diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6220839813374806, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006220839813374805, + "grad_norm": 19.98119354248047, + "learning_rate": 4.9975e-05, + "log_odds_chosen": -0.9538012742996216, + "log_odds_ratio": -1.4484777450561523, + "logits/chosen": -0.3500179052352905, + "logits/rejected": -0.4888765215873718, + "logps/chosen": -2.206789255142212, + "logps/rejected": -1.3497388362884521, + "loss": 7.9648, + "nll_loss": 7.819947242736816, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.2206789255142212, + "rewards/margins": -0.08570503443479538, + "rewards/rejected": -0.13497388362884521, + "step": 1 + }, + { + "epoch": 0.001244167962674961, + "grad_norm": 13.379537582397461, + "learning_rate": 4.995e-05, + "log_odds_chosen": -0.23575380444526672, + "log_odds_ratio": -0.9271624088287354, + "logits/chosen": -0.40814173221588135, + "logits/rejected": -0.43905961513519287, + "logps/chosen": -1.7428513765335083, + "logps/rejected": -1.5162339210510254, + "loss": 6.0609, + "nll_loss": 5.96816873550415, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1742851585149765, + "rewards/margins": -0.02266174554824829, + "rewards/rejected": -0.15162339806556702, + "step": 2 + }, + { + "epoch": 0.0018662519440124418, + "grad_norm": 18.385046005249023, + "learning_rate": 4.992500000000001e-05, + "log_odds_chosen": -1.152876853942871, + "log_odds_ratio": -1.6186071634292603, + "logits/chosen": -0.18745173513889313, + "logits/rejected": -0.269758939743042, + "logps/chosen": -2.3206677436828613, + "logps/rejected": -1.3112003803253174, + "loss": 6.6148, + "nll_loss": 6.452923774719238, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2320667803287506, + "rewards/margins": -0.10094673931598663, + "rewards/rejected": -0.13112004101276398, + "step": 3 + }, + { + "epoch": 0.002488335925349922, + "grad_norm": 9.557136535644531, + "learning_rate": 4.99e-05, + "log_odds_chosen": -1.4854508638381958, + "log_odds_ratio": -2.0142252445220947, + "logits/chosen": -0.028350114822387695, + "logits/rejected": -0.2540076971054077, + "logps/chosen": -2.8356778621673584, + "logps/rejected": -1.4353848695755005, + "loss": 5.7077, + "nll_loss": 5.506240367889404, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.28356778621673584, + "rewards/margins": -0.14002928137779236, + "rewards/rejected": -0.1435384899377823, + "step": 4 + }, + { + "epoch": 0.003110419906687403, + "grad_norm": 1.9710413217544556, + "learning_rate": 4.9875000000000006e-05, + "log_odds_chosen": -0.064725860953331, + "log_odds_ratio": -0.7509320974349976, + "logits/chosen": -0.2338867336511612, + "logits/rejected": -0.143581822514534, + "logps/chosen": -1.6930336952209473, + "logps/rejected": -1.6086996793746948, + "loss": 3.6175, + "nll_loss": 3.5423970222473145, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16930335760116577, + "rewards/margins": -0.008433390408754349, + "rewards/rejected": -0.16086997091770172, + "step": 5 + }, + { + "epoch": 0.0037325038880248835, + "grad_norm": 4.176668643951416, + "learning_rate": 4.9850000000000006e-05, + "log_odds_chosen": -1.303108811378479, + "log_odds_ratio": -1.8442931175231934, + "logits/chosen": 0.178592711687088, + "logits/rejected": -0.07974191009998322, + "logps/chosen": -2.8217928409576416, + "logps/rejected": -1.5813729763031006, + "loss": 5.1449, + "nll_loss": 4.96045446395874, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.2821792662143707, + "rewards/margins": -0.12404197454452515, + "rewards/rejected": -0.15813729166984558, + "step": 6 + }, + { + "epoch": 0.004354587869362364, + "grad_norm": 3.5636956691741943, + "learning_rate": 4.9825000000000005e-05, + "log_odds_chosen": -1.0302621126174927, + "log_odds_ratio": -1.533990502357483, + "logits/chosen": 0.1397887021303177, + "logits/rejected": -0.12103983759880066, + "logps/chosen": -2.5798606872558594, + "logps/rejected": -1.6471679210662842, + "loss": 5.2595, + "nll_loss": 5.1061482429504395, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2579860985279083, + "rewards/margins": -0.09326928108930588, + "rewards/rejected": -0.16471679508686066, + "step": 7 + }, + { + "epoch": 0.004976671850699844, + "grad_norm": 1.424960732460022, + "learning_rate": 4.9800000000000004e-05, + "log_odds_chosen": -0.057843249291181564, + "log_odds_ratio": -0.7315274477005005, + "logits/chosen": -0.21581152081489563, + "logits/rejected": -0.2254290133714676, + "logps/chosen": -1.4299900531768799, + "logps/rejected": -1.376258134841919, + "loss": 3.5319, + "nll_loss": 3.458702802658081, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14299900829792023, + "rewards/margins": -0.00537317618727684, + "rewards/rejected": -0.1376258283853531, + "step": 8 + }, + { + "epoch": 0.005598755832037325, + "grad_norm": 2.506531000137329, + "learning_rate": 4.9775000000000004e-05, + "log_odds_chosen": -1.3545101881027222, + "log_odds_ratio": -1.7200111150741577, + "logits/chosen": 0.09172062575817108, + "logits/rejected": -0.2259369194507599, + "logps/chosen": -2.604728937149048, + "logps/rejected": -1.421167016029358, + "loss": 5.1963, + "nll_loss": 5.0243120193481445, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.2604728937149048, + "rewards/margins": -0.11835619807243347, + "rewards/rejected": -0.1421166956424713, + "step": 9 + }, + { + "epoch": 0.006220839813374806, + "grad_norm": 2.135577917098999, + "learning_rate": 4.975e-05, + "log_odds_chosen": -0.5466750264167786, + "log_odds_ratio": -1.1493580341339111, + "logits/chosen": -0.009926818311214447, + "logits/rejected": -0.12821561098098755, + "logps/chosen": -2.1568522453308105, + "logps/rejected": -1.6440058946609497, + "loss": 4.7799, + "nll_loss": 4.664962291717529, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.21568521857261658, + "rewards/margins": -0.05128464847803116, + "rewards/rejected": -0.16440057754516602, + "step": 10 + }, + { + "epoch": 0.006842923794712286, + "grad_norm": 2.0873756408691406, + "learning_rate": 4.9725e-05, + "log_odds_chosen": -0.22385576367378235, + "log_odds_ratio": -0.8634535074234009, + "logits/chosen": -0.07170040160417557, + "logits/rejected": -0.28907641768455505, + "logps/chosen": -1.4515858888626099, + "logps/rejected": -1.2991254329681396, + "loss": 4.4469, + "nll_loss": 4.360559463500977, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.145158588886261, + "rewards/margins": -0.015246042050421238, + "rewards/rejected": -0.12991255521774292, + "step": 11 + }, + { + "epoch": 0.007465007776049767, + "grad_norm": 1.0876375436782837, + "learning_rate": 4.97e-05, + "log_odds_chosen": -0.17661024630069733, + "log_odds_ratio": -0.8128032088279724, + "logits/chosen": -0.3114607334136963, + "logits/rejected": -0.4201509356498718, + "logps/chosen": -1.3225839138031006, + "logps/rejected": -1.1948829889297485, + "loss": 3.5573, + "nll_loss": 3.476047992706299, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13225840032100677, + "rewards/margins": -0.012770086526870728, + "rewards/rejected": -0.11948829889297485, + "step": 12 + }, + { + "epoch": 0.008087091757387248, + "grad_norm": 1.08712637424469, + "learning_rate": 4.967500000000001e-05, + "log_odds_chosen": -0.38383999466896057, + "log_odds_ratio": -0.93343186378479, + "logits/chosen": -0.17842403054237366, + "logits/rejected": -0.4043556749820709, + "logps/chosen": -1.5285923480987549, + "logps/rejected": -1.2196223735809326, + "loss": 4.0367, + "nll_loss": 3.943343162536621, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.15285924077033997, + "rewards/margins": -0.030896998941898346, + "rewards/rejected": -0.12196224182844162, + "step": 13 + }, + { + "epoch": 0.008709175738724729, + "grad_norm": 1.449515461921692, + "learning_rate": 4.965e-05, + "log_odds_chosen": -0.6121888160705566, + "log_odds_ratio": -1.3804981708526611, + "logits/chosen": -0.06745365262031555, + "logits/rejected": -0.2206701636314392, + "logps/chosen": -2.0546789169311523, + "logps/rejected": -1.398402214050293, + "loss": 4.3677, + "nll_loss": 4.229663372039795, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20546789467334747, + "rewards/margins": -0.06562765687704086, + "rewards/rejected": -0.13984021544456482, + "step": 14 + }, + { + "epoch": 0.00933125972006221, + "grad_norm": 0.8825786709785461, + "learning_rate": 4.962500000000001e-05, + "log_odds_chosen": -0.13495028018951416, + "log_odds_ratio": -0.8009682893753052, + "logits/chosen": -0.20614610612392426, + "logits/rejected": -0.45154061913490295, + "logps/chosen": -1.570542812347412, + "logps/rejected": -1.476640224456787, + "loss": 3.9433, + "nll_loss": 3.863245964050293, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15705429017543793, + "rewards/margins": -0.009390261955559254, + "rewards/rejected": -0.14766402542591095, + "step": 15 + }, + { + "epoch": 0.009953343701399688, + "grad_norm": 0.7910265922546387, + "learning_rate": 4.96e-05, + "log_odds_chosen": -0.36577627062797546, + "log_odds_ratio": -0.9311540126800537, + "logits/chosen": -0.22354616224765778, + "logits/rejected": -0.29362282156944275, + "logps/chosen": -1.65301513671875, + "logps/rejected": -1.361577033996582, + "loss": 3.606, + "nll_loss": 3.5129306316375732, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.16530153155326843, + "rewards/margins": -0.0291438065469265, + "rewards/rejected": -0.13615772128105164, + "step": 16 + }, + { + "epoch": 0.010575427682737169, + "grad_norm": 0.9194669127464294, + "learning_rate": 4.9575000000000006e-05, + "log_odds_chosen": -0.17109160125255585, + "log_odds_ratio": -0.8144399523735046, + "logits/chosen": -0.0705355554819107, + "logits/rejected": -0.2670852541923523, + "logps/chosen": -1.7514052391052246, + "logps/rejected": -1.6155729293823242, + "loss": 4.2639, + "nll_loss": 4.182419776916504, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17514052987098694, + "rewards/margins": -0.013583235442638397, + "rewards/rejected": -0.16155728697776794, + "step": 17 + }, + { + "epoch": 0.01119751166407465, + "grad_norm": 0.7311789393424988, + "learning_rate": 4.9550000000000005e-05, + "log_odds_chosen": -0.04682855308055878, + "log_odds_ratio": -0.7275374531745911, + "logits/chosen": -0.08018438518047333, + "logits/rejected": -0.20024976134300232, + "logps/chosen": -1.3416996002197266, + "logps/rejected": -1.3203401565551758, + "loss": 3.7728, + "nll_loss": 3.7000904083251953, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13416996598243713, + "rewards/margins": -0.00213593989610672, + "rewards/rejected": -0.132034033536911, + "step": 18 + }, + { + "epoch": 0.01181959564541213, + "grad_norm": 0.7463477253913879, + "learning_rate": 4.9525000000000004e-05, + "log_odds_chosen": -0.6258882284164429, + "log_odds_ratio": -1.0997464656829834, + "logits/chosen": -0.12337462604045868, + "logits/rejected": -0.34859755635261536, + "logps/chosen": -1.7963664531707764, + "logps/rejected": -1.2740449905395508, + "loss": 3.714, + "nll_loss": 3.603994369506836, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.1796366274356842, + "rewards/margins": -0.05223213881254196, + "rewards/rejected": -0.12740451097488403, + "step": 19 + }, + { + "epoch": 0.012441679626749611, + "grad_norm": 0.6245782375335693, + "learning_rate": 4.9500000000000004e-05, + "log_odds_chosen": -0.2411532998085022, + "log_odds_ratio": -0.8302733898162842, + "logits/chosen": -0.049967993050813675, + "logits/rejected": -0.2759324908256531, + "logps/chosen": -1.4726518392562866, + "logps/rejected": -1.3111158609390259, + "loss": 3.707, + "nll_loss": 3.6239380836486816, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.14726518094539642, + "rewards/margins": -0.01615358516573906, + "rewards/rejected": -0.13111159205436707, + "step": 20 + }, + { + "epoch": 0.013063763608087092, + "grad_norm": 0.7757321000099182, + "learning_rate": 4.9475e-05, + "log_odds_chosen": 0.0013154447078704834, + "log_odds_ratio": -0.7458984851837158, + "logits/chosen": -0.016477234661579132, + "logits/rejected": -0.1882762759923935, + "logps/chosen": -1.4737496376037598, + "logps/rejected": -1.4141770601272583, + "loss": 3.9635, + "nll_loss": 3.8889150619506836, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1473749727010727, + "rewards/margins": -0.005957265384495258, + "rewards/rejected": -0.1414177119731903, + "step": 21 + }, + { + "epoch": 0.013685847589424573, + "grad_norm": 0.5803574323654175, + "learning_rate": 4.945e-05, + "log_odds_chosen": -0.024330340325832367, + "log_odds_ratio": -0.7314656972885132, + "logits/chosen": -0.0876406729221344, + "logits/rejected": -0.25811609625816345, + "logps/chosen": -1.4882334470748901, + "logps/rejected": -1.4596867561340332, + "loss": 3.7711, + "nll_loss": 3.6979050636291504, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1488233506679535, + "rewards/margins": -0.002854675054550171, + "rewards/rejected": -0.14596867561340332, + "step": 22 + }, + { + "epoch": 0.014307931570762053, + "grad_norm": 0.5226195454597473, + "learning_rate": 4.9425e-05, + "log_odds_chosen": -0.0417926162481308, + "log_odds_ratio": -0.7332164645195007, + "logits/chosen": -0.12541602551937103, + "logits/rejected": -0.2495168298482895, + "logps/chosen": -1.5487079620361328, + "logps/rejected": -1.5171284675598145, + "loss": 3.6381, + "nll_loss": 3.5647568702697754, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.15487080812454224, + "rewards/margins": -0.0031579509377479553, + "rewards/rejected": -0.15171284973621368, + "step": 23 + }, + { + "epoch": 0.014930015552099534, + "grad_norm": 0.49786579608917236, + "learning_rate": 4.94e-05, + "log_odds_chosen": -0.24082081019878387, + "log_odds_ratio": -0.8401607275009155, + "logits/chosen": -0.15435853600502014, + "logits/rejected": -0.3182606101036072, + "logps/chosen": -1.4637329578399658, + "logps/rejected": -1.2792115211486816, + "loss": 3.6258, + "nll_loss": 3.5417871475219727, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14637331664562225, + "rewards/margins": -0.01845216006040573, + "rewards/rejected": -0.12792114913463593, + "step": 24 + }, + { + "epoch": 0.015552099533437015, + "grad_norm": 0.7141457200050354, + "learning_rate": 4.937500000000001e-05, + "log_odds_chosen": 0.11563286930322647, + "log_odds_ratio": -0.6847242116928101, + "logits/chosen": -0.05548834800720215, + "logits/rejected": -0.25137871503829956, + "logps/chosen": -1.3125858306884766, + "logps/rejected": -1.3853249549865723, + "loss": 3.9615, + "nll_loss": 3.8930606842041016, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13125859200954437, + "rewards/margins": 0.007273899391293526, + "rewards/rejected": -0.13853248953819275, + "step": 25 + }, + { + "epoch": 0.016174183514774496, + "grad_norm": 0.48132890462875366, + "learning_rate": 4.935e-05, + "log_odds_chosen": -0.3378981947898865, + "log_odds_ratio": -1.048970341682434, + "logits/chosen": -0.27093982696533203, + "logits/rejected": -0.29352226853370667, + "logps/chosen": -1.8172366619110107, + "logps/rejected": -1.4850423336029053, + "loss": 3.3072, + "nll_loss": 3.2022593021392822, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18172365427017212, + "rewards/margins": -0.03321942687034607, + "rewards/rejected": -0.14850424230098724, + "step": 26 + }, + { + "epoch": 0.016796267496111975, + "grad_norm": 0.465388685464859, + "learning_rate": 4.9325000000000006e-05, + "log_odds_chosen": 0.26982247829437256, + "log_odds_ratio": -0.5840736031532288, + "logits/chosen": -0.13838329911231995, + "logits/rejected": -0.14500632882118225, + "logps/chosen": -1.2994801998138428, + "logps/rejected": -1.5056928396224976, + "loss": 3.2789, + "nll_loss": 3.220458984375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12994801998138428, + "rewards/margins": 0.020621273666620255, + "rewards/rejected": -0.15056928992271423, + "step": 27 + }, + { + "epoch": 0.017418351477449457, + "grad_norm": 0.4851706922054291, + "learning_rate": 4.93e-05, + "log_odds_chosen": 0.0167313814163208, + "log_odds_ratio": -0.7801838517189026, + "logits/chosen": -0.12882889807224274, + "logits/rejected": -0.27155885100364685, + "logps/chosen": -1.5362133979797363, + "logps/rejected": -1.5006215572357178, + "loss": 3.7467, + "nll_loss": 3.6686904430389404, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1536213457584381, + "rewards/margins": -0.0035591907799243927, + "rewards/rejected": -0.15006214380264282, + "step": 28 + }, + { + "epoch": 0.018040435458786936, + "grad_norm": 0.4745447039604187, + "learning_rate": 4.9275000000000005e-05, + "log_odds_chosen": -0.4235059916973114, + "log_odds_ratio": -0.944256067276001, + "logits/chosen": -0.15738001465797424, + "logits/rejected": -0.28892505168914795, + "logps/chosen": -1.4211246967315674, + "logps/rejected": -1.1277490854263306, + "loss": 3.5365, + "nll_loss": 3.4420742988586426, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.14211246371269226, + "rewards/margins": -0.029337557032704353, + "rewards/rejected": -0.11277490854263306, + "step": 29 + }, + { + "epoch": 0.01866251944012442, + "grad_norm": 0.507114052772522, + "learning_rate": 4.9250000000000004e-05, + "log_odds_chosen": -0.3637525737285614, + "log_odds_ratio": -0.9062818884849548, + "logits/chosen": -0.2181130200624466, + "logits/rejected": -0.37557047605514526, + "logps/chosen": -1.2170381546020508, + "logps/rejected": -0.9694937467575073, + "loss": 2.9864, + "nll_loss": 2.8957455158233643, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.12170383334159851, + "rewards/margins": -0.024754449725151062, + "rewards/rejected": -0.09694937616586685, + "step": 30 + }, + { + "epoch": 0.019284603421461897, + "grad_norm": 0.40544337034225464, + "learning_rate": 4.9225000000000004e-05, + "log_odds_chosen": -0.1934043914079666, + "log_odds_ratio": -0.8023362159729004, + "logits/chosen": -0.21577346324920654, + "logits/rejected": -0.29102498292922974, + "logps/chosen": -1.170575737953186, + "logps/rejected": -1.035292625427246, + "loss": 3.3617, + "nll_loss": 3.281477928161621, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.11705759167671204, + "rewards/margins": -0.013528317213058472, + "rewards/rejected": -0.10352927446365356, + "step": 31 + }, + { + "epoch": 0.019906687402799376, + "grad_norm": 0.5559415221214294, + "learning_rate": 4.92e-05, + "log_odds_chosen": -0.3727276027202606, + "log_odds_ratio": -0.9165624976158142, + "logits/chosen": -0.17616789042949677, + "logits/rejected": -0.41072139143943787, + "logps/chosen": -1.4517570734024048, + "logps/rejected": -1.1750273704528809, + "loss": 3.7894, + "nll_loss": 3.6977787017822266, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.14517571032047272, + "rewards/margins": -0.027672961354255676, + "rewards/rejected": -0.11750274896621704, + "step": 32 + }, + { + "epoch": 0.02052877138413686, + "grad_norm": 0.49967700242996216, + "learning_rate": 4.9175e-05, + "log_odds_chosen": -0.37599754333496094, + "log_odds_ratio": -1.035901427268982, + "logits/chosen": -0.19352321326732635, + "logits/rejected": -0.37395039200782776, + "logps/chosen": -1.4342128038406372, + "logps/rejected": -1.0744060277938843, + "loss": 3.4439, + "nll_loss": 3.3403570652008057, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.14342127740383148, + "rewards/margins": -0.03598066791892052, + "rewards/rejected": -0.10744060575962067, + "step": 33 + }, + { + "epoch": 0.021150855365474338, + "grad_norm": 0.40192580223083496, + "learning_rate": 4.915e-05, + "log_odds_chosen": 0.06716141849756241, + "log_odds_ratio": -0.68401700258255, + "logits/chosen": -0.2316037267446518, + "logits/rejected": -0.34950870275497437, + "logps/chosen": -1.3879081010818481, + "logps/rejected": -1.4275767803192139, + "loss": 3.3671, + "nll_loss": 3.2987279891967773, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1387908160686493, + "rewards/margins": 0.003966865129768848, + "rewards/rejected": -0.14275768399238586, + "step": 34 + }, + { + "epoch": 0.02177293934681182, + "grad_norm": 0.6308751106262207, + "learning_rate": 4.9125e-05, + "log_odds_chosen": 0.10806269943714142, + "log_odds_ratio": -0.6696207523345947, + "logits/chosen": -0.3126291334629059, + "logits/rejected": -0.3402310907840729, + "logps/chosen": -1.4202438592910767, + "logps/rejected": -1.5030903816223145, + "loss": 3.0162, + "nll_loss": 2.949201822280884, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14202438294887543, + "rewards/margins": 0.008284655399620533, + "rewards/rejected": -0.15030904114246368, + "step": 35 + }, + { + "epoch": 0.0223950233281493, + "grad_norm": 0.47672948241233826, + "learning_rate": 4.91e-05, + "log_odds_chosen": 0.6212818026542664, + "log_odds_ratio": -0.48152947425842285, + "logits/chosen": -0.2364703118801117, + "logits/rejected": -0.3322206139564514, + "logps/chosen": -0.9610673189163208, + "logps/rejected": -1.338320016860962, + "loss": 3.2005, + "nll_loss": 3.152357339859009, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09610673785209656, + "rewards/margins": 0.03772527724504471, + "rewards/rejected": -0.13383200764656067, + "step": 36 + }, + { + "epoch": 0.023017107309486782, + "grad_norm": 0.44931307435035706, + "learning_rate": 4.907500000000001e-05, + "log_odds_chosen": -0.29742100834846497, + "log_odds_ratio": -0.952938437461853, + "logits/chosen": -0.16727347671985626, + "logits/rejected": -0.3058803081512451, + "logps/chosen": -1.608022689819336, + "logps/rejected": -1.4014194011688232, + "loss": 3.4746, + "nll_loss": 3.3793392181396484, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.16080228984355927, + "rewards/margins": -0.020660335198044777, + "rewards/rejected": -0.14014194905757904, + "step": 37 + }, + { + "epoch": 0.02363919129082426, + "grad_norm": 0.42830324172973633, + "learning_rate": 4.905e-05, + "log_odds_chosen": -0.4679420292377472, + "log_odds_ratio": -1.0487958192825317, + "logits/chosen": -0.1773906946182251, + "logits/rejected": -0.206269308924675, + "logps/chosen": -1.745017647743225, + "logps/rejected": -1.3269473314285278, + "loss": 3.3234, + "nll_loss": 3.2185542583465576, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.17450176179409027, + "rewards/margins": -0.04180702194571495, + "rewards/rejected": -0.13269473612308502, + "step": 38 + }, + { + "epoch": 0.024261275272161743, + "grad_norm": 0.4050178825855255, + "learning_rate": 4.9025000000000006e-05, + "log_odds_chosen": 0.10232503712177277, + "log_odds_ratio": -0.6785426139831543, + "logits/chosen": -0.18703876435756683, + "logits/rejected": -0.3412337899208069, + "logps/chosen": -1.0300055742263794, + "logps/rejected": -1.0844731330871582, + "loss": 3.1217, + "nll_loss": 3.0538713932037354, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10300055146217346, + "rewards/margins": 0.0054467711597681046, + "rewards/rejected": -0.10844732820987701, + "step": 39 + }, + { + "epoch": 0.024883359253499222, + "grad_norm": 0.4841116666793823, + "learning_rate": 4.9e-05, + "log_odds_chosen": 0.02147568017244339, + "log_odds_ratio": -0.6917948722839355, + "logits/chosen": -0.10562430322170258, + "logits/rejected": -0.30269837379455566, + "logps/chosen": -1.4125561714172363, + "logps/rejected": -1.440596580505371, + "loss": 3.449, + "nll_loss": 3.3798322677612305, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14125561714172363, + "rewards/margins": 0.0028040409088134766, + "rewards/rejected": -0.1440596580505371, + "step": 40 + }, + { + "epoch": 0.0255054432348367, + "grad_norm": 0.4915813207626343, + "learning_rate": 4.8975000000000005e-05, + "log_odds_chosen": -0.11176743358373642, + "log_odds_ratio": -0.7612941265106201, + "logits/chosen": -0.08788780868053436, + "logits/rejected": -0.3535291254520416, + "logps/chosen": -1.260170578956604, + "logps/rejected": -1.17002534866333, + "loss": 3.5663, + "nll_loss": 3.490180015563965, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12601706385612488, + "rewards/margins": -0.009014511480927467, + "rewards/rejected": -0.11700254678726196, + "step": 41 + }, + { + "epoch": 0.026127527216174184, + "grad_norm": 0.4824346601963043, + "learning_rate": 4.8950000000000004e-05, + "log_odds_chosen": -0.18203996121883392, + "log_odds_ratio": -0.80300372838974, + "logits/chosen": -0.13452517986297607, + "logits/rejected": -0.14837010204792023, + "logps/chosen": -1.6312255859375, + "logps/rejected": -1.490962266921997, + "loss": 3.3183, + "nll_loss": 3.2380266189575195, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.16312256455421448, + "rewards/margins": -0.014026330783963203, + "rewards/rejected": -0.14909623563289642, + "step": 42 + }, + { + "epoch": 0.026749611197511663, + "grad_norm": 0.40107211470603943, + "learning_rate": 4.8925e-05, + "log_odds_chosen": 0.1854020357131958, + "log_odds_ratio": -0.6462830305099487, + "logits/chosen": -0.15604250133037567, + "logits/rejected": -0.18608345091342926, + "logps/chosen": -1.3835902214050293, + "logps/rejected": -1.5467376708984375, + "loss": 3.2676, + "nll_loss": 3.203000545501709, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13835902512073517, + "rewards/margins": 0.016314741224050522, + "rewards/rejected": -0.154673770070076, + "step": 43 + }, + { + "epoch": 0.027371695178849145, + "grad_norm": 0.516572117805481, + "learning_rate": 4.89e-05, + "log_odds_chosen": -0.15254731476306915, + "log_odds_ratio": -0.9058659672737122, + "logits/chosen": -0.07723084092140198, + "logits/rejected": -0.3161468803882599, + "logps/chosen": -1.5304224491119385, + "logps/rejected": -1.3918745517730713, + "loss": 3.5929, + "nll_loss": 3.502323865890503, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1530422568321228, + "rewards/margins": -0.013854792341589928, + "rewards/rejected": -0.13918745517730713, + "step": 44 + }, + { + "epoch": 0.027993779160186624, + "grad_norm": 0.4352788031101227, + "learning_rate": 4.8875e-05, + "log_odds_chosen": 0.12119769304990768, + "log_odds_ratio": -0.6822906136512756, + "logits/chosen": -0.08103634417057037, + "logits/rejected": -0.23941874504089355, + "logps/chosen": -1.2429063320159912, + "logps/rejected": -1.3138731718063354, + "loss": 3.4242, + "nll_loss": 3.3560192584991455, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.12429064512252808, + "rewards/margins": 0.007096678018569946, + "rewards/rejected": -0.13138732314109802, + "step": 45 + }, + { + "epoch": 0.028615863141524107, + "grad_norm": 0.45666632056236267, + "learning_rate": 4.885e-05, + "log_odds_chosen": 0.277486115694046, + "log_odds_ratio": -0.6156935095787048, + "logits/chosen": -0.16658517718315125, + "logits/rejected": -0.2931288778781891, + "logps/chosen": -1.0638997554779053, + "logps/rejected": -1.2447154521942139, + "loss": 2.8714, + "nll_loss": 2.8098669052124023, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10638997703790665, + "rewards/margins": 0.018081560730934143, + "rewards/rejected": -0.12447153031826019, + "step": 46 + }, + { + "epoch": 0.029237947122861586, + "grad_norm": 0.4158986806869507, + "learning_rate": 4.8825e-05, + "log_odds_chosen": -0.03135260194540024, + "log_odds_ratio": -0.7471761107444763, + "logits/chosen": 0.007953077554702759, + "logits/rejected": -0.1722358912229538, + "logps/chosen": -1.3186936378479004, + "logps/rejected": -1.2820833921432495, + "loss": 3.5286, + "nll_loss": 3.453866958618164, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.131869375705719, + "rewards/margins": -0.0036610299721360207, + "rewards/rejected": -0.12820833921432495, + "step": 47 + }, + { + "epoch": 0.029860031104199068, + "grad_norm": 0.5239469408988953, + "learning_rate": 4.88e-05, + "log_odds_chosen": -0.14184284210205078, + "log_odds_ratio": -0.7821545600891113, + "logits/chosen": -0.00846000388264656, + "logits/rejected": -0.24844269454479218, + "logps/chosen": -1.4144936800003052, + "logps/rejected": -1.2858200073242188, + "loss": 3.157, + "nll_loss": 3.07881236076355, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14144936203956604, + "rewards/margins": -0.01286737434566021, + "rewards/rejected": -0.12858200073242188, + "step": 48 + }, + { + "epoch": 0.030482115085536547, + "grad_norm": 0.42745500802993774, + "learning_rate": 4.8775000000000007e-05, + "log_odds_chosen": -0.016907602548599243, + "log_odds_ratio": -0.7335056066513062, + "logits/chosen": 0.027836868539452553, + "logits/rejected": -0.16093496978282928, + "logps/chosen": -1.3329463005065918, + "logps/rejected": -1.3012332916259766, + "loss": 3.7355, + "nll_loss": 3.6621429920196533, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13329462707042694, + "rewards/margins": -0.00317130284383893, + "rewards/rejected": -0.1301233321428299, + "step": 49 + }, + { + "epoch": 0.03110419906687403, + "grad_norm": 0.5647901296615601, + "learning_rate": 4.875e-05, + "log_odds_chosen": 0.05693642795085907, + "log_odds_ratio": -0.7423352003097534, + "logits/chosen": 0.07700448483228683, + "logits/rejected": -0.21037913858890533, + "logps/chosen": -1.3298025131225586, + "logps/rejected": -1.3227578401565552, + "loss": 3.6339, + "nll_loss": 3.55966854095459, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13298025727272034, + "rewards/margins": -0.0007044710218906403, + "rewards/rejected": -0.13227578997612, + "step": 50 + }, + { + "epoch": 0.031726283048211505, + "grad_norm": 0.5185202360153198, + "learning_rate": 4.8725000000000005e-05, + "log_odds_chosen": 0.271283358335495, + "log_odds_ratio": -0.5842689275741577, + "logits/chosen": -0.0015755696222186089, + "logits/rejected": -0.17162489891052246, + "logps/chosen": -1.039259672164917, + "logps/rejected": -1.1926697492599487, + "loss": 3.2335, + "nll_loss": 3.1750574111938477, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10392597317695618, + "rewards/margins": 0.015341001562774181, + "rewards/rejected": -0.11926697194576263, + "step": 51 + }, + { + "epoch": 0.03234836702954899, + "grad_norm": 0.3914552927017212, + "learning_rate": 4.87e-05, + "log_odds_chosen": -0.09510594606399536, + "log_odds_ratio": -0.7722680568695068, + "logits/chosen": -0.11492118239402771, + "logits/rejected": -0.1295318901538849, + "logps/chosen": -1.2744344472885132, + "logps/rejected": -1.1613863706588745, + "loss": 2.7643, + "nll_loss": 2.6871042251586914, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.12744343280792236, + "rewards/margins": -0.011304810643196106, + "rewards/rejected": -0.11613863706588745, + "step": 52 + }, + { + "epoch": 0.03297045101088647, + "grad_norm": 0.42848122119903564, + "learning_rate": 4.8675000000000004e-05, + "log_odds_chosen": 0.13229113817214966, + "log_odds_ratio": -0.659803569316864, + "logits/chosen": -0.206315815448761, + "logits/rejected": -0.24223698675632477, + "logps/chosen": -1.1478495597839355, + "logps/rejected": -1.2279183864593506, + "loss": 2.5846, + "nll_loss": 2.5186636447906494, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11478495597839355, + "rewards/margins": 0.008006898686289787, + "rewards/rejected": -0.12279185652732849, + "step": 53 + }, + { + "epoch": 0.03359253499222395, + "grad_norm": 0.9718621373176575, + "learning_rate": 4.8650000000000003e-05, + "log_odds_chosen": 0.4774380326271057, + "log_odds_ratio": -0.5322307348251343, + "logits/chosen": 0.07056058198213577, + "logits/rejected": -0.18487195670604706, + "logps/chosen": -1.300857663154602, + "logps/rejected": -1.635850429534912, + "loss": 3.7447, + "nll_loss": 3.6914896965026855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.130085751414299, + "rewards/margins": 0.033499278128147125, + "rewards/rejected": -0.16358503699302673, + "step": 54 + }, + { + "epoch": 0.03421461897356143, + "grad_norm": 0.341932475566864, + "learning_rate": 4.8625e-05, + "log_odds_chosen": 0.007643640041351318, + "log_odds_ratio": -0.7581207752227783, + "logits/chosen": -0.09210430085659027, + "logits/rejected": -0.0826970785856247, + "logps/chosen": -1.4047452211380005, + "logps/rejected": -1.3780653476715088, + "loss": 3.1252, + "nll_loss": 3.0493462085723877, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14047454297542572, + "rewards/margins": -0.002668004482984543, + "rewards/rejected": -0.13780653476715088, + "step": 55 + }, + { + "epoch": 0.034836702954898914, + "grad_norm": 0.45145970582962036, + "learning_rate": 4.86e-05, + "log_odds_chosen": -0.24970951676368713, + "log_odds_ratio": -0.8379786014556885, + "logits/chosen": 0.09427239745855331, + "logits/rejected": 0.03530137240886688, + "logps/chosen": -1.4495787620544434, + "logps/rejected": -1.284151554107666, + "loss": 2.9932, + "nll_loss": 2.90944242477417, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14495787024497986, + "rewards/margins": -0.0165427103638649, + "rewards/rejected": -0.12841516733169556, + "step": 56 + }, + { + "epoch": 0.03545878693623639, + "grad_norm": 0.7668087482452393, + "learning_rate": 4.8575e-05, + "log_odds_chosen": -0.44778311252593994, + "log_odds_ratio": -1.0893973112106323, + "logits/chosen": -0.056043416261672974, + "logits/rejected": -0.1815190464258194, + "logps/chosen": -1.829887866973877, + "logps/rejected": -1.4810354709625244, + "loss": 3.1517, + "nll_loss": 3.0427865982055664, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.18298880755901337, + "rewards/margins": -0.03488525003194809, + "rewards/rejected": -0.14810355007648468, + "step": 57 + }, + { + "epoch": 0.03608087091757387, + "grad_norm": 0.49874478578567505, + "learning_rate": 4.855e-05, + "log_odds_chosen": 0.3892138600349426, + "log_odds_ratio": -0.5497685670852661, + "logits/chosen": -0.11005600541830063, + "logits/rejected": -0.2566314935684204, + "logps/chosen": -1.1769773960113525, + "logps/rejected": -1.4603263139724731, + "loss": 3.0813, + "nll_loss": 3.026326894760132, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11769775301218033, + "rewards/margins": 0.028334885835647583, + "rewards/rejected": -0.14603263139724731, + "step": 58 + }, + { + "epoch": 0.03670295489891135, + "grad_norm": 0.3849422335624695, + "learning_rate": 4.8525e-05, + "log_odds_chosen": -0.050915684551000595, + "log_odds_ratio": -0.7625954151153564, + "logits/chosen": -0.010294832289218903, + "logits/rejected": -0.12012767791748047, + "logps/chosen": -1.289703607559204, + "logps/rejected": -1.2587721347808838, + "loss": 3.4305, + "nll_loss": 3.354198932647705, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12897036969661713, + "rewards/margins": -0.0030931569635868073, + "rewards/rejected": -0.12587721645832062, + "step": 59 + }, + { + "epoch": 0.03732503888024884, + "grad_norm": 0.3733402192592621, + "learning_rate": 4.85e-05, + "log_odds_chosen": 0.9943278431892395, + "log_odds_ratio": -0.46127861738204956, + "logits/chosen": -0.16634932160377502, + "logits/rejected": -0.24419239163398743, + "logps/chosen": -0.8558648824691772, + "logps/rejected": -1.3538252115249634, + "loss": 3.0007, + "nll_loss": 2.954594612121582, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08558649569749832, + "rewards/margins": 0.049796026200056076, + "rewards/rejected": -0.1353825181722641, + "step": 60 + }, + { + "epoch": 0.037947122861586316, + "grad_norm": 0.47226205468177795, + "learning_rate": 4.8475000000000006e-05, + "log_odds_chosen": 0.36546510457992554, + "log_odds_ratio": -0.5973978042602539, + "logits/chosen": 0.09770157933235168, + "logits/rejected": -0.09729360044002533, + "logps/chosen": -1.3515472412109375, + "logps/rejected": -1.606210470199585, + "loss": 3.6102, + "nll_loss": 3.5504493713378906, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13515472412109375, + "rewards/margins": 0.025466332212090492, + "rewards/rejected": -0.1606210470199585, + "step": 61 + }, + { + "epoch": 0.038569206842923795, + "grad_norm": 0.38134610652923584, + "learning_rate": 4.845e-05, + "log_odds_chosen": 0.3819832503795624, + "log_odds_ratio": -0.6043448448181152, + "logits/chosen": 0.1279037743806839, + "logits/rejected": 0.05211859941482544, + "logps/chosen": -1.3050546646118164, + "logps/rejected": -1.5339863300323486, + "loss": 3.3049, + "nll_loss": 3.244431972503662, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13050545752048492, + "rewards/margins": 0.02289317362010479, + "rewards/rejected": -0.15339863300323486, + "step": 62 + }, + { + "epoch": 0.039191290824261274, + "grad_norm": 0.5376470685005188, + "learning_rate": 4.8425000000000005e-05, + "log_odds_chosen": 0.21296370029449463, + "log_odds_ratio": -0.6956855058670044, + "logits/chosen": 0.005937471985816956, + "logits/rejected": -0.13821996748447418, + "logps/chosen": -1.2709373235702515, + "logps/rejected": -1.3110651969909668, + "loss": 3.4905, + "nll_loss": 3.4209353923797607, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12709373235702515, + "rewards/margins": 0.004012777470052242, + "rewards/rejected": -0.13110651075839996, + "step": 63 + }, + { + "epoch": 0.03981337480559875, + "grad_norm": 0.45539331436157227, + "learning_rate": 4.8400000000000004e-05, + "log_odds_chosen": -0.19902189075946808, + "log_odds_ratio": -0.8536033630371094, + "logits/chosen": 0.059451624751091, + "logits/rejected": -0.11644947528839111, + "logps/chosen": -1.4663325548171997, + "logps/rejected": -1.3308027982711792, + "loss": 3.6228, + "nll_loss": 3.5374748706817627, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14663325250148773, + "rewards/margins": -0.013552968390285969, + "rewards/rejected": -0.13308028876781464, + "step": 64 + }, + { + "epoch": 0.04043545878693624, + "grad_norm": 0.3535555899143219, + "learning_rate": 4.8375000000000004e-05, + "log_odds_chosen": -0.03207793086767197, + "log_odds_ratio": -0.724827766418457, + "logits/chosen": 0.035917554050683975, + "logits/rejected": -0.06365230679512024, + "logps/chosen": -1.3977922201156616, + "logps/rejected": -1.3612167835235596, + "loss": 3.3147, + "nll_loss": 3.2421815395355225, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1397792100906372, + "rewards/margins": -0.0036575384438037872, + "rewards/rejected": -0.13612167537212372, + "step": 65 + }, + { + "epoch": 0.04105754276827372, + "grad_norm": 0.4546717405319214, + "learning_rate": 4.835e-05, + "log_odds_chosen": -0.03722512722015381, + "log_odds_ratio": -0.7913417220115662, + "logits/chosen": 0.09111925959587097, + "logits/rejected": -0.1404401957988739, + "logps/chosen": -1.3331866264343262, + "logps/rejected": -1.2620258331298828, + "loss": 3.2911, + "nll_loss": 3.2119736671447754, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13331866264343262, + "rewards/margins": -0.007116083987057209, + "rewards/rejected": -0.12620258331298828, + "step": 66 + }, + { + "epoch": 0.0416796267496112, + "grad_norm": 0.30916666984558105, + "learning_rate": 4.8325e-05, + "log_odds_chosen": -0.3806450664997101, + "log_odds_ratio": -0.9495774507522583, + "logits/chosen": 0.22228950262069702, + "logits/rejected": 0.011185593903064728, + "logps/chosen": -1.36622953414917, + "logps/rejected": -1.1000258922576904, + "loss": 3.9335, + "nll_loss": 3.8385517597198486, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.13662296533584595, + "rewards/margins": -0.026620371267199516, + "rewards/rejected": -0.11000259220600128, + "step": 67 + }, + { + "epoch": 0.042301710730948676, + "grad_norm": 0.5843622088432312, + "learning_rate": 4.83e-05, + "log_odds_chosen": -0.04287657141685486, + "log_odds_ratio": -0.7503262758255005, + "logits/chosen": 0.12020996958017349, + "logits/rejected": -0.06287462264299393, + "logps/chosen": -1.2391448020935059, + "logps/rejected": -1.2382404804229736, + "loss": 2.7952, + "nll_loss": 2.72021484375, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12391449511051178, + "rewards/margins": -9.043654426932335e-05, + "rewards/rejected": -0.12382405996322632, + "step": 68 + }, + { + "epoch": 0.04292379471228616, + "grad_norm": 0.40152105689048767, + "learning_rate": 4.8275e-05, + "log_odds_chosen": 0.48192113637924194, + "log_odds_ratio": -0.5793935060501099, + "logits/chosen": 0.024148011580109596, + "logits/rejected": -0.028608618304133415, + "logps/chosen": -1.2637081146240234, + "logps/rejected": -1.5951621532440186, + "loss": 3.0237, + "nll_loss": 2.9657363891601562, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12637081742286682, + "rewards/margins": 0.03314541280269623, + "rewards/rejected": -0.15951621532440186, + "step": 69 + }, + { + "epoch": 0.04354587869362364, + "grad_norm": 0.5005712509155273, + "learning_rate": 4.825e-05, + "log_odds_chosen": 0.16657370328903198, + "log_odds_ratio": -0.6632782220840454, + "logits/chosen": 0.0074752867221832275, + "logits/rejected": -0.15818460285663605, + "logps/chosen": -1.009189486503601, + "logps/rejected": -1.074352502822876, + "loss": 3.0056, + "nll_loss": 2.939265012741089, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1009189561009407, + "rewards/margins": 0.006516308058053255, + "rewards/rejected": -0.10743525624275208, + "step": 70 + }, + { + "epoch": 0.04416796267496112, + "grad_norm": 0.47246062755584717, + "learning_rate": 4.822500000000001e-05, + "log_odds_chosen": -0.008882712572813034, + "log_odds_ratio": -0.799690842628479, + "logits/chosen": 0.1656499207019806, + "logits/rejected": 0.012578403577208519, + "logps/chosen": -1.1688836812973022, + "logps/rejected": -1.1197009086608887, + "loss": 3.2172, + "nll_loss": 3.137253999710083, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1168883666396141, + "rewards/margins": -0.0049182698130607605, + "rewards/rejected": -0.11197009682655334, + "step": 71 + }, + { + "epoch": 0.0447900466562986, + "grad_norm": 0.5179511308670044, + "learning_rate": 4.82e-05, + "log_odds_chosen": -0.01626509428024292, + "log_odds_ratio": -0.7597264051437378, + "logits/chosen": -0.07426048815250397, + "logits/rejected": -0.21345210075378418, + "logps/chosen": -1.305432677268982, + "logps/rejected": -1.3045635223388672, + "loss": 2.7467, + "nll_loss": 2.6707706451416016, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13054326176643372, + "rewards/margins": -8.692499250173569e-05, + "rewards/rejected": -0.1304563581943512, + "step": 72 + }, + { + "epoch": 0.04541213063763608, + "grad_norm": 0.3978026211261749, + "learning_rate": 4.8175000000000005e-05, + "log_odds_chosen": -0.17721597850322723, + "log_odds_ratio": -0.8433041572570801, + "logits/chosen": -0.00016376003623008728, + "logits/rejected": -0.1693696677684784, + "logps/chosen": -1.6973767280578613, + "logps/rejected": -1.5059082508087158, + "loss": 3.4049, + "nll_loss": 3.3205349445343018, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16973768174648285, + "rewards/margins": -0.019146855920553207, + "rewards/rejected": -0.15059083700180054, + "step": 73 + }, + { + "epoch": 0.046034214618973564, + "grad_norm": 0.44062405824661255, + "learning_rate": 4.815e-05, + "log_odds_chosen": 0.29165250062942505, + "log_odds_ratio": -0.6030845642089844, + "logits/chosen": 0.03590073063969612, + "logits/rejected": -0.05357559770345688, + "logps/chosen": -0.9286098480224609, + "logps/rejected": -1.074052333831787, + "loss": 3.1473, + "nll_loss": 3.0869500637054443, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09286098182201385, + "rewards/margins": 0.014544256031513214, + "rewards/rejected": -0.10740524530410767, + "step": 74 + }, + { + "epoch": 0.04665629860031104, + "grad_norm": 0.3442816436290741, + "learning_rate": 4.8125000000000004e-05, + "log_odds_chosen": 0.2888907194137573, + "log_odds_ratio": -0.6076911687850952, + "logits/chosen": -0.12151262164115906, + "logits/rejected": -0.12172101438045502, + "logps/chosen": -1.0215524435043335, + "logps/rejected": -1.1889455318450928, + "loss": 2.7404, + "nll_loss": 2.6796624660491943, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10215524584054947, + "rewards/margins": 0.016739321872591972, + "rewards/rejected": -0.118894562125206, + "step": 75 + }, + { + "epoch": 0.04727838258164852, + "grad_norm": 0.3842046558856964, + "learning_rate": 4.8100000000000004e-05, + "log_odds_chosen": -0.25257712602615356, + "log_odds_ratio": -0.8657203912734985, + "logits/chosen": 0.023503951728343964, + "logits/rejected": -0.18105646967887878, + "logps/chosen": -1.4399148225784302, + "logps/rejected": -1.2502027750015259, + "loss": 3.2649, + "nll_loss": 3.178344488143921, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14399148523807526, + "rewards/margins": -0.01897120475769043, + "rewards/rejected": -0.12502028048038483, + "step": 76 + }, + { + "epoch": 0.047900466562986, + "grad_norm": 0.3534882664680481, + "learning_rate": 4.8075e-05, + "log_odds_chosen": -0.21401476860046387, + "log_odds_ratio": -0.8372653722763062, + "logits/chosen": 0.14382417500019073, + "logits/rejected": -0.052027709782123566, + "logps/chosen": -1.4991471767425537, + "logps/rejected": -1.312038540840149, + "loss": 3.7097, + "nll_loss": 3.625964403152466, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1499147266149521, + "rewards/margins": -0.01871085911989212, + "rewards/rejected": -0.13120386004447937, + "step": 77 + }, + { + "epoch": 0.04852255054432349, + "grad_norm": 0.8299946188926697, + "learning_rate": 4.805e-05, + "log_odds_chosen": -0.24282418191432953, + "log_odds_ratio": -0.8619076013565063, + "logits/chosen": -0.18960991501808167, + "logits/rejected": -0.34159693121910095, + "logps/chosen": -1.1464121341705322, + "logps/rejected": -0.992071270942688, + "loss": 2.5854, + "nll_loss": 2.4992449283599854, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1146412119269371, + "rewards/margins": -0.015434084460139275, + "rewards/rejected": -0.09920713305473328, + "step": 78 + }, + { + "epoch": 0.049144634525660966, + "grad_norm": 0.5896576046943665, + "learning_rate": 4.8025e-05, + "log_odds_chosen": 0.14239203929901123, + "log_odds_ratio": -0.6772485375404358, + "logits/chosen": -0.02156006544828415, + "logits/rejected": -0.23830567300319672, + "logps/chosen": -1.3663321733474731, + "logps/rejected": -1.4884802103042603, + "loss": 3.0472, + "nll_loss": 2.9794466495513916, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13663321733474731, + "rewards/margins": 0.012214799411594868, + "rewards/rejected": -0.1488480269908905, + "step": 79 + }, + { + "epoch": 0.049766718506998445, + "grad_norm": 0.5111446976661682, + "learning_rate": 4.8e-05, + "log_odds_chosen": 0.06895725429058075, + "log_odds_ratio": -0.6709849834442139, + "logits/chosen": 0.03493687883019447, + "logits/rejected": -0.12141556292772293, + "logps/chosen": -1.3161884546279907, + "logps/rejected": -1.3568629026412964, + "loss": 2.9872, + "nll_loss": 2.9200947284698486, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13161885738372803, + "rewards/margins": 0.0040674470365047455, + "rewards/rejected": -0.13568630814552307, + "step": 80 + }, + { + "epoch": 0.050388802488335924, + "grad_norm": 0.5454034805297852, + "learning_rate": 4.7975e-05, + "log_odds_chosen": 0.013033639639616013, + "log_odds_ratio": -0.7662529945373535, + "logits/chosen": 0.09307534992694855, + "logits/rejected": -0.13599663972854614, + "logps/chosen": -1.2162799835205078, + "logps/rejected": -1.1834042072296143, + "loss": 3.0209, + "nll_loss": 2.9443132877349854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12162800133228302, + "rewards/margins": -0.0032875724136829376, + "rewards/rejected": -0.11834041774272919, + "step": 81 + }, + { + "epoch": 0.0510108864696734, + "grad_norm": 0.4710577726364136, + "learning_rate": 4.795e-05, + "log_odds_chosen": 0.1277192234992981, + "log_odds_ratio": -0.6501528024673462, + "logits/chosen": 0.048733945935964584, + "logits/rejected": -0.10845337063074112, + "logps/chosen": -1.2608442306518555, + "logps/rejected": -1.3538391590118408, + "loss": 2.9852, + "nll_loss": 2.920222759246826, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12608441710472107, + "rewards/margins": 0.0092995036393404, + "rewards/rejected": -0.13538393378257751, + "step": 82 + }, + { + "epoch": 0.05163297045101089, + "grad_norm": 0.4011155068874359, + "learning_rate": 4.7925000000000006e-05, + "log_odds_chosen": 0.20483741164207458, + "log_odds_ratio": -0.6441969275474548, + "logits/chosen": 0.017529848963022232, + "logits/rejected": -0.17869825661182404, + "logps/chosen": -1.1669516563415527, + "logps/rejected": -1.279728889465332, + "loss": 2.7464, + "nll_loss": 2.6819615364074707, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11669516563415527, + "rewards/margins": 0.011277715675532818, + "rewards/rejected": -0.12797288596630096, + "step": 83 + }, + { + "epoch": 0.05225505443234837, + "grad_norm": 0.5642073750495911, + "learning_rate": 4.79e-05, + "log_odds_chosen": 0.16463297605514526, + "log_odds_ratio": -0.6702374219894409, + "logits/chosen": 0.25281310081481934, + "logits/rejected": -0.022312596440315247, + "logps/chosen": -1.236396074295044, + "logps/rejected": -1.3540095090866089, + "loss": 3.6312, + "nll_loss": 3.564155101776123, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12363961338996887, + "rewards/margins": 0.01176133006811142, + "rewards/rejected": -0.1354009509086609, + "step": 84 + }, + { + "epoch": 0.05287713841368585, + "grad_norm": 0.42271995544433594, + "learning_rate": 4.7875000000000005e-05, + "log_odds_chosen": 0.29148802161216736, + "log_odds_ratio": -0.6428624987602234, + "logits/chosen": 0.16957850754261017, + "logits/rejected": -0.029354337602853775, + "logps/chosen": -1.3640344142913818, + "logps/rejected": -1.4766488075256348, + "loss": 3.5189, + "nll_loss": 3.45458984375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13640344142913818, + "rewards/margins": 0.01126144826412201, + "rewards/rejected": -0.1476649045944214, + "step": 85 + }, + { + "epoch": 0.053499222395023326, + "grad_norm": 0.6085870265960693, + "learning_rate": 4.785e-05, + "log_odds_chosen": 0.020148158073425293, + "log_odds_ratio": -0.6988396644592285, + "logits/chosen": -0.01158811990171671, + "logits/rejected": -0.15423518419265747, + "logps/chosen": -1.4441418647766113, + "logps/rejected": -1.4638988971710205, + "loss": 2.5733, + "nll_loss": 2.5033936500549316, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14441418647766113, + "rewards/margins": 0.0019757067784667015, + "rewards/rejected": -0.146389901638031, + "step": 86 + }, + { + "epoch": 0.05412130637636081, + "grad_norm": 0.4892805814743042, + "learning_rate": 4.7825000000000004e-05, + "log_odds_chosen": 0.8066454529762268, + "log_odds_ratio": -0.4088033139705658, + "logits/chosen": 0.12198805809020996, + "logits/rejected": -0.12190286070108414, + "logps/chosen": -0.8199340105056763, + "logps/rejected": -1.2292183637619019, + "loss": 2.7766, + "nll_loss": 2.73576021194458, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08199340105056763, + "rewards/margins": 0.040928442031145096, + "rewards/rejected": -0.12292183935642242, + "step": 87 + }, + { + "epoch": 0.05474339035769829, + "grad_norm": 0.3810538351535797, + "learning_rate": 4.78e-05, + "log_odds_chosen": -0.106544628739357, + "log_odds_ratio": -0.7797843217849731, + "logits/chosen": 0.23251253366470337, + "logits/rejected": 0.10770893096923828, + "logps/chosen": -1.1218090057373047, + "logps/rejected": -1.0476629734039307, + "loss": 3.2626, + "nll_loss": 3.1846585273742676, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11218090355396271, + "rewards/margins": -0.007414607331156731, + "rewards/rejected": -0.10476629436016083, + "step": 88 + }, + { + "epoch": 0.05536547433903577, + "grad_norm": 0.5943082571029663, + "learning_rate": 4.7775e-05, + "log_odds_chosen": 0.18490558862686157, + "log_odds_ratio": -0.6679292917251587, + "logits/chosen": 0.14844851195812225, + "logits/rejected": -0.0743369460105896, + "logps/chosen": -1.268092155456543, + "logps/rejected": -1.3130736351013184, + "loss": 3.2991, + "nll_loss": 3.232351779937744, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12680920958518982, + "rewards/margins": 0.0044981567189097404, + "rewards/rejected": -0.13130736351013184, + "step": 89 + }, + { + "epoch": 0.05598755832037325, + "grad_norm": 0.5314677357673645, + "learning_rate": 4.775e-05, + "log_odds_chosen": 0.2689226269721985, + "log_odds_ratio": -0.6226869225502014, + "logits/chosen": 0.06861215829849243, + "logits/rejected": -0.163130983710289, + "logps/chosen": -1.3787274360656738, + "logps/rejected": -1.6074514389038086, + "loss": 2.6911, + "nll_loss": 2.6288278102874756, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13787275552749634, + "rewards/margins": 0.022872384637594223, + "rewards/rejected": -0.16074511408805847, + "step": 90 + }, + { + "epoch": 0.05660964230171073, + "grad_norm": 0.4301508367061615, + "learning_rate": 4.7725e-05, + "log_odds_chosen": 0.3461243510246277, + "log_odds_ratio": -0.646808385848999, + "logits/chosen": 0.24266819655895233, + "logits/rejected": 0.029673846438527107, + "logps/chosen": -1.2074958086013794, + "logps/rejected": -1.411824345588684, + "loss": 3.5601, + "nll_loss": 3.495389938354492, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1207495778799057, + "rewards/margins": 0.02043285220861435, + "rewards/rejected": -0.14118242263793945, + "step": 91 + }, + { + "epoch": 0.05723172628304821, + "grad_norm": 0.4627557694911957, + "learning_rate": 4.77e-05, + "log_odds_chosen": -0.28584611415863037, + "log_odds_ratio": -0.9114346504211426, + "logits/chosen": 0.21556691825389862, + "logits/rejected": 0.014135261997580528, + "logps/chosen": -1.367776870727539, + "logps/rejected": -1.1591300964355469, + "loss": 3.5889, + "nll_loss": 3.497756004333496, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.13677769899368286, + "rewards/margins": -0.02086469531059265, + "rewards/rejected": -0.11591300368309021, + "step": 92 + }, + { + "epoch": 0.05785381026438569, + "grad_norm": 0.37699708342552185, + "learning_rate": 4.7675e-05, + "log_odds_chosen": 0.3036189377307892, + "log_odds_ratio": -0.5816516876220703, + "logits/chosen": 0.01264739129692316, + "logits/rejected": -0.06274904310703278, + "logps/chosen": -0.9731736183166504, + "logps/rejected": -1.1666101217269897, + "loss": 3.0008, + "nll_loss": 2.942596673965454, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09731736779212952, + "rewards/margins": 0.019343651831150055, + "rewards/rejected": -0.11666101962327957, + "step": 93 + }, + { + "epoch": 0.05847589424572317, + "grad_norm": 0.4950994551181793, + "learning_rate": 4.765e-05, + "log_odds_chosen": -0.003652891144156456, + "log_odds_ratio": -0.7005108594894409, + "logits/chosen": 0.1562429666519165, + "logits/rejected": -0.07231743633747101, + "logps/chosen": -1.5042771100997925, + "logps/rejected": -1.506368637084961, + "loss": 3.2415, + "nll_loss": 3.1714537143707275, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1504277139902115, + "rewards/margins": 0.0002091517671942711, + "rewards/rejected": -0.15063685178756714, + "step": 94 + }, + { + "epoch": 0.05909797822706065, + "grad_norm": 0.5913958549499512, + "learning_rate": 4.7625000000000006e-05, + "log_odds_chosen": -0.5333858132362366, + "log_odds_ratio": -1.058704137802124, + "logits/chosen": 0.2847288250923157, + "logits/rejected": -0.04206930845975876, + "logps/chosen": -2.0648934841156006, + "logps/rejected": -1.5971152782440186, + "loss": 3.879, + "nll_loss": 3.7731385231018066, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.20648935437202454, + "rewards/margins": -0.04677780717611313, + "rewards/rejected": -0.1597115397453308, + "step": 95 + }, + { + "epoch": 0.059720062208398136, + "grad_norm": 0.46030476689338684, + "learning_rate": 4.76e-05, + "log_odds_chosen": -0.05350463092327118, + "log_odds_ratio": -0.7248205542564392, + "logits/chosen": 0.07910023629665375, + "logits/rejected": -0.06675226986408234, + "logps/chosen": -1.3112492561340332, + "logps/rejected": -1.2683042287826538, + "loss": 2.5608, + "nll_loss": 2.4883413314819336, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13112492859363556, + "rewards/margins": -0.0042945025488734245, + "rewards/rejected": -0.12683042883872986, + "step": 96 + }, + { + "epoch": 0.060342146189735615, + "grad_norm": 0.8850117921829224, + "learning_rate": 4.7575000000000004e-05, + "log_odds_chosen": -0.16680529713630676, + "log_odds_ratio": -0.918777585029602, + "logits/chosen": 0.09566190838813782, + "logits/rejected": -0.09266671538352966, + "logps/chosen": -1.5960246324539185, + "logps/rejected": -1.3469147682189941, + "loss": 3.1828, + "nll_loss": 3.0909605026245117, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15960246324539185, + "rewards/margins": -0.024910978972911835, + "rewards/rejected": -0.13469147682189941, + "step": 97 + }, + { + "epoch": 0.060964230171073094, + "grad_norm": 0.43437162041664124, + "learning_rate": 4.755e-05, + "log_odds_chosen": 0.30574995279312134, + "log_odds_ratio": -0.6281970143318176, + "logits/chosen": 0.08859042823314667, + "logits/rejected": -0.02086031809449196, + "logps/chosen": -1.3456239700317383, + "logps/rejected": -1.5943999290466309, + "loss": 3.2651, + "nll_loss": 3.2022864818573, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1345623880624771, + "rewards/margins": 0.02487758919596672, + "rewards/rejected": -0.15943998098373413, + "step": 98 + }, + { + "epoch": 0.06158631415241057, + "grad_norm": 0.6766498684883118, + "learning_rate": 4.7525e-05, + "log_odds_chosen": -0.11186225712299347, + "log_odds_ratio": -0.7736210823059082, + "logits/chosen": 0.04021189361810684, + "logits/rejected": 0.03945222869515419, + "logps/chosen": -1.3682116270065308, + "logps/rejected": -1.301282286643982, + "loss": 2.6354, + "nll_loss": 2.5580055713653564, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13682116568088531, + "rewards/margins": -0.006692924536764622, + "rewards/rejected": -0.13012823462486267, + "step": 99 + }, + { + "epoch": 0.06220839813374806, + "grad_norm": 0.5133693218231201, + "learning_rate": 4.75e-05, + "log_odds_chosen": -0.17678174376487732, + "log_odds_ratio": -0.8037921786308289, + "logits/chosen": 0.03568641096353531, + "logits/rejected": -0.08334266394376755, + "logps/chosen": -1.4727808237075806, + "logps/rejected": -1.3427841663360596, + "loss": 2.8091, + "nll_loss": 2.7287254333496094, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1472780704498291, + "rewards/margins": -0.012999659404158592, + "rewards/rejected": -0.13427841663360596, + "step": 100 + }, + { + "epoch": 0.06283048211508553, + "grad_norm": 0.437914103269577, + "learning_rate": 4.7475e-05, + "log_odds_chosen": -0.055506929755210876, + "log_odds_ratio": -0.7757474184036255, + "logits/chosen": 0.13743720948696136, + "logits/rejected": 0.028182677924633026, + "logps/chosen": -1.2761411666870117, + "logps/rejected": -1.158294677734375, + "loss": 3.2391, + "nll_loss": 3.161520481109619, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1276141107082367, + "rewards/margins": -0.01178464200347662, + "rewards/rejected": -0.1158294752240181, + "step": 101 + }, + { + "epoch": 0.06345256609642301, + "grad_norm": 0.4476625919342041, + "learning_rate": 4.745e-05, + "log_odds_chosen": 0.2342412918806076, + "log_odds_ratio": -0.6413729786872864, + "logits/chosen": 0.168337881565094, + "logits/rejected": -0.14545048773288727, + "logps/chosen": -1.2778122425079346, + "logps/rejected": -1.4140448570251465, + "loss": 3.7524, + "nll_loss": 3.6882784366607666, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1277812272310257, + "rewards/margins": 0.013623261824250221, + "rewards/rejected": -0.14140449464321136, + "step": 102 + }, + { + "epoch": 0.0640746500777605, + "grad_norm": 0.5129625201225281, + "learning_rate": 4.7425e-05, + "log_odds_chosen": 0.08128762245178223, + "log_odds_ratio": -0.6819002628326416, + "logits/chosen": 0.10714941471815109, + "logits/rejected": -0.07025566697120667, + "logps/chosen": -1.333043098449707, + "logps/rejected": -1.3708577156066895, + "loss": 3.0904, + "nll_loss": 3.0221877098083496, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13330431282520294, + "rewards/margins": 0.0037814658135175705, + "rewards/rejected": -0.13708576560020447, + "step": 103 + }, + { + "epoch": 0.06469673405909798, + "grad_norm": 0.5524664521217346, + "learning_rate": 4.74e-05, + "log_odds_chosen": -0.28195393085479736, + "log_odds_ratio": -0.8670526742935181, + "logits/chosen": -0.04569420963525772, + "logits/rejected": -0.1721382588148117, + "logps/chosen": -1.4118688106536865, + "logps/rejected": -1.197587490081787, + "loss": 2.7953, + "nll_loss": 2.7085652351379395, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1411868780851364, + "rewards/margins": -0.021428123116493225, + "rewards/rejected": -0.11975875496864319, + "step": 104 + }, + { + "epoch": 0.06531881804043546, + "grad_norm": 0.4822681248188019, + "learning_rate": 4.7375e-05, + "log_odds_chosen": 0.38326454162597656, + "log_odds_ratio": -0.5975579619407654, + "logits/chosen": -0.007261446211487055, + "logits/rejected": -0.030812345445156097, + "logps/chosen": -1.179614782333374, + "logps/rejected": -1.4915132522583008, + "loss": 2.6517, + "nll_loss": 2.5919699668884277, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11796149611473083, + "rewards/margins": 0.031189851462841034, + "rewards/rejected": -0.14915132522583008, + "step": 105 + }, + { + "epoch": 0.06594090202177294, + "grad_norm": 0.5283992886543274, + "learning_rate": 4.735e-05, + "log_odds_chosen": 0.45969775319099426, + "log_odds_ratio": -0.5704058408737183, + "logits/chosen": -0.005958788096904755, + "logits/rejected": -0.1403818130493164, + "logps/chosen": -1.1262407302856445, + "logps/rejected": -1.365699291229248, + "loss": 3.3859, + "nll_loss": 3.328857183456421, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11262409389019012, + "rewards/margins": 0.023945845663547516, + "rewards/rejected": -0.13656993210315704, + "step": 106 + }, + { + "epoch": 0.06656298600311042, + "grad_norm": 0.416676789522171, + "learning_rate": 4.7325000000000005e-05, + "log_odds_chosen": 0.1313173770904541, + "log_odds_ratio": -0.6711844205856323, + "logits/chosen": 0.157118558883667, + "logits/rejected": 0.035575054585933685, + "logps/chosen": -1.4387624263763428, + "logps/rejected": -1.5632802248001099, + "loss": 3.2781, + "nll_loss": 3.210995674133301, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14387625455856323, + "rewards/margins": 0.012451781891286373, + "rewards/rejected": -0.156328022480011, + "step": 107 + }, + { + "epoch": 0.0671850699844479, + "grad_norm": 0.5572922229766846, + "learning_rate": 4.73e-05, + "log_odds_chosen": -0.12381504476070404, + "log_odds_ratio": -0.770376443862915, + "logits/chosen": 0.04753262549638748, + "logits/rejected": -0.051002245396375656, + "logps/chosen": -1.5710325241088867, + "logps/rejected": -1.46817147731781, + "loss": 2.8265, + "nll_loss": 2.749462127685547, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.15710324048995972, + "rewards/margins": -0.010286085307598114, + "rewards/rejected": -0.146817147731781, + "step": 108 + }, + { + "epoch": 0.06780715396578538, + "grad_norm": 0.3513568937778473, + "learning_rate": 4.7275000000000004e-05, + "log_odds_chosen": 0.05175573006272316, + "log_odds_ratio": -0.6735912561416626, + "logits/chosen": 0.08347096294164658, + "logits/rejected": 0.05597352236509323, + "logps/chosen": -1.2763020992279053, + "logps/rejected": -1.3162968158721924, + "loss": 2.8374, + "nll_loss": 2.77008056640625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12763021886348724, + "rewards/margins": 0.0039994558319449425, + "rewards/rejected": -0.13162967562675476, + "step": 109 + }, + { + "epoch": 0.06842923794712286, + "grad_norm": 0.48008182644844055, + "learning_rate": 4.7249999999999997e-05, + "log_odds_chosen": 0.2633849084377289, + "log_odds_ratio": -0.5829953551292419, + "logits/chosen": 0.0013431366533041, + "logits/rejected": -0.17736227810382843, + "logps/chosen": -1.2399024963378906, + "logps/rejected": -1.4264099597930908, + "loss": 2.5525, + "nll_loss": 2.4942123889923096, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1239902600646019, + "rewards/margins": 0.018650764599442482, + "rewards/rejected": -0.14264102280139923, + "step": 110 + }, + { + "epoch": 0.06905132192846034, + "grad_norm": 0.4274824857711792, + "learning_rate": 4.7225e-05, + "log_odds_chosen": -0.1062813401222229, + "log_odds_ratio": -0.809943675994873, + "logits/chosen": 0.17697763442993164, + "logits/rejected": -0.11846765875816345, + "logps/chosen": -1.3306231498718262, + "logps/rejected": -1.2187504768371582, + "loss": 3.1534, + "nll_loss": 3.0723817348480225, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13306231796741486, + "rewards/margins": -0.011187261901795864, + "rewards/rejected": -0.12187506258487701, + "step": 111 + }, + { + "epoch": 0.06967340590979783, + "grad_norm": 0.44988536834716797, + "learning_rate": 4.72e-05, + "log_odds_chosen": 0.298068106174469, + "log_odds_ratio": -0.5905066132545471, + "logits/chosen": 0.060463353991508484, + "logits/rejected": -0.10935623943805695, + "logps/chosen": -1.1479036808013916, + "logps/rejected": -1.342679738998413, + "loss": 2.709, + "nll_loss": 2.6499881744384766, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11479037255048752, + "rewards/margins": 0.01947760209441185, + "rewards/rejected": -0.13426797091960907, + "step": 112 + }, + { + "epoch": 0.07029548989113531, + "grad_norm": 0.4988582134246826, + "learning_rate": 4.7175e-05, + "log_odds_chosen": -0.08553829789161682, + "log_odds_ratio": -0.8603007793426514, + "logits/chosen": 0.39943423867225647, + "logits/rejected": 0.06061486154794693, + "logps/chosen": -1.4535963535308838, + "logps/rejected": -1.3087565898895264, + "loss": 3.8892, + "nll_loss": 3.8031439781188965, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14535963535308838, + "rewards/margins": -0.014483977109193802, + "rewards/rejected": -0.13087564706802368, + "step": 113 + }, + { + "epoch": 0.07091757387247279, + "grad_norm": 0.46725013852119446, + "learning_rate": 4.715e-05, + "log_odds_chosen": 0.641747772693634, + "log_odds_ratio": -0.5314986705780029, + "logits/chosen": 0.09639215469360352, + "logits/rejected": -0.023075614124536514, + "logps/chosen": -0.903424859046936, + "logps/rejected": -1.2310090065002441, + "loss": 3.0129, + "nll_loss": 2.9597768783569336, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09034248441457748, + "rewards/margins": 0.032758425921201706, + "rewards/rejected": -0.12310090661048889, + "step": 114 + }, + { + "epoch": 0.07153965785381027, + "grad_norm": 0.4860602617263794, + "learning_rate": 4.7125e-05, + "log_odds_chosen": -0.0031689107418060303, + "log_odds_ratio": -0.7295066118240356, + "logits/chosen": 0.21932338178157806, + "logits/rejected": 0.0026386789977550507, + "logps/chosen": -1.5008599758148193, + "logps/rejected": -1.5008916854858398, + "loss": 3.3598, + "nll_loss": 3.2868006229400635, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.15008598566055298, + "rewards/margins": 3.1907111406326294e-06, + "rewards/rejected": -0.15008917450904846, + "step": 115 + }, + { + "epoch": 0.07216174183514774, + "grad_norm": 0.8297399282455444, + "learning_rate": 4.71e-05, + "log_odds_chosen": -0.01504303514957428, + "log_odds_ratio": -0.7895686030387878, + "logits/chosen": 0.08432838320732117, + "logits/rejected": -0.08494340628385544, + "logps/chosen": -1.3483591079711914, + "logps/rejected": -1.3111402988433838, + "loss": 3.046, + "nll_loss": 2.9669947624206543, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13483591377735138, + "rewards/margins": -0.0037218770012259483, + "rewards/rejected": -0.13111403584480286, + "step": 116 + }, + { + "epoch": 0.07278382581648522, + "grad_norm": 0.4088709354400635, + "learning_rate": 4.7075e-05, + "log_odds_chosen": 0.20638790726661682, + "log_odds_ratio": -0.6793928146362305, + "logits/chosen": 0.2052862048149109, + "logits/rejected": 0.044292062520980835, + "logps/chosen": -1.2724294662475586, + "logps/rejected": -1.4322636127471924, + "loss": 3.539, + "nll_loss": 3.471055030822754, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12724295258522034, + "rewards/margins": 0.01598340831696987, + "rewards/rejected": -0.14322635531425476, + "step": 117 + }, + { + "epoch": 0.0734059097978227, + "grad_norm": 0.42515477538108826, + "learning_rate": 4.705e-05, + "log_odds_chosen": -0.019400358200073242, + "log_odds_ratio": -0.7594976425170898, + "logits/chosen": 0.27322930097579956, + "logits/rejected": 0.07737819850444794, + "logps/chosen": -1.2583709955215454, + "logps/rejected": -1.2143146991729736, + "loss": 3.5821, + "nll_loss": 3.5061068534851074, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12583710253238678, + "rewards/margins": -0.004405634477734566, + "rewards/rejected": -0.12143146991729736, + "step": 118 + }, + { + "epoch": 0.07402799377916018, + "grad_norm": 0.5038117170333862, + "learning_rate": 4.7025000000000005e-05, + "log_odds_chosen": 0.38098299503326416, + "log_odds_ratio": -0.5279546976089478, + "logits/chosen": 0.15353046357631683, + "logits/rejected": -0.20058253407478333, + "logps/chosen": -1.2580475807189941, + "logps/rejected": -1.5402312278747559, + "loss": 3.1463, + "nll_loss": 3.0934572219848633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12580475211143494, + "rewards/margins": 0.028218373656272888, + "rewards/rejected": -0.15402314066886902, + "step": 119 + }, + { + "epoch": 0.07465007776049767, + "grad_norm": 0.4213177561759949, + "learning_rate": 4.7e-05, + "log_odds_chosen": 0.2627849876880646, + "log_odds_ratio": -0.6001940965652466, + "logits/chosen": 0.3041577935218811, + "logits/rejected": -0.03829241171479225, + "logps/chosen": -1.1501599550247192, + "logps/rejected": -1.3457529544830322, + "loss": 3.6689, + "nll_loss": 3.6088767051696777, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11501598358154297, + "rewards/margins": 0.019559307023882866, + "rewards/rejected": -0.13457529246807098, + "step": 120 + }, + { + "epoch": 0.07527216174183515, + "grad_norm": 0.5030676126480103, + "learning_rate": 4.6975000000000003e-05, + "log_odds_chosen": 0.1841212958097458, + "log_odds_ratio": -0.6212969422340393, + "logits/chosen": 0.10191065073013306, + "logits/rejected": 0.07480275630950928, + "logps/chosen": -1.2450635433197021, + "logps/rejected": -1.3654481172561646, + "loss": 2.7331, + "nll_loss": 2.670966625213623, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12450635433197021, + "rewards/margins": 0.012038461863994598, + "rewards/rejected": -0.13654480874538422, + "step": 121 + }, + { + "epoch": 0.07589424572317263, + "grad_norm": 0.47793930768966675, + "learning_rate": 4.695e-05, + "log_odds_chosen": 0.22587494552135468, + "log_odds_ratio": -0.6521732211112976, + "logits/chosen": 0.14298784732818604, + "logits/rejected": 0.07297507673501968, + "logps/chosen": -1.1125988960266113, + "logps/rejected": -1.2532191276550293, + "loss": 3.0589, + "nll_loss": 2.993687391281128, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11125989258289337, + "rewards/margins": 0.014062018133699894, + "rewards/rejected": -0.12532192468643188, + "step": 122 + }, + { + "epoch": 0.07651632970451011, + "grad_norm": 0.585175096988678, + "learning_rate": 4.6925e-05, + "log_odds_chosen": 0.2622927725315094, + "log_odds_ratio": -0.598949670791626, + "logits/chosen": 0.06829071789979935, + "logits/rejected": -0.06984008848667145, + "logps/chosen": -1.279144287109375, + "logps/rejected": -1.4879951477050781, + "loss": 2.6114, + "nll_loss": 2.5515384674072266, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1279144287109375, + "rewards/margins": 0.020885098725557327, + "rewards/rejected": -0.14879952371120453, + "step": 123 + }, + { + "epoch": 0.07713841368584759, + "grad_norm": 0.5636870861053467, + "learning_rate": 4.69e-05, + "log_odds_chosen": 0.9784868955612183, + "log_odds_ratio": -0.40459758043289185, + "logits/chosen": 0.18325799703598022, + "logits/rejected": -0.04458653926849365, + "logps/chosen": -0.9027924537658691, + "logps/rejected": -1.5085422992706299, + "loss": 3.4475, + "nll_loss": 3.4070000648498535, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09027925133705139, + "rewards/margins": 0.060574986040592194, + "rewards/rejected": -0.150854229927063, + "step": 124 + }, + { + "epoch": 0.07776049766718507, + "grad_norm": 0.5631504654884338, + "learning_rate": 4.6875e-05, + "log_odds_chosen": 0.8957180976867676, + "log_odds_ratio": -0.46610015630722046, + "logits/chosen": 0.13964581489562988, + "logits/rejected": 0.029128514230251312, + "logps/chosen": -0.8781135678291321, + "logps/rejected": -1.3816094398498535, + "loss": 2.6724, + "nll_loss": 2.6258134841918945, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08781135827302933, + "rewards/margins": 0.05034959316253662, + "rewards/rejected": -0.13816094398498535, + "step": 125 + }, + { + "epoch": 0.07838258164852255, + "grad_norm": 0.3702596426010132, + "learning_rate": 4.685000000000001e-05, + "log_odds_chosen": 0.15894243121147156, + "log_odds_ratio": -0.6334323883056641, + "logits/chosen": 0.2041868418455124, + "logits/rejected": 0.06562945991754532, + "logps/chosen": -1.257232427597046, + "logps/rejected": -1.3767375946044922, + "loss": 3.3947, + "nll_loss": 3.3313791751861572, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12572325766086578, + "rewards/margins": 0.011950517073273659, + "rewards/rejected": -0.1376737654209137, + "step": 126 + }, + { + "epoch": 0.07900466562986003, + "grad_norm": 0.43313437700271606, + "learning_rate": 4.6825e-05, + "log_odds_chosen": 0.21152283251285553, + "log_odds_ratio": -0.6254419684410095, + "logits/chosen": -0.06763223558664322, + "logits/rejected": -0.05511503666639328, + "logps/chosen": -1.2304375171661377, + "logps/rejected": -1.3703246116638184, + "loss": 2.5647, + "nll_loss": 2.502199411392212, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12304375320672989, + "rewards/margins": 0.013988707214593887, + "rewards/rejected": -0.13703244924545288, + "step": 127 + }, + { + "epoch": 0.0796267496111975, + "grad_norm": 0.4836374521255493, + "learning_rate": 4.6800000000000006e-05, + "log_odds_chosen": -0.18248885869979858, + "log_odds_ratio": -0.8107894659042358, + "logits/chosen": 0.10594216734170914, + "logits/rejected": -0.056509390473365784, + "logps/chosen": -1.2352960109710693, + "logps/rejected": -1.1191729307174683, + "loss": 3.1493, + "nll_loss": 3.0681915283203125, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1235295981168747, + "rewards/margins": -0.011612308211624622, + "rewards/rejected": -0.11191728711128235, + "step": 128 + }, + { + "epoch": 0.080248833592535, + "grad_norm": 0.41119441390037537, + "learning_rate": 4.6775000000000005e-05, + "log_odds_chosen": 0.48518961668014526, + "log_odds_ratio": -0.49193328619003296, + "logits/chosen": 0.09460698068141937, + "logits/rejected": 0.13982588052749634, + "logps/chosen": -1.2239277362823486, + "logps/rejected": -1.5707978010177612, + "loss": 2.8458, + "nll_loss": 2.796567916870117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12239278107881546, + "rewards/margins": 0.03468699753284454, + "rewards/rejected": -0.1570797860622406, + "step": 129 + }, + { + "epoch": 0.08087091757387248, + "grad_norm": 0.5971326231956482, + "learning_rate": 4.6750000000000005e-05, + "log_odds_chosen": 0.3527142405509949, + "log_odds_ratio": -0.6361883878707886, + "logits/chosen": 0.3607098460197449, + "logits/rejected": -0.14897310733795166, + "logps/chosen": -1.3266918659210205, + "logps/rejected": -1.5898045301437378, + "loss": 3.5671, + "nll_loss": 3.5034427642822266, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13266919553279877, + "rewards/margins": 0.02631126344203949, + "rewards/rejected": -0.15898045897483826, + "step": 130 + }, + { + "epoch": 0.08149300155520996, + "grad_norm": 0.33660298585891724, + "learning_rate": 4.6725000000000004e-05, + "log_odds_chosen": 0.866208553314209, + "log_odds_ratio": -0.4489937126636505, + "logits/chosen": 0.11533968150615692, + "logits/rejected": 0.0789806991815567, + "logps/chosen": -0.9988769888877869, + "logps/rejected": -1.605279803276062, + "loss": 2.9545, + "nll_loss": 2.9095656871795654, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09988771378993988, + "rewards/margins": 0.06064027547836304, + "rewards/rejected": -0.16052797436714172, + "step": 131 + }, + { + "epoch": 0.08211508553654744, + "grad_norm": 0.31415626406669617, + "learning_rate": 4.6700000000000003e-05, + "log_odds_chosen": 0.2750133275985718, + "log_odds_ratio": -0.668391227722168, + "logits/chosen": 0.2666417360305786, + "logits/rejected": -0.017590831965208054, + "logps/chosen": -1.0113110542297363, + "logps/rejected": -1.1601898670196533, + "loss": 3.5387, + "nll_loss": 3.471817970275879, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10113111138343811, + "rewards/margins": 0.014887874014675617, + "rewards/rejected": -0.11601898819208145, + "step": 132 + }, + { + "epoch": 0.08273716951788491, + "grad_norm": 0.5030128955841064, + "learning_rate": 4.6675e-05, + "log_odds_chosen": 1.185779094696045, + "log_odds_ratio": -0.3972381353378296, + "logits/chosen": 0.14357468485832214, + "logits/rejected": 0.00014878623187541962, + "logps/chosen": -1.0277682542800903, + "logps/rejected": -1.820441722869873, + "loss": 3.2187, + "nll_loss": 3.178988456726074, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10277682542800903, + "rewards/margins": 0.07926735281944275, + "rewards/rejected": -0.1820441633462906, + "step": 133 + }, + { + "epoch": 0.0833592534992224, + "grad_norm": 0.4640537202358246, + "learning_rate": 4.665e-05, + "log_odds_chosen": -0.1519235223531723, + "log_odds_ratio": -0.8262673616409302, + "logits/chosen": 0.11444417387247086, + "logits/rejected": -0.012055326253175735, + "logps/chosen": -1.376497745513916, + "logps/rejected": -1.2670477628707886, + "loss": 2.9043, + "nll_loss": 2.821722984313965, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1376497745513916, + "rewards/margins": -0.010944999754428864, + "rewards/rejected": -0.12670478224754333, + "step": 134 + }, + { + "epoch": 0.08398133748055987, + "grad_norm": 0.4529246389865875, + "learning_rate": 4.6625e-05, + "log_odds_chosen": 0.1725572943687439, + "log_odds_ratio": -0.6657087206840515, + "logits/chosen": 0.08027800917625427, + "logits/rejected": -0.09482064098119736, + "logps/chosen": -1.1149572134017944, + "logps/rejected": -1.2249038219451904, + "loss": 2.7318, + "nll_loss": 2.6652512550354004, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1114957183599472, + "rewards/margins": 0.01099465787410736, + "rewards/rejected": -0.12249037623405457, + "step": 135 + }, + { + "epoch": 0.08460342146189735, + "grad_norm": 0.4171338677406311, + "learning_rate": 4.660000000000001e-05, + "log_odds_chosen": -0.09017062187194824, + "log_odds_ratio": -0.7511317133903503, + "logits/chosen": 0.2360968291759491, + "logits/rejected": -0.010965634137392044, + "logps/chosen": -1.1789023876190186, + "logps/rejected": -1.0944397449493408, + "loss": 3.3572, + "nll_loss": 3.2820589542388916, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11789023876190186, + "rewards/margins": -0.008446259424090385, + "rewards/rejected": -0.10944397747516632, + "step": 136 + }, + { + "epoch": 0.08522550544323483, + "grad_norm": 0.547868013381958, + "learning_rate": 4.6575e-05, + "log_odds_chosen": 0.7603722810745239, + "log_odds_ratio": -0.49443089962005615, + "logits/chosen": -0.03645198792219162, + "logits/rejected": -0.07120641320943832, + "logps/chosen": -1.1619699001312256, + "logps/rejected": -1.7555570602416992, + "loss": 2.7566, + "nll_loss": 2.707111120223999, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11619699001312256, + "rewards/margins": 0.05935873091220856, + "rewards/rejected": -0.17555572092533112, + "step": 137 + }, + { + "epoch": 0.08584758942457232, + "grad_norm": 0.3483281433582306, + "learning_rate": 4.655000000000001e-05, + "log_odds_chosen": 0.3946763873100281, + "log_odds_ratio": -0.6060354113578796, + "logits/chosen": 0.09304636716842651, + "logits/rejected": 0.01444307342171669, + "logps/chosen": -1.247818946838379, + "logps/rejected": -1.4207770824432373, + "loss": 3.0866, + "nll_loss": 3.025946855545044, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12478190660476685, + "rewards/margins": 0.017295803874731064, + "rewards/rejected": -0.1420777142047882, + "step": 138 + }, + { + "epoch": 0.0864696734059098, + "grad_norm": 0.4377744495868683, + "learning_rate": 4.6525e-05, + "log_odds_chosen": 0.5676741003990173, + "log_odds_ratio": -0.4999235272407532, + "logits/chosen": 0.18979693949222565, + "logits/rejected": -0.09224162995815277, + "logps/chosen": -1.0275253057479858, + "logps/rejected": -1.4091662168502808, + "loss": 3.1186, + "nll_loss": 3.068612575531006, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10275252908468246, + "rewards/margins": 0.03816410154104233, + "rewards/rejected": -0.140916645526886, + "step": 139 + }, + { + "epoch": 0.08709175738724728, + "grad_norm": 0.5015956163406372, + "learning_rate": 4.6500000000000005e-05, + "log_odds_chosen": 0.40913963317871094, + "log_odds_ratio": -0.5399808883666992, + "logits/chosen": 0.13208572566509247, + "logits/rejected": -0.042870864272117615, + "logps/chosen": -1.4589378833770752, + "logps/rejected": -1.76136314868927, + "loss": 2.8749, + "nll_loss": 2.8209028244018555, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14589379727840424, + "rewards/margins": 0.03024252876639366, + "rewards/rejected": -0.176136314868927, + "step": 140 + }, + { + "epoch": 0.08771384136858476, + "grad_norm": 0.5315511226654053, + "learning_rate": 4.6475000000000005e-05, + "log_odds_chosen": 0.23443233966827393, + "log_odds_ratio": -0.6079375743865967, + "logits/chosen": 0.10196913778781891, + "logits/rejected": -0.09439704567193985, + "logps/chosen": -1.2660562992095947, + "logps/rejected": -1.440688133239746, + "loss": 2.9088, + "nll_loss": 2.8479607105255127, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12660562992095947, + "rewards/margins": 0.01746317744255066, + "rewards/rejected": -0.14406880736351013, + "step": 141 + }, + { + "epoch": 0.08833592534992224, + "grad_norm": 0.48670142889022827, + "learning_rate": 4.6450000000000004e-05, + "log_odds_chosen": 0.20135235786437988, + "log_odds_ratio": -0.6504536271095276, + "logits/chosen": 0.057795051485300064, + "logits/rejected": -0.16002613306045532, + "logps/chosen": -1.202336072921753, + "logps/rejected": -1.336153268814087, + "loss": 2.9102, + "nll_loss": 2.8451108932495117, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12023360282182693, + "rewards/margins": 0.01338173821568489, + "rewards/rejected": -0.13361534476280212, + "step": 142 + }, + { + "epoch": 0.08895800933125972, + "grad_norm": 0.4415438771247864, + "learning_rate": 4.6425000000000004e-05, + "log_odds_chosen": 0.61316978931427, + "log_odds_ratio": -0.4622770845890045, + "logits/chosen": 0.04670334979891777, + "logits/rejected": 0.007671605795621872, + "logps/chosen": -1.3000093698501587, + "logps/rejected": -1.7552378177642822, + "loss": 2.9155, + "nll_loss": 2.869236707687378, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13000094890594482, + "rewards/margins": 0.045522838830947876, + "rewards/rejected": -0.1755237877368927, + "step": 143 + }, + { + "epoch": 0.0895800933125972, + "grad_norm": 0.5169774889945984, + "learning_rate": 4.64e-05, + "log_odds_chosen": 0.6011465787887573, + "log_odds_ratio": -0.5140647292137146, + "logits/chosen": 0.007771043106913567, + "logits/rejected": -0.15149809420108795, + "logps/chosen": -1.184515118598938, + "logps/rejected": -1.645982027053833, + "loss": 2.7975, + "nll_loss": 2.7460548877716064, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11845151335000992, + "rewards/margins": 0.0461466908454895, + "rewards/rejected": -0.16459819674491882, + "step": 144 + }, + { + "epoch": 0.09020217729393468, + "grad_norm": 0.6221879720687866, + "learning_rate": 4.6375e-05, + "log_odds_chosen": 0.6371638774871826, + "log_odds_ratio": -0.5189849138259888, + "logits/chosen": 0.051749397069215775, + "logits/rejected": -0.1522921919822693, + "logps/chosen": -0.8362572193145752, + "logps/rejected": -1.1811851263046265, + "loss": 3.1062, + "nll_loss": 3.0543196201324463, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08362571895122528, + "rewards/margins": 0.034492794424295425, + "rewards/rejected": -0.1181185245513916, + "step": 145 + }, + { + "epoch": 0.09082426127527216, + "grad_norm": 0.4802875518798828, + "learning_rate": 4.635e-05, + "log_odds_chosen": 0.3668875992298126, + "log_odds_ratio": -0.5806620717048645, + "logits/chosen": 0.10094377398490906, + "logits/rejected": -0.02433057874441147, + "logps/chosen": -1.2760828733444214, + "logps/rejected": -1.5276360511779785, + "loss": 2.9177, + "nll_loss": 2.859609842300415, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1276082843542099, + "rewards/margins": 0.02515532448887825, + "rewards/rejected": -0.15276360511779785, + "step": 146 + }, + { + "epoch": 0.09144634525660965, + "grad_norm": 0.4567488133907318, + "learning_rate": 4.6325e-05, + "log_odds_chosen": 0.6026850938796997, + "log_odds_ratio": -0.4573545753955841, + "logits/chosen": 0.19558559358119965, + "logits/rejected": 0.03107052482664585, + "logps/chosen": -1.185868263244629, + "logps/rejected": -1.6299797296524048, + "loss": 3.3654, + "nll_loss": 3.3196253776550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11858682334423065, + "rewards/margins": 0.044411152601242065, + "rewards/rejected": -0.16299797594547272, + "step": 147 + }, + { + "epoch": 0.09206842923794713, + "grad_norm": 0.5302126407623291, + "learning_rate": 4.630000000000001e-05, + "log_odds_chosen": -0.01642867922782898, + "log_odds_ratio": -0.7874202728271484, + "logits/chosen": 0.11570745706558228, + "logits/rejected": -0.1371677815914154, + "logps/chosen": -1.3408689498901367, + "logps/rejected": -1.3910057544708252, + "loss": 3.5827, + "nll_loss": 3.5039186477661133, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13408689200878143, + "rewards/margins": 0.005013669840991497, + "rewards/rejected": -0.1391005516052246, + "step": 148 + }, + { + "epoch": 0.0926905132192846, + "grad_norm": 0.4184373617172241, + "learning_rate": 4.6275e-05, + "log_odds_chosen": 0.05463185906410217, + "log_odds_ratio": -0.7645725607872009, + "logits/chosen": 0.1671661138534546, + "logits/rejected": 0.034170206636190414, + "logps/chosen": -1.3616355657577515, + "logps/rejected": -1.3897101879119873, + "loss": 3.3596, + "nll_loss": 3.283134698867798, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13616356253623962, + "rewards/margins": 0.002807457000017166, + "rewards/rejected": -0.13897103071212769, + "step": 149 + }, + { + "epoch": 0.09331259720062209, + "grad_norm": 0.42127570509910583, + "learning_rate": 4.6250000000000006e-05, + "log_odds_chosen": 0.08022980391979218, + "log_odds_ratio": -0.7030846476554871, + "logits/chosen": 0.0035371780395507812, + "logits/rejected": 0.003839358687400818, + "logps/chosen": -1.3736772537231445, + "logps/rejected": -1.378920078277588, + "loss": 2.8468, + "nll_loss": 2.7764601707458496, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13736772537231445, + "rewards/margins": 0.0005242684856057167, + "rewards/rejected": -0.1378920078277588, + "step": 150 + }, + { + "epoch": 0.09393468118195956, + "grad_norm": 0.4007570147514343, + "learning_rate": 4.6225e-05, + "log_odds_chosen": 0.06232370063662529, + "log_odds_ratio": -0.6894880533218384, + "logits/chosen": 0.028553307056427002, + "logits/rejected": 0.13209117949008942, + "logps/chosen": -1.5964021682739258, + "logps/rejected": -1.6478145122528076, + "loss": 2.7767, + "nll_loss": 2.707792282104492, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15964022278785706, + "rewards/margins": 0.005141226574778557, + "rewards/rejected": -0.16478145122528076, + "step": 151 + }, + { + "epoch": 0.09455676516329704, + "grad_norm": 0.506729006767273, + "learning_rate": 4.6200000000000005e-05, + "log_odds_chosen": 0.44813084602355957, + "log_odds_ratio": -0.6032027006149292, + "logits/chosen": 0.19062553346157074, + "logits/rejected": -0.01674717850983143, + "logps/chosen": -1.1239427328109741, + "logps/rejected": -1.383901834487915, + "loss": 3.1411, + "nll_loss": 3.0807762145996094, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11239427328109741, + "rewards/margins": 0.025995904579758644, + "rewards/rejected": -0.1383901834487915, + "step": 152 + }, + { + "epoch": 0.09517884914463452, + "grad_norm": 0.4609549641609192, + "learning_rate": 4.6175000000000004e-05, + "log_odds_chosen": 0.4586949646472931, + "log_odds_ratio": -0.5538212060928345, + "logits/chosen": 0.25490209460258484, + "logits/rejected": 0.08342747390270233, + "logps/chosen": -1.3676748275756836, + "logps/rejected": -1.6974760293960571, + "loss": 3.281, + "nll_loss": 3.2256217002868652, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13676749169826508, + "rewards/margins": 0.032980117946863174, + "rewards/rejected": -0.16974762082099915, + "step": 153 + }, + { + "epoch": 0.095800933125972, + "grad_norm": 0.3721259832382202, + "learning_rate": 4.6150000000000004e-05, + "log_odds_chosen": 0.5167128443717957, + "log_odds_ratio": -0.5634645223617554, + "logits/chosen": 0.06376560777425766, + "logits/rejected": -0.0771709680557251, + "logps/chosen": -1.1290371417999268, + "logps/rejected": -1.432267189025879, + "loss": 2.8284, + "nll_loss": 2.7720093727111816, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11290371417999268, + "rewards/margins": 0.03032299503684044, + "rewards/rejected": -0.1432267129421234, + "step": 154 + }, + { + "epoch": 0.09642301710730948, + "grad_norm": 0.43300989270210266, + "learning_rate": 4.6125e-05, + "log_odds_chosen": 0.20810005068778992, + "log_odds_ratio": -0.7468218207359314, + "logits/chosen": 0.2738891839981079, + "logits/rejected": 0.0035995468497276306, + "logps/chosen": -1.1184593439102173, + "logps/rejected": -1.2356008291244507, + "loss": 3.6973, + "nll_loss": 3.6226534843444824, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1118459403514862, + "rewards/margins": 0.011714148335158825, + "rewards/rejected": -0.12356008589267731, + "step": 155 + }, + { + "epoch": 0.09704510108864697, + "grad_norm": 0.4422686994075775, + "learning_rate": 4.61e-05, + "log_odds_chosen": 0.38565486669540405, + "log_odds_ratio": -0.5663627982139587, + "logits/chosen": 0.1406802535057068, + "logits/rejected": -0.10811451077461243, + "logps/chosen": -1.2533131837844849, + "logps/rejected": -1.501025915145874, + "loss": 2.9378, + "nll_loss": 2.881122589111328, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.125331312417984, + "rewards/margins": 0.02477126568555832, + "rewards/rejected": -0.15010258555412292, + "step": 156 + }, + { + "epoch": 0.09766718506998445, + "grad_norm": 0.43218180537223816, + "learning_rate": 4.6075e-05, + "log_odds_chosen": 0.7944599390029907, + "log_odds_ratio": -0.4741981029510498, + "logits/chosen": 0.13778428733348846, + "logits/rejected": 0.048661183565855026, + "logps/chosen": -1.2979130744934082, + "logps/rejected": -1.8968315124511719, + "loss": 3.1609, + "nll_loss": 3.1134564876556396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12979131937026978, + "rewards/margins": 0.05989184230566025, + "rewards/rejected": -0.18968316912651062, + "step": 157 + }, + { + "epoch": 0.09828926905132193, + "grad_norm": 0.3956831991672516, + "learning_rate": 4.605e-05, + "log_odds_chosen": 0.27946868538856506, + "log_odds_ratio": -0.6237690448760986, + "logits/chosen": 0.3799353837966919, + "logits/rejected": 0.11462019383907318, + "logps/chosen": -1.2597386837005615, + "logps/rejected": -1.4242260456085205, + "loss": 3.4372, + "nll_loss": 3.374814987182617, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1259738653898239, + "rewards/margins": 0.016448739916086197, + "rewards/rejected": -0.142422616481781, + "step": 158 + }, + { + "epoch": 0.09891135303265941, + "grad_norm": 0.40490007400512695, + "learning_rate": 4.6025e-05, + "log_odds_chosen": 1.3628696203231812, + "log_odds_ratio": -0.38135695457458496, + "logits/chosen": 0.16640058159828186, + "logits/rejected": 0.0424039289355278, + "logps/chosen": -0.7394822835922241, + "logps/rejected": -1.6354302167892456, + "loss": 3.0489, + "nll_loss": 3.010765552520752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07394823431968689, + "rewards/margins": 0.08959478884935379, + "rewards/rejected": -0.16354301571846008, + "step": 159 + }, + { + "epoch": 0.09953343701399689, + "grad_norm": 0.522266685962677, + "learning_rate": 4.600000000000001e-05, + "log_odds_chosen": 0.31891077756881714, + "log_odds_ratio": -0.6262969374656677, + "logits/chosen": 0.2351851910352707, + "logits/rejected": 0.0422259159386158, + "logps/chosen": -1.1578633785247803, + "logps/rejected": -1.3393428325653076, + "loss": 2.8211, + "nll_loss": 2.7584638595581055, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11578632891178131, + "rewards/margins": 0.018147945404052734, + "rewards/rejected": -0.13393428921699524, + "step": 160 + }, + { + "epoch": 0.10015552099533437, + "grad_norm": 0.4992579221725464, + "learning_rate": 4.5975e-05, + "log_odds_chosen": 0.36979615688323975, + "log_odds_ratio": -0.5988181829452515, + "logits/chosen": 0.2722373604774475, + "logits/rejected": 0.13797958195209503, + "logps/chosen": -1.223292589187622, + "logps/rejected": -1.4450644254684448, + "loss": 3.065, + "nll_loss": 3.0051493644714355, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12232926487922668, + "rewards/margins": 0.022177184000611305, + "rewards/rejected": -0.14450645446777344, + "step": 161 + }, + { + "epoch": 0.10077760497667185, + "grad_norm": 0.48701006174087524, + "learning_rate": 4.5950000000000006e-05, + "log_odds_chosen": 0.34209126234054565, + "log_odds_ratio": -0.550736665725708, + "logits/chosen": 0.14484034478664398, + "logits/rejected": -0.13392439484596252, + "logps/chosen": -1.1791408061981201, + "logps/rejected": -1.4220118522644043, + "loss": 2.7731, + "nll_loss": 2.718043088912964, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11791408061981201, + "rewards/margins": 0.02428712509572506, + "rewards/rejected": -0.14220120012760162, + "step": 162 + }, + { + "epoch": 0.10139968895800933, + "grad_norm": 0.42111936211586, + "learning_rate": 4.5925e-05, + "log_odds_chosen": 0.09586916118860245, + "log_odds_ratio": -0.6500096321105957, + "logits/chosen": 0.3419564366340637, + "logits/rejected": 0.21585996448993683, + "logps/chosen": -1.4152202606201172, + "logps/rejected": -1.4942808151245117, + "loss": 3.3547, + "nll_loss": 3.2897045612335205, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14152203500270844, + "rewards/margins": 0.007906057871878147, + "rewards/rejected": -0.1494280993938446, + "step": 163 + }, + { + "epoch": 0.1020217729393468, + "grad_norm": 0.651195228099823, + "learning_rate": 4.5900000000000004e-05, + "log_odds_chosen": 0.007867768406867981, + "log_odds_ratio": -0.8247443437576294, + "logits/chosen": 0.15761840343475342, + "logits/rejected": 0.04952532798051834, + "logps/chosen": -1.4861036539077759, + "logps/rejected": -1.4350836277008057, + "loss": 2.6309, + "nll_loss": 2.548461437225342, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14861036837100983, + "rewards/margins": -0.005102001130580902, + "rewards/rejected": -0.14350835978984833, + "step": 164 + }, + { + "epoch": 0.1026438569206843, + "grad_norm": 0.47373661398887634, + "learning_rate": 4.5875000000000004e-05, + "log_odds_chosen": 0.15654607117176056, + "log_odds_ratio": -0.67220139503479, + "logits/chosen": 0.42479315400123596, + "logits/rejected": 0.2729908227920532, + "logps/chosen": -1.3848153352737427, + "logps/rejected": -1.4821150302886963, + "loss": 3.1219, + "nll_loss": 3.054720878601074, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1384815275669098, + "rewards/margins": 0.009729975834488869, + "rewards/rejected": -0.1482115089893341, + "step": 165 + }, + { + "epoch": 0.10326594090202178, + "grad_norm": 0.39424991607666016, + "learning_rate": 4.585e-05, + "log_odds_chosen": 0.8976173400878906, + "log_odds_ratio": -0.5067555904388428, + "logits/chosen": 0.12819889187812805, + "logits/rejected": -0.08246054500341415, + "logps/chosen": -0.9714441299438477, + "logps/rejected": -1.3971306085586548, + "loss": 3.0076, + "nll_loss": 2.9568872451782227, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09714441001415253, + "rewards/margins": 0.04256865009665489, + "rewards/rejected": -0.13971306383609772, + "step": 166 + }, + { + "epoch": 0.10388802488335926, + "grad_norm": 0.4359923303127289, + "learning_rate": 4.5825e-05, + "log_odds_chosen": 0.13466989994049072, + "log_odds_ratio": -0.7068646550178528, + "logits/chosen": 0.29034674167633057, + "logits/rejected": -0.1330055147409439, + "logps/chosen": -1.3547630310058594, + "logps/rejected": -1.426397442817688, + "loss": 3.2544, + "nll_loss": 3.1836836338043213, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13547630608081818, + "rewards/margins": 0.007163448259234428, + "rewards/rejected": -0.14263975620269775, + "step": 167 + }, + { + "epoch": 0.10451010886469674, + "grad_norm": 0.47172248363494873, + "learning_rate": 4.58e-05, + "log_odds_chosen": 0.8191059827804565, + "log_odds_ratio": -0.5032205581665039, + "logits/chosen": 0.17938898503780365, + "logits/rejected": 0.05944395065307617, + "logps/chosen": -1.067983627319336, + "logps/rejected": -1.5608168840408325, + "loss": 2.9386, + "nll_loss": 2.888256311416626, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10679835081100464, + "rewards/margins": 0.04928332567214966, + "rewards/rejected": -0.1560816764831543, + "step": 168 + }, + { + "epoch": 0.10513219284603421, + "grad_norm": 0.5837737321853638, + "learning_rate": 4.5775e-05, + "log_odds_chosen": 0.21645092964172363, + "log_odds_ratio": -0.6055108904838562, + "logits/chosen": 0.42070725560188293, + "logits/rejected": 0.0440496951341629, + "logps/chosen": -1.0943289995193481, + "logps/rejected": -1.2313555479049683, + "loss": 3.6424, + "nll_loss": 3.5818536281585693, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1094328984618187, + "rewards/margins": 0.01370265707373619, + "rewards/rejected": -0.12313555181026459, + "step": 169 + }, + { + "epoch": 0.1057542768273717, + "grad_norm": 0.5199751257896423, + "learning_rate": 4.575e-05, + "log_odds_chosen": 0.17023658752441406, + "log_odds_ratio": -0.7053340077400208, + "logits/chosen": 0.16789722442626953, + "logits/rejected": 0.13283823430538177, + "logps/chosen": -1.2800236940383911, + "logps/rejected": -1.3655457496643066, + "loss": 2.931, + "nll_loss": 2.860494613647461, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1280023753643036, + "rewards/margins": 0.008552204817533493, + "rewards/rejected": -0.1365545690059662, + "step": 170 + }, + { + "epoch": 0.10637636080870917, + "grad_norm": 0.6239449381828308, + "learning_rate": 4.5725e-05, + "log_odds_chosen": -0.17279069125652313, + "log_odds_ratio": -0.8778814077377319, + "logits/chosen": 0.43515917658805847, + "logits/rejected": 0.1489274650812149, + "logps/chosen": -1.5174874067306519, + "logps/rejected": -1.3432010412216187, + "loss": 3.8779, + "nll_loss": 3.790090799331665, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15174873173236847, + "rewards/margins": -0.017428629100322723, + "rewards/rejected": -0.13432011008262634, + "step": 171 + }, + { + "epoch": 0.10699844479004665, + "grad_norm": 0.5014814734458923, + "learning_rate": 4.5700000000000006e-05, + "log_odds_chosen": 0.5290875434875488, + "log_odds_ratio": -0.5955926179885864, + "logits/chosen": 0.05028457194566727, + "logits/rejected": -0.058185793459415436, + "logps/chosen": -1.0905635356903076, + "logps/rejected": -1.361629605293274, + "loss": 2.4669, + "nll_loss": 2.407294273376465, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10905636847019196, + "rewards/margins": 0.027106598019599915, + "rewards/rejected": -0.13616296648979187, + "step": 172 + }, + { + "epoch": 0.10762052877138413, + "grad_norm": 0.45314520597457886, + "learning_rate": 4.5675e-05, + "log_odds_chosen": 0.11316041648387909, + "log_odds_ratio": -0.6723555326461792, + "logits/chosen": 0.1640755981206894, + "logits/rejected": -0.034357067197561264, + "logps/chosen": -1.294669508934021, + "logps/rejected": -1.3866937160491943, + "loss": 2.9407, + "nll_loss": 2.8734371662139893, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1294669657945633, + "rewards/margins": 0.009202406741678715, + "rewards/rejected": -0.13866937160491943, + "step": 173 + }, + { + "epoch": 0.10824261275272162, + "grad_norm": 0.44885244965553284, + "learning_rate": 4.5650000000000005e-05, + "log_odds_chosen": 0.3160461485385895, + "log_odds_ratio": -0.5713240504264832, + "logits/chosen": 0.23067131638526917, + "logits/rejected": 0.04281052201986313, + "logps/chosen": -1.31607186794281, + "logps/rejected": -1.5356450080871582, + "loss": 3.2553, + "nll_loss": 3.1981256008148193, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13160717487335205, + "rewards/margins": 0.021957319229841232, + "rewards/rejected": -0.15356451272964478, + "step": 174 + }, + { + "epoch": 0.1088646967340591, + "grad_norm": 0.49723100662231445, + "learning_rate": 4.5625e-05, + "log_odds_chosen": 0.9637750387191772, + "log_odds_ratio": -0.4667288661003113, + "logits/chosen": 0.13240934908390045, + "logits/rejected": -0.04236384481191635, + "logps/chosen": -1.2129688262939453, + "logps/rejected": -1.8437656164169312, + "loss": 3.156, + "nll_loss": 3.109340190887451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12129689753055573, + "rewards/margins": 0.06307967007160187, + "rewards/rejected": -0.1843765676021576, + "step": 175 + }, + { + "epoch": 0.10948678071539658, + "grad_norm": 0.5128828883171082, + "learning_rate": 4.5600000000000004e-05, + "log_odds_chosen": 0.3789896070957184, + "log_odds_ratio": -0.5478941798210144, + "logits/chosen": 0.15509214997291565, + "logits/rejected": 0.07832697033882141, + "logps/chosen": -1.4777848720550537, + "logps/rejected": -1.7933158874511719, + "loss": 2.8832, + "nll_loss": 2.8284482955932617, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1477784812450409, + "rewards/margins": 0.031553104519844055, + "rewards/rejected": -0.17933160066604614, + "step": 176 + }, + { + "epoch": 0.11010886469673406, + "grad_norm": 0.41308772563934326, + "learning_rate": 4.5575e-05, + "log_odds_chosen": -0.020543716847896576, + "log_odds_ratio": -0.8699101209640503, + "logits/chosen": 0.18716076016426086, + "logits/rejected": -0.06990818679332733, + "logps/chosen": -1.4498722553253174, + "logps/rejected": -1.3525209426879883, + "loss": 3.5567, + "nll_loss": 3.469757556915283, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14498722553253174, + "rewards/margins": -0.009735142812132835, + "rewards/rejected": -0.13525208830833435, + "step": 177 + }, + { + "epoch": 0.11073094867807154, + "grad_norm": 0.48727282881736755, + "learning_rate": 4.555e-05, + "log_odds_chosen": 0.26913127303123474, + "log_odds_ratio": -0.6672435998916626, + "logits/chosen": 0.163034588098526, + "logits/rejected": -0.04085027799010277, + "logps/chosen": -1.3829305171966553, + "logps/rejected": -1.5941439867019653, + "loss": 3.2551, + "nll_loss": 3.1884212493896484, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13829305768013, + "rewards/margins": 0.021121343597769737, + "rewards/rejected": -0.1594144105911255, + "step": 178 + }, + { + "epoch": 0.11135303265940902, + "grad_norm": 0.3962324857711792, + "learning_rate": 4.5525e-05, + "log_odds_chosen": 0.2522580623626709, + "log_odds_ratio": -0.6461670994758606, + "logits/chosen": 0.006648369133472443, + "logits/rejected": -0.11574047803878784, + "logps/chosen": -1.311452865600586, + "logps/rejected": -1.4216885566711426, + "loss": 2.9589, + "nll_loss": 2.894331693649292, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13114528357982635, + "rewards/margins": 0.011023584753274918, + "rewards/rejected": -0.14216886460781097, + "step": 179 + }, + { + "epoch": 0.1119751166407465, + "grad_norm": 0.4192509353160858, + "learning_rate": 4.55e-05, + "log_odds_chosen": 0.3250114917755127, + "log_odds_ratio": -0.6104202270507812, + "logits/chosen": 0.20732258260250092, + "logits/rejected": 0.005627447739243507, + "logps/chosen": -1.2976369857788086, + "logps/rejected": -1.5418976545333862, + "loss": 3.5311, + "nll_loss": 3.4701075553894043, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12976369261741638, + "rewards/margins": 0.02442607283592224, + "rewards/rejected": -0.15418976545333862, + "step": 180 + }, + { + "epoch": 0.11259720062208398, + "grad_norm": 0.3853883445262909, + "learning_rate": 4.5475e-05, + "log_odds_chosen": 0.11205227673053741, + "log_odds_ratio": -0.65590500831604, + "logits/chosen": 0.15651991963386536, + "logits/rejected": -0.04156707227230072, + "logps/chosen": -1.3088514804840088, + "logps/rejected": -1.4056215286254883, + "loss": 3.1677, + "nll_loss": 3.102067470550537, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13088513910770416, + "rewards/margins": 0.009677015244960785, + "rewards/rejected": -0.14056214690208435, + "step": 181 + }, + { + "epoch": 0.11321928460342146, + "grad_norm": 0.560540497303009, + "learning_rate": 4.545000000000001e-05, + "log_odds_chosen": 0.7333866953849792, + "log_odds_ratio": -0.4243454039096832, + "logits/chosen": 0.22672435641288757, + "logits/rejected": -0.04371669888496399, + "logps/chosen": -0.94305020570755, + "logps/rejected": -1.453209638595581, + "loss": 2.9935, + "nll_loss": 2.951089859008789, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09430501610040665, + "rewards/margins": 0.0510159507393837, + "rewards/rejected": -0.14532096683979034, + "step": 182 + }, + { + "epoch": 0.11384136858475895, + "grad_norm": 0.3617045283317566, + "learning_rate": 4.5425e-05, + "log_odds_chosen": 1.3186687231063843, + "log_odds_ratio": -0.3125315308570862, + "logits/chosen": 0.09166554361581802, + "logits/rejected": 0.04617012292146683, + "logps/chosen": -0.8101480007171631, + "logps/rejected": -1.4856460094451904, + "loss": 3.1663, + "nll_loss": 3.1350221633911133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08101479709148407, + "rewards/margins": 0.06754979491233826, + "rewards/rejected": -0.14856459200382233, + "step": 183 + }, + { + "epoch": 0.11446345256609643, + "grad_norm": 0.43170198798179626, + "learning_rate": 4.5400000000000006e-05, + "log_odds_chosen": 0.6821365356445312, + "log_odds_ratio": -0.5054466724395752, + "logits/chosen": 0.12610091269016266, + "logits/rejected": 0.06542984396219254, + "logps/chosen": -1.140608310699463, + "logps/rejected": -1.5901718139648438, + "loss": 3.0324, + "nll_loss": 2.9818594455718994, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11406083405017853, + "rewards/margins": 0.04495634511113167, + "rewards/rejected": -0.1590171754360199, + "step": 184 + }, + { + "epoch": 0.1150855365474339, + "grad_norm": 0.3195982277393341, + "learning_rate": 4.5375e-05, + "log_odds_chosen": 0.5379209518432617, + "log_odds_ratio": -0.49095743894577026, + "logits/chosen": 0.18578393757343292, + "logits/rejected": 0.09170369803905487, + "logps/chosen": -1.0872983932495117, + "logps/rejected": -1.476131558418274, + "loss": 3.5126, + "nll_loss": 3.4635496139526367, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10872984677553177, + "rewards/margins": 0.03888332098722458, + "rewards/rejected": -0.14761316776275635, + "step": 185 + }, + { + "epoch": 0.11570762052877138, + "grad_norm": 0.38618817925453186, + "learning_rate": 4.5350000000000005e-05, + "log_odds_chosen": 0.27880579233169556, + "log_odds_ratio": -0.5841034054756165, + "logits/chosen": 0.22152912616729736, + "logits/rejected": 0.037873830646276474, + "logps/chosen": -1.1515580415725708, + "logps/rejected": -1.3237394094467163, + "loss": 2.9728, + "nll_loss": 2.914379835128784, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11515580117702484, + "rewards/margins": 0.01721813902258873, + "rewards/rejected": -0.13237394392490387, + "step": 186 + }, + { + "epoch": 0.11632970451010886, + "grad_norm": 0.4079546332359314, + "learning_rate": 4.5325000000000004e-05, + "log_odds_chosen": -0.15632662177085876, + "log_odds_ratio": -0.7896630764007568, + "logits/chosen": 0.14888915419578552, + "logits/rejected": 0.015266455709934235, + "logps/chosen": -1.393578290939331, + "logps/rejected": -1.2698719501495361, + "loss": 2.9801, + "nll_loss": 2.9011240005493164, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1393578201532364, + "rewards/margins": -0.012370622716844082, + "rewards/rejected": -0.12698720395565033, + "step": 187 + }, + { + "epoch": 0.11695178849144634, + "grad_norm": 0.41905397176742554, + "learning_rate": 4.53e-05, + "log_odds_chosen": -0.014257103204727173, + "log_odds_ratio": -0.7734930515289307, + "logits/chosen": 0.22645491361618042, + "logits/rejected": 0.10181404650211334, + "logps/chosen": -1.4420051574707031, + "logps/rejected": -1.3750616312026978, + "loss": 3.305, + "nll_loss": 3.2276558876037598, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14420051872730255, + "rewards/margins": -0.006694357842206955, + "rewards/rejected": -0.1375061571598053, + "step": 188 + }, + { + "epoch": 0.11757387247278382, + "grad_norm": 0.4148879647254944, + "learning_rate": 4.5275e-05, + "log_odds_chosen": 0.3161711096763611, + "log_odds_ratio": -0.5657863616943359, + "logits/chosen": 0.09662674367427826, + "logits/rejected": -0.03456714004278183, + "logps/chosen": -1.122739553451538, + "logps/rejected": -1.358430027961731, + "loss": 2.703, + "nll_loss": 2.6464426517486572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11227396130561829, + "rewards/margins": 0.023569051176309586, + "rewards/rejected": -0.13584300875663757, + "step": 189 + }, + { + "epoch": 0.1181959564541213, + "grad_norm": 1.207576870918274, + "learning_rate": 4.525e-05, + "log_odds_chosen": -0.09153705835342407, + "log_odds_ratio": -0.7909804582595825, + "logits/chosen": 0.0819367915391922, + "logits/rejected": 0.03701075166463852, + "logps/chosen": -1.6013344526290894, + "logps/rejected": -1.5169143676757812, + "loss": 3.0113, + "nll_loss": 2.932208776473999, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1601334512233734, + "rewards/margins": -0.008442009799182415, + "rewards/rejected": -0.15169143676757812, + "step": 190 + }, + { + "epoch": 0.1188180404354588, + "grad_norm": 0.42340028285980225, + "learning_rate": 4.5225e-05, + "log_odds_chosen": 0.7335370779037476, + "log_odds_ratio": -0.4513997435569763, + "logits/chosen": 0.15320263803005219, + "logits/rejected": -0.06381958723068237, + "logps/chosen": -0.8546745181083679, + "logps/rejected": -1.3147962093353271, + "loss": 3.2561, + "nll_loss": 3.210986375808716, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08546745777130127, + "rewards/margins": 0.046012166887521744, + "rewards/rejected": -0.13147962093353271, + "step": 191 + }, + { + "epoch": 0.11944012441679627, + "grad_norm": 0.4323185980319977, + "learning_rate": 4.52e-05, + "log_odds_chosen": 0.43490371108055115, + "log_odds_ratio": -0.5183134078979492, + "logits/chosen": 0.3056216835975647, + "logits/rejected": 0.032028887420892715, + "logps/chosen": -1.260435938835144, + "logps/rejected": -1.577862024307251, + "loss": 3.3018, + "nll_loss": 3.249936819076538, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12604360282421112, + "rewards/margins": 0.031742602586746216, + "rewards/rejected": -0.15778620541095734, + "step": 192 + }, + { + "epoch": 0.12006220839813375, + "grad_norm": 0.3796927332878113, + "learning_rate": 4.5175e-05, + "log_odds_chosen": 0.5728176236152649, + "log_odds_ratio": -0.4924409091472626, + "logits/chosen": 0.09786956757307053, + "logits/rejected": -0.08450242131948471, + "logps/chosen": -0.922787070274353, + "logps/rejected": -1.3142082691192627, + "loss": 2.9078, + "nll_loss": 2.858553886413574, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09227870404720306, + "rewards/margins": 0.039142120629549026, + "rewards/rejected": -0.1314208209514618, + "step": 193 + }, + { + "epoch": 0.12068429237947123, + "grad_norm": 0.7153462171554565, + "learning_rate": 4.5150000000000006e-05, + "log_odds_chosen": 0.5174391865730286, + "log_odds_ratio": -0.5253512263298035, + "logits/chosen": 0.3215792179107666, + "logits/rejected": 0.12055753916501999, + "logps/chosen": -1.3720208406448364, + "logps/rejected": -1.7549231052398682, + "loss": 3.0374, + "nll_loss": 2.98486328125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13720208406448364, + "rewards/margins": 0.03829022869467735, + "rewards/rejected": -0.1754923164844513, + "step": 194 + }, + { + "epoch": 0.12130637636080871, + "grad_norm": 0.4331601560115814, + "learning_rate": 4.5125e-05, + "log_odds_chosen": 0.7227539420127869, + "log_odds_ratio": -0.4814952313899994, + "logits/chosen": 0.2730720341205597, + "logits/rejected": 0.1367463618516922, + "logps/chosen": -1.1998332738876343, + "logps/rejected": -1.7204265594482422, + "loss": 3.2678, + "nll_loss": 3.21962833404541, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11998332291841507, + "rewards/margins": 0.05205933377146721, + "rewards/rejected": -0.17204266786575317, + "step": 195 + }, + { + "epoch": 0.12192846034214619, + "grad_norm": 0.4868512749671936, + "learning_rate": 4.5100000000000005e-05, + "log_odds_chosen": -0.11273391544818878, + "log_odds_ratio": -0.7624410390853882, + "logits/chosen": 0.162788987159729, + "logits/rejected": 0.038346655666828156, + "logps/chosen": -1.2809481620788574, + "logps/rejected": -1.187037706375122, + "loss": 2.6403, + "nll_loss": 2.5640499591827393, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12809482216835022, + "rewards/margins": -0.009391050785779953, + "rewards/rejected": -0.11870376765727997, + "step": 196 + }, + { + "epoch": 0.12255054432348367, + "grad_norm": 0.4865812659263611, + "learning_rate": 4.5075e-05, + "log_odds_chosen": -0.08828195929527283, + "log_odds_ratio": -0.826860249042511, + "logits/chosen": 0.2504577338695526, + "logits/rejected": 0.20617207884788513, + "logps/chosen": -1.288416862487793, + "logps/rejected": -1.2700859308242798, + "loss": 3.1873, + "nll_loss": 3.1046361923217773, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12884169816970825, + "rewards/margins": -0.001833103597164154, + "rewards/rejected": -0.1270085871219635, + "step": 197 + }, + { + "epoch": 0.12317262830482115, + "grad_norm": 0.605975866317749, + "learning_rate": 4.5050000000000004e-05, + "log_odds_chosen": 0.6784669756889343, + "log_odds_ratio": -0.4767940044403076, + "logits/chosen": 0.038463614881038666, + "logits/rejected": 0.1368597447872162, + "logps/chosen": -1.371410608291626, + "logps/rejected": -1.9200092554092407, + "loss": 2.0605, + "nll_loss": 2.012791633605957, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13714107871055603, + "rewards/margins": 0.054859861731529236, + "rewards/rejected": -0.19200092554092407, + "step": 198 + }, + { + "epoch": 0.12379471228615863, + "grad_norm": 0.3905656635761261, + "learning_rate": 4.5025000000000003e-05, + "log_odds_chosen": 0.27600833773612976, + "log_odds_ratio": -0.5758750438690186, + "logits/chosen": 0.24896469712257385, + "logits/rejected": 0.1932651847600937, + "logps/chosen": -1.2160751819610596, + "logps/rejected": -1.3880665302276611, + "loss": 3.6016, + "nll_loss": 3.5440330505371094, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12160752713680267, + "rewards/margins": 0.01719912886619568, + "rewards/rejected": -0.13880664110183716, + "step": 199 + }, + { + "epoch": 0.12441679626749612, + "grad_norm": 0.9062671065330505, + "learning_rate": 4.5e-05, + "log_odds_chosen": 0.029399242252111435, + "log_odds_ratio": -0.7444265484809875, + "logits/chosen": 0.16195963323116302, + "logits/rejected": -0.002658102661371231, + "logps/chosen": -1.62650465965271, + "logps/rejected": -1.6063220500946045, + "loss": 3.017, + "nll_loss": 2.9425477981567383, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.162650465965271, + "rewards/margins": -0.0020182570442557335, + "rewards/rejected": -0.16063222289085388, + "step": 200 + }, + { + "epoch": 0.12503888024883358, + "grad_norm": 0.7090647220611572, + "learning_rate": 4.4975e-05, + "log_odds_chosen": -0.061332136392593384, + "log_odds_ratio": -0.7987865209579468, + "logits/chosen": 0.1813337355852127, + "logits/rejected": -0.05886400490999222, + "logps/chosen": -1.4757061004638672, + "logps/rejected": -1.383807897567749, + "loss": 3.2882, + "nll_loss": 3.2082958221435547, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14757061004638672, + "rewards/margins": -0.009189811535179615, + "rewards/rejected": -0.13838079571723938, + "step": 201 + }, + { + "epoch": 0.12566096423017106, + "grad_norm": 0.37017297744750977, + "learning_rate": 4.495e-05, + "log_odds_chosen": 0.6404499411582947, + "log_odds_ratio": -0.5468106269836426, + "logits/chosen": 0.2760981321334839, + "logits/rejected": 0.07182542979717255, + "logps/chosen": -1.1483453512191772, + "logps/rejected": -1.6027138233184814, + "loss": 3.5294, + "nll_loss": 3.47471284866333, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11483453214168549, + "rewards/margins": 0.04543685540556908, + "rewards/rejected": -0.16027137637138367, + "step": 202 + }, + { + "epoch": 0.12628304821150854, + "grad_norm": 0.3965308368206024, + "learning_rate": 4.4925e-05, + "log_odds_chosen": -0.044021353125572205, + "log_odds_ratio": -0.7529421448707581, + "logits/chosen": 0.12207438051700592, + "logits/rejected": 0.07484577596187592, + "logps/chosen": -1.0992047786712646, + "logps/rejected": -1.0575580596923828, + "loss": 2.8192, + "nll_loss": 2.7438852787017822, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10992048680782318, + "rewards/margins": -0.004164676181972027, + "rewards/rejected": -0.10575580596923828, + "step": 203 + }, + { + "epoch": 0.12690513219284602, + "grad_norm": 0.3560815155506134, + "learning_rate": 4.49e-05, + "log_odds_chosen": 0.6957288384437561, + "log_odds_ratio": -0.4497440457344055, + "logits/chosen": 0.4506039619445801, + "logits/rejected": 0.015391908586025238, + "logps/chosen": -1.1910122632980347, + "logps/rejected": -1.6560842990875244, + "loss": 4.0035, + "nll_loss": 3.9584884643554688, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11910124123096466, + "rewards/margins": 0.04650719463825226, + "rewards/rejected": -0.16560842096805573, + "step": 204 + }, + { + "epoch": 0.12752721617418353, + "grad_norm": 0.6403685212135315, + "learning_rate": 4.4875e-05, + "log_odds_chosen": 0.2122831642627716, + "log_odds_ratio": -0.8761559724807739, + "logits/chosen": 0.014004663564264774, + "logits/rejected": -0.14548785984516144, + "logps/chosen": -1.689424753189087, + "logps/rejected": -1.6934268474578857, + "loss": 2.4198, + "nll_loss": 2.332214117050171, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16894248127937317, + "rewards/margins": 0.00040020793676376343, + "rewards/rejected": -0.16934269666671753, + "step": 205 + }, + { + "epoch": 0.128149300155521, + "grad_norm": 0.4000180661678314, + "learning_rate": 4.4850000000000006e-05, + "log_odds_chosen": 1.1184585094451904, + "log_odds_ratio": -0.37039902806282043, + "logits/chosen": 0.2634533941745758, + "logits/rejected": 0.16103744506835938, + "logps/chosen": -1.027080774307251, + "logps/rejected": -1.6481044292449951, + "loss": 3.1462, + "nll_loss": 3.1091270446777344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10270807147026062, + "rewards/margins": 0.06210235506296158, + "rewards/rejected": -0.1648104339838028, + "step": 206 + }, + { + "epoch": 0.12877138413685849, + "grad_norm": 0.5040024518966675, + "learning_rate": 4.4825e-05, + "log_odds_chosen": 0.2705846130847931, + "log_odds_ratio": -0.59270179271698, + "logits/chosen": 0.15955251455307007, + "logits/rejected": -0.05338404327630997, + "logps/chosen": -1.2470922470092773, + "logps/rejected": -1.4498063325881958, + "loss": 2.6152, + "nll_loss": 2.555975914001465, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12470922619104385, + "rewards/margins": 0.020271409302949905, + "rewards/rejected": -0.14498063921928406, + "step": 207 + }, + { + "epoch": 0.12939346811819596, + "grad_norm": 0.594912052154541, + "learning_rate": 4.4800000000000005e-05, + "log_odds_chosen": 0.7347403764724731, + "log_odds_ratio": -0.44468581676483154, + "logits/chosen": 0.14959490299224854, + "logits/rejected": -0.027468346059322357, + "logps/chosen": -1.372584581375122, + "logps/rejected": -1.9416903257369995, + "loss": 2.7127, + "nll_loss": 2.6681857109069824, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13725847005844116, + "rewards/margins": 0.056910570710897446, + "rewards/rejected": -0.1941690444946289, + "step": 208 + }, + { + "epoch": 0.13001555209953344, + "grad_norm": 0.5162251591682434, + "learning_rate": 4.4775e-05, + "log_odds_chosen": 0.7629825472831726, + "log_odds_ratio": -0.48512592911720276, + "logits/chosen": 0.2162114381790161, + "logits/rejected": 0.039904482662677765, + "logps/chosen": -1.1675233840942383, + "logps/rejected": -1.7713046073913574, + "loss": 2.8182, + "nll_loss": 2.769730806350708, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11675234138965607, + "rewards/margins": 0.06037811189889908, + "rewards/rejected": -0.17713046073913574, + "step": 209 + }, + { + "epoch": 0.13063763608087092, + "grad_norm": 0.4357577860355377, + "learning_rate": 4.4750000000000004e-05, + "log_odds_chosen": 0.16853998601436615, + "log_odds_ratio": -0.6546946167945862, + "logits/chosen": 0.28680700063705444, + "logits/rejected": 0.13408944010734558, + "logps/chosen": -1.4316234588623047, + "logps/rejected": -1.5849097967147827, + "loss": 3.1143, + "nll_loss": 3.0488011837005615, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.143162339925766, + "rewards/margins": 0.015328639186918736, + "rewards/rejected": -0.15849095582962036, + "step": 210 + }, + { + "epoch": 0.1312597200622084, + "grad_norm": 0.48053115606307983, + "learning_rate": 4.4725e-05, + "log_odds_chosen": 0.6203852891921997, + "log_odds_ratio": -0.47376835346221924, + "logits/chosen": 0.30184412002563477, + "logits/rejected": 0.14279696345329285, + "logps/chosen": -1.20229971408844, + "logps/rejected": -1.6811708211898804, + "loss": 3.1291, + "nll_loss": 3.0817553997039795, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12022997438907623, + "rewards/margins": 0.04788711294531822, + "rewards/rejected": -0.16811709105968475, + "step": 211 + }, + { + "epoch": 0.13188180404354588, + "grad_norm": 0.3863763213157654, + "learning_rate": 4.47e-05, + "log_odds_chosen": 0.23602965474128723, + "log_odds_ratio": -0.6337170004844666, + "logits/chosen": 0.14647379517555237, + "logits/rejected": 0.13201558589935303, + "logps/chosen": -1.5172404050827026, + "logps/rejected": -1.723448634147644, + "loss": 2.9906, + "nll_loss": 2.9272663593292236, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15172404050827026, + "rewards/margins": 0.02062082849442959, + "rewards/rejected": -0.1723448783159256, + "step": 212 + }, + { + "epoch": 0.13250388802488336, + "grad_norm": 0.5771374106407166, + "learning_rate": 4.4675e-05, + "log_odds_chosen": 0.6309947371482849, + "log_odds_ratio": -0.5399520397186279, + "logits/chosen": 0.12432387471199036, + "logits/rejected": 0.20462198555469513, + "logps/chosen": -1.5274455547332764, + "logps/rejected": -2.06624436378479, + "loss": 2.5767, + "nll_loss": 2.5227041244506836, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15274454653263092, + "rewards/margins": 0.053879883140325546, + "rewards/rejected": -0.20662443339824677, + "step": 213 + }, + { + "epoch": 0.13312597200622084, + "grad_norm": 0.41249603033065796, + "learning_rate": 4.465e-05, + "log_odds_chosen": -0.003160417079925537, + "log_odds_ratio": -0.777141273021698, + "logits/chosen": 0.14705267548561096, + "logits/rejected": -0.06464418023824692, + "logps/chosen": -1.5321584939956665, + "logps/rejected": -1.5345534086227417, + "loss": 3.1204, + "nll_loss": 3.042703866958618, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.15321585536003113, + "rewards/margins": 0.00023948121815919876, + "rewards/rejected": -0.15345533192157745, + "step": 214 + }, + { + "epoch": 0.13374805598755832, + "grad_norm": 0.4820147752761841, + "learning_rate": 4.4625e-05, + "log_odds_chosen": 0.2911366820335388, + "log_odds_ratio": -0.5818198323249817, + "logits/chosen": 0.2889520227909088, + "logits/rejected": 0.08205986022949219, + "logps/chosen": -1.4115300178527832, + "logps/rejected": -1.6441384553909302, + "loss": 2.9628, + "nll_loss": 2.904639959335327, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1411530077457428, + "rewards/margins": 0.02326083928346634, + "rewards/rejected": -0.16441385447978973, + "step": 215 + }, + { + "epoch": 0.1343701399688958, + "grad_norm": 0.41194218397140503, + "learning_rate": 4.46e-05, + "log_odds_chosen": 0.3945164382457733, + "log_odds_ratio": -0.6375629305839539, + "logits/chosen": 0.421595960855484, + "logits/rejected": 0.30017945170402527, + "logps/chosen": -1.3539952039718628, + "logps/rejected": -1.5963020324707031, + "loss": 3.5731, + "nll_loss": 3.509347915649414, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13539952039718628, + "rewards/margins": 0.024230685085058212, + "rewards/rejected": -0.1596302092075348, + "step": 216 + }, + { + "epoch": 0.13499222395023328, + "grad_norm": 0.503413200378418, + "learning_rate": 4.4575e-05, + "log_odds_chosen": 0.5858572721481323, + "log_odds_ratio": -0.5386984944343567, + "logits/chosen": 0.19849129021167755, + "logits/rejected": 0.03486599028110504, + "logps/chosen": -1.2306039333343506, + "logps/rejected": -1.630487322807312, + "loss": 3.1597, + "nll_loss": 3.105804443359375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12306039035320282, + "rewards/margins": 0.03998832777142525, + "rewards/rejected": -0.16304872930049896, + "step": 217 + }, + { + "epoch": 0.13561430793157075, + "grad_norm": 0.5384315848350525, + "learning_rate": 4.4550000000000005e-05, + "log_odds_chosen": 1.3037192821502686, + "log_odds_ratio": -0.36916741728782654, + "logits/chosen": 0.14214058220386505, + "logits/rejected": 0.01107453927397728, + "logps/chosen": -0.9095080494880676, + "logps/rejected": -1.8086912631988525, + "loss": 2.5382, + "nll_loss": 2.5013184547424316, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09095080196857452, + "rewards/margins": 0.08991833031177521, + "rewards/rejected": -0.18086913228034973, + "step": 218 + }, + { + "epoch": 0.13623639191290823, + "grad_norm": 0.46685314178466797, + "learning_rate": 4.4525e-05, + "log_odds_chosen": 0.5023396611213684, + "log_odds_ratio": -0.5046614408493042, + "logits/chosen": 0.2844086289405823, + "logits/rejected": 0.1384527087211609, + "logps/chosen": -1.252253532409668, + "logps/rejected": -1.6022236347198486, + "loss": 2.6639, + "nll_loss": 2.613450527191162, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12522533535957336, + "rewards/margins": 0.034997016191482544, + "rewards/rejected": -0.1602223664522171, + "step": 219 + }, + { + "epoch": 0.1368584758942457, + "grad_norm": 0.37732696533203125, + "learning_rate": 4.4500000000000004e-05, + "log_odds_chosen": 0.475115031003952, + "log_odds_ratio": -0.5874269008636475, + "logits/chosen": 0.18952935934066772, + "logits/rejected": -0.05404677242040634, + "logps/chosen": -1.0435460805892944, + "logps/rejected": -1.356130599975586, + "loss": 3.2866, + "nll_loss": 3.227907419204712, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1043546050786972, + "rewards/margins": 0.03125845640897751, + "rewards/rejected": -0.13561305403709412, + "step": 220 + }, + { + "epoch": 0.1374805598755832, + "grad_norm": 0.39852970838546753, + "learning_rate": 4.4475e-05, + "log_odds_chosen": 0.40951022505760193, + "log_odds_ratio": -0.6001676917076111, + "logits/chosen": 0.1417674422264099, + "logits/rejected": -0.023162055760622025, + "logps/chosen": -1.1433157920837402, + "logps/rejected": -1.3727480173110962, + "loss": 2.8523, + "nll_loss": 2.7923316955566406, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11433158069849014, + "rewards/margins": 0.022943219169974327, + "rewards/rejected": -0.13727480173110962, + "step": 221 + }, + { + "epoch": 0.13810264385692067, + "grad_norm": 0.4464816153049469, + "learning_rate": 4.445e-05, + "log_odds_chosen": 0.983132541179657, + "log_odds_ratio": -0.3711632490158081, + "logits/chosen": 0.22909504175186157, + "logits/rejected": -0.061455175280570984, + "logps/chosen": -1.0717201232910156, + "logps/rejected": -1.7862846851348877, + "loss": 3.3108, + "nll_loss": 3.2737152576446533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10717201232910156, + "rewards/margins": 0.07145645469427109, + "rewards/rejected": -0.17862847447395325, + "step": 222 + }, + { + "epoch": 0.13872472783825818, + "grad_norm": 0.5403879880905151, + "learning_rate": 4.4425e-05, + "log_odds_chosen": 0.5938290357589722, + "log_odds_ratio": -0.4913523495197296, + "logits/chosen": 0.09688084572553635, + "logits/rejected": -0.05809571593999863, + "logps/chosen": -1.2423458099365234, + "logps/rejected": -1.6755822896957397, + "loss": 2.842, + "nll_loss": 2.7928619384765625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12423457205295563, + "rewards/margins": 0.04332365095615387, + "rewards/rejected": -0.1675582379102707, + "step": 223 + }, + { + "epoch": 0.13934681181959566, + "grad_norm": 0.5137689113616943, + "learning_rate": 4.44e-05, + "log_odds_chosen": 0.9574803113937378, + "log_odds_ratio": -0.6414791345596313, + "logits/chosen": 0.41440945863723755, + "logits/rejected": 0.10038256645202637, + "logps/chosen": -1.3024613857269287, + "logps/rejected": -1.8441886901855469, + "loss": 4.0097, + "nll_loss": 3.94557523727417, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1302461475133896, + "rewards/margins": 0.05417274311184883, + "rewards/rejected": -0.18441888689994812, + "step": 224 + }, + { + "epoch": 0.13996889580093314, + "grad_norm": 0.34440910816192627, + "learning_rate": 4.4375e-05, + "log_odds_chosen": 0.7401940822601318, + "log_odds_ratio": -0.5135765671730042, + "logits/chosen": 0.34442248940467834, + "logits/rejected": -0.011198656633496284, + "logps/chosen": -1.2954169511795044, + "logps/rejected": -1.7026612758636475, + "loss": 3.9705, + "nll_loss": 3.9191346168518066, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12954169511795044, + "rewards/margins": 0.040724433958530426, + "rewards/rejected": -0.17026613652706146, + "step": 225 + }, + { + "epoch": 0.14059097978227061, + "grad_norm": 0.4693191349506378, + "learning_rate": 4.435e-05, + "log_odds_chosen": 0.46523332595825195, + "log_odds_ratio": -0.5510789752006531, + "logits/chosen": 0.09288059175014496, + "logits/rejected": -0.01915971003472805, + "logps/chosen": -1.0227751731872559, + "logps/rejected": -1.2773354053497314, + "loss": 2.7217, + "nll_loss": 2.6666271686553955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10227751731872559, + "rewards/margins": 0.0254560187458992, + "rewards/rejected": -0.1277335286140442, + "step": 226 + }, + { + "epoch": 0.1412130637636081, + "grad_norm": 0.3790217936038971, + "learning_rate": 4.4325e-05, + "log_odds_chosen": 0.7664673924446106, + "log_odds_ratio": -0.4955465793609619, + "logits/chosen": 0.2821044921875, + "logits/rejected": 0.1695287972688675, + "logps/chosen": -1.0519059896469116, + "logps/rejected": -1.5946249961853027, + "loss": 3.3048, + "nll_loss": 3.2552666664123535, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10519059747457504, + "rewards/margins": 0.054271895438432693, + "rewards/rejected": -0.15946249663829803, + "step": 227 + }, + { + "epoch": 0.14183514774494557, + "grad_norm": 0.43475788831710815, + "learning_rate": 4.43e-05, + "log_odds_chosen": 1.3139894008636475, + "log_odds_ratio": -0.3370991349220276, + "logits/chosen": 0.12189202010631561, + "logits/rejected": -0.0020540207624435425, + "logps/chosen": -1.0322346687316895, + "logps/rejected": -2.0027456283569336, + "loss": 3.0615, + "nll_loss": 3.0277514457702637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10322347283363342, + "rewards/margins": 0.09705111384391785, + "rewards/rejected": -0.20027457177639008, + "step": 228 + }, + { + "epoch": 0.14245723172628305, + "grad_norm": 0.4523204565048218, + "learning_rate": 4.4275e-05, + "log_odds_chosen": 0.5393589735031128, + "log_odds_ratio": -0.5129015445709229, + "logits/chosen": 0.3066309988498688, + "logits/rejected": 0.01438068225979805, + "logps/chosen": -1.1355011463165283, + "logps/rejected": -1.4902321100234985, + "loss": 3.3797, + "nll_loss": 3.328399181365967, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1135501116514206, + "rewards/margins": 0.035473089665174484, + "rewards/rejected": -0.14902320504188538, + "step": 229 + }, + { + "epoch": 0.14307931570762053, + "grad_norm": 0.45688357949256897, + "learning_rate": 4.4250000000000005e-05, + "log_odds_chosen": 1.0163792371749878, + "log_odds_ratio": -0.42969611287117004, + "logits/chosen": 0.2794167995452881, + "logits/rejected": 0.0069199977442622185, + "logps/chosen": -1.1130561828613281, + "logps/rejected": -1.8443264961242676, + "loss": 3.1188, + "nll_loss": 3.0758509635925293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11130562424659729, + "rewards/margins": 0.07312701642513275, + "rewards/rejected": -0.18443265557289124, + "step": 230 + }, + { + "epoch": 0.143701399688958, + "grad_norm": 0.46562471985816956, + "learning_rate": 4.4225e-05, + "log_odds_chosen": 0.051547348499298096, + "log_odds_ratio": -0.6991626024246216, + "logits/chosen": 0.3503286838531494, + "logits/rejected": 0.16541802883148193, + "logps/chosen": -1.2530078887939453, + "logps/rejected": -1.2871830463409424, + "loss": 3.1336, + "nll_loss": 3.0636978149414062, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12530077993869781, + "rewards/margins": 0.003417517989873886, + "rewards/rejected": -0.128718301653862, + "step": 231 + }, + { + "epoch": 0.1443234836702955, + "grad_norm": 0.5583508610725403, + "learning_rate": 4.4200000000000004e-05, + "log_odds_chosen": 0.4175459146499634, + "log_odds_ratio": -0.5737831592559814, + "logits/chosen": 0.4354173243045807, + "logits/rejected": 0.12491598725318909, + "logps/chosen": -1.5473692417144775, + "logps/rejected": -1.9231562614440918, + "loss": 3.3368, + "nll_loss": 3.279399871826172, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15473692119121552, + "rewards/margins": 0.03757869452238083, + "rewards/rejected": -0.19231562316417694, + "step": 232 + }, + { + "epoch": 0.14494556765163297, + "grad_norm": 0.5220412611961365, + "learning_rate": 4.4174999999999996e-05, + "log_odds_chosen": 0.49191123247146606, + "log_odds_ratio": -0.6103449463844299, + "logits/chosen": 0.17081604897975922, + "logits/rejected": 0.05159229040145874, + "logps/chosen": -1.402343988418579, + "logps/rejected": -1.7350009679794312, + "loss": 3.3047, + "nll_loss": 3.2436368465423584, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14023439586162567, + "rewards/margins": 0.03326570987701416, + "rewards/rejected": -0.17350010573863983, + "step": 233 + }, + { + "epoch": 0.14556765163297045, + "grad_norm": 0.7865017652511597, + "learning_rate": 4.415e-05, + "log_odds_chosen": -0.3338541090488434, + "log_odds_ratio": -1.0038307905197144, + "logits/chosen": 0.20039094984531403, + "logits/rejected": 0.13230621814727783, + "logps/chosen": -1.9984074831008911, + "logps/rejected": -1.6774605512619019, + "loss": 2.6104, + "nll_loss": 2.5099916458129883, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1998407542705536, + "rewards/margins": -0.03209469094872475, + "rewards/rejected": -0.16774605214595795, + "step": 234 + }, + { + "epoch": 0.14618973561430793, + "grad_norm": 0.5441064834594727, + "learning_rate": 4.4125e-05, + "log_odds_chosen": 0.19742441177368164, + "log_odds_ratio": -0.6543236970901489, + "logits/chosen": 0.2758321762084961, + "logits/rejected": 0.2791656255722046, + "logps/chosen": -1.3114479780197144, + "logps/rejected": -1.4452178478240967, + "loss": 2.6863, + "nll_loss": 2.620828866958618, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13114479184150696, + "rewards/margins": 0.013376971706748009, + "rewards/rejected": -0.1445217728614807, + "step": 235 + }, + { + "epoch": 0.1468118195956454, + "grad_norm": 0.4621172845363617, + "learning_rate": 4.41e-05, + "log_odds_chosen": 0.35999900102615356, + "log_odds_ratio": -0.6286050081253052, + "logits/chosen": 0.3370401859283447, + "logits/rejected": 0.1895410120487213, + "logps/chosen": -1.0965385437011719, + "logps/rejected": -1.2593486309051514, + "loss": 3.3825, + "nll_loss": 3.3196167945861816, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10965386033058167, + "rewards/margins": 0.016280999407172203, + "rewards/rejected": -0.12593486905097961, + "step": 236 + }, + { + "epoch": 0.14743390357698288, + "grad_norm": 0.4633600115776062, + "learning_rate": 4.4075e-05, + "log_odds_chosen": 0.15592439472675323, + "log_odds_ratio": -0.6589779853820801, + "logits/chosen": 0.24592751264572144, + "logits/rejected": 0.05784950405359268, + "logps/chosen": -1.350083827972412, + "logps/rejected": -1.4974408149719238, + "loss": 3.17, + "nll_loss": 3.1041297912597656, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13500836491584778, + "rewards/margins": 0.014735697768628597, + "rewards/rejected": -0.14974406361579895, + "step": 237 + }, + { + "epoch": 0.14805598755832036, + "grad_norm": 0.5822882652282715, + "learning_rate": 4.405e-05, + "log_odds_chosen": 0.5478257536888123, + "log_odds_ratio": -0.6282358169555664, + "logits/chosen": 0.28536713123321533, + "logits/rejected": 0.10193793475627899, + "logps/chosen": -1.4900511503219604, + "logps/rejected": -1.923351526260376, + "loss": 3.1027, + "nll_loss": 3.039842128753662, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14900511503219604, + "rewards/margins": 0.043330058455467224, + "rewards/rejected": -0.19233515858650208, + "step": 238 + }, + { + "epoch": 0.14867807153965784, + "grad_norm": 0.4006459712982178, + "learning_rate": 4.4025e-05, + "log_odds_chosen": 0.30393242835998535, + "log_odds_ratio": -0.5825948715209961, + "logits/chosen": 0.30995607376098633, + "logits/rejected": 0.09291841834783554, + "logps/chosen": -1.4452520608901978, + "logps/rejected": -1.6686973571777344, + "loss": 3.2763, + "nll_loss": 3.218071937561035, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1445252150297165, + "rewards/margins": 0.022344522178173065, + "rewards/rejected": -0.16686972975730896, + "step": 239 + }, + { + "epoch": 0.14930015552099535, + "grad_norm": 0.37317949533462524, + "learning_rate": 4.4000000000000006e-05, + "log_odds_chosen": 1.5197196006774902, + "log_odds_ratio": -0.4212367534637451, + "logits/chosen": 0.3777592182159424, + "logits/rejected": 0.2529478073120117, + "logps/chosen": -0.9898640513420105, + "logps/rejected": -2.145714282989502, + "loss": 3.4547, + "nll_loss": 3.4126036167144775, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09898640215396881, + "rewards/margins": 0.11558501422405243, + "rewards/rejected": -0.21457141637802124, + "step": 240 + }, + { + "epoch": 0.14992223950233283, + "grad_norm": 0.5220414996147156, + "learning_rate": 4.3975e-05, + "log_odds_chosen": 0.9724363684654236, + "log_odds_ratio": -0.39547187089920044, + "logits/chosen": 0.18093883991241455, + "logits/rejected": 0.09247509390115738, + "logps/chosen": -1.3090280294418335, + "logps/rejected": -1.9929441213607788, + "loss": 2.5084, + "nll_loss": 2.4688820838928223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13090281188488007, + "rewards/margins": 0.06839160621166229, + "rewards/rejected": -0.19929441809654236, + "step": 241 + }, + { + "epoch": 0.1505443234836703, + "grad_norm": 0.41178998351097107, + "learning_rate": 4.3950000000000004e-05, + "log_odds_chosen": 0.03777070343494415, + "log_odds_ratio": -0.7433863282203674, + "logits/chosen": 0.18827003240585327, + "logits/rejected": 0.17734594643115997, + "logps/chosen": -1.3193129301071167, + "logps/rejected": -1.3846216201782227, + "loss": 2.8552, + "nll_loss": 2.7808141708374023, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13193130493164062, + "rewards/margins": 0.006530864164233208, + "rewards/rejected": -0.1384621560573578, + "step": 242 + }, + { + "epoch": 0.15116640746500778, + "grad_norm": 0.41735777258872986, + "learning_rate": 4.3925e-05, + "log_odds_chosen": 0.5820766687393188, + "log_odds_ratio": -0.4971098303794861, + "logits/chosen": 0.3844646215438843, + "logits/rejected": 0.1357293277978897, + "logps/chosen": -1.285773515701294, + "logps/rejected": -1.6937378644943237, + "loss": 3.6206, + "nll_loss": 3.5709385871887207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1285773366689682, + "rewards/margins": 0.0407964326441288, + "rewards/rejected": -0.1693737804889679, + "step": 243 + }, + { + "epoch": 0.15178849144634526, + "grad_norm": 0.4252638816833496, + "learning_rate": 4.39e-05, + "log_odds_chosen": 0.9820137023925781, + "log_odds_ratio": -0.5139923095703125, + "logits/chosen": 0.09503781795501709, + "logits/rejected": 0.09257042407989502, + "logps/chosen": -1.1998246908187866, + "logps/rejected": -1.9771127700805664, + "loss": 2.4225, + "nll_loss": 2.371094226837158, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11998246610164642, + "rewards/margins": 0.07772879302501678, + "rewards/rejected": -0.1977112740278244, + "step": 244 + }, + { + "epoch": 0.15241057542768274, + "grad_norm": 0.35691797733306885, + "learning_rate": 4.3875e-05, + "log_odds_chosen": 0.15277954936027527, + "log_odds_ratio": -0.6356155872344971, + "logits/chosen": 0.1442718803882599, + "logits/rejected": 0.08599300682544708, + "logps/chosen": -1.3047629594802856, + "logps/rejected": -1.41819167137146, + "loss": 2.9185, + "nll_loss": 2.8548996448516846, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13047629594802856, + "rewards/margins": 0.011342879384756088, + "rewards/rejected": -0.14181917905807495, + "step": 245 + }, + { + "epoch": 0.15303265940902022, + "grad_norm": 0.3984750509262085, + "learning_rate": 4.385e-05, + "log_odds_chosen": 1.2786288261413574, + "log_odds_ratio": -0.36163750290870667, + "logits/chosen": 0.1432109922170639, + "logits/rejected": 0.06168469786643982, + "logps/chosen": -0.9494646191596985, + "logps/rejected": -1.8683538436889648, + "loss": 3.0516, + "nll_loss": 3.0154216289520264, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0949464663863182, + "rewards/margins": 0.0918889045715332, + "rewards/rejected": -0.1868353635072708, + "step": 246 + }, + { + "epoch": 0.1536547433903577, + "grad_norm": 0.35005396604537964, + "learning_rate": 4.3825e-05, + "log_odds_chosen": 0.36924123764038086, + "log_odds_ratio": -0.6463004946708679, + "logits/chosen": 0.08100152760744095, + "logits/rejected": -0.09407303482294083, + "logps/chosen": -1.0215667486190796, + "logps/rejected": -1.2288739681243896, + "loss": 2.9297, + "nll_loss": 2.865115165710449, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10215667635202408, + "rewards/margins": 0.020730726420879364, + "rewards/rejected": -0.12288740277290344, + "step": 247 + }, + { + "epoch": 0.15427682737169518, + "grad_norm": 0.5391296744346619, + "learning_rate": 4.38e-05, + "log_odds_chosen": 0.5213131904602051, + "log_odds_ratio": -0.5025854110717773, + "logits/chosen": 0.06299649924039841, + "logits/rejected": -0.09286680817604065, + "logps/chosen": -1.111344337463379, + "logps/rejected": -1.5201202630996704, + "loss": 3.0356, + "nll_loss": 2.9853503704071045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11113443225622177, + "rewards/margins": 0.04087759926915169, + "rewards/rejected": -0.15201203525066376, + "step": 248 + }, + { + "epoch": 0.15489891135303266, + "grad_norm": 0.454863578081131, + "learning_rate": 4.3775e-05, + "log_odds_chosen": 1.0419408082962036, + "log_odds_ratio": -0.47779956459999084, + "logits/chosen": 0.06458957493305206, + "logits/rejected": -0.05411160737276077, + "logps/chosen": -1.1926398277282715, + "logps/rejected": -1.8617669343948364, + "loss": 3.2576, + "nll_loss": 3.2098097801208496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11926397681236267, + "rewards/margins": 0.0669127106666565, + "rewards/rejected": -0.18617668747901917, + "step": 249 + }, + { + "epoch": 0.15552099533437014, + "grad_norm": 0.5558252334594727, + "learning_rate": 4.375e-05, + "log_odds_chosen": 1.1860463619232178, + "log_odds_ratio": -0.41251182556152344, + "logits/chosen": -0.08973684906959534, + "logits/rejected": -0.16550233960151672, + "logps/chosen": -1.0720642805099487, + "logps/rejected": -1.8928790092468262, + "loss": 2.5241, + "nll_loss": 2.4828810691833496, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10720643401145935, + "rewards/margins": 0.08208145946264267, + "rewards/rejected": -0.18928790092468262, + "step": 250 + }, + { + "epoch": 0.15614307931570762, + "grad_norm": 0.46578970551490784, + "learning_rate": 4.3725000000000006e-05, + "log_odds_chosen": 0.8737031817436218, + "log_odds_ratio": -0.45753180980682373, + "logits/chosen": 0.10488224774599075, + "logits/rejected": 0.002302175387740135, + "logps/chosen": -1.141048550605774, + "logps/rejected": -1.7917754650115967, + "loss": 2.7639, + "nll_loss": 2.718146562576294, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11410486698150635, + "rewards/margins": 0.0650726854801178, + "rewards/rejected": -0.17917755246162415, + "step": 251 + }, + { + "epoch": 0.1567651632970451, + "grad_norm": 0.5264093279838562, + "learning_rate": 4.3700000000000005e-05, + "log_odds_chosen": 1.2083812952041626, + "log_odds_ratio": -0.46954599022865295, + "logits/chosen": 0.10020960867404938, + "logits/rejected": 0.05089284107089043, + "logps/chosen": -1.2586607933044434, + "logps/rejected": -2.2547106742858887, + "loss": 2.6152, + "nll_loss": 2.568207025527954, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12586607038974762, + "rewards/margins": 0.09960497915744781, + "rewards/rejected": -0.22547104954719543, + "step": 252 + }, + { + "epoch": 0.15738724727838257, + "grad_norm": 0.39940425753593445, + "learning_rate": 4.3675000000000005e-05, + "log_odds_chosen": 0.8237320780754089, + "log_odds_ratio": -0.42615777254104614, + "logits/chosen": 0.12673884630203247, + "logits/rejected": 0.04520142823457718, + "logps/chosen": -1.2019363641738892, + "logps/rejected": -1.8468737602233887, + "loss": 3.0343, + "nll_loss": 2.991687536239624, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12019363045692444, + "rewards/margins": 0.06449373066425323, + "rewards/rejected": -0.18468736112117767, + "step": 253 + }, + { + "epoch": 0.15800933125972005, + "grad_norm": 0.36626678705215454, + "learning_rate": 4.3650000000000004e-05, + "log_odds_chosen": 0.9984500408172607, + "log_odds_ratio": -0.3665887117385864, + "logits/chosen": 0.18472406268119812, + "logits/rejected": 0.037876978516578674, + "logps/chosen": -1.0484715700149536, + "logps/rejected": -1.8369355201721191, + "loss": 3.5121, + "nll_loss": 3.4753966331481934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10484716296195984, + "rewards/margins": 0.07884638756513596, + "rewards/rejected": -0.1836935579776764, + "step": 254 + }, + { + "epoch": 0.15863141524105753, + "grad_norm": 0.4521494507789612, + "learning_rate": 4.3625e-05, + "log_odds_chosen": 1.2302688360214233, + "log_odds_ratio": -0.3208959698677063, + "logits/chosen": 0.1981630027294159, + "logits/rejected": -0.019413426518440247, + "logps/chosen": -0.9359973669052124, + "logps/rejected": -1.7934815883636475, + "loss": 3.3395, + "nll_loss": 3.3074443340301514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09359973669052124, + "rewards/margins": 0.08574841916561127, + "rewards/rejected": -0.1793481707572937, + "step": 255 + }, + { + "epoch": 0.159253499222395, + "grad_norm": 0.4002397060394287, + "learning_rate": 4.36e-05, + "log_odds_chosen": 0.2759625315666199, + "log_odds_ratio": -0.5995438098907471, + "logits/chosen": 0.0834539383649826, + "logits/rejected": -0.03273439407348633, + "logps/chosen": -1.3060052394866943, + "logps/rejected": -1.5258723497390747, + "loss": 3.0473, + "nll_loss": 2.9873416423797607, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13060052692890167, + "rewards/margins": 0.021986715495586395, + "rewards/rejected": -0.15258723497390747, + "step": 256 + }, + { + "epoch": 0.1598755832037325, + "grad_norm": 0.6363208293914795, + "learning_rate": 4.3575e-05, + "log_odds_chosen": 0.5167827606201172, + "log_odds_ratio": -0.5308623909950256, + "logits/chosen": 0.23719561100006104, + "logits/rejected": 0.009174295701086521, + "logps/chosen": -1.3490158319473267, + "logps/rejected": -1.7873324155807495, + "loss": 3.2075, + "nll_loss": 3.1543946266174316, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13490158319473267, + "rewards/margins": 0.04383165389299393, + "rewards/rejected": -0.1787332445383072, + "step": 257 + }, + { + "epoch": 0.16049766718507, + "grad_norm": 0.4826381504535675, + "learning_rate": 4.355e-05, + "log_odds_chosen": 0.10458363592624664, + "log_odds_ratio": -0.709639310836792, + "logits/chosen": 0.22416506707668304, + "logits/rejected": 0.1090393140912056, + "logps/chosen": -1.410514235496521, + "logps/rejected": -1.4754109382629395, + "loss": 3.4102, + "nll_loss": 3.339193820953369, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14105141162872314, + "rewards/margins": 0.006489668972790241, + "rewards/rejected": -0.1475410908460617, + "step": 258 + }, + { + "epoch": 0.16111975116640748, + "grad_norm": 0.41199758648872375, + "learning_rate": 4.352500000000001e-05, + "log_odds_chosen": 1.1602838039398193, + "log_odds_ratio": -0.3441213369369507, + "logits/chosen": 0.12007400393486023, + "logits/rejected": 0.005203016102313995, + "logps/chosen": -0.8913068175315857, + "logps/rejected": -1.666398286819458, + "loss": 3.228, + "nll_loss": 3.1936206817626953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08913067728281021, + "rewards/margins": 0.07750913500785828, + "rewards/rejected": -0.16663981974124908, + "step": 259 + }, + { + "epoch": 0.16174183514774496, + "grad_norm": 0.5916374325752258, + "learning_rate": 4.35e-05, + "log_odds_chosen": 0.4282585680484772, + "log_odds_ratio": -0.5398960709571838, + "logits/chosen": 0.22822090983390808, + "logits/rejected": 0.1819199174642563, + "logps/chosen": -1.363776445388794, + "logps/rejected": -1.7082788944244385, + "loss": 2.7722, + "nll_loss": 2.7182321548461914, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13637763261795044, + "rewards/margins": 0.03445024788379669, + "rewards/rejected": -0.17082789540290833, + "step": 260 + }, + { + "epoch": 0.16236391912908243, + "grad_norm": 0.5163818001747131, + "learning_rate": 4.3475000000000006e-05, + "log_odds_chosen": 0.7817611694335938, + "log_odds_ratio": -0.419194757938385, + "logits/chosen": 0.11984409391880035, + "logits/rejected": -0.05257415026426315, + "logps/chosen": -1.1561753749847412, + "logps/rejected": -1.754962682723999, + "loss": 2.795, + "nll_loss": 2.7531259059906006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1156175285577774, + "rewards/margins": 0.05987873673439026, + "rewards/rejected": -0.17549628019332886, + "step": 261 + }, + { + "epoch": 0.1629860031104199, + "grad_norm": 0.3384513854980469, + "learning_rate": 4.345e-05, + "log_odds_chosen": 0.48024481534957886, + "log_odds_ratio": -0.5129582285881042, + "logits/chosen": 0.38080763816833496, + "logits/rejected": 0.12893861532211304, + "logps/chosen": -1.1857705116271973, + "logps/rejected": -1.510347604751587, + "loss": 3.62, + "nll_loss": 3.5687360763549805, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11857704818248749, + "rewards/margins": 0.032457705587148666, + "rewards/rejected": -0.15103475749492645, + "step": 262 + }, + { + "epoch": 0.1636080870917574, + "grad_norm": 0.3694182336330414, + "learning_rate": 4.3425000000000005e-05, + "log_odds_chosen": 1.5465469360351562, + "log_odds_ratio": -0.2594394385814667, + "logits/chosen": 0.19702796638011932, + "logits/rejected": 0.1825985312461853, + "logps/chosen": -1.0576568841934204, + "logps/rejected": -2.280040740966797, + "loss": 2.7614, + "nll_loss": 2.735440492630005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1057656854391098, + "rewards/margins": 0.12223838269710541, + "rewards/rejected": -0.2280040681362152, + "step": 263 + }, + { + "epoch": 0.16423017107309487, + "grad_norm": 0.4737184941768646, + "learning_rate": 4.3400000000000005e-05, + "log_odds_chosen": 1.012281060218811, + "log_odds_ratio": -0.45542672276496887, + "logits/chosen": 0.18417447805404663, + "logits/rejected": 0.2393220067024231, + "logps/chosen": -1.3665863275527954, + "logps/rejected": -2.137876272201538, + "loss": 3.0061, + "nll_loss": 2.9605910778045654, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13665863871574402, + "rewards/margins": 0.07712897658348083, + "rewards/rejected": -0.21378763020038605, + "step": 264 + }, + { + "epoch": 0.16485225505443235, + "grad_norm": 0.5989867448806763, + "learning_rate": 4.3375000000000004e-05, + "log_odds_chosen": 0.47888749837875366, + "log_odds_ratio": -0.5475578904151917, + "logits/chosen": 0.34955573081970215, + "logits/rejected": -0.01235947385430336, + "logps/chosen": -1.1995731592178345, + "logps/rejected": -1.5407181978225708, + "loss": 3.5055, + "nll_loss": 3.4507837295532227, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1199573203921318, + "rewards/margins": 0.034114498645067215, + "rewards/rejected": -0.15407180786132812, + "step": 265 + }, + { + "epoch": 0.16547433903576983, + "grad_norm": 0.45694050192832947, + "learning_rate": 4.335e-05, + "log_odds_chosen": 1.0779105424880981, + "log_odds_ratio": -0.4133705198764801, + "logits/chosen": 0.31415730714797974, + "logits/rejected": 0.17435882985591888, + "logps/chosen": -1.2905559539794922, + "logps/rejected": -2.1517629623413086, + "loss": 3.4923, + "nll_loss": 3.4509427547454834, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12905558943748474, + "rewards/margins": 0.08612069487571716, + "rewards/rejected": -0.2151763141155243, + "step": 266 + }, + { + "epoch": 0.1660964230171073, + "grad_norm": 0.5878811478614807, + "learning_rate": 4.3325e-05, + "log_odds_chosen": 0.61550372838974, + "log_odds_ratio": -0.5280717015266418, + "logits/chosen": 0.13362732529640198, + "logits/rejected": -0.045731253921985626, + "logps/chosen": -1.3563334941864014, + "logps/rejected": -1.7919440269470215, + "loss": 2.8887, + "nll_loss": 2.8359363079071045, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13563336431980133, + "rewards/margins": 0.04356103017926216, + "rewards/rejected": -0.1791943907737732, + "step": 267 + }, + { + "epoch": 0.1667185069984448, + "grad_norm": 0.32133936882019043, + "learning_rate": 4.33e-05, + "log_odds_chosen": 0.21047142148017883, + "log_odds_ratio": -0.7247399687767029, + "logits/chosen": 0.40043243765830994, + "logits/rejected": 0.05317821353673935, + "logps/chosen": -1.4496357440948486, + "logps/rejected": -1.6929826736450195, + "loss": 3.9373, + "nll_loss": 3.8648571968078613, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1449635773897171, + "rewards/margins": 0.02433469332754612, + "rewards/rejected": -0.16929826140403748, + "step": 268 + }, + { + "epoch": 0.16734059097978227, + "grad_norm": 0.39708441495895386, + "learning_rate": 4.3275e-05, + "log_odds_chosen": 0.2173278033733368, + "log_odds_ratio": -0.659453272819519, + "logits/chosen": 0.16504698991775513, + "logits/rejected": 0.015571564435958862, + "logps/chosen": -1.2936235666275024, + "logps/rejected": -1.4728864431381226, + "loss": 3.2912, + "nll_loss": 3.225303888320923, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12936235964298248, + "rewards/margins": 0.0179262924939394, + "rewards/rejected": -0.14728865027427673, + "step": 269 + }, + { + "epoch": 0.16796267496111975, + "grad_norm": 0.4461729824542999, + "learning_rate": 4.325e-05, + "log_odds_chosen": 0.7989851236343384, + "log_odds_ratio": -0.5427728295326233, + "logits/chosen": -0.0695042535662651, + "logits/rejected": -0.15135838091373444, + "logps/chosen": -1.2412259578704834, + "logps/rejected": -1.7757339477539062, + "loss": 2.8043, + "nll_loss": 2.7499935626983643, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12412260472774506, + "rewards/margins": 0.053450800478458405, + "rewards/rejected": -0.17757341265678406, + "step": 270 + }, + { + "epoch": 0.16858475894245722, + "grad_norm": 0.5164592266082764, + "learning_rate": 4.322500000000001e-05, + "log_odds_chosen": 0.5904808640480042, + "log_odds_ratio": -0.5027241110801697, + "logits/chosen": 0.006964411586523056, + "logits/rejected": -0.11874233931303024, + "logps/chosen": -1.0042153596878052, + "logps/rejected": -1.401916265487671, + "loss": 2.8845, + "nll_loss": 2.834200143814087, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10042153298854828, + "rewards/margins": 0.039770081639289856, + "rewards/rejected": -0.14019162952899933, + "step": 271 + }, + { + "epoch": 0.1692068429237947, + "grad_norm": 0.5214595198631287, + "learning_rate": 4.32e-05, + "log_odds_chosen": 0.24742233753204346, + "log_odds_ratio": -0.7152416110038757, + "logits/chosen": 0.1348581463098526, + "logits/rejected": -0.040653035044670105, + "logps/chosen": -1.2521984577178955, + "logps/rejected": -1.3138203620910645, + "loss": 3.3, + "nll_loss": 3.2285189628601074, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12521985173225403, + "rewards/margins": 0.006162186153233051, + "rewards/rejected": -0.1313820332288742, + "step": 272 + }, + { + "epoch": 0.16982892690513218, + "grad_norm": 0.40921929478645325, + "learning_rate": 4.3175000000000006e-05, + "log_odds_chosen": 0.8957703113555908, + "log_odds_ratio": -0.4180378317832947, + "logits/chosen": 0.04939873516559601, + "logits/rejected": -0.06932801753282547, + "logps/chosen": -0.9343599677085876, + "logps/rejected": -1.509351134300232, + "loss": 3.069, + "nll_loss": 3.027172088623047, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09343599528074265, + "rewards/margins": 0.05749912187457085, + "rewards/rejected": -0.1509351134300232, + "step": 273 + }, + { + "epoch": 0.17045101088646966, + "grad_norm": 0.5043439865112305, + "learning_rate": 4.315e-05, + "log_odds_chosen": 1.2644999027252197, + "log_odds_ratio": -0.43298041820526123, + "logits/chosen": 0.1321154087781906, + "logits/rejected": -0.09128782153129578, + "logps/chosen": -1.0918232202529907, + "logps/rejected": -2.1638400554656982, + "loss": 2.9592, + "nll_loss": 2.9159512519836426, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10918232053518295, + "rewards/margins": 0.10720168799161911, + "rewards/rejected": -0.21638402342796326, + "step": 274 + }, + { + "epoch": 0.17107309486780714, + "grad_norm": 0.4372323751449585, + "learning_rate": 4.3125000000000005e-05, + "log_odds_chosen": 0.33947059512138367, + "log_odds_ratio": -0.6284760236740112, + "logits/chosen": 0.08530203998088837, + "logits/rejected": -0.054735083132982254, + "logps/chosen": -1.3148372173309326, + "logps/rejected": -1.509586215019226, + "loss": 3.0704, + "nll_loss": 3.007549285888672, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13148371875286102, + "rewards/margins": 0.019474901258945465, + "rewards/rejected": -0.15095862746238708, + "step": 275 + }, + { + "epoch": 0.17169517884914465, + "grad_norm": 0.3891263008117676, + "learning_rate": 4.3100000000000004e-05, + "log_odds_chosen": 0.5235820412635803, + "log_odds_ratio": -0.6117573976516724, + "logits/chosen": 0.34458497166633606, + "logits/rejected": 0.05262750759720802, + "logps/chosen": -1.4382394552230835, + "logps/rejected": -1.8694814443588257, + "loss": 3.8207, + "nll_loss": 3.75954270362854, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14382395148277283, + "rewards/margins": 0.04312421381473541, + "rewards/rejected": -0.18694815039634705, + "step": 276 + }, + { + "epoch": 0.17231726283048213, + "grad_norm": 0.4415547251701355, + "learning_rate": 4.3075000000000003e-05, + "log_odds_chosen": 0.05041904002428055, + "log_odds_ratio": -0.6868739128112793, + "logits/chosen": 0.006680905818939209, + "logits/rejected": -0.09860651195049286, + "logps/chosen": -1.2028601169586182, + "logps/rejected": -1.2418127059936523, + "loss": 2.6857, + "nll_loss": 2.6169979572296143, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1202860176563263, + "rewards/margins": 0.0038952501490712166, + "rewards/rejected": -0.12418127059936523, + "step": 277 + }, + { + "epoch": 0.1729393468118196, + "grad_norm": 0.44455549120903015, + "learning_rate": 4.305e-05, + "log_odds_chosen": 1.729311466217041, + "log_odds_ratio": -0.3406631648540497, + "logits/chosen": 0.18643704056739807, + "logits/rejected": 0.041028942912817, + "logps/chosen": -0.9607067108154297, + "logps/rejected": -2.246669292449951, + "loss": 3.3204, + "nll_loss": 3.286289930343628, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09607066959142685, + "rewards/margins": 0.1285962462425232, + "rewards/rejected": -0.22466690838336945, + "step": 278 + }, + { + "epoch": 0.17356143079315708, + "grad_norm": 0.5485425591468811, + "learning_rate": 4.3025e-05, + "log_odds_chosen": 1.234410285949707, + "log_odds_ratio": -0.316910058259964, + "logits/chosen": 0.17609338462352753, + "logits/rejected": -0.03866102546453476, + "logps/chosen": -1.0069231986999512, + "logps/rejected": -1.8689861297607422, + "loss": 3.0301, + "nll_loss": 2.998425006866455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10069232434034348, + "rewards/margins": 0.08620627969503403, + "rewards/rejected": -0.1868986040353775, + "step": 279 + }, + { + "epoch": 0.17418351477449456, + "grad_norm": 0.5843217372894287, + "learning_rate": 4.3e-05, + "log_odds_chosen": 0.3664306402206421, + "log_odds_ratio": -0.5743635892868042, + "logits/chosen": 0.08559656143188477, + "logits/rejected": -0.036249175667762756, + "logps/chosen": -1.137782335281372, + "logps/rejected": -1.3918867111206055, + "loss": 2.5858, + "nll_loss": 2.528327226638794, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11377823352813721, + "rewards/margins": 0.02541043609380722, + "rewards/rejected": -0.13918867707252502, + "step": 280 + }, + { + "epoch": 0.17480559875583204, + "grad_norm": 0.4670501947402954, + "learning_rate": 4.2975e-05, + "log_odds_chosen": 0.6214780211448669, + "log_odds_ratio": -0.6075859069824219, + "logits/chosen": 0.2213689535856247, + "logits/rejected": 0.08010444790124893, + "logps/chosen": -1.2412688732147217, + "logps/rejected": -1.5694103240966797, + "loss": 3.1539, + "nll_loss": 3.0931215286254883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12412689626216888, + "rewards/margins": 0.032814137637615204, + "rewards/rejected": -0.1569410264492035, + "step": 281 + }, + { + "epoch": 0.17542768273716952, + "grad_norm": 0.38018524646759033, + "learning_rate": 4.295e-05, + "log_odds_chosen": 0.8761900663375854, + "log_odds_ratio": -0.5851420164108276, + "logits/chosen": 0.04957732930779457, + "logits/rejected": 0.10427582263946533, + "logps/chosen": -1.2226829528808594, + "logps/rejected": -2.00304913520813, + "loss": 2.5916, + "nll_loss": 2.5330915451049805, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12226830422878265, + "rewards/margins": 0.0780366063117981, + "rewards/rejected": -0.20030491054058075, + "step": 282 + }, + { + "epoch": 0.176049766718507, + "grad_norm": 0.4785327613353729, + "learning_rate": 4.2925000000000007e-05, + "log_odds_chosen": 0.7665344476699829, + "log_odds_ratio": -0.5781853795051575, + "logits/chosen": 0.4440539479255676, + "logits/rejected": 0.12010644376277924, + "logps/chosen": -1.2878354787826538, + "logps/rejected": -1.7937930822372437, + "loss": 3.7711, + "nll_loss": 3.713322401046753, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12878355383872986, + "rewards/margins": 0.050595760345458984, + "rewards/rejected": -0.17937931418418884, + "step": 283 + }, + { + "epoch": 0.17667185069984448, + "grad_norm": 0.4155225157737732, + "learning_rate": 4.29e-05, + "log_odds_chosen": 1.0096347332000732, + "log_odds_ratio": -0.5321923494338989, + "logits/chosen": 0.3086691200733185, + "logits/rejected": 0.2555179297924042, + "logps/chosen": -1.3326821327209473, + "logps/rejected": -2.250950813293457, + "loss": 3.1272, + "nll_loss": 3.0739986896514893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13326820731163025, + "rewards/margins": 0.09182686358690262, + "rewards/rejected": -0.22509507834911346, + "step": 284 + }, + { + "epoch": 0.17729393468118196, + "grad_norm": 0.4170939028263092, + "learning_rate": 4.2875000000000005e-05, + "log_odds_chosen": 0.6225306391716003, + "log_odds_ratio": -0.5137684345245361, + "logits/chosen": 0.4927368760108948, + "logits/rejected": 0.17728674411773682, + "logps/chosen": -1.3216047286987305, + "logps/rejected": -1.7400200366973877, + "loss": 3.8377, + "nll_loss": 3.786278247833252, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.132160484790802, + "rewards/margins": 0.0418415293097496, + "rewards/rejected": -0.1740020215511322, + "step": 285 + }, + { + "epoch": 0.17791601866251944, + "grad_norm": 0.5519422888755798, + "learning_rate": 4.285e-05, + "log_odds_chosen": 1.4737498760223389, + "log_odds_ratio": -0.38779395818710327, + "logits/chosen": 0.3228255808353424, + "logits/rejected": 0.10631482303142548, + "logps/chosen": -1.236034631729126, + "logps/rejected": -2.550682544708252, + "loss": 2.8773, + "nll_loss": 2.8385581970214844, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1236034631729126, + "rewards/margins": 0.13146479427814484, + "rewards/rejected": -0.25506827235221863, + "step": 286 + }, + { + "epoch": 0.17853810264385692, + "grad_norm": 0.3894568979740143, + "learning_rate": 4.2825000000000004e-05, + "log_odds_chosen": 1.6931242942810059, + "log_odds_ratio": -0.1958678513765335, + "logits/chosen": 0.2403871715068817, + "logits/rejected": 0.07207652926445007, + "logps/chosen": -1.2004201412200928, + "logps/rejected": -2.5714733600616455, + "loss": 3.0662, + "nll_loss": 3.0466134548187256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12004202604293823, + "rewards/margins": 0.1371053159236908, + "rewards/rejected": -0.25714734196662903, + "step": 287 + }, + { + "epoch": 0.1791601866251944, + "grad_norm": 0.49171674251556396, + "learning_rate": 4.2800000000000004e-05, + "log_odds_chosen": 0.856521487236023, + "log_odds_ratio": -0.4331633448600769, + "logits/chosen": 0.26269370317459106, + "logits/rejected": 0.2361827790737152, + "logps/chosen": -1.312029242515564, + "logps/rejected": -1.9908480644226074, + "loss": 3.1928, + "nll_loss": 3.1494884490966797, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13120292127132416, + "rewards/margins": 0.06788188219070435, + "rewards/rejected": -0.1990848183631897, + "step": 288 + }, + { + "epoch": 0.17978227060653187, + "grad_norm": 0.4975660443305969, + "learning_rate": 4.2775e-05, + "log_odds_chosen": 1.012892484664917, + "log_odds_ratio": -0.3519851267337799, + "logits/chosen": 0.21247489750385284, + "logits/rejected": 0.05814167857170105, + "logps/chosen": -1.152166485786438, + "logps/rejected": -1.9508147239685059, + "loss": 2.5502, + "nll_loss": 2.514974355697632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11521665006875992, + "rewards/margins": 0.07986482977867126, + "rewards/rejected": -0.19508148729801178, + "step": 289 + }, + { + "epoch": 0.18040435458786935, + "grad_norm": 0.5224384665489197, + "learning_rate": 4.275e-05, + "log_odds_chosen": 1.7257745265960693, + "log_odds_ratio": -0.24555160105228424, + "logits/chosen": 0.47818949818611145, + "logits/rejected": 0.06968870759010315, + "logps/chosen": -0.8608075976371765, + "logps/rejected": -2.147359848022461, + "loss": 3.4919, + "nll_loss": 3.467371940612793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08608075976371765, + "rewards/margins": 0.12865521013736725, + "rewards/rejected": -0.2147359699010849, + "step": 290 + }, + { + "epoch": 0.18102643856920683, + "grad_norm": 0.4815826416015625, + "learning_rate": 4.2725e-05, + "log_odds_chosen": 1.6820149421691895, + "log_odds_ratio": -0.23076926171779633, + "logits/chosen": 0.33108270168304443, + "logits/rejected": 0.07832175493240356, + "logps/chosen": -0.956098198890686, + "logps/rejected": -2.2002487182617188, + "loss": 2.7417, + "nll_loss": 2.718578338623047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09560982137918472, + "rewards/margins": 0.12441505491733551, + "rewards/rejected": -0.22002488374710083, + "step": 291 + }, + { + "epoch": 0.1816485225505443, + "grad_norm": 0.37672296166419983, + "learning_rate": 4.27e-05, + "log_odds_chosen": 0.5850558876991272, + "log_odds_ratio": -0.4817024767398834, + "logits/chosen": 0.3864336907863617, + "logits/rejected": 0.1432633250951767, + "logps/chosen": -1.0847342014312744, + "logps/rejected": -1.523407220840454, + "loss": 3.3112, + "nll_loss": 3.2630155086517334, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10847342759370804, + "rewards/margins": 0.04386730492115021, + "rewards/rejected": -0.15234072506427765, + "step": 292 + }, + { + "epoch": 0.1822706065318818, + "grad_norm": 0.5808604955673218, + "learning_rate": 4.2675e-05, + "log_odds_chosen": 0.610927939414978, + "log_odds_ratio": -0.4852539002895355, + "logits/chosen": 0.35360458493232727, + "logits/rejected": 0.008299056440591812, + "logps/chosen": -1.268179178237915, + "logps/rejected": -1.7629632949829102, + "loss": 2.9865, + "nll_loss": 2.9379255771636963, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12681792676448822, + "rewards/margins": 0.04947839677333832, + "rewards/rejected": -0.17629633843898773, + "step": 293 + }, + { + "epoch": 0.1828926905132193, + "grad_norm": 0.412265807390213, + "learning_rate": 4.265e-05, + "log_odds_chosen": 1.2932488918304443, + "log_odds_ratio": -0.3016420006752014, + "logits/chosen": 0.3499048352241516, + "logits/rejected": 0.12447762489318848, + "logps/chosen": -1.0451427698135376, + "logps/rejected": -1.9540801048278809, + "loss": 3.1025, + "nll_loss": 3.0723202228546143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10451428592205048, + "rewards/margins": 0.09089374542236328, + "rewards/rejected": -0.19540801644325256, + "step": 294 + }, + { + "epoch": 0.18351477449455678, + "grad_norm": 0.44288045167922974, + "learning_rate": 4.2625000000000006e-05, + "log_odds_chosen": 0.4354422688484192, + "log_odds_ratio": -0.5339393615722656, + "logits/chosen": 0.2939504086971283, + "logits/rejected": 0.13727524876594543, + "logps/chosen": -1.2091028690338135, + "logps/rejected": -1.5410089492797852, + "loss": 2.9659, + "nll_loss": 2.9124677181243896, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12091030180454254, + "rewards/margins": 0.03319060429930687, + "rewards/rejected": -0.15410089492797852, + "step": 295 + }, + { + "epoch": 0.18413685847589426, + "grad_norm": 0.4726203680038452, + "learning_rate": 4.26e-05, + "log_odds_chosen": 0.4269523620605469, + "log_odds_ratio": -0.6357343196868896, + "logits/chosen": 0.42432665824890137, + "logits/rejected": 0.09816896915435791, + "logps/chosen": -1.2562463283538818, + "logps/rejected": -1.4864271879196167, + "loss": 3.435, + "nll_loss": 3.371392250061035, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1256246566772461, + "rewards/margins": 0.023018071427941322, + "rewards/rejected": -0.14864271879196167, + "step": 296 + }, + { + "epoch": 0.18475894245723173, + "grad_norm": 0.4871671199798584, + "learning_rate": 4.2575000000000005e-05, + "log_odds_chosen": 0.5284110307693481, + "log_odds_ratio": -0.6341941356658936, + "logits/chosen": 0.3682095408439636, + "logits/rejected": 0.31838396191596985, + "logps/chosen": -1.3797330856323242, + "logps/rejected": -1.8745726346969604, + "loss": 3.0913, + "nll_loss": 3.0278983116149902, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13797330856323242, + "rewards/margins": 0.04948396235704422, + "rewards/rejected": -0.18745726346969604, + "step": 297 + }, + { + "epoch": 0.1853810264385692, + "grad_norm": 0.33851662278175354, + "learning_rate": 4.2550000000000004e-05, + "log_odds_chosen": 0.6119697093963623, + "log_odds_ratio": -0.6034431457519531, + "logits/chosen": 0.28094005584716797, + "logits/rejected": 0.2045813798904419, + "logps/chosen": -1.17061448097229, + "logps/rejected": -1.5927424430847168, + "loss": 3.4087, + "nll_loss": 3.3483834266662598, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11706145107746124, + "rewards/margins": 0.042212799191474915, + "rewards/rejected": -0.15927425026893616, + "step": 298 + }, + { + "epoch": 0.1860031104199067, + "grad_norm": 0.49449941515922546, + "learning_rate": 4.2525000000000004e-05, + "log_odds_chosen": 0.40445905923843384, + "log_odds_ratio": -0.5552644729614258, + "logits/chosen": 0.2672854959964752, + "logits/rejected": 0.16854549944400787, + "logps/chosen": -1.561603307723999, + "logps/rejected": -1.825971007347107, + "loss": 2.7683, + "nll_loss": 2.7127437591552734, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15616033971309662, + "rewards/margins": 0.026436757296323776, + "rewards/rejected": -0.1825971007347107, + "step": 299 + }, + { + "epoch": 0.18662519440124417, + "grad_norm": 0.48564156889915466, + "learning_rate": 4.25e-05, + "log_odds_chosen": 1.5461804866790771, + "log_odds_ratio": -0.26383474469184875, + "logits/chosen": 0.36446020007133484, + "logits/rejected": 0.2191128432750702, + "logps/chosen": -1.0896272659301758, + "logps/rejected": -2.29062557220459, + "loss": 3.5646, + "nll_loss": 3.5382304191589355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10896272957324982, + "rewards/margins": 0.12009983509778976, + "rewards/rejected": -0.22906255722045898, + "step": 300 + }, + { + "epoch": 0.18724727838258165, + "grad_norm": 0.39352017641067505, + "learning_rate": 4.2475e-05, + "log_odds_chosen": 1.5315042734146118, + "log_odds_ratio": -0.41960352659225464, + "logits/chosen": 0.3345024585723877, + "logits/rejected": 0.1347406506538391, + "logps/chosen": -1.1930088996887207, + "logps/rejected": -2.30621337890625, + "loss": 3.5042, + "nll_loss": 3.4622511863708496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11930090188980103, + "rewards/margins": 0.11132044345140457, + "rewards/rejected": -0.230621337890625, + "step": 301 + }, + { + "epoch": 0.18786936236391913, + "grad_norm": 0.3949545621871948, + "learning_rate": 4.245e-05, + "log_odds_chosen": 0.7881875038146973, + "log_odds_ratio": -0.4549405574798584, + "logits/chosen": 0.1703367680311203, + "logits/rejected": 0.10550281405448914, + "logps/chosen": -1.1531308889389038, + "logps/rejected": -1.7436622381210327, + "loss": 3.0154, + "nll_loss": 2.9698915481567383, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1153130903840065, + "rewards/margins": 0.05905313044786453, + "rewards/rejected": -0.17436623573303223, + "step": 302 + }, + { + "epoch": 0.1884914463452566, + "grad_norm": 0.3909085690975189, + "learning_rate": 4.2425e-05, + "log_odds_chosen": 1.3018293380737305, + "log_odds_ratio": -0.33854174613952637, + "logits/chosen": 0.32386866211891174, + "logits/rejected": 0.33090081810951233, + "logps/chosen": -1.2735427618026733, + "logps/rejected": -2.3735671043395996, + "loss": 2.9068, + "nll_loss": 2.872992753982544, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12735427916049957, + "rewards/margins": 0.11000245064496994, + "rewards/rejected": -0.23735672235488892, + "step": 303 + }, + { + "epoch": 0.1891135303265941, + "grad_norm": 0.678255558013916, + "learning_rate": 4.24e-05, + "log_odds_chosen": 0.5951331853866577, + "log_odds_ratio": -0.5492010116577148, + "logits/chosen": 0.18354575335979462, + "logits/rejected": -0.09810749441385269, + "logps/chosen": -1.291944980621338, + "logps/rejected": -1.642785668373108, + "loss": 3.1579, + "nll_loss": 3.102977752685547, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1291944980621338, + "rewards/margins": 0.0350840725004673, + "rewards/rejected": -0.1642785668373108, + "step": 304 + }, + { + "epoch": 0.18973561430793157, + "grad_norm": 0.5240991115570068, + "learning_rate": 4.237500000000001e-05, + "log_odds_chosen": 3.6760783195495605, + "log_odds_ratio": -0.180177241563797, + "logits/chosen": 0.14156953990459442, + "logits/rejected": -0.06919633597135544, + "logps/chosen": -0.7791962623596191, + "logps/rejected": -3.654845714569092, + "loss": 3.0695, + "nll_loss": 3.051457643508911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07791963219642639, + "rewards/margins": 0.2875649333000183, + "rewards/rejected": -0.3654845356941223, + "step": 305 + }, + { + "epoch": 0.19035769828926905, + "grad_norm": 0.4615384042263031, + "learning_rate": 4.235e-05, + "log_odds_chosen": 0.5181177258491516, + "log_odds_ratio": -0.5444080829620361, + "logits/chosen": 0.20214281976222992, + "logits/rejected": 0.17525801062583923, + "logps/chosen": -1.248335838317871, + "logps/rejected": -1.6521800756454468, + "loss": 2.82, + "nll_loss": 2.7655587196350098, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12483358383178711, + "rewards/margins": 0.0403844378888607, + "rewards/rejected": -0.16521801054477692, + "step": 306 + }, + { + "epoch": 0.19097978227060652, + "grad_norm": 0.4177151620388031, + "learning_rate": 4.2325000000000006e-05, + "log_odds_chosen": 0.533401608467102, + "log_odds_ratio": -0.520737886428833, + "logits/chosen": 0.1740420013666153, + "logits/rejected": 0.11574655026197433, + "logps/chosen": -1.2388856410980225, + "logps/rejected": -1.618835687637329, + "loss": 2.9597, + "nll_loss": 2.9076099395751953, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12388856709003448, + "rewards/margins": 0.03799499571323395, + "rewards/rejected": -0.16188356280326843, + "step": 307 + }, + { + "epoch": 0.191601866251944, + "grad_norm": 0.4279910624027252, + "learning_rate": 4.23e-05, + "log_odds_chosen": 1.0015525817871094, + "log_odds_ratio": -0.4986322224140167, + "logits/chosen": 0.4348335564136505, + "logits/rejected": 0.1550769805908203, + "logps/chosen": -1.2172647714614868, + "logps/rejected": -2.026916742324829, + "loss": 3.3799, + "nll_loss": 3.330019950866699, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12172648310661316, + "rewards/margins": 0.08096518367528915, + "rewards/rejected": -0.20269165933132172, + "step": 308 + }, + { + "epoch": 0.19222395023328148, + "grad_norm": 0.47602641582489014, + "learning_rate": 4.2275000000000004e-05, + "log_odds_chosen": 1.2695807218551636, + "log_odds_ratio": -0.34691569209098816, + "logits/chosen": 0.2914532721042633, + "logits/rejected": 0.17980057001113892, + "logps/chosen": -0.9040358662605286, + "logps/rejected": -1.8016421794891357, + "loss": 3.2324, + "nll_loss": 3.1976821422576904, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09040358662605286, + "rewards/margins": 0.08976064622402191, + "rewards/rejected": -0.18016423285007477, + "step": 309 + }, + { + "epoch": 0.19284603421461896, + "grad_norm": 0.4949467182159424, + "learning_rate": 4.2250000000000004e-05, + "log_odds_chosen": 0.9988858103752136, + "log_odds_ratio": -0.4942994713783264, + "logits/chosen": 0.2518168091773987, + "logits/rejected": 0.07765643298625946, + "logps/chosen": -1.2327001094818115, + "logps/rejected": -2.0293068885803223, + "loss": 2.9551, + "nll_loss": 2.905709743499756, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12327000498771667, + "rewards/margins": 0.07966066896915436, + "rewards/rejected": -0.20293068885803223, + "step": 310 + }, + { + "epoch": 0.19346811819595647, + "grad_norm": 0.4487239122390747, + "learning_rate": 4.2225e-05, + "log_odds_chosen": 0.5391438603401184, + "log_odds_ratio": -0.7081095576286316, + "logits/chosen": 0.42160993814468384, + "logits/rejected": 0.17291218042373657, + "logps/chosen": -1.851615309715271, + "logps/rejected": -2.3373863697052, + "loss": 3.9019, + "nll_loss": 3.831056833267212, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1851615458726883, + "rewards/margins": 0.04857710748910904, + "rewards/rejected": -0.23373863101005554, + "step": 311 + }, + { + "epoch": 0.19409020217729395, + "grad_norm": 0.47805315256118774, + "learning_rate": 4.22e-05, + "log_odds_chosen": 1.0429258346557617, + "log_odds_ratio": -0.42988187074661255, + "logits/chosen": 0.12882010638713837, + "logits/rejected": 0.03473813459277153, + "logps/chosen": -1.2987618446350098, + "logps/rejected": -2.2001380920410156, + "loss": 2.8064, + "nll_loss": 2.7633748054504395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12987619638442993, + "rewards/margins": 0.09013761579990387, + "rewards/rejected": -0.2200138121843338, + "step": 312 + }, + { + "epoch": 0.19471228615863143, + "grad_norm": 0.633873462677002, + "learning_rate": 4.2175e-05, + "log_odds_chosen": 0.6489430665969849, + "log_odds_ratio": -0.5409046411514282, + "logits/chosen": 0.1894546002149582, + "logits/rejected": -0.016907554119825363, + "logps/chosen": -1.4098745584487915, + "logps/rejected": -1.8865466117858887, + "loss": 3.4695, + "nll_loss": 3.4154553413391113, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14098745584487915, + "rewards/margins": 0.047667186707258224, + "rewards/rejected": -0.18865466117858887, + "step": 313 + }, + { + "epoch": 0.1953343701399689, + "grad_norm": 0.4453391432762146, + "learning_rate": 4.215e-05, + "log_odds_chosen": 1.4420627355575562, + "log_odds_ratio": -0.4442042112350464, + "logits/chosen": 0.13987398147583008, + "logits/rejected": 0.13250389695167542, + "logps/chosen": -1.3193628787994385, + "logps/rejected": -2.651116371154785, + "loss": 2.7875, + "nll_loss": 2.743088722229004, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13193629682064056, + "rewards/margins": 0.133175328373909, + "rewards/rejected": -0.26511162519454956, + "step": 314 + }, + { + "epoch": 0.19595645412130638, + "grad_norm": 0.48520854115486145, + "learning_rate": 4.2125e-05, + "log_odds_chosen": 1.0938706398010254, + "log_odds_ratio": -0.3769736588001251, + "logits/chosen": 0.14695967733860016, + "logits/rejected": 0.03137361258268356, + "logps/chosen": -1.062088966369629, + "logps/rejected": -1.953197717666626, + "loss": 2.8743, + "nll_loss": 2.8366215229034424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10620889812707901, + "rewards/margins": 0.08911087363958359, + "rewards/rejected": -0.1953197717666626, + "step": 315 + }, + { + "epoch": 0.19657853810264386, + "grad_norm": 0.4708939492702484, + "learning_rate": 4.21e-05, + "log_odds_chosen": 0.7692523002624512, + "log_odds_ratio": -0.4789087176322937, + "logits/chosen": 0.13523553311824799, + "logits/rejected": 0.05263520032167435, + "logps/chosen": -1.1547871828079224, + "logps/rejected": -1.744168996810913, + "loss": 3.1827, + "nll_loss": 3.134774684906006, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11547872424125671, + "rewards/margins": 0.05893819034099579, + "rewards/rejected": -0.1744169145822525, + "step": 316 + }, + { + "epoch": 0.19720062208398134, + "grad_norm": 0.5101141929626465, + "learning_rate": 4.2075000000000006e-05, + "log_odds_chosen": 0.9251267910003662, + "log_odds_ratio": -0.420250803232193, + "logits/chosen": 0.1897871047258377, + "logits/rejected": 0.0760931670665741, + "logps/chosen": -1.4300227165222168, + "logps/rejected": -2.243600845336914, + "loss": 3.0869, + "nll_loss": 3.044924259185791, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14300227165222168, + "rewards/margins": 0.08135779947042465, + "rewards/rejected": -0.22436007857322693, + "step": 317 + }, + { + "epoch": 0.19782270606531882, + "grad_norm": 0.4665040373802185, + "learning_rate": 4.205e-05, + "log_odds_chosen": 1.6941817998886108, + "log_odds_ratio": -0.3107852339744568, + "logits/chosen": 0.10233496129512787, + "logits/rejected": -0.04454576224088669, + "logps/chosen": -1.426357388496399, + "logps/rejected": -2.9099607467651367, + "loss": 3.0493, + "nll_loss": 3.018172025680542, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14263573288917542, + "rewards/margins": 0.14836034178733826, + "rewards/rejected": -0.29099607467651367, + "step": 318 + }, + { + "epoch": 0.1984447900466563, + "grad_norm": 0.8431224822998047, + "learning_rate": 4.2025000000000005e-05, + "log_odds_chosen": 0.7668442130088806, + "log_odds_ratio": -0.489085853099823, + "logits/chosen": 0.1494210809469223, + "logits/rejected": 0.06121387332677841, + "logps/chosen": -1.5238502025604248, + "logps/rejected": -2.1793744564056396, + "loss": 3.1432, + "nll_loss": 3.0943074226379395, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15238502621650696, + "rewards/margins": 0.06555242836475372, + "rewards/rejected": -0.2179374396800995, + "step": 319 + }, + { + "epoch": 0.19906687402799378, + "grad_norm": 0.6626170873641968, + "learning_rate": 4.2e-05, + "log_odds_chosen": 1.027713656425476, + "log_odds_ratio": -0.37676990032196045, + "logits/chosen": 0.16855540871620178, + "logits/rejected": -0.08387507498264313, + "logps/chosen": -1.222766637802124, + "logps/rejected": -2.0757288932800293, + "loss": 2.8647, + "nll_loss": 2.827024459838867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1222766637802124, + "rewards/margins": 0.08529624342918396, + "rewards/rejected": -0.20757290720939636, + "step": 320 + }, + { + "epoch": 0.19968895800933126, + "grad_norm": 0.4691254794597626, + "learning_rate": 4.1975000000000004e-05, + "log_odds_chosen": 1.1109933853149414, + "log_odds_ratio": -0.37901267409324646, + "logits/chosen": 0.14058086276054382, + "logits/rejected": -0.0027409703470766544, + "logps/chosen": -0.9070495367050171, + "logps/rejected": -1.6633799076080322, + "loss": 2.8839, + "nll_loss": 2.846041202545166, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09070496261119843, + "rewards/margins": 0.07563304156064987, + "rewards/rejected": -0.1663379967212677, + "step": 321 + }, + { + "epoch": 0.20031104199066874, + "grad_norm": 0.5388383865356445, + "learning_rate": 4.195e-05, + "log_odds_chosen": 1.4941861629486084, + "log_odds_ratio": -0.2680909335613251, + "logits/chosen": 0.11168316006660461, + "logits/rejected": -0.132093608379364, + "logps/chosen": -0.9982537031173706, + "logps/rejected": -1.9839081764221191, + "loss": 2.9087, + "nll_loss": 2.881884813308716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09982537478208542, + "rewards/margins": 0.09856545180082321, + "rewards/rejected": -0.19839082658290863, + "step": 322 + }, + { + "epoch": 0.20093312597200622, + "grad_norm": 0.39389094710350037, + "learning_rate": 4.1925e-05, + "log_odds_chosen": 1.2304775714874268, + "log_odds_ratio": -0.4389082193374634, + "logits/chosen": 0.12545819580554962, + "logits/rejected": 0.040004927664995193, + "logps/chosen": -1.345543384552002, + "logps/rejected": -2.4424848556518555, + "loss": 3.1398, + "nll_loss": 3.095932722091675, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13455434143543243, + "rewards/margins": 0.10969413816928864, + "rewards/rejected": -0.24424846470355988, + "step": 323 + }, + { + "epoch": 0.2015552099533437, + "grad_norm": 0.45025521516799927, + "learning_rate": 4.19e-05, + "log_odds_chosen": 0.9204633831977844, + "log_odds_ratio": -0.5545144081115723, + "logits/chosen": 0.04150621220469475, + "logits/rejected": -0.08123153448104858, + "logps/chosen": -1.1536731719970703, + "logps/rejected": -1.8100297451019287, + "loss": 3.0471, + "nll_loss": 2.9916718006134033, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11536730825901031, + "rewards/margins": 0.06563566625118256, + "rewards/rejected": -0.18100297451019287, + "step": 324 + }, + { + "epoch": 0.20217729393468117, + "grad_norm": 0.4303993880748749, + "learning_rate": 4.1875e-05, + "log_odds_chosen": 1.6544878482818604, + "log_odds_ratio": -0.19887760281562805, + "logits/chosen": 0.08420052379369736, + "logits/rejected": 0.0031083375215530396, + "logps/chosen": -1.3029048442840576, + "logps/rejected": -2.700894594192505, + "loss": 3.1539, + "nll_loss": 3.1340394020080566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13029049336910248, + "rewards/margins": 0.13979899883270264, + "rewards/rejected": -0.2700894773006439, + "step": 325 + }, + { + "epoch": 0.20279937791601865, + "grad_norm": 1.0563331842422485, + "learning_rate": 4.185e-05, + "log_odds_chosen": 0.21350765228271484, + "log_odds_ratio": -0.6822980642318726, + "logits/chosen": -0.024682089686393738, + "logits/rejected": -0.08352095633745193, + "logps/chosen": -1.4727632999420166, + "logps/rejected": -1.5891326665878296, + "loss": 2.8543, + "nll_loss": 2.7860758304595947, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1472763568162918, + "rewards/margins": 0.011636918410658836, + "rewards/rejected": -0.1589132696390152, + "step": 326 + }, + { + "epoch": 0.20342146189735613, + "grad_norm": 0.48655053973197937, + "learning_rate": 4.1825e-05, + "log_odds_chosen": 0.36451324820518494, + "log_odds_ratio": -0.6466756463050842, + "logits/chosen": 0.02400357648730278, + "logits/rejected": -0.04008547589182854, + "logps/chosen": -1.255650520324707, + "logps/rejected": -1.5385801792144775, + "loss": 3.1882, + "nll_loss": 3.1234936714172363, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1255650520324707, + "rewards/margins": 0.02829296514391899, + "rewards/rejected": -0.15385802090168, + "step": 327 + }, + { + "epoch": 0.2040435458786936, + "grad_norm": 0.5478668212890625, + "learning_rate": 4.18e-05, + "log_odds_chosen": 1.6087690591812134, + "log_odds_ratio": -0.31839194893836975, + "logits/chosen": 0.11044462025165558, + "logits/rejected": -0.06441525369882584, + "logps/chosen": -1.167734146118164, + "logps/rejected": -2.54228138923645, + "loss": 2.9786, + "nll_loss": 2.9467685222625732, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11677341163158417, + "rewards/margins": 0.13745473325252533, + "rewards/rejected": -0.2542281448841095, + "step": 328 + }, + { + "epoch": 0.20466562986003112, + "grad_norm": 0.3749350309371948, + "learning_rate": 4.1775000000000006e-05, + "log_odds_chosen": 0.4586121439933777, + "log_odds_ratio": -0.5654648542404175, + "logits/chosen": 0.09604926407337189, + "logits/rejected": -0.018427755683660507, + "logps/chosen": -1.2352399826049805, + "logps/rejected": -1.6107468605041504, + "loss": 3.0382, + "nll_loss": 2.981649875640869, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1235240027308464, + "rewards/margins": 0.03755068778991699, + "rewards/rejected": -0.161074697971344, + "step": 329 + }, + { + "epoch": 0.2052877138413686, + "grad_norm": 0.4368545413017273, + "learning_rate": 4.175e-05, + "log_odds_chosen": 0.8839046955108643, + "log_odds_ratio": -0.5126104354858398, + "logits/chosen": -0.007048317231237888, + "logits/rejected": -0.09785065799951553, + "logps/chosen": -1.0619440078735352, + "logps/rejected": -1.7408676147460938, + "loss": 3.0543, + "nll_loss": 3.0030674934387207, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.106194406747818, + "rewards/margins": 0.06789234280586243, + "rewards/rejected": -0.17408674955368042, + "step": 330 + }, + { + "epoch": 0.20590979782270608, + "grad_norm": 0.5278705954551697, + "learning_rate": 4.1725000000000005e-05, + "log_odds_chosen": 0.30167797207832336, + "log_odds_ratio": -0.6502044796943665, + "logits/chosen": 0.032889269292354584, + "logits/rejected": 0.03355567902326584, + "logps/chosen": -1.4090936183929443, + "logps/rejected": -1.6204454898834229, + "loss": 3.1308, + "nll_loss": 3.0657973289489746, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1409093588590622, + "rewards/margins": 0.021135203540325165, + "rewards/rejected": -0.16204456984996796, + "step": 331 + }, + { + "epoch": 0.20653188180404355, + "grad_norm": 0.4414202868938446, + "learning_rate": 4.17e-05, + "log_odds_chosen": 1.4241623878479004, + "log_odds_ratio": -0.3517926335334778, + "logits/chosen": 0.11898308247327805, + "logits/rejected": -0.013245618902146816, + "logps/chosen": -0.9463114738464355, + "logps/rejected": -2.077383041381836, + "loss": 2.8483, + "nll_loss": 2.8131093978881836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0946311503648758, + "rewards/margins": 0.11310717463493347, + "rewards/rejected": -0.20773832499980927, + "step": 332 + }, + { + "epoch": 0.20715396578538103, + "grad_norm": 0.38851961493492126, + "learning_rate": 4.1675e-05, + "log_odds_chosen": 0.28314775228500366, + "log_odds_ratio": -0.578311562538147, + "logits/chosen": -0.008570694364607334, + "logits/rejected": -0.05588866025209427, + "logps/chosen": -1.1838418245315552, + "logps/rejected": -1.3598947525024414, + "loss": 2.8154, + "nll_loss": 2.757525682449341, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11838418245315552, + "rewards/margins": 0.017605291679501534, + "rewards/rejected": -0.1359894871711731, + "step": 333 + }, + { + "epoch": 0.2077760497667185, + "grad_norm": 0.6326988935470581, + "learning_rate": 4.165e-05, + "log_odds_chosen": 0.140260249376297, + "log_odds_ratio": -0.677656888961792, + "logits/chosen": 0.025349613279104233, + "logits/rejected": -0.08781476318836212, + "logps/chosen": -1.341933250427246, + "logps/rejected": -1.4497263431549072, + "loss": 2.7444, + "nll_loss": 2.676629066467285, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1341933161020279, + "rewards/margins": 0.010779321193695068, + "rewards/rejected": -0.14497263729572296, + "step": 334 + }, + { + "epoch": 0.208398133748056, + "grad_norm": 0.4192201793193817, + "learning_rate": 4.1625e-05, + "log_odds_chosen": 1.2017258405685425, + "log_odds_ratio": -0.373602032661438, + "logits/chosen": 0.16877499222755432, + "logits/rejected": 0.06103990972042084, + "logps/chosen": -1.1729239225387573, + "logps/rejected": -1.9996774196624756, + "loss": 3.3254, + "nll_loss": 3.2880568504333496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1172923892736435, + "rewards/margins": 0.08267535269260406, + "rewards/rejected": -0.19996774196624756, + "step": 335 + }, + { + "epoch": 0.20902021772939347, + "grad_norm": 0.5239099264144897, + "learning_rate": 4.16e-05, + "log_odds_chosen": 0.15256771445274353, + "log_odds_ratio": -0.6872279047966003, + "logits/chosen": 0.0012313630431890488, + "logits/rejected": -0.11143770813941956, + "logps/chosen": -1.2717194557189941, + "logps/rejected": -1.4060418605804443, + "loss": 2.7665, + "nll_loss": 2.6978044509887695, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12717196345329285, + "rewards/margins": 0.013432244770228863, + "rewards/rejected": -0.1406041979789734, + "step": 336 + }, + { + "epoch": 0.20964230171073095, + "grad_norm": 0.46690982580184937, + "learning_rate": 4.1575e-05, + "log_odds_chosen": 1.1506333351135254, + "log_odds_ratio": -0.396980345249176, + "logits/chosen": 0.14569717645645142, + "logits/rejected": -0.02970746159553528, + "logps/chosen": -0.9054533243179321, + "logps/rejected": -1.7300324440002441, + "loss": 3.1932, + "nll_loss": 3.153456926345825, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09054533392190933, + "rewards/margins": 0.08245791494846344, + "rewards/rejected": -0.17300325632095337, + "step": 337 + }, + { + "epoch": 0.21026438569206843, + "grad_norm": 0.4816306233406067, + "learning_rate": 4.155e-05, + "log_odds_chosen": 1.2588977813720703, + "log_odds_ratio": -0.38654831051826477, + "logits/chosen": 0.1284165233373642, + "logits/rejected": 0.061425551772117615, + "logps/chosen": -1.5059263706207275, + "logps/rejected": -2.580245018005371, + "loss": 2.7022, + "nll_loss": 2.6635427474975586, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1505926251411438, + "rewards/margins": 0.10743187367916107, + "rewards/rejected": -0.25802451372146606, + "step": 338 + }, + { + "epoch": 0.2108864696734059, + "grad_norm": 0.5407478213310242, + "learning_rate": 4.1525e-05, + "log_odds_chosen": 1.0435866117477417, + "log_odds_ratio": -0.4741755723953247, + "logits/chosen": 0.07317960262298584, + "logits/rejected": 0.008852528408169746, + "logps/chosen": -1.398842215538025, + "logps/rejected": -2.2561936378479004, + "loss": 3.1174, + "nll_loss": 3.0700323581695557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13988423347473145, + "rewards/margins": 0.08573515713214874, + "rewards/rejected": -0.225619375705719, + "step": 339 + }, + { + "epoch": 0.2115085536547434, + "grad_norm": 0.5428896546363831, + "learning_rate": 4.15e-05, + "log_odds_chosen": 1.3254553079605103, + "log_odds_ratio": -0.3870584964752197, + "logits/chosen": 0.12351932376623154, + "logits/rejected": -0.04987223446369171, + "logps/chosen": -1.1178048849105835, + "logps/rejected": -2.1846683025360107, + "loss": 2.6345, + "nll_loss": 2.5957448482513428, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11178049445152283, + "rewards/margins": 0.10668633878231049, + "rewards/rejected": -0.2184668481349945, + "step": 340 + }, + { + "epoch": 0.21213063763608087, + "grad_norm": 0.49065813422203064, + "learning_rate": 4.1475000000000005e-05, + "log_odds_chosen": 0.6055381298065186, + "log_odds_ratio": -0.5067149996757507, + "logits/chosen": 0.11017728596925735, + "logits/rejected": -0.005071556195616722, + "logps/chosen": -1.3153109550476074, + "logps/rejected": -1.7827832698822021, + "loss": 3.0023, + "nll_loss": 2.9516162872314453, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13153107464313507, + "rewards/margins": 0.04674723744392395, + "rewards/rejected": -0.17827832698822021, + "step": 341 + }, + { + "epoch": 0.21275272161741834, + "grad_norm": 0.38548681139945984, + "learning_rate": 4.145e-05, + "log_odds_chosen": 1.6406880617141724, + "log_odds_ratio": -0.32489141821861267, + "logits/chosen": 0.20307300984859467, + "logits/rejected": 0.008104506880044937, + "logps/chosen": -0.8666763305664062, + "logps/rejected": -2.039579391479492, + "loss": 3.6414, + "nll_loss": 3.608938455581665, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08666763454675674, + "rewards/margins": 0.11729030311107635, + "rewards/rejected": -0.2039579451084137, + "step": 342 + }, + { + "epoch": 0.21337480559875582, + "grad_norm": 0.44505774974823, + "learning_rate": 4.1425000000000004e-05, + "log_odds_chosen": 0.9692625999450684, + "log_odds_ratio": -0.4369995594024658, + "logits/chosen": 0.17034012079238892, + "logits/rejected": -0.028462085872888565, + "logps/chosen": -1.098499059677124, + "logps/rejected": -1.6745946407318115, + "loss": 3.3937, + "nll_loss": 3.34997296333313, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10984990000724792, + "rewards/margins": 0.05760956183075905, + "rewards/rejected": -0.16745945811271667, + "step": 343 + }, + { + "epoch": 0.2139968895800933, + "grad_norm": 0.4816812574863434, + "learning_rate": 4.14e-05, + "log_odds_chosen": 1.5928500890731812, + "log_odds_ratio": -0.23111592233181, + "logits/chosen": 0.188461571931839, + "logits/rejected": 0.022169630974531174, + "logps/chosen": -0.9164315462112427, + "logps/rejected": -2.1156816482543945, + "loss": 2.7098, + "nll_loss": 2.6867213249206543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09164315462112427, + "rewards/margins": 0.11992499977350235, + "rewards/rejected": -0.21156814694404602, + "step": 344 + }, + { + "epoch": 0.21461897356143078, + "grad_norm": 0.39990314841270447, + "learning_rate": 4.1375e-05, + "log_odds_chosen": 0.49777552485466003, + "log_odds_ratio": -0.6073991656303406, + "logits/chosen": 0.18223583698272705, + "logits/rejected": 0.04722006618976593, + "logps/chosen": -1.0849920511245728, + "logps/rejected": -1.4609473943710327, + "loss": 3.2362, + "nll_loss": 3.1754815578460693, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10849921405315399, + "rewards/margins": 0.03759553283452988, + "rewards/rejected": -0.14609473943710327, + "step": 345 + }, + { + "epoch": 0.21524105754276826, + "grad_norm": 0.5857303738594055, + "learning_rate": 4.135e-05, + "log_odds_chosen": 0.844973087310791, + "log_odds_ratio": -0.46381595730781555, + "logits/chosen": 0.2590794265270233, + "logits/rejected": 0.1343073546886444, + "logps/chosen": -1.3666094541549683, + "logps/rejected": -2.0936036109924316, + "loss": 3.0643, + "nll_loss": 3.0179357528686523, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13666093349456787, + "rewards/margins": 0.0726994052529335, + "rewards/rejected": -0.20936036109924316, + "step": 346 + }, + { + "epoch": 0.21586314152410577, + "grad_norm": 0.567475438117981, + "learning_rate": 4.1325e-05, + "log_odds_chosen": 0.8636181354522705, + "log_odds_ratio": -0.40255892276763916, + "logits/chosen": 0.20596131682395935, + "logits/rejected": 0.061537884175777435, + "logps/chosen": -1.3861594200134277, + "logps/rejected": -2.1171822547912598, + "loss": 2.8103, + "nll_loss": 2.770002603530884, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1386159509420395, + "rewards/margins": 0.07310228049755096, + "rewards/rejected": -0.21171823143959045, + "step": 347 + }, + { + "epoch": 0.21648522550544325, + "grad_norm": 0.5971509218215942, + "learning_rate": 4.13e-05, + "log_odds_chosen": 2.9550795555114746, + "log_odds_ratio": -0.1911197304725647, + "logits/chosen": 0.29377758502960205, + "logits/rejected": 0.28102779388427734, + "logps/chosen": -1.3482720851898193, + "logps/rejected": -4.058900833129883, + "loss": 3.0978, + "nll_loss": 3.0786807537078857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13482721149921417, + "rewards/margins": 0.2710628807544708, + "rewards/rejected": -0.4058900773525238, + "step": 348 + }, + { + "epoch": 0.21710730948678073, + "grad_norm": 0.44494226574897766, + "learning_rate": 4.1275e-05, + "log_odds_chosen": 1.0941739082336426, + "log_odds_ratio": -0.4475710391998291, + "logits/chosen": 0.15722906589508057, + "logits/rejected": 0.02086580917239189, + "logps/chosen": -1.191224455833435, + "logps/rejected": -2.0817713737487793, + "loss": 2.9021, + "nll_loss": 2.8573296070098877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1191224530339241, + "rewards/margins": 0.08905468881130219, + "rewards/rejected": -0.20817714929580688, + "step": 349 + }, + { + "epoch": 0.2177293934681182, + "grad_norm": 0.5429697632789612, + "learning_rate": 4.125e-05, + "log_odds_chosen": 1.2548682689666748, + "log_odds_ratio": -0.37429577112197876, + "logits/chosen": 0.28128620982170105, + "logits/rejected": 0.08453608304262161, + "logps/chosen": -1.363343596458435, + "logps/rejected": -2.3076062202453613, + "loss": 3.444, + "nll_loss": 3.4065957069396973, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1363343596458435, + "rewards/margins": 0.09442625939846039, + "rewards/rejected": -0.2307606041431427, + "step": 350 + }, + { + "epoch": 0.21835147744945568, + "grad_norm": 0.41189780831336975, + "learning_rate": 4.1225e-05, + "log_odds_chosen": 1.1283422708511353, + "log_odds_ratio": -0.41111886501312256, + "logits/chosen": 0.27242082357406616, + "logits/rejected": 0.169078066945076, + "logps/chosen": -1.1591994762420654, + "logps/rejected": -2.119546413421631, + "loss": 3.0923, + "nll_loss": 3.0512022972106934, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11591994762420654, + "rewards/margins": 0.0960346907377243, + "rewards/rejected": -0.21195463836193085, + "step": 351 + }, + { + "epoch": 0.21897356143079316, + "grad_norm": 0.4804132878780365, + "learning_rate": 4.12e-05, + "log_odds_chosen": 2.3194806575775146, + "log_odds_ratio": -0.2600080370903015, + "logits/chosen": 0.2595370411872864, + "logits/rejected": 0.08692187070846558, + "logps/chosen": -0.8231872320175171, + "logps/rejected": -2.4552807807922363, + "loss": 3.3194, + "nll_loss": 3.293379306793213, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08231872320175171, + "rewards/margins": 0.16320934891700745, + "rewards/rejected": -0.24552807211875916, + "step": 352 + }, + { + "epoch": 0.21959564541213064, + "grad_norm": 0.5321614146232605, + "learning_rate": 4.1175000000000005e-05, + "log_odds_chosen": 0.7400184869766235, + "log_odds_ratio": -0.46396252512931824, + "logits/chosen": 0.18774618208408356, + "logits/rejected": 0.041869793087244034, + "logps/chosen": -1.439223051071167, + "logps/rejected": -2.082319736480713, + "loss": 2.9231, + "nll_loss": 2.8767247200012207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14392229914665222, + "rewards/margins": 0.06430967152118683, + "rewards/rejected": -0.20823195576667786, + "step": 353 + }, + { + "epoch": 0.22021772939346812, + "grad_norm": 0.48990121483802795, + "learning_rate": 4.115e-05, + "log_odds_chosen": 1.467127799987793, + "log_odds_ratio": -0.3306187391281128, + "logits/chosen": 0.21457459032535553, + "logits/rejected": -0.0008951930794864893, + "logps/chosen": -1.305351734161377, + "logps/rejected": -2.5240726470947266, + "loss": 3.1787, + "nll_loss": 3.1456456184387207, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13053518533706665, + "rewards/margins": 0.12187208235263824, + "rewards/rejected": -0.2524072527885437, + "step": 354 + }, + { + "epoch": 0.2208398133748056, + "grad_norm": 0.4076468348503113, + "learning_rate": 4.1125000000000004e-05, + "log_odds_chosen": 3.02394962310791, + "log_odds_ratio": -0.1947459578514099, + "logits/chosen": 0.15360787510871887, + "logits/rejected": 0.13919305801391602, + "logps/chosen": -0.9943020939826965, + "logps/rejected": -3.5736258029937744, + "loss": 2.964, + "nll_loss": 2.944533348083496, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09943021088838577, + "rewards/margins": 0.2579323649406433, + "rewards/rejected": -0.3573625683784485, + "step": 355 + }, + { + "epoch": 0.22146189735614308, + "grad_norm": 0.4909219443798065, + "learning_rate": 4.11e-05, + "log_odds_chosen": 1.00634765625, + "log_odds_ratio": -0.43545883893966675, + "logits/chosen": 0.14945323765277863, + "logits/rejected": 0.019120551645755768, + "logps/chosen": -1.1865514516830444, + "logps/rejected": -2.035024642944336, + "loss": 3.0395, + "nll_loss": 2.995992660522461, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11865514516830444, + "rewards/margins": 0.08484730869531631, + "rewards/rejected": -0.20350246131420135, + "step": 356 + }, + { + "epoch": 0.22208398133748056, + "grad_norm": 0.7641182541847229, + "learning_rate": 4.1075e-05, + "log_odds_chosen": 1.090247392654419, + "log_odds_ratio": -0.37240341305732727, + "logits/chosen": 0.05090628191828728, + "logits/rejected": -0.03716772794723511, + "logps/chosen": -1.2965291738510132, + "logps/rejected": -2.2305068969726562, + "loss": 2.3492, + "nll_loss": 2.3119568824768066, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12965291738510132, + "rewards/margins": 0.09339778125286102, + "rewards/rejected": -0.22305069863796234, + "step": 357 + }, + { + "epoch": 0.22270606531881804, + "grad_norm": 0.5538558959960938, + "learning_rate": 4.105e-05, + "log_odds_chosen": 0.3712920546531677, + "log_odds_ratio": -0.6206623315811157, + "logits/chosen": 0.120402030646801, + "logits/rejected": -0.043820858001708984, + "logps/chosen": -1.399167537689209, + "logps/rejected": -1.6653398275375366, + "loss": 2.9107, + "nll_loss": 2.848662853240967, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13991674780845642, + "rewards/margins": 0.02661724016070366, + "rewards/rejected": -0.16653397679328918, + "step": 358 + }, + { + "epoch": 0.22332814930015552, + "grad_norm": 0.4002559185028076, + "learning_rate": 4.1025e-05, + "log_odds_chosen": 0.884466290473938, + "log_odds_ratio": -0.485196590423584, + "logits/chosen": 0.16781805455684662, + "logits/rejected": 0.09334772825241089, + "logps/chosen": -1.0677001476287842, + "logps/rejected": -1.5183968544006348, + "loss": 3.1326, + "nll_loss": 3.084113597869873, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10677002370357513, + "rewards/margins": 0.04506966099143028, + "rewards/rejected": -0.15183967351913452, + "step": 359 + }, + { + "epoch": 0.223950233281493, + "grad_norm": 0.45029738545417786, + "learning_rate": 4.1e-05, + "log_odds_chosen": 0.6543417572975159, + "log_odds_ratio": -0.5618710517883301, + "logits/chosen": 0.2441825568675995, + "logits/rejected": 0.06579945981502533, + "logps/chosen": -1.5026423931121826, + "logps/rejected": -2.0812129974365234, + "loss": 3.486, + "nll_loss": 3.4297986030578613, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15026423335075378, + "rewards/margins": 0.05785707011818886, + "rewards/rejected": -0.20812132954597473, + "step": 360 + }, + { + "epoch": 0.22457231726283047, + "grad_norm": 0.40222129225730896, + "learning_rate": 4.0975e-05, + "log_odds_chosen": 0.6278929710388184, + "log_odds_ratio": -0.5374911427497864, + "logits/chosen": 0.1057760939002037, + "logits/rejected": 0.1274702399969101, + "logps/chosen": -1.3516254425048828, + "logps/rejected": -1.9070372581481934, + "loss": 3.0704, + "nll_loss": 3.0166256427764893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13516254723072052, + "rewards/margins": 0.05554117262363434, + "rewards/rejected": -0.19070371985435486, + "step": 361 + }, + { + "epoch": 0.22519440124416795, + "grad_norm": 0.40902429819107056, + "learning_rate": 4.095e-05, + "log_odds_chosen": 1.3791449069976807, + "log_odds_ratio": -0.3504829406738281, + "logits/chosen": 0.09938657283782959, + "logits/rejected": -0.04903809353709221, + "logps/chosen": -0.8722810745239258, + "logps/rejected": -1.6328226327896118, + "loss": 3.063, + "nll_loss": 3.027944326400757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08722810447216034, + "rewards/margins": 0.0760541558265686, + "rewards/rejected": -0.16328227519989014, + "step": 362 + }, + { + "epoch": 0.22581648522550543, + "grad_norm": 0.5102895498275757, + "learning_rate": 4.0925000000000005e-05, + "log_odds_chosen": 1.6143276691436768, + "log_odds_ratio": -0.24948588013648987, + "logits/chosen": 0.1433638036251068, + "logits/rejected": 0.04843227565288544, + "logps/chosen": -1.1994624137878418, + "logps/rejected": -2.5450100898742676, + "loss": 2.8281, + "nll_loss": 2.803130626678467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11994623392820358, + "rewards/margins": 0.13455477356910706, + "rewards/rejected": -0.2545010447502136, + "step": 363 + }, + { + "epoch": 0.2264385692068429, + "grad_norm": 0.44875141978263855, + "learning_rate": 4.09e-05, + "log_odds_chosen": 0.5202021598815918, + "log_odds_ratio": -0.5619540214538574, + "logits/chosen": 0.09863647073507309, + "logits/rejected": -0.04804396256804466, + "logps/chosen": -1.4303956031799316, + "logps/rejected": -1.8165171146392822, + "loss": 2.7145, + "nll_loss": 2.658271312713623, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14303956925868988, + "rewards/margins": 0.038612157106399536, + "rewards/rejected": -0.1816517412662506, + "step": 364 + }, + { + "epoch": 0.22706065318818042, + "grad_norm": 0.4456920325756073, + "learning_rate": 4.0875000000000004e-05, + "log_odds_chosen": 1.450744390487671, + "log_odds_ratio": -0.3320462107658386, + "logits/chosen": 0.09102889895439148, + "logits/rejected": -0.05849744379520416, + "logps/chosen": -1.1014940738677979, + "logps/rejected": -2.2115225791931152, + "loss": 3.3094, + "nll_loss": 3.2761690616607666, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11014941334724426, + "rewards/margins": 0.11100286990404129, + "rewards/rejected": -0.22115227580070496, + "step": 365 + }, + { + "epoch": 0.2276827371695179, + "grad_norm": 0.45694804191589355, + "learning_rate": 4.085e-05, + "log_odds_chosen": 0.8244272470474243, + "log_odds_ratio": -0.4660112261772156, + "logits/chosen": 0.12547065317630768, + "logits/rejected": -0.11931107938289642, + "logps/chosen": -1.0817716121673584, + "logps/rejected": -1.7403573989868164, + "loss": 2.971, + "nll_loss": 2.9244022369384766, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10817715525627136, + "rewards/margins": 0.06585858762264252, + "rewards/rejected": -0.17403574287891388, + "step": 366 + }, + { + "epoch": 0.22830482115085537, + "grad_norm": 0.5407382249832153, + "learning_rate": 4.0825e-05, + "log_odds_chosen": 2.503481864929199, + "log_odds_ratio": -0.13278703391551971, + "logits/chosen": 0.10355640947818756, + "logits/rejected": -0.04844285547733307, + "logps/chosen": -0.958575963973999, + "logps/rejected": -2.8335940837860107, + "loss": 2.8018, + "nll_loss": 2.788564682006836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09585760533809662, + "rewards/margins": 0.18750178813934326, + "rewards/rejected": -0.2833594083786011, + "step": 367 + }, + { + "epoch": 0.22892690513219285, + "grad_norm": 0.6709921956062317, + "learning_rate": 4.08e-05, + "log_odds_chosen": 0.13700546324253082, + "log_odds_ratio": -0.6914812326431274, + "logits/chosen": 0.29757586121559143, + "logits/rejected": 0.058456920087337494, + "logps/chosen": -1.8119229078292847, + "logps/rejected": -1.8780629634857178, + "loss": 3.6408, + "nll_loss": 3.5716493129730225, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1811922788619995, + "rewards/margins": 0.006614033132791519, + "rewards/rejected": -0.18780632317066193, + "step": 368 + }, + { + "epoch": 0.22954898911353033, + "grad_norm": 0.5146610736846924, + "learning_rate": 4.0775e-05, + "log_odds_chosen": 2.349278688430786, + "log_odds_ratio": -0.31061261892318726, + "logits/chosen": 0.2917703092098236, + "logits/rejected": 0.0938117578625679, + "logps/chosen": -1.0622313022613525, + "logps/rejected": -3.1076037883758545, + "loss": 3.223, + "nll_loss": 3.1919517517089844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10622313618659973, + "rewards/margins": 0.20453724265098572, + "rewards/rejected": -0.31076037883758545, + "step": 369 + }, + { + "epoch": 0.2301710730948678, + "grad_norm": 0.32432612776756287, + "learning_rate": 4.075e-05, + "log_odds_chosen": 1.531700849533081, + "log_odds_ratio": -0.26523175835609436, + "logits/chosen": 0.28544509410858154, + "logits/rejected": 0.16868844628334045, + "logps/chosen": -1.1210910081863403, + "logps/rejected": -2.238771915435791, + "loss": 3.6429, + "nll_loss": 3.61639666557312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11210910975933075, + "rewards/margins": 0.11176810413599014, + "rewards/rejected": -0.2238771915435791, + "step": 370 + }, + { + "epoch": 0.2307931570762053, + "grad_norm": 0.4330305755138397, + "learning_rate": 4.0725e-05, + "log_odds_chosen": 1.4429383277893066, + "log_odds_ratio": -0.29110294580459595, + "logits/chosen": 0.26996076107025146, + "logits/rejected": 0.07536883652210236, + "logps/chosen": -1.1473499536514282, + "logps/rejected": -2.227917432785034, + "loss": 3.2222, + "nll_loss": 3.1930618286132812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11473499983549118, + "rewards/margins": 0.10805673897266388, + "rewards/rejected": -0.22279173135757446, + "step": 371 + }, + { + "epoch": 0.23141524105754277, + "grad_norm": 0.5261003375053406, + "learning_rate": 4.07e-05, + "log_odds_chosen": 2.001164197921753, + "log_odds_ratio": -0.22053340077400208, + "logits/chosen": 0.22258536517620087, + "logits/rejected": -0.05359187722206116, + "logps/chosen": -0.984519362449646, + "logps/rejected": -2.612748146057129, + "loss": 3.3361, + "nll_loss": 3.314016342163086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09845194220542908, + "rewards/margins": 0.1628229022026062, + "rewards/rejected": -0.2612748146057129, + "step": 372 + }, + { + "epoch": 0.23203732503888025, + "grad_norm": 0.5155344009399414, + "learning_rate": 4.0675e-05, + "log_odds_chosen": 0.6205844879150391, + "log_odds_ratio": -0.536081075668335, + "logits/chosen": 0.16748444736003876, + "logits/rejected": 0.03588568791747093, + "logps/chosen": -0.976515531539917, + "logps/rejected": -1.502734899520874, + "loss": 3.0293, + "nll_loss": 2.9756860733032227, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09765155613422394, + "rewards/margins": 0.052621930837631226, + "rewards/rejected": -0.15027348697185516, + "step": 373 + }, + { + "epoch": 0.23265940902021773, + "grad_norm": 0.46208807826042175, + "learning_rate": 4.065e-05, + "log_odds_chosen": 0.9866193532943726, + "log_odds_ratio": -0.41447627544403076, + "logits/chosen": 0.19046258926391602, + "logits/rejected": 0.1209864392876625, + "logps/chosen": -0.9953656196594238, + "logps/rejected": -1.6837455034255981, + "loss": 3.1588, + "nll_loss": 3.1173107624053955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09953656792640686, + "rewards/margins": 0.0688379779458046, + "rewards/rejected": -0.16837453842163086, + "step": 374 + }, + { + "epoch": 0.2332814930015552, + "grad_norm": 0.5262720584869385, + "learning_rate": 4.0625000000000005e-05, + "log_odds_chosen": 1.1617014408111572, + "log_odds_ratio": -0.3742392063140869, + "logits/chosen": 0.2684021592140198, + "logits/rejected": 0.0713948905467987, + "logps/chosen": -1.4048610925674438, + "logps/rejected": -2.383072853088379, + "loss": 3.3322, + "nll_loss": 3.29473876953125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14048609137535095, + "rewards/margins": 0.0978211984038353, + "rewards/rejected": -0.23830729722976685, + "step": 375 + }, + { + "epoch": 0.23390357698289269, + "grad_norm": 0.4657245874404907, + "learning_rate": 4.0600000000000004e-05, + "log_odds_chosen": 1.0480836629867554, + "log_odds_ratio": -0.3715401887893677, + "logits/chosen": 0.17920681834220886, + "logits/rejected": -0.03246612474322319, + "logps/chosen": -1.4259849786758423, + "logps/rejected": -2.313983201980591, + "loss": 3.0117, + "nll_loss": 2.9745419025421143, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14259850978851318, + "rewards/margins": 0.08879982680082321, + "rewards/rejected": -0.2313983291387558, + "step": 376 + }, + { + "epoch": 0.23452566096423016, + "grad_norm": 0.4098886549472809, + "learning_rate": 4.0575000000000004e-05, + "log_odds_chosen": 0.9914601445198059, + "log_odds_ratio": -0.43344154953956604, + "logits/chosen": 0.24701879918575287, + "logits/rejected": 0.019207902252674103, + "logps/chosen": -1.3967047929763794, + "logps/rejected": -2.260988235473633, + "loss": 3.6364, + "nll_loss": 3.5931003093719482, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1396704763174057, + "rewards/margins": 0.08642837405204773, + "rewards/rejected": -0.22609883546829224, + "step": 377 + }, + { + "epoch": 0.23514774494556764, + "grad_norm": 0.47420018911361694, + "learning_rate": 4.055e-05, + "log_odds_chosen": 2.0201539993286133, + "log_odds_ratio": -0.2169860154390335, + "logits/chosen": 0.25134286284446716, + "logits/rejected": 0.0921395868062973, + "logps/chosen": -1.110857605934143, + "logps/rejected": -2.820281744003296, + "loss": 2.9052, + "nll_loss": 2.8835182189941406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11108576506376266, + "rewards/margins": 0.17094242572784424, + "rewards/rejected": -0.2820281982421875, + "step": 378 + }, + { + "epoch": 0.23576982892690512, + "grad_norm": 0.6942557096481323, + "learning_rate": 4.0525e-05, + "log_odds_chosen": 1.0329240560531616, + "log_odds_ratio": -0.7378867268562317, + "logits/chosen": 0.11842384189367294, + "logits/rejected": 0.05716584250330925, + "logps/chosen": -1.7523798942565918, + "logps/rejected": -2.5439324378967285, + "loss": 2.6703, + "nll_loss": 2.596536159515381, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1752379834651947, + "rewards/margins": 0.07915525138378143, + "rewards/rejected": -0.25439321994781494, + "step": 379 + }, + { + "epoch": 0.2363919129082426, + "grad_norm": 0.49551472067832947, + "learning_rate": 4.05e-05, + "log_odds_chosen": 1.2499074935913086, + "log_odds_ratio": -0.4396931529045105, + "logits/chosen": 0.3040961027145386, + "logits/rejected": 0.13865968585014343, + "logps/chosen": -1.3086928129196167, + "logps/rejected": -2.284111499786377, + "loss": 3.2365, + "nll_loss": 3.192500114440918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1308692842721939, + "rewards/margins": 0.09754188358783722, + "rewards/rejected": -0.22841116786003113, + "step": 380 + }, + { + "epoch": 0.23701399688958008, + "grad_norm": 0.4651765823364258, + "learning_rate": 4.0475e-05, + "log_odds_chosen": 0.9740829467773438, + "log_odds_ratio": -0.4101206362247467, + "logits/chosen": 0.18327367305755615, + "logits/rejected": -0.007276562973856926, + "logps/chosen": -1.0384430885314941, + "logps/rejected": -1.7929588556289673, + "loss": 2.9892, + "nll_loss": 2.9481401443481445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10384431481361389, + "rewards/margins": 0.0754515752196312, + "rewards/rejected": -0.17929589748382568, + "step": 381 + }, + { + "epoch": 0.2376360808709176, + "grad_norm": 0.4907761216163635, + "learning_rate": 4.045000000000001e-05, + "log_odds_chosen": 0.9015824794769287, + "log_odds_ratio": -0.4331977963447571, + "logits/chosen": 0.11043369770050049, + "logits/rejected": 0.11448623239994049, + "logps/chosen": -1.1956536769866943, + "logps/rejected": -1.9301912784576416, + "loss": 3.053, + "nll_loss": 3.009702682495117, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11956537514925003, + "rewards/margins": 0.07345376163721085, + "rewards/rejected": -0.19301912188529968, + "step": 382 + }, + { + "epoch": 0.23825816485225507, + "grad_norm": 0.45944109559059143, + "learning_rate": 4.0425e-05, + "log_odds_chosen": 0.9134513735771179, + "log_odds_ratio": -0.4373936057090759, + "logits/chosen": 0.05043463408946991, + "logits/rejected": 0.10286575555801392, + "logps/chosen": -1.2276078462600708, + "logps/rejected": -1.9750827550888062, + "loss": 2.8257, + "nll_loss": 2.781928777694702, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12276078015565872, + "rewards/margins": 0.0747474879026413, + "rewards/rejected": -0.19750827550888062, + "step": 383 + }, + { + "epoch": 0.23888024883359255, + "grad_norm": 0.5201630592346191, + "learning_rate": 4.0400000000000006e-05, + "log_odds_chosen": 2.114027738571167, + "log_odds_ratio": -0.15464414656162262, + "logits/chosen": 0.2518041431903839, + "logits/rejected": 0.06087159365415573, + "logps/chosen": -0.7840594053268433, + "logps/rejected": -2.2629692554473877, + "loss": 3.3323, + "nll_loss": 3.316868782043457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0784059390425682, + "rewards/margins": 0.14789099991321564, + "rewards/rejected": -0.22629694640636444, + "step": 384 + }, + { + "epoch": 0.23950233281493002, + "grad_norm": 0.44370707869529724, + "learning_rate": 4.0375e-05, + "log_odds_chosen": 1.3410742282867432, + "log_odds_ratio": -0.322849839925766, + "logits/chosen": -0.04521143063902855, + "logits/rejected": -0.05069145932793617, + "logps/chosen": -1.05382239818573, + "logps/rejected": -2.046505928039551, + "loss": 2.6729, + "nll_loss": 2.6405997276306152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10538224130868912, + "rewards/margins": 0.0992683619260788, + "rewards/rejected": -0.2046506106853485, + "step": 385 + }, + { + "epoch": 0.2401244167962675, + "grad_norm": 0.40675365924835205, + "learning_rate": 4.0350000000000005e-05, + "log_odds_chosen": 1.3835175037384033, + "log_odds_ratio": -0.26051661372184753, + "logits/chosen": 0.23836039006710052, + "logits/rejected": 0.06631730496883392, + "logps/chosen": -1.0143346786499023, + "logps/rejected": -2.0367674827575684, + "loss": 3.5034, + "nll_loss": 3.4773647785186768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10143347829580307, + "rewards/margins": 0.10224328190088272, + "rewards/rejected": -0.2036767452955246, + "step": 386 + }, + { + "epoch": 0.24074650077760498, + "grad_norm": 0.46325162053108215, + "learning_rate": 4.0325000000000004e-05, + "log_odds_chosen": 0.7983840107917786, + "log_odds_ratio": -0.5282416939735413, + "logits/chosen": 0.2139953374862671, + "logits/rejected": 0.19283059239387512, + "logps/chosen": -1.2492709159851074, + "logps/rejected": -1.9619114398956299, + "loss": 2.8089, + "nll_loss": 2.756054639816284, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1249270886182785, + "rewards/margins": 0.07126405835151672, + "rewards/rejected": -0.19619114696979523, + "step": 387 + }, + { + "epoch": 0.24136858475894246, + "grad_norm": 0.5103418231010437, + "learning_rate": 4.0300000000000004e-05, + "log_odds_chosen": 1.5594842433929443, + "log_odds_ratio": -0.2448807656764984, + "logits/chosen": 0.17046277225017548, + "logits/rejected": 0.07869112491607666, + "logps/chosen": -1.0376362800598145, + "logps/rejected": -2.278496503829956, + "loss": 2.7251, + "nll_loss": 2.7006213665008545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1037636250257492, + "rewards/margins": 0.12408602982759476, + "rewards/rejected": -0.22784966230392456, + "step": 388 + }, + { + "epoch": 0.24199066874027994, + "grad_norm": 0.3780629634857178, + "learning_rate": 4.0275e-05, + "log_odds_chosen": 1.3771535158157349, + "log_odds_ratio": -0.3319317102432251, + "logits/chosen": 0.32596805691719055, + "logits/rejected": 0.1879710555076599, + "logps/chosen": -1.2158101797103882, + "logps/rejected": -2.255094289779663, + "loss": 3.3506, + "nll_loss": 3.3174211978912354, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12158101797103882, + "rewards/margins": 0.10392840951681137, + "rewards/rejected": -0.22550944983959198, + "step": 389 + }, + { + "epoch": 0.24261275272161742, + "grad_norm": 0.4106887876987457, + "learning_rate": 4.025e-05, + "log_odds_chosen": 0.6944050788879395, + "log_odds_ratio": -0.46452128887176514, + "logits/chosen": 0.10568028688430786, + "logits/rejected": 0.0720917209982872, + "logps/chosen": -1.2601232528686523, + "logps/rejected": -1.7703832387924194, + "loss": 2.8362, + "nll_loss": 2.789783239364624, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12601232528686523, + "rewards/margins": 0.051026009023189545, + "rewards/rejected": -0.17703832685947418, + "step": 390 + }, + { + "epoch": 0.2432348367029549, + "grad_norm": 0.39343398809432983, + "learning_rate": 4.0225e-05, + "log_odds_chosen": 0.9674738645553589, + "log_odds_ratio": -0.5284614562988281, + "logits/chosen": 0.10175783932209015, + "logits/rejected": 0.03822772949934006, + "logps/chosen": -1.2551448345184326, + "logps/rejected": -2.0882956981658936, + "loss": 3.0856, + "nll_loss": 3.0327351093292236, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12551449239253998, + "rewards/margins": 0.08331510424613953, + "rewards/rejected": -0.2088295817375183, + "step": 391 + }, + { + "epoch": 0.24385692068429238, + "grad_norm": 0.4384991228580475, + "learning_rate": 4.02e-05, + "log_odds_chosen": 0.7777218818664551, + "log_odds_ratio": -0.4195319414138794, + "logits/chosen": 0.3055810332298279, + "logits/rejected": 0.05448678508400917, + "logps/chosen": -1.110863447189331, + "logps/rejected": -1.6843622922897339, + "loss": 3.2888, + "nll_loss": 3.2468209266662598, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11108633875846863, + "rewards/margins": 0.05734989792108536, + "rewards/rejected": -0.1684362292289734, + "step": 392 + }, + { + "epoch": 0.24447900466562986, + "grad_norm": 0.5314688682556152, + "learning_rate": 4.0175e-05, + "log_odds_chosen": 0.9773632884025574, + "log_odds_ratio": -0.4269845485687256, + "logits/chosen": 0.17569248378276825, + "logits/rejected": 0.1796281337738037, + "logps/chosen": -1.25931978225708, + "logps/rejected": -2.1036195755004883, + "loss": 2.7412, + "nll_loss": 2.6984646320343018, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.125931978225708, + "rewards/margins": 0.08442999422550201, + "rewards/rejected": -0.21036197245121002, + "step": 393 + }, + { + "epoch": 0.24510108864696734, + "grad_norm": 0.5598838329315186, + "learning_rate": 4.015000000000001e-05, + "log_odds_chosen": 1.459074854850769, + "log_odds_ratio": -0.30608847737312317, + "logits/chosen": 0.1628699004650116, + "logits/rejected": 0.10045505315065384, + "logps/chosen": -1.0321791172027588, + "logps/rejected": -2.095522880554199, + "loss": 2.6775, + "nll_loss": 2.646920680999756, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10321791470050812, + "rewards/margins": 0.10633436590433121, + "rewards/rejected": -0.20955227315425873, + "step": 394 + }, + { + "epoch": 0.24572317262830481, + "grad_norm": 0.31368446350097656, + "learning_rate": 4.0125e-05, + "log_odds_chosen": 1.034085750579834, + "log_odds_ratio": -0.4138377904891968, + "logits/chosen": 0.2146773636341095, + "logits/rejected": 0.06570446491241455, + "logps/chosen": -0.9849491715431213, + "logps/rejected": -1.6467113494873047, + "loss": 3.8255, + "nll_loss": 3.784114360809326, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09849491715431213, + "rewards/margins": 0.06617622822523117, + "rewards/rejected": -0.1646711528301239, + "step": 395 + }, + { + "epoch": 0.2463452566096423, + "grad_norm": 0.5984493494033813, + "learning_rate": 4.0100000000000006e-05, + "log_odds_chosen": 1.0793639421463013, + "log_odds_ratio": -0.47389787435531616, + "logits/chosen": 0.25743645429611206, + "logits/rejected": 0.023621711879968643, + "logps/chosen": -1.2558321952819824, + "logps/rejected": -2.103111505508423, + "loss": 3.2582, + "nll_loss": 3.2107701301574707, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.125583216547966, + "rewards/margins": 0.084727942943573, + "rewards/rejected": -0.210311159491539, + "step": 396 + }, + { + "epoch": 0.24696734059097977, + "grad_norm": 0.37724167108535767, + "learning_rate": 4.0075e-05, + "log_odds_chosen": 0.8990364670753479, + "log_odds_ratio": -0.4033519923686981, + "logits/chosen": 0.26665544509887695, + "logits/rejected": 0.15528859198093414, + "logps/chosen": -1.1553454399108887, + "logps/rejected": -1.854149580001831, + "loss": 3.484, + "nll_loss": 3.443643093109131, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11553453654050827, + "rewards/margins": 0.0698804259300232, + "rewards/rejected": -0.18541496992111206, + "step": 397 + }, + { + "epoch": 0.24758942457231725, + "grad_norm": 0.8459545373916626, + "learning_rate": 4.0050000000000004e-05, + "log_odds_chosen": 1.516906976699829, + "log_odds_ratio": -0.2759634852409363, + "logits/chosen": 0.23380643129348755, + "logits/rejected": 0.14687879383563995, + "logps/chosen": -1.2292195558547974, + "logps/rejected": -2.45884108543396, + "loss": 2.9524, + "nll_loss": 2.924772024154663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12292195856571198, + "rewards/margins": 0.12296216189861298, + "rewards/rejected": -0.24588412046432495, + "step": 398 + }, + { + "epoch": 0.24821150855365473, + "grad_norm": 0.7587859630584717, + "learning_rate": 4.0025000000000004e-05, + "log_odds_chosen": 1.5884180068969727, + "log_odds_ratio": -0.3095462918281555, + "logits/chosen": 0.18802863359451294, + "logits/rejected": -0.01595812290906906, + "logps/chosen": -1.3920193910598755, + "logps/rejected": -2.773435115814209, + "loss": 3.247, + "nll_loss": 3.2160043716430664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13920193910598755, + "rewards/margins": 0.13814157247543335, + "rewards/rejected": -0.2773435115814209, + "step": 399 + }, + { + "epoch": 0.24883359253499224, + "grad_norm": 0.7157498598098755, + "learning_rate": 4e-05, + "log_odds_chosen": 0.774174690246582, + "log_odds_ratio": -0.6406145691871643, + "logits/chosen": 0.3137771189212799, + "logits/rejected": 0.028129838407039642, + "logps/chosen": -1.4273900985717773, + "logps/rejected": -2.0852622985839844, + "loss": 4.025, + "nll_loss": 3.9609427452087402, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14273901283740997, + "rewards/margins": 0.06578721851110458, + "rewards/rejected": -0.20852622389793396, + "step": 400 + }, + { + "epoch": 0.24945567651632972, + "grad_norm": 0.6268599033355713, + "learning_rate": 3.9975e-05, + "log_odds_chosen": 1.2294279336929321, + "log_odds_ratio": -0.4779622554779053, + "logits/chosen": 0.16509678959846497, + "logits/rejected": 0.07056587189435959, + "logps/chosen": -1.2676770687103271, + "logps/rejected": -2.3259618282318115, + "loss": 3.3218, + "nll_loss": 3.274020195007324, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12676770985126495, + "rewards/margins": 0.10582847893238068, + "rewards/rejected": -0.23259618878364563, + "step": 401 + }, + { + "epoch": 0.25007776049766717, + "grad_norm": 0.5052048563957214, + "learning_rate": 3.995e-05, + "log_odds_chosen": 1.3162705898284912, + "log_odds_ratio": -0.32979390025138855, + "logits/chosen": 0.14668475091457367, + "logits/rejected": 0.01875954680144787, + "logps/chosen": -1.279245138168335, + "logps/rejected": -2.367394208908081, + "loss": 3.0466, + "nll_loss": 3.013622522354126, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12792451679706573, + "rewards/margins": 0.10881491005420685, + "rewards/rejected": -0.23673942685127258, + "step": 402 + }, + { + "epoch": 0.2506998444790047, + "grad_norm": 0.586941659450531, + "learning_rate": 3.9925e-05, + "log_odds_chosen": 0.97333163022995, + "log_odds_ratio": -0.4262157678604126, + "logits/chosen": 0.199526846408844, + "logits/rejected": 0.12170709669589996, + "logps/chosen": -1.0942950248718262, + "logps/rejected": -1.8334949016571045, + "loss": 2.9178, + "nll_loss": 2.8751866817474365, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1094295084476471, + "rewards/margins": 0.07391998916864395, + "rewards/rejected": -0.18334949016571045, + "step": 403 + }, + { + "epoch": 0.2513219284603421, + "grad_norm": 0.7138959765434265, + "learning_rate": 3.99e-05, + "log_odds_chosen": 1.5978549718856812, + "log_odds_ratio": -0.6236208081245422, + "logits/chosen": 0.08655344694852829, + "logits/rejected": 0.06523916870355606, + "logps/chosen": -1.0683348178863525, + "logps/rejected": -2.3083322048187256, + "loss": 3.0347, + "nll_loss": 2.972341537475586, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10683348774909973, + "rewards/margins": 0.12399972975254059, + "rewards/rejected": -0.2308332324028015, + "step": 404 + }, + { + "epoch": 0.25194401244167963, + "grad_norm": 0.4803338050842285, + "learning_rate": 3.9875e-05, + "log_odds_chosen": 0.7191067337989807, + "log_odds_ratio": -0.4708095192909241, + "logits/chosen": 0.049791183322668076, + "logits/rejected": -0.03897171467542648, + "logps/chosen": -1.1012663841247559, + "logps/rejected": -1.5942356586456299, + "loss": 3.0085, + "nll_loss": 2.961468458175659, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11012664437294006, + "rewards/margins": 0.049296922981739044, + "rewards/rejected": -0.1594235748052597, + "step": 405 + }, + { + "epoch": 0.2525660964230171, + "grad_norm": 0.5744175314903259, + "learning_rate": 3.9850000000000006e-05, + "log_odds_chosen": 0.09449946880340576, + "log_odds_ratio": -0.6956384778022766, + "logits/chosen": 0.15730737149715424, + "logits/rejected": -0.03350464627146721, + "logps/chosen": -1.461653709411621, + "logps/rejected": -1.5220292806625366, + "loss": 3.3611, + "nll_loss": 3.2915573120117188, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1461653709411621, + "rewards/margins": 0.006037542596459389, + "rewards/rejected": -0.15220291912555695, + "step": 406 + }, + { + "epoch": 0.2531881804043546, + "grad_norm": 0.4293562173843384, + "learning_rate": 3.9825e-05, + "log_odds_chosen": 0.5901978611946106, + "log_odds_ratio": -0.5310933589935303, + "logits/chosen": 0.02693868800997734, + "logits/rejected": -0.0009972341358661652, + "logps/chosen": -1.046227216720581, + "logps/rejected": -1.3914040327072144, + "loss": 2.7241, + "nll_loss": 2.670952320098877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1046227216720581, + "rewards/margins": 0.03451768681406975, + "rewards/rejected": -0.13914041221141815, + "step": 407 + }, + { + "epoch": 0.25381026438569204, + "grad_norm": 1.1754698753356934, + "learning_rate": 3.9800000000000005e-05, + "log_odds_chosen": 1.0236449241638184, + "log_odds_ratio": -0.46178126335144043, + "logits/chosen": 0.1028258204460144, + "logits/rejected": -0.047515541315078735, + "logps/chosen": -1.7946947813034058, + "logps/rejected": -2.7080202102661133, + "loss": 2.8661, + "nll_loss": 2.8199520111083984, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17946948111057281, + "rewards/margins": 0.09133254736661911, + "rewards/rejected": -0.27080202102661133, + "step": 408 + }, + { + "epoch": 0.25443234836702955, + "grad_norm": 0.4363967180252075, + "learning_rate": 3.9775e-05, + "log_odds_chosen": 1.6175098419189453, + "log_odds_ratio": -0.37422215938568115, + "logits/chosen": 0.22522154450416565, + "logits/rejected": 0.0489632673561573, + "logps/chosen": -0.9020571112632751, + "logps/rejected": -1.936967134475708, + "loss": 3.776, + "nll_loss": 3.7385733127593994, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09020572155714035, + "rewards/margins": 0.10349100083112717, + "rewards/rejected": -0.1936967372894287, + "step": 409 + }, + { + "epoch": 0.25505443234836706, + "grad_norm": 0.594385027885437, + "learning_rate": 3.9750000000000004e-05, + "log_odds_chosen": 1.2434051036834717, + "log_odds_ratio": -0.32507485151290894, + "logits/chosen": 0.14571481943130493, + "logits/rejected": 0.02298334613442421, + "logps/chosen": -1.0427602529525757, + "logps/rejected": -1.886343240737915, + "loss": 3.0004, + "nll_loss": 2.9678754806518555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10427601635456085, + "rewards/margins": 0.08435830473899841, + "rewards/rejected": -0.18863432109355927, + "step": 410 + }, + { + "epoch": 0.2556765163297045, + "grad_norm": 0.41299790143966675, + "learning_rate": 3.9725e-05, + "log_odds_chosen": 0.741004228591919, + "log_odds_ratio": -0.4465721845626831, + "logits/chosen": 0.18152594566345215, + "logits/rejected": 0.13067328929901123, + "logps/chosen": -1.281395435333252, + "logps/rejected": -1.903831958770752, + "loss": 3.0622, + "nll_loss": 3.0175766944885254, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12813954055309296, + "rewards/margins": 0.06224365532398224, + "rewards/rejected": -0.1903831958770752, + "step": 411 + }, + { + "epoch": 0.256298600311042, + "grad_norm": 0.6275978088378906, + "learning_rate": 3.97e-05, + "log_odds_chosen": 0.0313977487385273, + "log_odds_ratio": -0.7415766716003418, + "logits/chosen": -0.03772881627082825, + "logits/rejected": 0.12028350681066513, + "logps/chosen": -1.4508265256881714, + "logps/rejected": -1.4406688213348389, + "loss": 2.3823, + "nll_loss": 2.30818772315979, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14508265256881714, + "rewards/margins": -0.001015770249068737, + "rewards/rejected": -0.14406687021255493, + "step": 412 + }, + { + "epoch": 0.25692068429237946, + "grad_norm": 0.4781981408596039, + "learning_rate": 3.9675e-05, + "log_odds_chosen": 1.5340921878814697, + "log_odds_ratio": -0.28959184885025024, + "logits/chosen": 0.34700697660446167, + "logits/rejected": 0.2925771176815033, + "logps/chosen": -1.3058507442474365, + "logps/rejected": -2.6288981437683105, + "loss": 3.491, + "nll_loss": 3.4620461463928223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13058508932590485, + "rewards/margins": 0.13230472803115845, + "rewards/rejected": -0.2628898024559021, + "step": 413 + }, + { + "epoch": 0.25754276827371697, + "grad_norm": 0.494865357875824, + "learning_rate": 3.965e-05, + "log_odds_chosen": 1.5314979553222656, + "log_odds_ratio": -0.25367340445518494, + "logits/chosen": 0.18154457211494446, + "logits/rejected": 0.1370812952518463, + "logps/chosen": -0.9703949689865112, + "logps/rejected": -1.9872558116912842, + "loss": 3.0317, + "nll_loss": 3.006338119506836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09703950583934784, + "rewards/margins": 0.10168607532978058, + "rewards/rejected": -0.19872558116912842, + "step": 414 + }, + { + "epoch": 0.2581648522550544, + "grad_norm": 0.45149925351142883, + "learning_rate": 3.9625e-05, + "log_odds_chosen": 0.49176663160324097, + "log_odds_ratio": -0.6137273907661438, + "logits/chosen": 0.19695249199867249, + "logits/rejected": 0.18039798736572266, + "logps/chosen": -1.330711841583252, + "logps/rejected": -1.8058357238769531, + "loss": 3.122, + "nll_loss": 3.0606541633605957, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1330711841583252, + "rewards/margins": 0.047512397170066833, + "rewards/rejected": -0.18058358132839203, + "step": 415 + }, + { + "epoch": 0.25878693623639193, + "grad_norm": 0.4677426517009735, + "learning_rate": 3.960000000000001e-05, + "log_odds_chosen": 1.4863536357879639, + "log_odds_ratio": -0.3277415931224823, + "logits/chosen": 0.3289963901042938, + "logits/rejected": 0.295125275850296, + "logps/chosen": -1.3954858779907227, + "logps/rejected": -2.662785530090332, + "loss": 3.3443, + "nll_loss": 3.3114874362945557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13954858481884003, + "rewards/margins": 0.12672998011112213, + "rewards/rejected": -0.26627856492996216, + "step": 416 + }, + { + "epoch": 0.2594090202177294, + "grad_norm": 0.5604446530342102, + "learning_rate": 3.9575e-05, + "log_odds_chosen": 1.654098629951477, + "log_odds_ratio": -0.34370043873786926, + "logits/chosen": 0.44623494148254395, + "logits/rejected": 0.26756906509399414, + "logps/chosen": -1.5715718269348145, + "logps/rejected": -3.0832409858703613, + "loss": 3.74, + "nll_loss": 3.7056450843811035, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15715718269348145, + "rewards/margins": 0.1511669158935547, + "rewards/rejected": -0.30832409858703613, + "step": 417 + }, + { + "epoch": 0.2600311041990669, + "grad_norm": 0.5016684532165527, + "learning_rate": 3.9550000000000006e-05, + "log_odds_chosen": 1.0671627521514893, + "log_odds_ratio": -0.34129106998443604, + "logits/chosen": 0.09102344512939453, + "logits/rejected": 0.1018139123916626, + "logps/chosen": -1.1463091373443604, + "logps/rejected": -1.9536468982696533, + "loss": 2.8157, + "nll_loss": 2.781564474105835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11463090777397156, + "rewards/margins": 0.0807337760925293, + "rewards/rejected": -0.19536468386650085, + "step": 418 + }, + { + "epoch": 0.26065318818040434, + "grad_norm": 0.9377952218055725, + "learning_rate": 3.9525e-05, + "log_odds_chosen": 2.2686891555786133, + "log_odds_ratio": -0.14280670881271362, + "logits/chosen": 0.29692742228507996, + "logits/rejected": 0.15223044157028198, + "logps/chosen": -1.185152292251587, + "logps/rejected": -3.158766269683838, + "loss": 2.8944, + "nll_loss": 2.880141258239746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11851523816585541, + "rewards/margins": 0.19736140966415405, + "rewards/rejected": -0.31587663292884827, + "step": 419 + }, + { + "epoch": 0.26127527216174184, + "grad_norm": 0.5251947045326233, + "learning_rate": 3.9500000000000005e-05, + "log_odds_chosen": 1.5507677793502808, + "log_odds_ratio": -0.3057706654071808, + "logits/chosen": 0.32404136657714844, + "logits/rejected": 0.18444237112998962, + "logps/chosen": -1.1873574256896973, + "logps/rejected": -2.446171522140503, + "loss": 3.1758, + "nll_loss": 3.1452338695526123, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11873574554920197, + "rewards/margins": 0.1258814036846161, + "rewards/rejected": -0.24461714923381805, + "step": 420 + }, + { + "epoch": 0.2618973561430793, + "grad_norm": 0.4308774471282959, + "learning_rate": 3.9475000000000004e-05, + "log_odds_chosen": 0.3947753608226776, + "log_odds_ratio": -0.5612890124320984, + "logits/chosen": 0.13869617879390717, + "logits/rejected": 0.044950664043426514, + "logps/chosen": -1.2930006980895996, + "logps/rejected": -1.6258165836334229, + "loss": 2.9868, + "nll_loss": 2.9306952953338623, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.129300057888031, + "rewards/margins": 0.0332816019654274, + "rewards/rejected": -0.162581667304039, + "step": 421 + }, + { + "epoch": 0.2625194401244168, + "grad_norm": 0.5830161571502686, + "learning_rate": 3.9450000000000003e-05, + "log_odds_chosen": 1.1204309463500977, + "log_odds_ratio": -0.3383479714393616, + "logits/chosen": 0.07506772875785828, + "logits/rejected": -0.03575318679213524, + "logps/chosen": -1.3158361911773682, + "logps/rejected": -2.236865997314453, + "loss": 2.7736, + "nll_loss": 2.7397449016571045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13158363103866577, + "rewards/margins": 0.09210299700498581, + "rewards/rejected": -0.22368663549423218, + "step": 422 + }, + { + "epoch": 0.26314152410575425, + "grad_norm": 0.44899922609329224, + "learning_rate": 3.9425e-05, + "log_odds_chosen": 0.8703987002372742, + "log_odds_ratio": -0.5454012155532837, + "logits/chosen": 0.2875048816204071, + "logits/rejected": 0.11582295596599579, + "logps/chosen": -1.363781213760376, + "logps/rejected": -2.0890636444091797, + "loss": 3.2192, + "nll_loss": 3.1646461486816406, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13637810945510864, + "rewards/margins": 0.07252823561429977, + "rewards/rejected": -0.208906352519989, + "step": 423 + }, + { + "epoch": 0.26376360808709176, + "grad_norm": 0.5044350028038025, + "learning_rate": 3.94e-05, + "log_odds_chosen": 1.7807879447937012, + "log_odds_ratio": -0.2641463577747345, + "logits/chosen": 0.25100329518318176, + "logits/rejected": 0.0806911289691925, + "logps/chosen": -1.1451863050460815, + "logps/rejected": -2.675082206726074, + "loss": 2.879, + "nll_loss": 2.852630138397217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11451863497495651, + "rewards/margins": 0.15298959612846375, + "rewards/rejected": -0.26750820875167847, + "step": 424 + }, + { + "epoch": 0.2643856920684292, + "grad_norm": 0.5463127493858337, + "learning_rate": 3.9375e-05, + "log_odds_chosen": 1.7012643814086914, + "log_odds_ratio": -0.3870460093021393, + "logits/chosen": 0.1893111914396286, + "logits/rejected": 0.12804879248142242, + "logps/chosen": -1.3288249969482422, + "logps/rejected": -2.868129253387451, + "loss": 3.3723, + "nll_loss": 3.3335795402526855, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1328825205564499, + "rewards/margins": 0.1539304256439209, + "rewards/rejected": -0.286812961101532, + "step": 425 + }, + { + "epoch": 0.2650077760497667, + "grad_norm": 0.8619997501373291, + "learning_rate": 3.935e-05, + "log_odds_chosen": 0.8874719738960266, + "log_odds_ratio": -0.7657593488693237, + "logits/chosen": 0.08044306933879852, + "logits/rejected": -0.008549835532903671, + "logps/chosen": -1.7457143068313599, + "logps/rejected": -2.4340968132019043, + "loss": 2.9367, + "nll_loss": 2.8601346015930176, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1745714396238327, + "rewards/margins": 0.06883823126554489, + "rewards/rejected": -0.2434096783399582, + "step": 426 + }, + { + "epoch": 0.2656298600311042, + "grad_norm": 0.5804911851882935, + "learning_rate": 3.9325e-05, + "log_odds_chosen": 1.0181046724319458, + "log_odds_ratio": -0.5276904106140137, + "logits/chosen": 0.20519691705703735, + "logits/rejected": 0.11686623841524124, + "logps/chosen": -1.3472039699554443, + "logps/rejected": -2.2560369968414307, + "loss": 3.1032, + "nll_loss": 3.050477981567383, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13472039997577667, + "rewards/margins": 0.0908832848072052, + "rewards/rejected": -0.22560366988182068, + "step": 427 + }, + { + "epoch": 0.2662519440124417, + "grad_norm": 0.4489701986312866, + "learning_rate": 3.9300000000000007e-05, + "log_odds_chosen": 1.4261120557785034, + "log_odds_ratio": -0.3344850540161133, + "logits/chosen": 0.06815585494041443, + "logits/rejected": 0.005228975787758827, + "logps/chosen": -1.0918066501617432, + "logps/rejected": -2.263209342956543, + "loss": 2.801, + "nll_loss": 2.767580509185791, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10918065905570984, + "rewards/margins": 0.1171402856707573, + "rewards/rejected": -0.22632096707820892, + "step": 428 + }, + { + "epoch": 0.2668740279937792, + "grad_norm": 0.6455628871917725, + "learning_rate": 3.9275e-05, + "log_odds_chosen": 1.4339535236358643, + "log_odds_ratio": -0.32922857999801636, + "logits/chosen": 0.24753251671791077, + "logits/rejected": 0.17592373490333557, + "logps/chosen": -1.4441932439804077, + "logps/rejected": -2.687839984893799, + "loss": 3.094, + "nll_loss": 3.0610883235931396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14441931247711182, + "rewards/margins": 0.1243646889925003, + "rewards/rejected": -0.2687840163707733, + "step": 429 + }, + { + "epoch": 0.26749611197511663, + "grad_norm": 0.29282695055007935, + "learning_rate": 3.9250000000000005e-05, + "log_odds_chosen": 1.340848445892334, + "log_odds_ratio": -0.34622523188591003, + "logits/chosen": 0.22931736707687378, + "logits/rejected": 0.14595317840576172, + "logps/chosen": -1.1169357299804688, + "logps/rejected": -2.1826369762420654, + "loss": 3.4491, + "nll_loss": 3.4144582748413086, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11169356852769852, + "rewards/margins": 0.10657013207674026, + "rewards/rejected": -0.21826371550559998, + "step": 430 + }, + { + "epoch": 0.26811819595645414, + "grad_norm": 0.3759579062461853, + "learning_rate": 3.9225e-05, + "log_odds_chosen": 0.9186004996299744, + "log_odds_ratio": -0.4763484001159668, + "logits/chosen": 0.2613193690776825, + "logits/rejected": 0.11609724164009094, + "logps/chosen": -0.910784125328064, + "logps/rejected": -1.6030433177947998, + "loss": 3.5052, + "nll_loss": 3.4575998783111572, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09107840061187744, + "rewards/margins": 0.06922592967748642, + "rewards/rejected": -0.16030432283878326, + "step": 431 + }, + { + "epoch": 0.2687402799377916, + "grad_norm": 0.4951987564563751, + "learning_rate": 3.9200000000000004e-05, + "log_odds_chosen": 1.3083531856536865, + "log_odds_ratio": -0.3793916702270508, + "logits/chosen": 0.3077886700630188, + "logits/rejected": 0.05748777836561203, + "logps/chosen": -1.2742183208465576, + "logps/rejected": -2.2997703552246094, + "loss": 3.7485, + "nll_loss": 3.710557460784912, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12742182612419128, + "rewards/margins": 0.10255520790815353, + "rewards/rejected": -0.22997704148292542, + "step": 432 + }, + { + "epoch": 0.2693623639191291, + "grad_norm": 0.4444817006587982, + "learning_rate": 3.9175000000000004e-05, + "log_odds_chosen": 1.2291901111602783, + "log_odds_ratio": -0.35217738151550293, + "logits/chosen": 0.2221524715423584, + "logits/rejected": 0.05530994012951851, + "logps/chosen": -0.9905591011047363, + "logps/rejected": -1.9143832921981812, + "loss": 3.3073, + "nll_loss": 3.272057056427002, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09905591607093811, + "rewards/margins": 0.09238240122795105, + "rewards/rejected": -0.19143833220005035, + "step": 433 + }, + { + "epoch": 0.26998444790046655, + "grad_norm": 0.6299095749855042, + "learning_rate": 3.915e-05, + "log_odds_chosen": 1.615001916885376, + "log_odds_ratio": -0.3864189386367798, + "logits/chosen": 0.24696266651153564, + "logits/rejected": 0.18707673251628876, + "logps/chosen": -1.9024136066436768, + "logps/rejected": -3.3632092475891113, + "loss": 2.8607, + "nll_loss": 2.822024345397949, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19024136662483215, + "rewards/margins": 0.14607959985733032, + "rewards/rejected": -0.3363209664821625, + "step": 434 + }, + { + "epoch": 0.27060653188180406, + "grad_norm": 0.5415927767753601, + "learning_rate": 3.9125e-05, + "log_odds_chosen": 4.024477481842041, + "log_odds_ratio": -0.14453338086605072, + "logits/chosen": 0.2853761315345764, + "logits/rejected": 0.15076112747192383, + "logps/chosen": -1.2033590078353882, + "logps/rejected": -4.912008285522461, + "loss": 2.7905, + "nll_loss": 2.776012897491455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1203359067440033, + "rewards/margins": 0.3708649277687073, + "rewards/rejected": -0.4912008047103882, + "step": 435 + }, + { + "epoch": 0.2712286158631415, + "grad_norm": 0.6048150658607483, + "learning_rate": 3.91e-05, + "log_odds_chosen": 0.31674981117248535, + "log_odds_ratio": -0.6299794912338257, + "logits/chosen": 0.13437308371067047, + "logits/rejected": 0.002337227575480938, + "logps/chosen": -1.596144199371338, + "logps/rejected": -1.849827527999878, + "loss": 2.6519, + "nll_loss": 2.588890790939331, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1596144139766693, + "rewards/margins": 0.025368336588144302, + "rewards/rejected": -0.1849827617406845, + "step": 436 + }, + { + "epoch": 0.271850699844479, + "grad_norm": 0.44196486473083496, + "learning_rate": 3.9075e-05, + "log_odds_chosen": 1.744307041168213, + "log_odds_ratio": -0.26184773445129395, + "logits/chosen": 0.14910796284675598, + "logits/rejected": 0.08620636910200119, + "logps/chosen": -1.1879832744598389, + "logps/rejected": -2.592353343963623, + "loss": 2.8721, + "nll_loss": 2.845905303955078, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11879833042621613, + "rewards/margins": 0.14043700695037842, + "rewards/rejected": -0.25923532247543335, + "step": 437 + }, + { + "epoch": 0.27247278382581647, + "grad_norm": 0.5670360326766968, + "learning_rate": 3.905e-05, + "log_odds_chosen": 2.0719993114471436, + "log_odds_ratio": -0.2230648547410965, + "logits/chosen": 0.24322016537189484, + "logits/rejected": 0.2413720339536667, + "logps/chosen": -1.1990163326263428, + "logps/rejected": -2.9432950019836426, + "loss": 2.7375, + "nll_loss": 2.715202569961548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1199016347527504, + "rewards/margins": 0.17442789673805237, + "rewards/rejected": -0.29432952404022217, + "step": 438 + }, + { + "epoch": 0.273094867807154, + "grad_norm": 0.4377356767654419, + "learning_rate": 3.9025e-05, + "log_odds_chosen": 2.4671876430511475, + "log_odds_ratio": -0.4254373013973236, + "logits/chosen": 0.3846701383590698, + "logits/rejected": 0.2242540866136551, + "logps/chosen": -0.9989205598831177, + "logps/rejected": -3.226830244064331, + "loss": 3.7535, + "nll_loss": 3.7109501361846924, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09989205747842789, + "rewards/margins": 0.22279097139835358, + "rewards/rejected": -0.3226830065250397, + "step": 439 + }, + { + "epoch": 0.2737169517884914, + "grad_norm": 0.6218655109405518, + "learning_rate": 3.9000000000000006e-05, + "log_odds_chosen": 1.7963087558746338, + "log_odds_ratio": -0.24013981223106384, + "logits/chosen": 0.22267626225948334, + "logits/rejected": 0.15220728516578674, + "logps/chosen": -1.4764683246612549, + "logps/rejected": -3.08296275138855, + "loss": 2.6897, + "nll_loss": 2.665696620941162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14764682948589325, + "rewards/margins": 0.16064943373203278, + "rewards/rejected": -0.308296263217926, + "step": 440 + }, + { + "epoch": 0.27433903576982893, + "grad_norm": 0.4783235788345337, + "learning_rate": 3.8975e-05, + "log_odds_chosen": 3.722914934158325, + "log_odds_ratio": -0.1403815746307373, + "logits/chosen": 0.3612845838069916, + "logits/rejected": 0.3681058883666992, + "logps/chosen": -1.2698994874954224, + "logps/rejected": -4.6545090675354, + "loss": 3.0846, + "nll_loss": 3.0705621242523193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1269899606704712, + "rewards/margins": 0.3384609520435333, + "rewards/rejected": -0.46545088291168213, + "step": 441 + }, + { + "epoch": 0.2749611197511664, + "grad_norm": 0.6110908389091492, + "learning_rate": 3.8950000000000005e-05, + "log_odds_chosen": 1.8417094945907593, + "log_odds_ratio": -0.23535192012786865, + "logits/chosen": 0.3509153127670288, + "logits/rejected": 0.10706407576799393, + "logps/chosen": -1.146148920059204, + "logps/rejected": -2.6451005935668945, + "loss": 3.4359, + "nll_loss": 3.4123191833496094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11461488902568817, + "rewards/margins": 0.14989519119262695, + "rewards/rejected": -0.2645100951194763, + "step": 442 + }, + { + "epoch": 0.2755832037325039, + "grad_norm": 0.378417044878006, + "learning_rate": 3.8925e-05, + "log_odds_chosen": 1.9329280853271484, + "log_odds_ratio": -0.31626445055007935, + "logits/chosen": 0.2477678805589676, + "logits/rejected": 0.19056656956672668, + "logps/chosen": -0.9245951175689697, + "logps/rejected": -2.271580219268799, + "loss": 3.5382, + "nll_loss": 3.5066051483154297, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09245951473712921, + "rewards/margins": 0.1346985101699829, + "rewards/rejected": -0.22715803980827332, + "step": 443 + }, + { + "epoch": 0.27620528771384134, + "grad_norm": 0.45432165265083313, + "learning_rate": 3.8900000000000004e-05, + "log_odds_chosen": 2.492096185684204, + "log_odds_ratio": -0.20271611213684082, + "logits/chosen": 0.1072557121515274, + "logits/rejected": 0.1309928297996521, + "logps/chosen": -1.2175402641296387, + "logps/rejected": -3.431077003479004, + "loss": 2.5011, + "nll_loss": 2.480823040008545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12175402045249939, + "rewards/margins": 0.2213537096977234, + "rewards/rejected": -0.3431077003479004, + "step": 444 + }, + { + "epoch": 0.27682737169517885, + "grad_norm": 0.4896884262561798, + "learning_rate": 3.8875e-05, + "log_odds_chosen": 2.018673896789551, + "log_odds_ratio": -0.23892150819301605, + "logits/chosen": 0.18642401695251465, + "logits/rejected": 0.1272982805967331, + "logps/chosen": -1.0007119178771973, + "logps/rejected": -2.652472496032715, + "loss": 2.8004, + "nll_loss": 2.776474714279175, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10007119923830032, + "rewards/margins": 0.16517606377601624, + "rewards/rejected": -0.26524725556373596, + "step": 445 + }, + { + "epoch": 0.27744945567651635, + "grad_norm": 0.4476563036441803, + "learning_rate": 3.885e-05, + "log_odds_chosen": 1.5900170803070068, + "log_odds_ratio": -0.3700055480003357, + "logits/chosen": 0.41444650292396545, + "logits/rejected": 0.26796096563339233, + "logps/chosen": -1.4082850217819214, + "logps/rejected": -2.710878372192383, + "loss": 3.8047, + "nll_loss": 3.7677254676818848, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14082849025726318, + "rewards/margins": 0.13025934994220734, + "rewards/rejected": -0.2710878551006317, + "step": 446 + }, + { + "epoch": 0.2780715396578538, + "grad_norm": 0.44968464970588684, + "learning_rate": 3.8825e-05, + "log_odds_chosen": 1.3255045413970947, + "log_odds_ratio": -0.2901913523674011, + "logits/chosen": 0.2749924957752228, + "logits/rejected": 0.2749479413032532, + "logps/chosen": -1.219002604484558, + "logps/rejected": -2.211946964263916, + "loss": 3.2752, + "nll_loss": 3.246131420135498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12190026044845581, + "rewards/margins": 0.09929443150758743, + "rewards/rejected": -0.22119468450546265, + "step": 447 + }, + { + "epoch": 0.2786936236391913, + "grad_norm": 0.6323361992835999, + "learning_rate": 3.88e-05, + "log_odds_chosen": 2.376352071762085, + "log_odds_ratio": -0.16851823031902313, + "logits/chosen": 0.2564919590950012, + "logits/rejected": 0.3483697772026062, + "logps/chosen": -1.1505968570709229, + "logps/rejected": -3.2237629890441895, + "loss": 2.4031, + "nll_loss": 2.386216640472412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11505968123674393, + "rewards/margins": 0.20731663703918457, + "rewards/rejected": -0.3223763108253479, + "step": 448 + }, + { + "epoch": 0.27931570762052876, + "grad_norm": 0.4380817115306854, + "learning_rate": 3.8775e-05, + "log_odds_chosen": 1.486014723777771, + "log_odds_ratio": -0.3191292881965637, + "logits/chosen": 0.35001346468925476, + "logits/rejected": 0.26604193449020386, + "logps/chosen": -1.0675163269042969, + "logps/rejected": -2.318084478378296, + "loss": 3.3673, + "nll_loss": 3.3354063034057617, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10675162822008133, + "rewards/margins": 0.12505681812763214, + "rewards/rejected": -0.23180843889713287, + "step": 449 + }, + { + "epoch": 0.27993779160186627, + "grad_norm": 0.5374330282211304, + "learning_rate": 3.875e-05, + "log_odds_chosen": 2.0906572341918945, + "log_odds_ratio": -0.2168201506137848, + "logits/chosen": 0.2877808213233948, + "logits/rejected": 0.3449041545391083, + "logps/chosen": -1.3126355409622192, + "logps/rejected": -2.9990274906158447, + "loss": 3.0555, + "nll_loss": 3.033806085586548, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13126355409622192, + "rewards/margins": 0.1686391979455948, + "rewards/rejected": -0.2999027371406555, + "step": 450 + }, + { + "epoch": 0.2805598755832037, + "grad_norm": 0.6015781760215759, + "learning_rate": 3.8725e-05, + "log_odds_chosen": 2.5022025108337402, + "log_odds_ratio": -0.19380271434783936, + "logits/chosen": 0.3818150460720062, + "logits/rejected": 0.35132819414138794, + "logps/chosen": -1.2525804042816162, + "logps/rejected": -3.4683022499084473, + "loss": 3.4237, + "nll_loss": 3.4043030738830566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12525805830955505, + "rewards/margins": 0.22157219052314758, + "rewards/rejected": -0.34683024883270264, + "step": 451 + }, + { + "epoch": 0.28118195956454123, + "grad_norm": 0.5427994132041931, + "learning_rate": 3.8700000000000006e-05, + "log_odds_chosen": 1.5231120586395264, + "log_odds_ratio": -0.28168755769729614, + "logits/chosen": 0.3691140115261078, + "logits/rejected": 0.37976768612861633, + "logps/chosen": -1.2954881191253662, + "logps/rejected": -2.606727361679077, + "loss": 3.1849, + "nll_loss": 3.1567115783691406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1295488178730011, + "rewards/margins": 0.13112393021583557, + "rewards/rejected": -0.26067274808883667, + "step": 452 + }, + { + "epoch": 0.2818040435458787, + "grad_norm": 0.6339102983474731, + "learning_rate": 3.8675e-05, + "log_odds_chosen": 4.337133407592773, + "log_odds_ratio": -0.21208305656909943, + "logits/chosen": 0.25384026765823364, + "logits/rejected": 0.20061862468719482, + "logps/chosen": -1.4610943794250488, + "logps/rejected": -5.5489044189453125, + "loss": 3.273, + "nll_loss": 3.251758098602295, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1461094468832016, + "rewards/margins": 0.4087810218334198, + "rewards/rejected": -0.5548904538154602, + "step": 453 + }, + { + "epoch": 0.2824261275272162, + "grad_norm": 1.4807077646255493, + "learning_rate": 3.8650000000000004e-05, + "log_odds_chosen": 2.719972848892212, + "log_odds_ratio": -0.1599472612142563, + "logits/chosen": 0.38763532042503357, + "logits/rejected": 0.3248913884162903, + "logps/chosen": -1.1536898612976074, + "logps/rejected": -3.492626905441284, + "loss": 3.2056, + "nll_loss": 3.1896491050720215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11536898463964462, + "rewards/margins": 0.2338937222957611, + "rewards/rejected": -0.34926271438598633, + "step": 454 + }, + { + "epoch": 0.28304821150855364, + "grad_norm": 0.4285638928413391, + "learning_rate": 3.8625e-05, + "log_odds_chosen": 3.288182020187378, + "log_odds_ratio": -0.05790887400507927, + "logits/chosen": 0.2987384498119354, + "logits/rejected": 0.29059848189353943, + "logps/chosen": -1.2740700244903564, + "logps/rejected": -4.2349653244018555, + "loss": 3.027, + "nll_loss": 3.0212230682373047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1274069994688034, + "rewards/margins": 0.2960895299911499, + "rewards/rejected": -0.4234965443611145, + "step": 455 + }, + { + "epoch": 0.28367029548989114, + "grad_norm": 0.8652162551879883, + "learning_rate": 3.86e-05, + "log_odds_chosen": 1.5928850173950195, + "log_odds_ratio": -0.311477929353714, + "logits/chosen": 0.17478427290916443, + "logits/rejected": 0.1352236568927765, + "logps/chosen": -1.60139799118042, + "logps/rejected": -2.999675750732422, + "loss": 3.2718, + "nll_loss": 3.2406058311462402, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.160139799118042, + "rewards/margins": 0.13982778787612915, + "rewards/rejected": -0.29996758699417114, + "step": 456 + }, + { + "epoch": 0.2842923794712286, + "grad_norm": 0.3627634644508362, + "learning_rate": 3.8575e-05, + "log_odds_chosen": 2.181387186050415, + "log_odds_ratio": -0.3236231803894043, + "logits/chosen": 0.05983370169997215, + "logits/rejected": 0.02825239673256874, + "logps/chosen": -1.1143271923065186, + "logps/rejected": -2.988661766052246, + "loss": 3.0779, + "nll_loss": 3.0455780029296875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11143273115158081, + "rewards/margins": 0.18743345141410828, + "rewards/rejected": -0.2988661825656891, + "step": 457 + }, + { + "epoch": 0.2849144634525661, + "grad_norm": 0.5619986057281494, + "learning_rate": 3.855e-05, + "log_odds_chosen": 1.8027219772338867, + "log_odds_ratio": -0.46999043226242065, + "logits/chosen": -0.01704075187444687, + "logits/rejected": 0.08329185098409653, + "logps/chosen": -1.0725547075271606, + "logps/rejected": -2.68211030960083, + "loss": 2.6529, + "nll_loss": 2.6059412956237793, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1072554737329483, + "rewards/margins": 0.16095557808876038, + "rewards/rejected": -0.2682110369205475, + "step": 458 + }, + { + "epoch": 0.28553654743390355, + "grad_norm": 0.4395221471786499, + "learning_rate": 3.8525e-05, + "log_odds_chosen": 3.0232295989990234, + "log_odds_ratio": -0.25974196195602417, + "logits/chosen": 0.24080729484558105, + "logits/rejected": 0.15837368369102478, + "logps/chosen": -0.9511310458183289, + "logps/rejected": -3.5407259464263916, + "loss": 3.3904, + "nll_loss": 3.364377021789551, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.095113106071949, + "rewards/margins": 0.25895950198173523, + "rewards/rejected": -0.35407260060310364, + "step": 459 + }, + { + "epoch": 0.28615863141524106, + "grad_norm": 0.39195650815963745, + "learning_rate": 3.85e-05, + "log_odds_chosen": 3.548543930053711, + "log_odds_ratio": -0.2414306104183197, + "logits/chosen": 0.20818278193473816, + "logits/rejected": 0.2002563178539276, + "logps/chosen": -0.9960837364196777, + "logps/rejected": -4.0006537437438965, + "loss": 3.0262, + "nll_loss": 3.002067804336548, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09960837662220001, + "rewards/margins": 0.3004570007324219, + "rewards/rejected": -0.40006542205810547, + "step": 460 + }, + { + "epoch": 0.2867807153965785, + "grad_norm": 0.6457217931747437, + "learning_rate": 3.8475e-05, + "log_odds_chosen": 2.5402870178222656, + "log_odds_ratio": -0.3850615918636322, + "logits/chosen": 0.11771635711193085, + "logits/rejected": 0.03514918312430382, + "logps/chosen": -1.3628007173538208, + "logps/rejected": -3.764535427093506, + "loss": 2.7094, + "nll_loss": 2.670870304107666, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13628007471561432, + "rewards/margins": 0.24017345905303955, + "rewards/rejected": -0.3764535188674927, + "step": 461 + }, + { + "epoch": 0.287402799377916, + "grad_norm": 0.5981135368347168, + "learning_rate": 3.845e-05, + "log_odds_chosen": 1.2403737306594849, + "log_odds_ratio": -0.6255621314048767, + "logits/chosen": 0.06767088919878006, + "logits/rejected": 0.015279887244105339, + "logps/chosen": -1.2029376029968262, + "logps/rejected": -2.2572832107543945, + "loss": 2.8872, + "nll_loss": 2.8246262073516846, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12029377371072769, + "rewards/margins": 0.10543454438447952, + "rewards/rejected": -0.22572831809520721, + "step": 462 + }, + { + "epoch": 0.2880248833592535, + "grad_norm": 0.6233921647071838, + "learning_rate": 3.8425e-05, + "log_odds_chosen": 2.528999090194702, + "log_odds_ratio": -0.2973198890686035, + "logits/chosen": 0.1612185388803482, + "logits/rejected": 0.18806105852127075, + "logps/chosen": -1.4305648803710938, + "logps/rejected": -3.712616443634033, + "loss": 2.868, + "nll_loss": 2.838242292404175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1430564969778061, + "rewards/margins": 0.22820515930652618, + "rewards/rejected": -0.3712616562843323, + "step": 463 + }, + { + "epoch": 0.288646967340591, + "grad_norm": 0.43462324142456055, + "learning_rate": 3.8400000000000005e-05, + "log_odds_chosen": 3.214184522628784, + "log_odds_ratio": -0.06043536216020584, + "logits/chosen": 0.21719320118427277, + "logits/rejected": 0.25715163350105286, + "logps/chosen": -1.121093511581421, + "logps/rejected": -3.9434640407562256, + "loss": 2.9904, + "nll_loss": 2.9843649864196777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11210934817790985, + "rewards/margins": 0.28223708271980286, + "rewards/rejected": -0.3943464159965515, + "step": 464 + }, + { + "epoch": 0.2892690513219285, + "grad_norm": 0.5641807913780212, + "learning_rate": 3.8375e-05, + "log_odds_chosen": 4.3896636962890625, + "log_odds_ratio": -0.18577708303928375, + "logits/chosen": 0.0453701950609684, + "logits/rejected": 0.07587449252605438, + "logps/chosen": -1.2915798425674438, + "logps/rejected": -5.313374996185303, + "loss": 2.8604, + "nll_loss": 2.841811418533325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12915797531604767, + "rewards/margins": 0.4021795392036438, + "rewards/rejected": -0.5313374996185303, + "step": 465 + }, + { + "epoch": 0.28989113530326593, + "grad_norm": 0.7072799801826477, + "learning_rate": 3.8350000000000004e-05, + "log_odds_chosen": 3.1329102516174316, + "log_odds_ratio": -0.38895708322525024, + "logits/chosen": 0.36617377400398254, + "logits/rejected": 0.32684844732284546, + "logps/chosen": -1.486217975616455, + "logps/rejected": -4.195459842681885, + "loss": 3.3864, + "nll_loss": 3.347512722015381, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1486217975616455, + "rewards/margins": 0.2709242105484009, + "rewards/rejected": -0.4195460081100464, + "step": 466 + }, + { + "epoch": 0.29051321928460344, + "grad_norm": 0.5267555713653564, + "learning_rate": 3.8324999999999996e-05, + "log_odds_chosen": 1.2240618467330933, + "log_odds_ratio": -0.39744335412979126, + "logits/chosen": 0.22255158424377441, + "logits/rejected": 0.18081636726856232, + "logps/chosen": -1.1293349266052246, + "logps/rejected": -2.1319031715393066, + "loss": 2.8698, + "nll_loss": 2.830047130584717, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11293349415063858, + "rewards/margins": 0.10025681555271149, + "rewards/rejected": -0.21319028735160828, + "step": 467 + }, + { + "epoch": 0.2911353032659409, + "grad_norm": 0.6344406604766846, + "learning_rate": 3.83e-05, + "log_odds_chosen": 6.454854488372803, + "log_odds_ratio": -0.14626577496528625, + "logits/chosen": 0.26416561007499695, + "logits/rejected": 0.411790668964386, + "logps/chosen": -1.2627651691436768, + "logps/rejected": -7.43460750579834, + "loss": 3.1443, + "nll_loss": 3.129704475402832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12627652287483215, + "rewards/margins": 0.6171842813491821, + "rewards/rejected": -0.7434607148170471, + "step": 468 + }, + { + "epoch": 0.2917573872472784, + "grad_norm": 2.3335068225860596, + "learning_rate": 3.8275e-05, + "log_odds_chosen": 5.498930931091309, + "log_odds_ratio": -0.5096065998077393, + "logits/chosen": 0.3187922239303589, + "logits/rejected": 0.4804098308086395, + "logps/chosen": -1.7013590335845947, + "logps/rejected": -6.9031877517700195, + "loss": 3.3374, + "nll_loss": 3.2864151000976562, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17013590037822723, + "rewards/margins": 0.5201829075813293, + "rewards/rejected": -0.690318763256073, + "step": 469 + }, + { + "epoch": 0.29237947122861585, + "grad_norm": 0.3446737825870514, + "learning_rate": 3.825e-05, + "log_odds_chosen": 4.659626007080078, + "log_odds_ratio": -0.05383168160915375, + "logits/chosen": 0.3359685242176056, + "logits/rejected": 0.3225572407245636, + "logps/chosen": -1.2879467010498047, + "logps/rejected": -5.530665397644043, + "loss": 4.0427, + "nll_loss": 4.037296295166016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12879468500614166, + "rewards/margins": 0.4242718815803528, + "rewards/rejected": -0.5530665516853333, + "step": 470 + }, + { + "epoch": 0.29300155520995336, + "grad_norm": 0.6117813587188721, + "learning_rate": 3.8225e-05, + "log_odds_chosen": 3.6450319290161133, + "log_odds_ratio": -0.11542224884033203, + "logits/chosen": 0.22536617517471313, + "logits/rejected": 0.26635074615478516, + "logps/chosen": -0.9902899265289307, + "logps/rejected": -4.163326740264893, + "loss": 3.1959, + "nll_loss": 3.184352397918701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09902900457382202, + "rewards/margins": 0.31730368733406067, + "rewards/rejected": -0.4163326919078827, + "step": 471 + }, + { + "epoch": 0.2936236391912908, + "grad_norm": 0.6647369861602783, + "learning_rate": 3.82e-05, + "log_odds_chosen": 4.759244441986084, + "log_odds_ratio": -0.22806090116500854, + "logits/chosen": 0.2276982069015503, + "logits/rejected": 0.2902960777282715, + "logps/chosen": -1.0806303024291992, + "logps/rejected": -5.489457607269287, + "loss": 2.7768, + "nll_loss": 2.754019260406494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10806304216384888, + "rewards/margins": 0.44088274240493774, + "rewards/rejected": -0.5489457845687866, + "step": 472 + }, + { + "epoch": 0.2942457231726283, + "grad_norm": 0.4787210524082184, + "learning_rate": 3.8175e-05, + "log_odds_chosen": 3.1084511280059814, + "log_odds_ratio": -0.15226207673549652, + "logits/chosen": 0.3156070113182068, + "logits/rejected": 0.38743242621421814, + "logps/chosen": -1.3634812831878662, + "logps/rejected": -4.172502040863037, + "loss": 3.6274, + "nll_loss": 3.612142324447632, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13634812831878662, + "rewards/margins": 0.28090211749076843, + "rewards/rejected": -0.41725024580955505, + "step": 473 + }, + { + "epoch": 0.29486780715396577, + "grad_norm": 0.4799170196056366, + "learning_rate": 3.8150000000000006e-05, + "log_odds_chosen": 5.302570343017578, + "log_odds_ratio": -0.17532306909561157, + "logits/chosen": 0.2545361816883087, + "logits/rejected": 0.4782401919364929, + "logps/chosen": -1.1034696102142334, + "logps/rejected": -6.034841537475586, + "loss": 2.6736, + "nll_loss": 2.6560819149017334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1103469654917717, + "rewards/margins": 0.4931372106075287, + "rewards/rejected": -0.6034841537475586, + "step": 474 + }, + { + "epoch": 0.2954898911353033, + "grad_norm": 0.43535855412483215, + "learning_rate": 3.8125e-05, + "log_odds_chosen": 7.762436389923096, + "log_odds_ratio": -0.13623476028442383, + "logits/chosen": 0.1623404324054718, + "logits/rejected": 0.2579556405544281, + "logps/chosen": -0.7613686323165894, + "logps/rejected": -7.789710521697998, + "loss": 3.0684, + "nll_loss": 3.054818630218506, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07613686472177505, + "rewards/margins": 0.7028341889381409, + "rewards/rejected": -0.7789710760116577, + "step": 475 + }, + { + "epoch": 0.2961119751166407, + "grad_norm": 1.3790374994277954, + "learning_rate": 3.8100000000000005e-05, + "log_odds_chosen": 3.3232266902923584, + "log_odds_ratio": -0.1542171686887741, + "logits/chosen": 0.13157705962657928, + "logits/rejected": 0.14837421476840973, + "logps/chosen": -1.329514741897583, + "logps/rejected": -4.3654656410217285, + "loss": 2.7209, + "nll_loss": 2.7054383754730225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13295146822929382, + "rewards/margins": 0.3035951256752014, + "rewards/rejected": -0.43654656410217285, + "step": 476 + }, + { + "epoch": 0.29673405909797823, + "grad_norm": 0.4154740869998932, + "learning_rate": 3.8075e-05, + "log_odds_chosen": 3.3031208515167236, + "log_odds_ratio": -0.19959819316864014, + "logits/chosen": 0.29767587780952454, + "logits/rejected": 0.3195898234844208, + "logps/chosen": -1.2283899784088135, + "logps/rejected": -4.255227088928223, + "loss": 3.426, + "nll_loss": 3.4059970378875732, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12283900380134583, + "rewards/margins": 0.3026837110519409, + "rewards/rejected": -0.42552274465560913, + "step": 477 + }, + { + "epoch": 0.2973561430793157, + "grad_norm": 0.440824955701828, + "learning_rate": 3.805e-05, + "log_odds_chosen": 5.133951187133789, + "log_odds_ratio": -0.1343551129102707, + "logits/chosen": -0.004576465114951134, + "logits/rejected": 0.09814856946468353, + "logps/chosen": -1.0036342144012451, + "logps/rejected": -5.681285381317139, + "loss": 2.9374, + "nll_loss": 2.923962116241455, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10036341845989227, + "rewards/margins": 0.46776506304740906, + "rewards/rejected": -0.5681285262107849, + "step": 478 + }, + { + "epoch": 0.2979782270606532, + "grad_norm": 0.4802106022834778, + "learning_rate": 3.8025e-05, + "log_odds_chosen": 4.080648422241211, + "log_odds_ratio": -0.15446895360946655, + "logits/chosen": 0.006258752197027206, + "logits/rejected": 0.04351950064301491, + "logps/chosen": -1.1790379285812378, + "logps/rejected": -4.813777923583984, + "loss": 2.8196, + "nll_loss": 2.804121732711792, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11790378391742706, + "rewards/margins": 0.3634740114212036, + "rewards/rejected": -0.48137781023979187, + "step": 479 + }, + { + "epoch": 0.2986003110419907, + "grad_norm": 0.4847407341003418, + "learning_rate": 3.8e-05, + "log_odds_chosen": 4.479074478149414, + "log_odds_ratio": -0.12381869554519653, + "logits/chosen": 0.2093774825334549, + "logits/rejected": 0.29991966485977173, + "logps/chosen": -1.0310206413269043, + "logps/rejected": -5.081138610839844, + "loss": 2.6951, + "nll_loss": 2.682741165161133, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10310205817222595, + "rewards/margins": 0.4050118029117584, + "rewards/rejected": -0.5081138610839844, + "step": 480 + }, + { + "epoch": 0.29922239502332815, + "grad_norm": 0.9063189029693604, + "learning_rate": 3.7975e-05, + "log_odds_chosen": 2.8616793155670166, + "log_odds_ratio": -0.12458339333534241, + "logits/chosen": 0.1728355586528778, + "logits/rejected": 0.1352435052394867, + "logps/chosen": -1.2799311876296997, + "logps/rejected": -3.802501678466797, + "loss": 3.3188, + "nll_loss": 3.306375026702881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1279931366443634, + "rewards/margins": 0.25225701928138733, + "rewards/rejected": -0.38025015592575073, + "step": 481 + }, + { + "epoch": 0.29984447900466565, + "grad_norm": 0.42489972710609436, + "learning_rate": 3.795e-05, + "log_odds_chosen": 5.341192245483398, + "log_odds_ratio": -0.26994985342025757, + "logits/chosen": 0.02017594873905182, + "logits/rejected": 0.0014428049325942993, + "logps/chosen": -1.1768755912780762, + "logps/rejected": -6.195101261138916, + "loss": 2.8912, + "nll_loss": 2.8642163276672363, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11768755316734314, + "rewards/margins": 0.5018225312232971, + "rewards/rejected": -0.6195101737976074, + "step": 482 + }, + { + "epoch": 0.3004665629860031, + "grad_norm": 0.4654333293437958, + "learning_rate": 3.7925e-05, + "log_odds_chosen": 2.1485209465026855, + "log_odds_ratio": -0.2876686751842499, + "logits/chosen": 0.02476568892598152, + "logits/rejected": 0.06350227445363998, + "logps/chosen": -1.2585699558258057, + "logps/rejected": -3.1734275817871094, + "loss": 3.0375, + "nll_loss": 3.008713960647583, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12585701048374176, + "rewards/margins": 0.19148576259613037, + "rewards/rejected": -0.31734275817871094, + "step": 483 + }, + { + "epoch": 0.3010886469673406, + "grad_norm": 1.332303524017334, + "learning_rate": 3.79e-05, + "log_odds_chosen": 1.5843894481658936, + "log_odds_ratio": -0.5189324617385864, + "logits/chosen": 0.19418621063232422, + "logits/rejected": 0.06159728765487671, + "logps/chosen": -1.9245703220367432, + "logps/rejected": -3.3311855792999268, + "loss": 3.4679, + "nll_loss": 3.4160549640655518, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19245702028274536, + "rewards/margins": 0.14066153764724731, + "rewards/rejected": -0.3331185579299927, + "step": 484 + }, + { + "epoch": 0.30171073094867806, + "grad_norm": 0.6246785521507263, + "learning_rate": 3.7875e-05, + "log_odds_chosen": 4.188319206237793, + "log_odds_ratio": -0.4303605556488037, + "logits/chosen": 0.15692827105522156, + "logits/rejected": 0.29972484707832336, + "logps/chosen": -1.1631245613098145, + "logps/rejected": -5.161314964294434, + "loss": 2.4339, + "nll_loss": 2.390859365463257, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11631244421005249, + "rewards/margins": 0.3998190760612488, + "rewards/rejected": -0.5161314606666565, + "step": 485 + }, + { + "epoch": 0.30233281493001557, + "grad_norm": 0.4516681134700775, + "learning_rate": 3.7850000000000005e-05, + "log_odds_chosen": 3.0724825859069824, + "log_odds_ratio": -0.13238348066806793, + "logits/chosen": 0.14594241976737976, + "logits/rejected": 0.19076189398765564, + "logps/chosen": -0.9410706758499146, + "logps/rejected": -3.3255326747894287, + "loss": 2.9302, + "nll_loss": 2.9170100688934326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09410707652568817, + "rewards/margins": 0.2384462058544159, + "rewards/rejected": -0.33255326747894287, + "step": 486 + }, + { + "epoch": 0.302954898911353, + "grad_norm": 0.44087883830070496, + "learning_rate": 3.7825e-05, + "log_odds_chosen": 3.9114091396331787, + "log_odds_ratio": -0.2950913608074188, + "logits/chosen": 0.15809951722621918, + "logits/rejected": 0.14425241947174072, + "logps/chosen": -1.040985345840454, + "logps/rejected": -4.549111366271973, + "loss": 3.16, + "nll_loss": 3.1304802894592285, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10409855097532272, + "rewards/margins": 0.3508126437664032, + "rewards/rejected": -0.45491117238998413, + "step": 487 + }, + { + "epoch": 0.30357698289269053, + "grad_norm": 0.556199312210083, + "learning_rate": 3.7800000000000004e-05, + "log_odds_chosen": 5.235114097595215, + "log_odds_ratio": -0.041219230741262436, + "logits/chosen": 0.14572152495384216, + "logits/rejected": 0.3106115758419037, + "logps/chosen": -1.1664040088653564, + "logps/rejected": -5.896844863891602, + "loss": 3.1604, + "nll_loss": 3.1562514305114746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11664040386676788, + "rewards/margins": 0.47304409742355347, + "rewards/rejected": -0.5896844863891602, + "step": 488 + }, + { + "epoch": 0.304199066874028, + "grad_norm": 0.5189743041992188, + "learning_rate": 3.7775e-05, + "log_odds_chosen": 7.963743209838867, + "log_odds_ratio": -0.1266322284936905, + "logits/chosen": 0.3704788088798523, + "logits/rejected": 0.6580029726028442, + "logps/chosen": -1.3174169063568115, + "logps/rejected": -9.020464897155762, + "loss": 3.4173, + "nll_loss": 3.4046013355255127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1317417025566101, + "rewards/margins": 0.7703048586845398, + "rewards/rejected": -0.9020465016365051, + "step": 489 + }, + { + "epoch": 0.3048211508553655, + "grad_norm": 0.4847100079059601, + "learning_rate": 3.775e-05, + "log_odds_chosen": 9.21757984161377, + "log_odds_ratio": -0.10597797483205795, + "logits/chosen": 0.16972588002681732, + "logits/rejected": 0.3820986747741699, + "logps/chosen": -1.2503827810287476, + "logps/rejected": -10.164284706115723, + "loss": 3.2205, + "nll_loss": 3.2099387645721436, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.125038281083107, + "rewards/margins": 0.8913902044296265, + "rewards/rejected": -1.0164284706115723, + "step": 490 + }, + { + "epoch": 0.30544323483670294, + "grad_norm": 0.8318912386894226, + "learning_rate": 3.7725e-05, + "log_odds_chosen": 9.271547317504883, + "log_odds_ratio": -0.021590309217572212, + "logits/chosen": 0.3189174234867096, + "logits/rejected": 0.49795106053352356, + "logps/chosen": -1.382540225982666, + "logps/rejected": -10.197100639343262, + "loss": 3.9656, + "nll_loss": 3.963407039642334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13825401663780212, + "rewards/margins": 0.8814560174942017, + "rewards/rejected": -1.0197100639343262, + "step": 491 + }, + { + "epoch": 0.30606531881804044, + "grad_norm": 0.40964096784591675, + "learning_rate": 3.77e-05, + "log_odds_chosen": 5.522929668426514, + "log_odds_ratio": -0.1387336701154709, + "logits/chosen": 0.29802757501602173, + "logits/rejected": 0.4043997526168823, + "logps/chosen": -1.073734164237976, + "logps/rejected": -6.166069507598877, + "loss": 3.7311, + "nll_loss": 3.7172298431396484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1073734238743782, + "rewards/margins": 0.5092335343360901, + "rewards/rejected": -0.6166069507598877, + "step": 492 + }, + { + "epoch": 0.3066874027993779, + "grad_norm": 0.5874708294868469, + "learning_rate": 3.7675e-05, + "log_odds_chosen": 12.224292755126953, + "log_odds_ratio": -0.021359411999583244, + "logits/chosen": 0.136999249458313, + "logits/rejected": 0.7200852632522583, + "logps/chosen": -0.9732953906059265, + "logps/rejected": -12.676000595092773, + "loss": 2.3038, + "nll_loss": 2.3016741275787354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09732954204082489, + "rewards/margins": 1.1702704429626465, + "rewards/rejected": -1.2676000595092773, + "step": 493 + }, + { + "epoch": 0.3073094867807154, + "grad_norm": 0.5743467807769775, + "learning_rate": 3.765e-05, + "log_odds_chosen": 12.042713165283203, + "log_odds_ratio": -0.070807546377182, + "logits/chosen": 0.2361012101173401, + "logits/rejected": 0.562972903251648, + "logps/chosen": -1.5356069803237915, + "logps/rejected": -13.233451843261719, + "loss": 3.359, + "nll_loss": 3.3519067764282227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15356069803237915, + "rewards/margins": 1.1697845458984375, + "rewards/rejected": -1.3233451843261719, + "step": 494 + }, + { + "epoch": 0.30793157076205285, + "grad_norm": 1.004876732826233, + "learning_rate": 3.7625e-05, + "log_odds_chosen": 6.287815570831299, + "log_odds_ratio": -0.2341613471508026, + "logits/chosen": 0.1172030121088028, + "logits/rejected": 0.23774453997612, + "logps/chosen": -1.0990171432495117, + "logps/rejected": -6.949299335479736, + "loss": 3.1907, + "nll_loss": 3.167259454727173, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10990171879529953, + "rewards/margins": 0.5850282311439514, + "rewards/rejected": -0.6949299573898315, + "step": 495 + }, + { + "epoch": 0.30855365474339036, + "grad_norm": 3.0034093856811523, + "learning_rate": 3.76e-05, + "log_odds_chosen": 7.123260498046875, + "log_odds_ratio": -0.10044410824775696, + "logits/chosen": 0.23248106241226196, + "logits/rejected": 0.4872136116027832, + "logps/chosen": -1.2667349576950073, + "logps/rejected": -8.054340362548828, + "loss": 3.0054, + "nll_loss": 2.9953811168670654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12667348980903625, + "rewards/margins": 0.6787605285644531, + "rewards/rejected": -0.8054340481758118, + "step": 496 + }, + { + "epoch": 0.3091757387247278, + "grad_norm": 1.1735531091690063, + "learning_rate": 3.7575e-05, + "log_odds_chosen": 8.58930492401123, + "log_odds_ratio": -0.04813535511493683, + "logits/chosen": 0.26603323221206665, + "logits/rejected": 0.5334312915802002, + "logps/chosen": -1.2953003644943237, + "logps/rejected": -9.486214637756348, + "loss": 3.1711, + "nll_loss": 3.166259288787842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12953002750873566, + "rewards/margins": 0.8190913796424866, + "rewards/rejected": -0.9486214518547058, + "step": 497 + }, + { + "epoch": 0.3097978227060653, + "grad_norm": 0.6103359460830688, + "learning_rate": 3.7550000000000005e-05, + "log_odds_chosen": 2.1337530612945557, + "log_odds_ratio": -0.2864910066127777, + "logits/chosen": 0.15103283524513245, + "logits/rejected": 0.25624915957450867, + "logps/chosen": -1.439260482788086, + "logps/rejected": -3.371507406234741, + "loss": 3.1723, + "nll_loss": 3.143685817718506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14392603933811188, + "rewards/margins": 0.1932247132062912, + "rewards/rejected": -0.33715078234672546, + "step": 498 + }, + { + "epoch": 0.3104199066874028, + "grad_norm": 0.43494969606399536, + "learning_rate": 3.7525e-05, + "log_odds_chosen": 3.525885820388794, + "log_odds_ratio": -0.3266465663909912, + "logits/chosen": 0.008623511530458927, + "logits/rejected": 0.14156433939933777, + "logps/chosen": -1.0080041885375977, + "logps/rejected": -4.299755096435547, + "loss": 2.4517, + "nll_loss": 2.4190571308135986, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10080042481422424, + "rewards/margins": 0.32917505502700806, + "rewards/rejected": -0.4299754798412323, + "step": 499 + }, + { + "epoch": 0.3110419906687403, + "grad_norm": 0.4379410147666931, + "learning_rate": 3.7500000000000003e-05, + "log_odds_chosen": 1.4496530294418335, + "log_odds_ratio": -0.3601369857788086, + "logits/chosen": 0.027029283344745636, + "logits/rejected": 0.10909566283226013, + "logps/chosen": -1.3700931072235107, + "logps/rejected": -2.6318204402923584, + "loss": 2.8374, + "nll_loss": 2.801370620727539, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13700930774211884, + "rewards/margins": 0.1261727213859558, + "rewards/rejected": -0.26318204402923584, + "step": 500 + }, + { + "epoch": 0.3116640746500778, + "grad_norm": 0.631767988204956, + "learning_rate": 3.7475e-05, + "log_odds_chosen": 2.5015196800231934, + "log_odds_ratio": -0.40100595355033875, + "logits/chosen": -0.09599044919013977, + "logits/rejected": -0.06666186451911926, + "logps/chosen": -0.8881614804267883, + "logps/rejected": -3.0457301139831543, + "loss": 2.8761, + "nll_loss": 2.8360183238983154, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08881615102291107, + "rewards/margins": 0.2157568335533142, + "rewards/rejected": -0.3045729994773865, + "step": 501 + }, + { + "epoch": 0.31228615863141523, + "grad_norm": 0.5571648478507996, + "learning_rate": 3.745e-05, + "log_odds_chosen": 3.5235862731933594, + "log_odds_ratio": -0.2553158402442932, + "logits/chosen": 0.08271875232458115, + "logits/rejected": 0.159298837184906, + "logps/chosen": -1.5579495429992676, + "logps/rejected": -4.873930931091309, + "loss": 3.0358, + "nll_loss": 3.010316848754883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15579496324062347, + "rewards/margins": 0.331598162651062, + "rewards/rejected": -0.4873931109905243, + "step": 502 + }, + { + "epoch": 0.31290824261275274, + "grad_norm": 0.9667811393737793, + "learning_rate": 3.7425e-05, + "log_odds_chosen": 1.5735365152359009, + "log_odds_ratio": -0.4731370806694031, + "logits/chosen": 0.03942291438579559, + "logits/rejected": -0.06971655786037445, + "logps/chosen": -1.4615811109542847, + "logps/rejected": -2.880722761154175, + "loss": 3.1011, + "nll_loss": 3.0537822246551514, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1461580991744995, + "rewards/margins": 0.14191415905952454, + "rewards/rejected": -0.28807225823402405, + "step": 503 + }, + { + "epoch": 0.3135303265940902, + "grad_norm": 0.496913880109787, + "learning_rate": 3.74e-05, + "log_odds_chosen": 2.018817901611328, + "log_odds_ratio": -0.37110626697540283, + "logits/chosen": -0.06371671706438065, + "logits/rejected": -0.035864636301994324, + "logps/chosen": -0.9149460792541504, + "logps/rejected": -2.4723567962646484, + "loss": 2.7536, + "nll_loss": 2.7165191173553467, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0914946049451828, + "rewards/margins": 0.15574106574058533, + "rewards/rejected": -0.24723568558692932, + "step": 504 + }, + { + "epoch": 0.3141524105754277, + "grad_norm": 0.45206576585769653, + "learning_rate": 3.737500000000001e-05, + "log_odds_chosen": 2.5706026554107666, + "log_odds_ratio": -0.44339919090270996, + "logits/chosen": 0.18639887869358063, + "logits/rejected": 0.09812915325164795, + "logps/chosen": -1.0650825500488281, + "logps/rejected": -3.2684202194213867, + "loss": 3.6104, + "nll_loss": 3.5660290718078613, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10650825500488281, + "rewards/margins": 0.2203337550163269, + "rewards/rejected": -0.3268420100212097, + "step": 505 + }, + { + "epoch": 0.31477449455676515, + "grad_norm": 0.46192917227745056, + "learning_rate": 3.735e-05, + "log_odds_chosen": 1.1359951496124268, + "log_odds_ratio": -0.45393139123916626, + "logits/chosen": 0.20836736261844635, + "logits/rejected": 0.045999638736248016, + "logps/chosen": -1.4677722454071045, + "logps/rejected": -2.4714083671569824, + "loss": 3.5338, + "nll_loss": 3.4884276390075684, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1467772126197815, + "rewards/margins": 0.10036361217498779, + "rewards/rejected": -0.24714083969593048, + "step": 506 + }, + { + "epoch": 0.31539657853810266, + "grad_norm": 0.8858140110969543, + "learning_rate": 3.7325000000000006e-05, + "log_odds_chosen": 0.8142578601837158, + "log_odds_ratio": -0.41193997859954834, + "logits/chosen": 0.23445630073547363, + "logits/rejected": 0.15942780673503876, + "logps/chosen": -1.313204050064087, + "logps/rejected": -1.9680757522583008, + "loss": 3.3672, + "nll_loss": 3.326014995574951, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13132041692733765, + "rewards/margins": 0.06548717617988586, + "rewards/rejected": -0.19680756330490112, + "step": 507 + }, + { + "epoch": 0.3160186625194401, + "grad_norm": 0.513054370880127, + "learning_rate": 3.73e-05, + "log_odds_chosen": 1.9759085178375244, + "log_odds_ratio": -0.40382054448127747, + "logits/chosen": 0.1355867236852646, + "logits/rejected": -0.025263607501983643, + "logps/chosen": -1.2560956478118896, + "logps/rejected": -3.0063164234161377, + "loss": 3.0785, + "nll_loss": 3.038083553314209, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12560957670211792, + "rewards/margins": 0.17502208054065704, + "rewards/rejected": -0.30063164234161377, + "step": 508 + }, + { + "epoch": 0.3166407465007776, + "grad_norm": 0.29589080810546875, + "learning_rate": 3.7275000000000005e-05, + "log_odds_chosen": 3.7314577102661133, + "log_odds_ratio": -0.2652418315410614, + "logits/chosen": 0.1787126213312149, + "logits/rejected": 0.24298204481601715, + "logps/chosen": -1.1753450632095337, + "logps/rejected": -4.593437194824219, + "loss": 3.1982, + "nll_loss": 3.1716480255126953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11753450334072113, + "rewards/margins": 0.3418092131614685, + "rewards/rejected": -0.45934373140335083, + "step": 509 + }, + { + "epoch": 0.31726283048211507, + "grad_norm": 0.37716519832611084, + "learning_rate": 3.7250000000000004e-05, + "log_odds_chosen": 6.13275146484375, + "log_odds_ratio": -0.0573071613907814, + "logits/chosen": 0.1888727992773056, + "logits/rejected": 0.2561497092247009, + "logps/chosen": -1.123389720916748, + "logps/rejected": -6.81281852722168, + "loss": 3.15, + "nll_loss": 3.1442646980285645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11233897507190704, + "rewards/margins": 0.5689429044723511, + "rewards/rejected": -0.6812818050384521, + "step": 510 + }, + { + "epoch": 0.3178849144634526, + "grad_norm": 0.48151102662086487, + "learning_rate": 3.7225000000000004e-05, + "log_odds_chosen": 2.6327872276306152, + "log_odds_ratio": -0.2002970576286316, + "logits/chosen": 0.3196089267730713, + "logits/rejected": 0.27417632937431335, + "logps/chosen": -1.2908976078033447, + "logps/rejected": -3.636533260345459, + "loss": 3.504, + "nll_loss": 3.4839515686035156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12908975780010223, + "rewards/margins": 0.23456355929374695, + "rewards/rejected": -0.3636533319950104, + "step": 511 + }, + { + "epoch": 0.31850699844479, + "grad_norm": 0.43662747740745544, + "learning_rate": 3.72e-05, + "log_odds_chosen": 4.564843654632568, + "log_odds_ratio": -0.06708259880542755, + "logits/chosen": 0.24633480608463287, + "logits/rejected": 0.48307889699935913, + "logps/chosen": -0.9144176840782166, + "logps/rejected": -4.8161234855651855, + "loss": 3.12, + "nll_loss": 3.1133110523223877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09144177287817001, + "rewards/margins": 0.39017054438591003, + "rewards/rejected": -0.48161232471466064, + "step": 512 + }, + { + "epoch": 0.31912908242612753, + "grad_norm": 0.5609356164932251, + "learning_rate": 3.7175e-05, + "log_odds_chosen": 5.504610538482666, + "log_odds_ratio": -0.08810669928789139, + "logits/chosen": 0.2721938490867615, + "logits/rejected": 0.42912524938583374, + "logps/chosen": -1.2787365913391113, + "logps/rejected": -6.235076904296875, + "loss": 3.0593, + "nll_loss": 3.050450086593628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12787367403507233, + "rewards/margins": 0.49563395977020264, + "rewards/rejected": -0.6235076189041138, + "step": 513 + }, + { + "epoch": 0.319751166407465, + "grad_norm": 0.3104366958141327, + "learning_rate": 3.715e-05, + "log_odds_chosen": 6.418957710266113, + "log_odds_ratio": -0.17403581738471985, + "logits/chosen": 0.2422790229320526, + "logits/rejected": 0.45724615454673767, + "logps/chosen": -1.1649143695831299, + "logps/rejected": -7.193992614746094, + "loss": 3.2466, + "nll_loss": 3.2292304039001465, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11649143695831299, + "rewards/margins": 0.6029078364372253, + "rewards/rejected": -0.7193993330001831, + "step": 514 + }, + { + "epoch": 0.3203732503888025, + "grad_norm": 0.4039505422115326, + "learning_rate": 3.7125e-05, + "log_odds_chosen": 4.793672561645508, + "log_odds_ratio": -0.17864283919334412, + "logits/chosen": 0.31905102729797363, + "logits/rejected": 0.5531411170959473, + "logps/chosen": -0.8685755133628845, + "logps/rejected": -5.093353271484375, + "loss": 3.4313, + "nll_loss": 3.4134421348571777, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08685755729675293, + "rewards/margins": 0.4224777817726135, + "rewards/rejected": -0.5093352794647217, + "step": 515 + }, + { + "epoch": 0.32099533437014, + "grad_norm": 0.6209097504615784, + "learning_rate": 3.71e-05, + "log_odds_chosen": 13.188308715820312, + "log_odds_ratio": -0.03541084751486778, + "logits/chosen": 0.2741844952106476, + "logits/rejected": 0.9028793573379517, + "logps/chosen": -1.2709870338439941, + "logps/rejected": -13.850850105285645, + "loss": 2.5904, + "nll_loss": 2.5868520736694336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1270987093448639, + "rewards/margins": 1.257986307144165, + "rewards/rejected": -1.385085105895996, + "step": 516 + }, + { + "epoch": 0.32161741835147745, + "grad_norm": 0.4564976990222931, + "learning_rate": 3.707500000000001e-05, + "log_odds_chosen": 8.428442001342773, + "log_odds_ratio": -0.1521356999874115, + "logits/chosen": 0.31411972641944885, + "logits/rejected": 0.6657370328903198, + "logps/chosen": -1.1624163389205933, + "logps/rejected": -9.256325721740723, + "loss": 3.2951, + "nll_loss": 3.279919385910034, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11624162644147873, + "rewards/margins": 0.8093909025192261, + "rewards/rejected": -0.9256325960159302, + "step": 517 + }, + { + "epoch": 0.32223950233281495, + "grad_norm": 0.4547824263572693, + "learning_rate": 3.705e-05, + "log_odds_chosen": 8.413576126098633, + "log_odds_ratio": -0.18886855244636536, + "logits/chosen": 0.28079456090927124, + "logits/rejected": 0.5538430213928223, + "logps/chosen": -1.1201964616775513, + "logps/rejected": -9.210822105407715, + "loss": 3.1612, + "nll_loss": 3.1422665119171143, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11201964318752289, + "rewards/margins": 0.8090626001358032, + "rewards/rejected": -0.9210821986198425, + "step": 518 + }, + { + "epoch": 0.3228615863141524, + "grad_norm": 0.47383055090904236, + "learning_rate": 3.7025000000000005e-05, + "log_odds_chosen": 7.101673126220703, + "log_odds_ratio": -0.2151382565498352, + "logits/chosen": 0.19054706394672394, + "logits/rejected": 0.4452645480632782, + "logps/chosen": -0.9745645523071289, + "logps/rejected": -7.625181674957275, + "loss": 2.8218, + "nll_loss": 2.8003106117248535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09745645523071289, + "rewards/margins": 0.6650617718696594, + "rewards/rejected": -0.7625181674957275, + "step": 519 + }, + { + "epoch": 0.3234836702954899, + "grad_norm": 0.6366752982139587, + "learning_rate": 3.7e-05, + "log_odds_chosen": 8.329351425170898, + "log_odds_ratio": -0.3641209304332733, + "logits/chosen": 0.14251962304115295, + "logits/rejected": 0.6523263454437256, + "logps/chosen": -1.3124771118164062, + "logps/rejected": -9.418330192565918, + "loss": 2.3793, + "nll_loss": 2.3428869247436523, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13124771416187286, + "rewards/margins": 0.8105852603912354, + "rewards/rejected": -0.9418330192565918, + "step": 520 + }, + { + "epoch": 0.32410575427682736, + "grad_norm": 0.4376268684864044, + "learning_rate": 3.6975000000000004e-05, + "log_odds_chosen": 10.756841659545898, + "log_odds_ratio": -0.2801904082298279, + "logits/chosen": 0.31616631150245667, + "logits/rejected": 0.5765305757522583, + "logps/chosen": -1.2397606372833252, + "logps/rejected": -11.706258773803711, + "loss": 3.3466, + "nll_loss": 3.318598508834839, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12397606670856476, + "rewards/margins": 1.046649694442749, + "rewards/rejected": -1.1706258058547974, + "step": 521 + }, + { + "epoch": 0.32472783825816487, + "grad_norm": 0.4194689393043518, + "learning_rate": 3.6950000000000004e-05, + "log_odds_chosen": 9.908319473266602, + "log_odds_ratio": -0.1611042022705078, + "logits/chosen": 0.32544493675231934, + "logits/rejected": 0.6462383270263672, + "logps/chosen": -1.2010586261749268, + "logps/rejected": -10.787912368774414, + "loss": 3.4332, + "nll_loss": 3.4171175956726074, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12010586261749268, + "rewards/margins": 0.9586854577064514, + "rewards/rejected": -1.0787913799285889, + "step": 522 + }, + { + "epoch": 0.3253499222395023, + "grad_norm": 1.505491018295288, + "learning_rate": 3.6925e-05, + "log_odds_chosen": 14.340056419372559, + "log_odds_ratio": -0.0028261165134608746, + "logits/chosen": 0.2382410168647766, + "logits/rejected": 0.7966728806495667, + "logps/chosen": -1.5478522777557373, + "logps/rejected": -15.318090438842773, + "loss": 2.7164, + "nll_loss": 2.716071128845215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15478524565696716, + "rewards/margins": 1.3770238161087036, + "rewards/rejected": -1.5318089723587036, + "step": 523 + }, + { + "epoch": 0.3259720062208398, + "grad_norm": 0.4445713460445404, + "learning_rate": 3.69e-05, + "log_odds_chosen": 10.326703071594238, + "log_odds_ratio": -0.0006719048251397908, + "logits/chosen": 0.380736768245697, + "logits/rejected": 1.0418426990509033, + "logps/chosen": -1.3673847913742065, + "logps/rejected": -11.366878509521484, + "loss": 3.4216, + "nll_loss": 3.4214975833892822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13673847913742065, + "rewards/margins": 0.9999493956565857, + "rewards/rejected": -1.1366878747940063, + "step": 524 + }, + { + "epoch": 0.3265940902021773, + "grad_norm": 0.7201062440872192, + "learning_rate": 3.6875e-05, + "log_odds_chosen": 10.034008979797363, + "log_odds_ratio": -0.09337079524993896, + "logits/chosen": 0.24129562079906464, + "logits/rejected": 0.7426089644432068, + "logps/chosen": -1.3333206176757812, + "logps/rejected": -11.076189041137695, + "loss": 2.9608, + "nll_loss": 2.951493263244629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13333207368850708, + "rewards/margins": 0.9742867946624756, + "rewards/rejected": -1.1076189279556274, + "step": 525 + }, + { + "epoch": 0.3272161741835148, + "grad_norm": 0.3761167526245117, + "learning_rate": 3.685e-05, + "log_odds_chosen": 3.660090923309326, + "log_odds_ratio": -0.5031037330627441, + "logits/chosen": 0.13932842016220093, + "logits/rejected": 0.2806503176689148, + "logps/chosen": -1.4467041492462158, + "logps/rejected": -5.056726455688477, + "loss": 3.1734, + "nll_loss": 3.1230688095092773, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14467042684555054, + "rewards/margins": 0.36100226640701294, + "rewards/rejected": -0.5056726932525635, + "step": 526 + }, + { + "epoch": 0.32783825816485224, + "grad_norm": 0.48558077216148376, + "learning_rate": 3.6825e-05, + "log_odds_chosen": 7.3520402908325195, + "log_odds_ratio": -0.22055096924304962, + "logits/chosen": 0.23364008963108063, + "logits/rejected": 0.43131324648857117, + "logps/chosen": -0.8987447023391724, + "logps/rejected": -7.75795316696167, + "loss": 3.0376, + "nll_loss": 3.0155234336853027, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08987447619438171, + "rewards/margins": 0.6859208345413208, + "rewards/rejected": -0.7757953405380249, + "step": 527 + }, + { + "epoch": 0.32846034214618974, + "grad_norm": 0.5533658266067505, + "learning_rate": 3.68e-05, + "log_odds_chosen": 7.092010498046875, + "log_odds_ratio": -0.21083347499370575, + "logits/chosen": 0.16821296513080597, + "logits/rejected": 0.5857659578323364, + "logps/chosen": -1.1237003803253174, + "logps/rejected": -7.805783748626709, + "loss": 2.9438, + "nll_loss": 2.9227511882781982, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11237004399299622, + "rewards/margins": 0.6682083606719971, + "rewards/rejected": -0.7805783748626709, + "step": 528 + }, + { + "epoch": 0.3290824261275272, + "grad_norm": 0.3960796594619751, + "learning_rate": 3.6775000000000006e-05, + "log_odds_chosen": 5.841064453125, + "log_odds_ratio": -0.2500973641872406, + "logits/chosen": 0.27127107977867126, + "logits/rejected": 0.43577903509140015, + "logps/chosen": -1.4152770042419434, + "logps/rejected": -7.040769100189209, + "loss": 3.6124, + "nll_loss": 3.587347984313965, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1415277123451233, + "rewards/margins": 0.5625491738319397, + "rewards/rejected": -0.704076886177063, + "step": 529 + }, + { + "epoch": 0.3297045101088647, + "grad_norm": 0.5108814835548401, + "learning_rate": 3.675e-05, + "log_odds_chosen": 4.806069850921631, + "log_odds_ratio": -0.23009052872657776, + "logits/chosen": -0.039525844156742096, + "logits/rejected": 0.47278881072998047, + "logps/chosen": -1.1424496173858643, + "logps/rejected": -5.656998157501221, + "loss": 2.4652, + "nll_loss": 2.442180633544922, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1142449676990509, + "rewards/margins": 0.4514548182487488, + "rewards/rejected": -0.5656998157501221, + "step": 530 + }, + { + "epoch": 0.33032659409020215, + "grad_norm": 0.4421898424625397, + "learning_rate": 3.6725000000000005e-05, + "log_odds_chosen": 4.877275466918945, + "log_odds_ratio": -0.16743728518486023, + "logits/chosen": 0.05251733213663101, + "logits/rejected": 0.2228899598121643, + "logps/chosen": -0.9268969297409058, + "logps/rejected": -5.308384418487549, + "loss": 2.8371, + "nll_loss": 2.820338010787964, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09268969297409058, + "rewards/margins": 0.4381487965583801, + "rewards/rejected": -0.5308384895324707, + "step": 531 + }, + { + "epoch": 0.33094867807153966, + "grad_norm": 0.33455726504325867, + "learning_rate": 3.6700000000000004e-05, + "log_odds_chosen": 3.417280912399292, + "log_odds_ratio": -0.38553696870803833, + "logits/chosen": 0.15889662504196167, + "logits/rejected": 0.3938848376274109, + "logps/chosen": -1.285894751548767, + "logps/rejected": -4.560215950012207, + "loss": 3.2018, + "nll_loss": 3.1632089614868164, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1285894811153412, + "rewards/margins": 0.3274320960044861, + "rewards/rejected": -0.45602160692214966, + "step": 532 + }, + { + "epoch": 0.33157076205287717, + "grad_norm": 0.3787088096141815, + "learning_rate": 3.6675000000000004e-05, + "log_odds_chosen": 9.068405151367188, + "log_odds_ratio": -0.08513309061527252, + "logits/chosen": 0.15244531631469727, + "logits/rejected": 0.667584240436554, + "logps/chosen": -1.1898918151855469, + "logps/rejected": -9.916119575500488, + "loss": 2.9166, + "nll_loss": 2.9080939292907715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11898918449878693, + "rewards/margins": 0.8726227879524231, + "rewards/rejected": -0.9916119575500488, + "step": 533 + }, + { + "epoch": 0.3321928460342146, + "grad_norm": 0.39991286396980286, + "learning_rate": 3.665e-05, + "log_odds_chosen": 12.67526912689209, + "log_odds_ratio": -0.1024663895368576, + "logits/chosen": 0.21998846530914307, + "logits/rejected": 0.7742512226104736, + "logps/chosen": -1.0572272539138794, + "logps/rejected": -13.273682594299316, + "loss": 2.9368, + "nll_loss": 2.9265105724334717, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10572272539138794, + "rewards/margins": 1.2216455936431885, + "rewards/rejected": -1.3273682594299316, + "step": 534 + }, + { + "epoch": 0.3328149300155521, + "grad_norm": 0.49979427456855774, + "learning_rate": 3.6625e-05, + "log_odds_chosen": 1.2147586345672607, + "log_odds_ratio": -0.6691287755966187, + "logits/chosen": 0.12728288769721985, + "logits/rejected": 0.22928330302238464, + "logps/chosen": -1.2779240608215332, + "logps/rejected": -2.464627265930176, + "loss": 2.5579, + "nll_loss": 2.491028308868408, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12779241800308228, + "rewards/margins": 0.11867032200098038, + "rewards/rejected": -0.24646273255348206, + "step": 535 + }, + { + "epoch": 0.3334370139968896, + "grad_norm": 0.5618945360183716, + "learning_rate": 3.66e-05, + "log_odds_chosen": 5.239035129547119, + "log_odds_ratio": -0.4188866913318634, + "logits/chosen": 0.32197320461273193, + "logits/rejected": 0.5903685092926025, + "logps/chosen": -1.3105459213256836, + "logps/rejected": -6.369636535644531, + "loss": 3.088, + "nll_loss": 3.0460805892944336, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1310545951128006, + "rewards/margins": 0.5059090852737427, + "rewards/rejected": -0.6369636654853821, + "step": 536 + }, + { + "epoch": 0.3340590979782271, + "grad_norm": 0.5546212792396545, + "learning_rate": 3.6575e-05, + "log_odds_chosen": 8.182401657104492, + "log_odds_ratio": -0.22162377834320068, + "logits/chosen": 0.09892675280570984, + "logits/rejected": 0.21920329332351685, + "logps/chosen": -1.0027060508728027, + "logps/rejected": -8.776458740234375, + "loss": 2.8454, + "nll_loss": 2.823225975036621, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1002705991268158, + "rewards/margins": 0.777375340461731, + "rewards/rejected": -0.8776458501815796, + "step": 537 + }, + { + "epoch": 0.33468118195956453, + "grad_norm": 0.47566530108451843, + "learning_rate": 3.655e-05, + "log_odds_chosen": 5.837797164916992, + "log_odds_ratio": -0.18034599721431732, + "logits/chosen": 0.30575647950172424, + "logits/rejected": 0.5681968331336975, + "logps/chosen": -1.1159521341323853, + "logps/rejected": -6.579317092895508, + "loss": 3.0867, + "nll_loss": 3.068636178970337, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11159522086381912, + "rewards/margins": 0.5463365316390991, + "rewards/rejected": -0.6579316854476929, + "step": 538 + }, + { + "epoch": 0.33530326594090204, + "grad_norm": 0.5338764786720276, + "learning_rate": 3.652500000000001e-05, + "log_odds_chosen": 9.230720520019531, + "log_odds_ratio": -0.18835598230361938, + "logits/chosen": 0.4432418942451477, + "logits/rejected": 1.0067334175109863, + "logps/chosen": -0.791349470615387, + "logps/rejected": -9.255400657653809, + "loss": 3.4137, + "nll_loss": 3.3948874473571777, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07913494855165482, + "rewards/margins": 0.8464051485061646, + "rewards/rejected": -0.9255399703979492, + "step": 539 + }, + { + "epoch": 0.3359253499222395, + "grad_norm": 0.4006078839302063, + "learning_rate": 3.65e-05, + "log_odds_chosen": 7.939189434051514, + "log_odds_ratio": -0.17464585602283478, + "logits/chosen": 0.3552432358264923, + "logits/rejected": 0.7513606548309326, + "logps/chosen": -1.0403473377227783, + "logps/rejected": -8.616355895996094, + "loss": 3.3614, + "nll_loss": 3.3439626693725586, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10403473675251007, + "rewards/margins": 0.7576008439064026, + "rewards/rejected": -0.8616355061531067, + "step": 540 + }, + { + "epoch": 0.336547433903577, + "grad_norm": 0.652712345123291, + "learning_rate": 3.6475000000000006e-05, + "log_odds_chosen": 7.361583709716797, + "log_odds_ratio": -0.44913148880004883, + "logits/chosen": 0.5314218997955322, + "logits/rejected": 0.9203664660453796, + "logps/chosen": -1.7339417934417725, + "logps/rejected": -8.831292152404785, + "loss": 3.4607, + "nll_loss": 3.4157681465148926, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17339418828487396, + "rewards/margins": 0.7097350358963013, + "rewards/rejected": -0.8831292390823364, + "step": 541 + }, + { + "epoch": 0.33716951788491445, + "grad_norm": 0.5391427278518677, + "learning_rate": 3.645e-05, + "log_odds_chosen": 5.927590370178223, + "log_odds_ratio": -0.17163582146167755, + "logits/chosen": 0.45557430386543274, + "logits/rejected": 0.758292019367218, + "logps/chosen": -1.0969210863113403, + "logps/rejected": -6.686923980712891, + "loss": 3.0634, + "nll_loss": 3.0462679862976074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10969211906194687, + "rewards/margins": 0.5590003728866577, + "rewards/rejected": -0.6686924695968628, + "step": 542 + }, + { + "epoch": 0.33779160186625196, + "grad_norm": 0.5085594058036804, + "learning_rate": 3.6425000000000004e-05, + "log_odds_chosen": 6.805022239685059, + "log_odds_ratio": -0.04457619786262512, + "logits/chosen": 0.2579970359802246, + "logits/rejected": 0.8251427412033081, + "logps/chosen": -1.0637521743774414, + "logps/rejected": -7.367990016937256, + "loss": 2.7691, + "nll_loss": 2.7646713256835938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10637522488832474, + "rewards/margins": 0.6304237246513367, + "rewards/rejected": -0.7367990016937256, + "step": 543 + }, + { + "epoch": 0.3384136858475894, + "grad_norm": 1.546451449394226, + "learning_rate": 3.6400000000000004e-05, + "log_odds_chosen": 7.0844831466674805, + "log_odds_ratio": -0.15514059364795685, + "logits/chosen": 0.32124999165534973, + "logits/rejected": 0.5545608997344971, + "logps/chosen": -1.4081883430480957, + "logps/rejected": -8.002361297607422, + "loss": 3.4905, + "nll_loss": 3.475017547607422, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14081883430480957, + "rewards/margins": 0.6594172716140747, + "rewards/rejected": -0.8002361059188843, + "step": 544 + }, + { + "epoch": 0.3390357698289269, + "grad_norm": 0.9646157622337341, + "learning_rate": 3.6375e-05, + "log_odds_chosen": 5.977232933044434, + "log_odds_ratio": -0.31709274649620056, + "logits/chosen": 0.2529332637786865, + "logits/rejected": 0.3822195529937744, + "logps/chosen": -1.1004133224487305, + "logps/rejected": -6.800418376922607, + "loss": 3.3593, + "nll_loss": 3.3275961875915527, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11004132777452469, + "rewards/margins": 0.5700005292892456, + "rewards/rejected": -0.6800418496131897, + "step": 545 + }, + { + "epoch": 0.33965785381026437, + "grad_norm": 0.4293280839920044, + "learning_rate": 3.635e-05, + "log_odds_chosen": 8.562162399291992, + "log_odds_ratio": -0.13144966959953308, + "logits/chosen": 0.12223690003156662, + "logits/rejected": 0.3512270450592041, + "logps/chosen": -0.9846524596214294, + "logps/rejected": -9.026788711547852, + "loss": 2.9709, + "nll_loss": 2.957728385925293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09846524894237518, + "rewards/margins": 0.8042137026786804, + "rewards/rejected": -0.9026789665222168, + "step": 546 + }, + { + "epoch": 0.34027993779160187, + "grad_norm": 0.5592858195304871, + "learning_rate": 3.6325e-05, + "log_odds_chosen": 7.228453159332275, + "log_odds_ratio": -0.07215896248817444, + "logits/chosen": 0.19593659043312073, + "logits/rejected": 0.7204120755195618, + "logps/chosen": -1.225344181060791, + "logps/rejected": -8.123269081115723, + "loss": 3.223, + "nll_loss": 3.2157747745513916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12253442406654358, + "rewards/margins": 0.6897925138473511, + "rewards/rejected": -0.8123269081115723, + "step": 547 + }, + { + "epoch": 0.3409020217729393, + "grad_norm": 0.3459852635860443, + "learning_rate": 3.63e-05, + "log_odds_chosen": 8.127766609191895, + "log_odds_ratio": -0.10982684791088104, + "logits/chosen": 0.18933087587356567, + "logits/rejected": 0.6136522889137268, + "logps/chosen": -1.2939542531967163, + "logps/rejected": -9.101163864135742, + "loss": 3.2767, + "nll_loss": 3.265726089477539, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12939542531967163, + "rewards/margins": 0.7807210683822632, + "rewards/rejected": -0.91011643409729, + "step": 548 + }, + { + "epoch": 0.34152410575427683, + "grad_norm": 0.5195790529251099, + "learning_rate": 3.6275e-05, + "log_odds_chosen": 10.92127799987793, + "log_odds_ratio": -0.03485168516635895, + "logits/chosen": 0.1153639703989029, + "logits/rejected": 1.0010063648223877, + "logps/chosen": -0.8691779375076294, + "logps/rejected": -11.154041290283203, + "loss": 2.4175, + "nll_loss": 2.414062976837158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08691780269145966, + "rewards/margins": 1.0284863710403442, + "rewards/rejected": -1.1154041290283203, + "step": 549 + }, + { + "epoch": 0.3421461897356143, + "grad_norm": 0.4225841760635376, + "learning_rate": 3.625e-05, + "log_odds_chosen": 11.029512405395508, + "log_odds_ratio": -0.003762052860110998, + "logits/chosen": 0.1677618771791458, + "logits/rejected": 0.9653577208518982, + "logps/chosen": -1.1759905815124512, + "logps/rejected": -11.536764144897461, + "loss": 3.2061, + "nll_loss": 3.2056827545166016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11759904772043228, + "rewards/margins": 1.0360772609710693, + "rewards/rejected": -1.1536762714385986, + "step": 550 + }, + { + "epoch": 0.3427682737169518, + "grad_norm": 0.4881388247013092, + "learning_rate": 3.6225000000000006e-05, + "log_odds_chosen": 10.496110916137695, + "log_odds_ratio": -0.012914421036839485, + "logits/chosen": 0.15544462203979492, + "logits/rejected": 0.910711944103241, + "logps/chosen": -1.2286415100097656, + "logps/rejected": -11.356847763061523, + "loss": 2.8724, + "nll_loss": 2.871134042739868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12286416441202164, + "rewards/margins": 1.0128206014633179, + "rewards/rejected": -1.1356847286224365, + "step": 551 + }, + { + "epoch": 0.3433903576982893, + "grad_norm": 0.5085164308547974, + "learning_rate": 3.62e-05, + "log_odds_chosen": 3.2137742042541504, + "log_odds_ratio": -0.2135535031557083, + "logits/chosen": 0.052660077810287476, + "logits/rejected": 0.18513916432857513, + "logps/chosen": -1.4653301239013672, + "logps/rejected": -4.4103498458862305, + "loss": 3.3976, + "nll_loss": 3.37619686126709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14653301239013672, + "rewards/margins": 0.294501930475235, + "rewards/rejected": -0.4410349726676941, + "step": 552 + }, + { + "epoch": 0.34401244167962675, + "grad_norm": 0.45229020714759827, + "learning_rate": 3.6175000000000005e-05, + "log_odds_chosen": 6.850950241088867, + "log_odds_ratio": -0.22252152860164642, + "logits/chosen": 0.07053791731595993, + "logits/rejected": 0.5561230182647705, + "logps/chosen": -1.2072479724884033, + "logps/rejected": -7.82691764831543, + "loss": 2.8457, + "nll_loss": 2.8234715461730957, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12072480469942093, + "rewards/margins": 0.6619669198989868, + "rewards/rejected": -0.7826917767524719, + "step": 553 + }, + { + "epoch": 0.34463452566096425, + "grad_norm": 0.38006460666656494, + "learning_rate": 3.615e-05, + "log_odds_chosen": 4.494772911071777, + "log_odds_ratio": -0.40767160058021545, + "logits/chosen": 0.1025007963180542, + "logits/rejected": 0.28547078371047974, + "logps/chosen": -1.0280405282974243, + "logps/rejected": -5.124785900115967, + "loss": 3.0916, + "nll_loss": 3.050813674926758, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10280404984951019, + "rewards/margins": 0.4096745550632477, + "rewards/rejected": -0.5124785900115967, + "step": 554 + }, + { + "epoch": 0.3452566096423017, + "grad_norm": 0.41790586709976196, + "learning_rate": 3.6125000000000004e-05, + "log_odds_chosen": 9.663520812988281, + "log_odds_ratio": -0.1240549236536026, + "logits/chosen": 0.013118164613842964, + "logits/rejected": 0.7078239917755127, + "logps/chosen": -1.0113468170166016, + "logps/rejected": -10.269187927246094, + "loss": 2.3077, + "nll_loss": 2.2952585220336914, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10113468766212463, + "rewards/margins": 0.9257840514183044, + "rewards/rejected": -1.026918888092041, + "step": 555 + }, + { + "epoch": 0.3458786936236392, + "grad_norm": 0.42868050932884216, + "learning_rate": 3.61e-05, + "log_odds_chosen": 11.801397323608398, + "log_odds_ratio": -0.05558867007493973, + "logits/chosen": 0.11525575071573257, + "logits/rejected": 0.5333128571510315, + "logps/chosen": -0.6986356377601624, + "logps/rejected": -11.682586669921875, + "loss": 2.5445, + "nll_loss": 2.538949728012085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06986355781555176, + "rewards/margins": 1.0983952283859253, + "rewards/rejected": -1.1682586669921875, + "step": 556 + }, + { + "epoch": 0.34650077760497666, + "grad_norm": 0.3941185772418976, + "learning_rate": 3.6075e-05, + "log_odds_chosen": 9.488451957702637, + "log_odds_ratio": -0.022738970816135406, + "logits/chosen": 0.2211320698261261, + "logits/rejected": 0.9476622343063354, + "logps/chosen": -1.3519847393035889, + "logps/rejected": -10.521648406982422, + "loss": 3.1268, + "nll_loss": 3.1244912147521973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1351984739303589, + "rewards/margins": 0.9169663190841675, + "rewards/rejected": -1.0521647930145264, + "step": 557 + }, + { + "epoch": 0.34712286158631417, + "grad_norm": 0.5461376309394836, + "learning_rate": 3.605e-05, + "log_odds_chosen": 6.737714767456055, + "log_odds_ratio": -0.270801305770874, + "logits/chosen": 0.20157453417778015, + "logits/rejected": 0.566937267780304, + "logps/chosen": -1.1833453178405762, + "logps/rejected": -7.544872760772705, + "loss": 3.1792, + "nll_loss": 3.152073621749878, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11833453178405762, + "rewards/margins": 0.6361527442932129, + "rewards/rejected": -0.7544872760772705, + "step": 558 + }, + { + "epoch": 0.3477449455676516, + "grad_norm": 0.5232445597648621, + "learning_rate": 3.6025e-05, + "log_odds_chosen": 14.541240692138672, + "log_odds_ratio": -0.1170097142457962, + "logits/chosen": 0.005186443217098713, + "logits/rejected": 0.5835030674934387, + "logps/chosen": -1.1031867265701294, + "logps/rejected": -15.032424926757812, + "loss": 2.6264, + "nll_loss": 2.6146867275238037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11031867563724518, + "rewards/margins": 1.3929238319396973, + "rewards/rejected": -1.5032424926757812, + "step": 559 + }, + { + "epoch": 0.3483670295489891, + "grad_norm": 0.40398097038269043, + "learning_rate": 3.6e-05, + "log_odds_chosen": 10.217870712280273, + "log_odds_ratio": -0.021281030029058456, + "logits/chosen": 0.26363763213157654, + "logits/rejected": 0.7080330848693848, + "logps/chosen": -1.2973829507827759, + "logps/rejected": -11.150710105895996, + "loss": 3.5862, + "nll_loss": 3.58408260345459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12973830103874207, + "rewards/margins": 0.985332727432251, + "rewards/rejected": -1.1150710582733154, + "step": 560 + }, + { + "epoch": 0.3489891135303266, + "grad_norm": 0.3686501085758209, + "learning_rate": 3.5975e-05, + "log_odds_chosen": 11.192573547363281, + "log_odds_ratio": -0.00902550108730793, + "logits/chosen": 0.07758531719446182, + "logits/rejected": 0.7558944225311279, + "logps/chosen": -0.9165749549865723, + "logps/rejected": -11.570627212524414, + "loss": 2.5896, + "nll_loss": 2.5887372493743896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09165749698877335, + "rewards/margins": 1.0654051303863525, + "rewards/rejected": -1.1570627689361572, + "step": 561 + }, + { + "epoch": 0.3496111975116641, + "grad_norm": 0.3752516508102417, + "learning_rate": 3.595e-05, + "log_odds_chosen": 13.246245384216309, + "log_odds_ratio": -0.1449487805366516, + "logits/chosen": 0.2694312334060669, + "logits/rejected": 1.2381070852279663, + "logps/chosen": -1.137485384941101, + "logps/rejected": -14.047223091125488, + "loss": 3.0143, + "nll_loss": 2.999814033508301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11374853551387787, + "rewards/margins": 1.2909739017486572, + "rewards/rejected": -1.4047224521636963, + "step": 562 + }, + { + "epoch": 0.35023328149300154, + "grad_norm": 0.3949683904647827, + "learning_rate": 3.5925000000000006e-05, + "log_odds_chosen": 8.974077224731445, + "log_odds_ratio": -0.0277986042201519, + "logits/chosen": 0.18479128181934357, + "logits/rejected": 0.6321989297866821, + "logps/chosen": -0.7476387023925781, + "logps/rejected": -8.921815872192383, + "loss": 3.007, + "nll_loss": 3.004196882247925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07476387172937393, + "rewards/margins": 0.8174177408218384, + "rewards/rejected": -0.8921815752983093, + "step": 563 + }, + { + "epoch": 0.35085536547433904, + "grad_norm": 5.831315994262695, + "learning_rate": 3.59e-05, + "log_odds_chosen": 9.392239570617676, + "log_odds_ratio": -0.17418979108333588, + "logits/chosen": 0.1219196617603302, + "logits/rejected": 0.9300506114959717, + "logps/chosen": -1.233995795249939, + "logps/rejected": -10.267557144165039, + "loss": 2.6961, + "nll_loss": 2.6787209510803223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12339960038661957, + "rewards/margins": 0.9033560752868652, + "rewards/rejected": -1.026755690574646, + "step": 564 + }, + { + "epoch": 0.3514774494556765, + "grad_norm": 0.4015295207500458, + "learning_rate": 3.5875000000000005e-05, + "log_odds_chosen": 11.647599220275879, + "log_odds_ratio": -0.007850930094718933, + "logits/chosen": 0.2969825267791748, + "logits/rejected": 0.9183492660522461, + "logps/chosen": -1.142836332321167, + "logps/rejected": -12.2813720703125, + "loss": 3.3011, + "nll_loss": 3.3002769947052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11428363621234894, + "rewards/margins": 1.1138535737991333, + "rewards/rejected": -1.2281371355056763, + "step": 565 + }, + { + "epoch": 0.352099533437014, + "grad_norm": 0.8392999768257141, + "learning_rate": 3.585e-05, + "log_odds_chosen": 15.985481262207031, + "log_odds_ratio": -0.0001611363550182432, + "logits/chosen": 0.33824992179870605, + "logits/rejected": 1.2024070024490356, + "logps/chosen": -1.6567131280899048, + "logps/rejected": -17.20331573486328, + "loss": 3.5828, + "nll_loss": 3.5828089714050293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16567131876945496, + "rewards/margins": 1.5546603202819824, + "rewards/rejected": -1.7203315496444702, + "step": 566 + }, + { + "epoch": 0.35272161741835145, + "grad_norm": 0.4848373830318451, + "learning_rate": 3.5825000000000003e-05, + "log_odds_chosen": 13.249602317810059, + "log_odds_ratio": -0.12133760005235672, + "logits/chosen": 0.1779986023902893, + "logits/rejected": 1.0257817506790161, + "logps/chosen": -1.069126009941101, + "logps/rejected": -13.913080215454102, + "loss": 2.6563, + "nll_loss": 2.644139289855957, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10691259801387787, + "rewards/margins": 1.284395456314087, + "rewards/rejected": -1.391308069229126, + "step": 567 + }, + { + "epoch": 0.35334370139968896, + "grad_norm": 0.4035547375679016, + "learning_rate": 3.58e-05, + "log_odds_chosen": 13.095964431762695, + "log_odds_ratio": -0.006714683026075363, + "logits/chosen": 0.09633934497833252, + "logits/rejected": 0.7238105535507202, + "logps/chosen": -0.9360955357551575, + "logps/rejected": -13.495253562927246, + "loss": 2.7449, + "nll_loss": 2.744229555130005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09360955655574799, + "rewards/margins": 1.255915880203247, + "rewards/rejected": -1.3495254516601562, + "step": 568 + }, + { + "epoch": 0.35396578538102647, + "grad_norm": 0.7661040425300598, + "learning_rate": 3.5775e-05, + "log_odds_chosen": 9.719877243041992, + "log_odds_ratio": -0.23540478944778442, + "logits/chosen": 0.3083692789077759, + "logits/rejected": 1.01234769821167, + "logps/chosen": -1.212859034538269, + "logps/rejected": -10.61805534362793, + "loss": 3.1301, + "nll_loss": 3.1065545082092285, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12128590047359467, + "rewards/margins": 0.9405196905136108, + "rewards/rejected": -1.0618056058883667, + "step": 569 + }, + { + "epoch": 0.3545878693623639, + "grad_norm": 0.3926217257976532, + "learning_rate": 3.575e-05, + "log_odds_chosen": 10.073858261108398, + "log_odds_ratio": -0.1235310509800911, + "logits/chosen": 0.13625189661979675, + "logits/rejected": 0.7520464658737183, + "logps/chosen": -1.2251389026641846, + "logps/rejected": -10.796701431274414, + "loss": 2.7511, + "nll_loss": 2.738753318786621, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12251389026641846, + "rewards/margins": 0.957156240940094, + "rewards/rejected": -1.0796700716018677, + "step": 570 + }, + { + "epoch": 0.3552099533437014, + "grad_norm": 0.8033375144004822, + "learning_rate": 3.5725e-05, + "log_odds_chosen": 4.0529279708862305, + "log_odds_ratio": -0.20374912023544312, + "logits/chosen": 0.1487824022769928, + "logits/rejected": 0.3493782877922058, + "logps/chosen": -1.0385345220565796, + "logps/rejected": -4.73582649230957, + "loss": 2.6228, + "nll_loss": 2.6024231910705566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10385345667600632, + "rewards/margins": 0.369729220867157, + "rewards/rejected": -0.4735826551914215, + "step": 571 + }, + { + "epoch": 0.3558320373250389, + "grad_norm": 0.38423430919647217, + "learning_rate": 3.57e-05, + "log_odds_chosen": 7.926821708679199, + "log_odds_ratio": -0.0515306256711483, + "logits/chosen": 0.3775959610939026, + "logits/rejected": 0.7432839274406433, + "logps/chosen": -1.2405486106872559, + "logps/rejected": -8.742298126220703, + "loss": 3.7806, + "nll_loss": 3.775494337081909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12405486404895782, + "rewards/margins": 0.7501749992370605, + "rewards/rejected": -0.874229907989502, + "step": 572 + }, + { + "epoch": 0.3564541213063764, + "grad_norm": 0.5379979610443115, + "learning_rate": 3.5675e-05, + "log_odds_chosen": 4.479024410247803, + "log_odds_ratio": -0.45168882608413696, + "logits/chosen": 0.07607246190309525, + "logits/rejected": 0.2333858758211136, + "logps/chosen": -1.1368803977966309, + "logps/rejected": -5.305628776550293, + "loss": 2.7889, + "nll_loss": 2.7437705993652344, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11368802934885025, + "rewards/margins": 0.41687485575675964, + "rewards/rejected": -0.5305629372596741, + "step": 573 + }, + { + "epoch": 0.35707620528771383, + "grad_norm": 1.3011409044265747, + "learning_rate": 3.565e-05, + "log_odds_chosen": 3.907680034637451, + "log_odds_ratio": -0.33975449204444885, + "logits/chosen": 0.2571965754032135, + "logits/rejected": 0.5251089930534363, + "logps/chosen": -1.092237949371338, + "logps/rejected": -4.7046799659729, + "loss": 3.1342, + "nll_loss": 3.1002023220062256, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10922379791736603, + "rewards/margins": 0.36124420166015625, + "rewards/rejected": -0.4704679846763611, + "step": 574 + }, + { + "epoch": 0.35769828926905134, + "grad_norm": 0.6529536247253418, + "learning_rate": 3.5625000000000005e-05, + "log_odds_chosen": 7.877595901489258, + "log_odds_ratio": -0.17618416249752045, + "logits/chosen": 0.3382919430732727, + "logits/rejected": 0.7670020461082458, + "logps/chosen": -1.239711880683899, + "logps/rejected": -8.82756233215332, + "loss": 3.5095, + "nll_loss": 3.4918410778045654, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12397119402885437, + "rewards/margins": 0.7587849497795105, + "rewards/rejected": -0.882756233215332, + "step": 575 + }, + { + "epoch": 0.3583203732503888, + "grad_norm": 0.4566708505153656, + "learning_rate": 3.56e-05, + "log_odds_chosen": 11.294231414794922, + "log_odds_ratio": -0.1296280026435852, + "logits/chosen": 0.2539590299129486, + "logits/rejected": 0.8094176054000854, + "logps/chosen": -1.7060835361480713, + "logps/rejected": -12.409913063049316, + "loss": 3.248, + "nll_loss": 3.234999656677246, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17060835659503937, + "rewards/margins": 1.070383071899414, + "rewards/rejected": -1.2409913539886475, + "step": 576 + }, + { + "epoch": 0.3589424572317263, + "grad_norm": 0.46674543619155884, + "learning_rate": 3.5575000000000004e-05, + "log_odds_chosen": 3.2900829315185547, + "log_odds_ratio": -0.31960904598236084, + "logits/chosen": 0.08701478689908981, + "logits/rejected": 0.1937023103237152, + "logps/chosen": -0.9611541628837585, + "logps/rejected": -3.7472667694091797, + "loss": 2.775, + "nll_loss": 2.743015766143799, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09611541777849197, + "rewards/margins": 0.27861127257347107, + "rewards/rejected": -0.37472671270370483, + "step": 577 + }, + { + "epoch": 0.35956454121306375, + "grad_norm": 0.3776179552078247, + "learning_rate": 3.555e-05, + "log_odds_chosen": 3.8524093627929688, + "log_odds_ratio": -0.1908583641052246, + "logits/chosen": 0.1532333493232727, + "logits/rejected": 0.4199594557285309, + "logps/chosen": -1.2285915613174438, + "logps/rejected": -4.827481269836426, + "loss": 3.0596, + "nll_loss": 3.0405075550079346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1228591576218605, + "rewards/margins": 0.3598890006542206, + "rewards/rejected": -0.4827481508255005, + "step": 578 + }, + { + "epoch": 0.36018662519440126, + "grad_norm": 0.5118311643600464, + "learning_rate": 3.5525e-05, + "log_odds_chosen": 7.782455921173096, + "log_odds_ratio": -0.2796177864074707, + "logits/chosen": 0.08961069583892822, + "logits/rejected": 0.5031421780586243, + "logps/chosen": -1.2598867416381836, + "logps/rejected": -8.860183715820312, + "loss": 2.4754, + "nll_loss": 2.4474620819091797, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1259886771440506, + "rewards/margins": 0.760029673576355, + "rewards/rejected": -0.8860183954238892, + "step": 579 + }, + { + "epoch": 0.3608087091757387, + "grad_norm": 0.40134724974632263, + "learning_rate": 3.55e-05, + "log_odds_chosen": 10.93133544921875, + "log_odds_ratio": -0.1257864534854889, + "logits/chosen": 0.13825632631778717, + "logits/rejected": 0.43383318185806274, + "logps/chosen": -1.1258553266525269, + "logps/rejected": -11.62828540802002, + "loss": 3.1864, + "nll_loss": 3.173846960067749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11258553713560104, + "rewards/margins": 1.0502430200576782, + "rewards/rejected": -1.1628284454345703, + "step": 580 + }, + { + "epoch": 0.3614307931570762, + "grad_norm": 0.37659627199172974, + "learning_rate": 3.5475e-05, + "log_odds_chosen": 9.985986709594727, + "log_odds_ratio": -0.02613000012934208, + "logits/chosen": 0.2525095045566559, + "logits/rejected": 0.599713146686554, + "logps/chosen": -1.3548399209976196, + "logps/rejected": -11.0252685546875, + "loss": 3.217, + "nll_loss": 3.214369535446167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1354840099811554, + "rewards/margins": 0.967042863368988, + "rewards/rejected": -1.1025269031524658, + "step": 581 + }, + { + "epoch": 0.36205287713841366, + "grad_norm": 0.4002346098423004, + "learning_rate": 3.545e-05, + "log_odds_chosen": 7.260525226593018, + "log_odds_ratio": -0.22730308771133423, + "logits/chosen": 0.3146141767501831, + "logits/rejected": 0.7007775902748108, + "logps/chosen": -1.2889691591262817, + "logps/rejected": -8.326926231384277, + "loss": 3.4248, + "nll_loss": 3.4020333290100098, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12889692187309265, + "rewards/margins": 0.7037956714630127, + "rewards/rejected": -0.8326926231384277, + "step": 582 + }, + { + "epoch": 0.36267496111975117, + "grad_norm": 0.4641818404197693, + "learning_rate": 3.5425e-05, + "log_odds_chosen": 8.128804206848145, + "log_odds_ratio": -0.26333796977996826, + "logits/chosen": 0.2824775278568268, + "logits/rejected": 0.928533673286438, + "logps/chosen": -1.3320082426071167, + "logps/rejected": -9.260583877563477, + "loss": 2.9645, + "nll_loss": 2.9382100105285645, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13320083916187286, + "rewards/margins": 0.7928575873374939, + "rewards/rejected": -0.9260584115982056, + "step": 583 + }, + { + "epoch": 0.3632970451010886, + "grad_norm": 0.4908921718597412, + "learning_rate": 3.54e-05, + "log_odds_chosen": 7.648657321929932, + "log_odds_ratio": -0.20667998492717743, + "logits/chosen": 0.1102341040968895, + "logits/rejected": 0.6346576809883118, + "logps/chosen": -1.1072837114334106, + "logps/rejected": -8.343591690063477, + "loss": 2.6697, + "nll_loss": 2.6490468978881836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11072837561368942, + "rewards/margins": 0.7236307859420776, + "rewards/rejected": -0.8343591094017029, + "step": 584 + }, + { + "epoch": 0.36391912908242613, + "grad_norm": 0.42618241906166077, + "learning_rate": 3.5375e-05, + "log_odds_chosen": 6.4050774574279785, + "log_odds_ratio": -0.31210067868232727, + "logits/chosen": 0.232399120926857, + "logits/rejected": 0.5041719079017639, + "logps/chosen": -1.3073663711547852, + "logps/rejected": -7.530215263366699, + "loss": 3.0651, + "nll_loss": 3.033935785293579, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13073663413524628, + "rewards/margins": 0.6222848892211914, + "rewards/rejected": -0.7530215382575989, + "step": 585 + }, + { + "epoch": 0.3645412130637636, + "grad_norm": 0.3700605034828186, + "learning_rate": 3.535e-05, + "log_odds_chosen": 9.741218566894531, + "log_odds_ratio": -0.2235887050628662, + "logits/chosen": 0.17234095931053162, + "logits/rejected": 0.7051503658294678, + "logps/chosen": -1.2577295303344727, + "logps/rejected": -10.757645606994629, + "loss": 2.9902, + "nll_loss": 2.967867851257324, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12577295303344727, + "rewards/margins": 0.9499915838241577, + "rewards/rejected": -1.075764536857605, + "step": 586 + }, + { + "epoch": 0.3651632970451011, + "grad_norm": 0.4975121021270752, + "learning_rate": 3.5325000000000005e-05, + "log_odds_chosen": 8.96474838256836, + "log_odds_ratio": -0.33923766016960144, + "logits/chosen": 0.3609507977962494, + "logits/rejected": 0.8460711240768433, + "logps/chosen": -1.3260118961334229, + "logps/rejected": -10.090775489807129, + "loss": 3.1647, + "nll_loss": 3.1307742595672607, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13260118663311005, + "rewards/margins": 0.8764763474464417, + "rewards/rejected": -1.009077548980713, + "step": 587 + }, + { + "epoch": 0.3657853810264386, + "grad_norm": 0.7693401575088501, + "learning_rate": 3.53e-05, + "log_odds_chosen": 3.7467236518859863, + "log_odds_ratio": -0.6245080828666687, + "logits/chosen": 0.09779280424118042, + "logits/rejected": 0.26720985770225525, + "logps/chosen": -1.4264414310455322, + "logps/rejected": -4.837800025939941, + "loss": 2.7459, + "nll_loss": 2.6834800243377686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14264413714408875, + "rewards/margins": 0.3411359190940857, + "rewards/rejected": -0.48378005623817444, + "step": 588 + }, + { + "epoch": 0.36640746500777605, + "grad_norm": 0.41673484444618225, + "learning_rate": 3.5275000000000004e-05, + "log_odds_chosen": 7.740867614746094, + "log_odds_ratio": -0.17404800653457642, + "logits/chosen": 0.23524239659309387, + "logits/rejected": 0.43157610297203064, + "logps/chosen": -0.9836545586585999, + "logps/rejected": -8.333390235900879, + "loss": 3.4373, + "nll_loss": 3.419887065887451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09836546331644058, + "rewards/margins": 0.7349735498428345, + "rewards/rejected": -0.8333390951156616, + "step": 589 + }, + { + "epoch": 0.36702954898911355, + "grad_norm": 0.5059419870376587, + "learning_rate": 3.525e-05, + "log_odds_chosen": 4.646327018737793, + "log_odds_ratio": -0.17816919088363647, + "logits/chosen": 0.14172425866127014, + "logits/rejected": 0.35883334279060364, + "logps/chosen": -1.3644582033157349, + "logps/rejected": -5.79020881652832, + "loss": 2.8427, + "nll_loss": 2.8248753547668457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1364458203315735, + "rewards/margins": 0.442575067281723, + "rewards/rejected": -0.5790208578109741, + "step": 590 + }, + { + "epoch": 0.367651632970451, + "grad_norm": 0.45872944593429565, + "learning_rate": 3.5225e-05, + "log_odds_chosen": 14.634015083312988, + "log_odds_ratio": -0.07316752523183823, + "logits/chosen": 0.20786404609680176, + "logits/rejected": 1.1298377513885498, + "logps/chosen": -1.0286473035812378, + "logps/rejected": -15.15748405456543, + "loss": 2.9729, + "nll_loss": 2.965557098388672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10286473482847214, + "rewards/margins": 1.4128836393356323, + "rewards/rejected": -1.5157485008239746, + "step": 591 + }, + { + "epoch": 0.3682737169517885, + "grad_norm": 0.678117036819458, + "learning_rate": 3.52e-05, + "log_odds_chosen": 6.029167175292969, + "log_odds_ratio": -0.36632686853408813, + "logits/chosen": 0.255694717168808, + "logits/rejected": 0.5321189761161804, + "logps/chosen": -0.940081000328064, + "logps/rejected": -6.712700843811035, + "loss": 3.3547, + "nll_loss": 3.3180642127990723, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09400809556245804, + "rewards/margins": 0.5772619843482971, + "rewards/rejected": -0.6712701320648193, + "step": 592 + }, + { + "epoch": 0.36889580093312596, + "grad_norm": 0.415947824716568, + "learning_rate": 3.5175e-05, + "log_odds_chosen": 9.287468910217285, + "log_odds_ratio": -0.33420291543006897, + "logits/chosen": 0.1860431730747223, + "logits/rejected": 0.6892718076705933, + "logps/chosen": -1.1524367332458496, + "logps/rejected": -10.236968994140625, + "loss": 3.0043, + "nll_loss": 2.9708549976348877, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11524367332458496, + "rewards/margins": 0.9084532260894775, + "rewards/rejected": -1.0236968994140625, + "step": 593 + }, + { + "epoch": 0.36951788491446347, + "grad_norm": 0.6838355660438538, + "learning_rate": 3.515e-05, + "log_odds_chosen": 11.52259635925293, + "log_odds_ratio": -0.16391010582447052, + "logits/chosen": 0.14682823419570923, + "logits/rejected": 0.7776925563812256, + "logps/chosen": -1.0399855375289917, + "logps/rejected": -12.213512420654297, + "loss": 2.6591, + "nll_loss": 2.642709493637085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10399855673313141, + "rewards/margins": 1.1173527240753174, + "rewards/rejected": -1.2213512659072876, + "step": 594 + }, + { + "epoch": 0.3701399688958009, + "grad_norm": 0.5135987997055054, + "learning_rate": 3.5125e-05, + "log_odds_chosen": 10.131647109985352, + "log_odds_ratio": -0.15441890060901642, + "logits/chosen": 0.1124928668141365, + "logits/rejected": 0.4724903106689453, + "logps/chosen": -1.242720365524292, + "logps/rejected": -11.012077331542969, + "loss": 2.9245, + "nll_loss": 2.9090170860290527, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12427204102277756, + "rewards/margins": 0.9769356846809387, + "rewards/rejected": -1.1012077331542969, + "step": 595 + }, + { + "epoch": 0.3707620528771384, + "grad_norm": 40.30850601196289, + "learning_rate": 3.51e-05, + "log_odds_chosen": 12.990202903747559, + "log_odds_ratio": -0.9076607823371887, + "logits/chosen": 0.37599942088127136, + "logits/rejected": 1.0942538976669312, + "logps/chosen": -3.2952306270599365, + "logps/rejected": -15.915863037109375, + "loss": 3.9839, + "nll_loss": 3.893106460571289, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.32952308654785156, + "rewards/margins": 1.2620632648468018, + "rewards/rejected": -1.5915863513946533, + "step": 596 + }, + { + "epoch": 0.3713841368584759, + "grad_norm": 30.15015983581543, + "learning_rate": 3.5075000000000006e-05, + "log_odds_chosen": 10.713224411010742, + "log_odds_ratio": -0.17117588222026825, + "logits/chosen": 0.28844138979911804, + "logits/rejected": 0.7312687039375305, + "logps/chosen": -2.2271180152893066, + "logps/rejected": -12.698269844055176, + "loss": 3.5309, + "nll_loss": 3.5137851238250732, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22271178662776947, + "rewards/margins": 1.0471150875091553, + "rewards/rejected": -1.269826889038086, + "step": 597 + }, + { + "epoch": 0.3720062208398134, + "grad_norm": 0.4343259036540985, + "learning_rate": 3.505e-05, + "log_odds_chosen": 16.90182876586914, + "log_odds_ratio": -0.0005982535076327622, + "logits/chosen": 0.18842889368534088, + "logits/rejected": 0.9794631600379944, + "logps/chosen": -1.1533726453781128, + "logps/rejected": -17.613147735595703, + "loss": 3.0408, + "nll_loss": 3.0407166481018066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11533726751804352, + "rewards/margins": 1.64597749710083, + "rewards/rejected": -1.761314868927002, + "step": 598 + }, + { + "epoch": 0.37262830482115084, + "grad_norm": 0.38888421654701233, + "learning_rate": 3.5025000000000004e-05, + "log_odds_chosen": 6.894892692565918, + "log_odds_ratio": -0.13730782270431519, + "logits/chosen": 0.24703556299209595, + "logits/rejected": 0.6600065231323242, + "logps/chosen": -0.8034927845001221, + "logps/rejected": -7.0167083740234375, + "loss": 3.3792, + "nll_loss": 3.3654870986938477, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08034928143024445, + "rewards/margins": 0.6213215589523315, + "rewards/rejected": -0.7016708254814148, + "step": 599 + }, + { + "epoch": 0.37325038880248834, + "grad_norm": 0.3728528320789337, + "learning_rate": 3.5e-05, + "log_odds_chosen": 10.392243385314941, + "log_odds_ratio": -0.08781400322914124, + "logits/chosen": 0.14524298906326294, + "logits/rejected": 0.8344522714614868, + "logps/chosen": -1.0598855018615723, + "logps/rejected": -11.012116432189941, + "loss": 2.7949, + "nll_loss": 2.786162853240967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10598855465650558, + "rewards/margins": 0.9952231645584106, + "rewards/rejected": -1.101211667060852, + "step": 600 + }, + { + "epoch": 0.3738724727838258, + "grad_norm": 0.4189922511577606, + "learning_rate": 3.4975e-05, + "log_odds_chosen": 2.0390243530273438, + "log_odds_ratio": -0.39055636525154114, + "logits/chosen": 0.08198841661214828, + "logits/rejected": 0.19098129868507385, + "logps/chosen": -1.0478918552398682, + "logps/rejected": -2.8190433979034424, + "loss": 2.9721, + "nll_loss": 2.932997226715088, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10478918254375458, + "rewards/margins": 0.17711517214775085, + "rewards/rejected": -0.28190433979034424, + "step": 601 + }, + { + "epoch": 0.3744945567651633, + "grad_norm": 0.4921775758266449, + "learning_rate": 3.495e-05, + "log_odds_chosen": 3.0136878490448, + "log_odds_ratio": -0.41104233264923096, + "logits/chosen": 0.2594601511955261, + "logits/rejected": 0.3218687176704407, + "logps/chosen": -1.2040386199951172, + "logps/rejected": -4.0521650314331055, + "loss": 3.3135, + "nll_loss": 3.2724430561065674, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12040386348962784, + "rewards/margins": 0.2848126292228699, + "rewards/rejected": -0.4052164852619171, + "step": 602 + }, + { + "epoch": 0.37511664074650075, + "grad_norm": 0.5495923161506653, + "learning_rate": 3.4925e-05, + "log_odds_chosen": 2.3447442054748535, + "log_odds_ratio": -0.38162410259246826, + "logits/chosen": 0.23126523196697235, + "logits/rejected": 0.14873181283473969, + "logps/chosen": -1.3720605373382568, + "logps/rejected": -3.5602757930755615, + "loss": 3.5564, + "nll_loss": 3.5182876586914062, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1372060626745224, + "rewards/margins": 0.21882151067256927, + "rewards/rejected": -0.3560275733470917, + "step": 603 + }, + { + "epoch": 0.37573872472783826, + "grad_norm": 0.5212722420692444, + "learning_rate": 3.49e-05, + "log_odds_chosen": 6.591350078582764, + "log_odds_ratio": -0.1355379819869995, + "logits/chosen": 0.07449732720851898, + "logits/rejected": 0.338863730430603, + "logps/chosen": -1.1350582838058472, + "logps/rejected": -7.260507583618164, + "loss": 2.7158, + "nll_loss": 2.7022814750671387, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11350581794977188, + "rewards/margins": 0.6125448942184448, + "rewards/rejected": -0.7260507941246033, + "step": 604 + }, + { + "epoch": 0.37636080870917576, + "grad_norm": 0.47101324796676636, + "learning_rate": 3.4875e-05, + "log_odds_chosen": 2.375208616256714, + "log_odds_ratio": -0.4302332103252411, + "logits/chosen": 0.042256325483322144, + "logits/rejected": 0.19507254660129547, + "logps/chosen": -1.2770401239395142, + "logps/rejected": -3.526949882507324, + "loss": 2.6779, + "nll_loss": 2.6349117755889893, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12770400941371918, + "rewards/margins": 0.22499097883701324, + "rewards/rejected": -0.3526949882507324, + "step": 605 + }, + { + "epoch": 0.3769828926905132, + "grad_norm": 0.534155547618866, + "learning_rate": 3.485e-05, + "log_odds_chosen": 4.362737655639648, + "log_odds_ratio": -0.16578631103038788, + "logits/chosen": -0.0013291984796524048, + "logits/rejected": 0.4363200068473816, + "logps/chosen": -1.247031569480896, + "logps/rejected": -5.332918167114258, + "loss": 2.764, + "nll_loss": 2.7474396228790283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12470315396785736, + "rewards/margins": 0.4085886478424072, + "rewards/rejected": -0.5332918167114258, + "step": 606 + }, + { + "epoch": 0.3776049766718507, + "grad_norm": 0.5048543810844421, + "learning_rate": 3.4825e-05, + "log_odds_chosen": 3.234736919403076, + "log_odds_ratio": -0.36818602681159973, + "logits/chosen": 0.2557424008846283, + "logits/rejected": 0.39144402742385864, + "logps/chosen": -1.2747143507003784, + "logps/rejected": -4.362112998962402, + "loss": 3.2001, + "nll_loss": 3.163267135620117, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1274714320898056, + "rewards/margins": 0.3087398409843445, + "rewards/rejected": -0.4362112581729889, + "step": 607 + }, + { + "epoch": 0.3782270606531882, + "grad_norm": 0.3789282441139221, + "learning_rate": 3.48e-05, + "log_odds_chosen": 5.484085559844971, + "log_odds_ratio": -0.24823862314224243, + "logits/chosen": 0.07289771735668182, + "logits/rejected": 0.3960074782371521, + "logps/chosen": -1.0095453262329102, + "logps/rejected": -6.180008411407471, + "loss": 3.062, + "nll_loss": 3.037135124206543, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10095453262329102, + "rewards/margins": 0.517046332359314, + "rewards/rejected": -0.6180008053779602, + "step": 608 + }, + { + "epoch": 0.3788491446345257, + "grad_norm": 0.45806336402893066, + "learning_rate": 3.4775000000000005e-05, + "log_odds_chosen": 5.055695533752441, + "log_odds_ratio": -0.07001174986362457, + "logits/chosen": 0.29797932505607605, + "logits/rejected": 0.7238092422485352, + "logps/chosen": -1.1003090143203735, + "logps/rejected": -5.755534648895264, + "loss": 3.4197, + "nll_loss": 3.4127392768859863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11003090441226959, + "rewards/margins": 0.4655225872993469, + "rewards/rejected": -0.5755534768104553, + "step": 609 + }, + { + "epoch": 0.37947122861586313, + "grad_norm": 0.4006802439689636, + "learning_rate": 3.475e-05, + "log_odds_chosen": 8.513465881347656, + "log_odds_ratio": -0.1920199692249298, + "logits/chosen": 0.18292070925235748, + "logits/rejected": 0.8109362721443176, + "logps/chosen": -1.025404691696167, + "logps/rejected": -8.980233192443848, + "loss": 3.2596, + "nll_loss": 3.2403993606567383, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10254047811031342, + "rewards/margins": 0.795482873916626, + "rewards/rejected": -0.8980233073234558, + "step": 610 + }, + { + "epoch": 0.38009331259720064, + "grad_norm": 0.49815765023231506, + "learning_rate": 3.4725000000000004e-05, + "log_odds_chosen": 5.950308799743652, + "log_odds_ratio": -0.26779890060424805, + "logits/chosen": 0.12611763179302216, + "logits/rejected": 0.2929439842700958, + "logps/chosen": -1.2592666149139404, + "logps/rejected": -6.927753925323486, + "loss": 3.173, + "nll_loss": 3.146212100982666, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.125926673412323, + "rewards/margins": 0.5668487548828125, + "rewards/rejected": -0.6927754878997803, + "step": 611 + }, + { + "epoch": 0.3807153965785381, + "grad_norm": 0.44551563262939453, + "learning_rate": 3.4699999999999996e-05, + "log_odds_chosen": 8.533232688903809, + "log_odds_ratio": -0.24877887964248657, + "logits/chosen": 0.3306812644004822, + "logits/rejected": 0.6930794715881348, + "logps/chosen": -0.9571521878242493, + "logps/rejected": -9.140741348266602, + "loss": 3.4917, + "nll_loss": 3.4668092727661133, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09571521729230881, + "rewards/margins": 0.8183589577674866, + "rewards/rejected": -0.914074182510376, + "step": 612 + }, + { + "epoch": 0.3813374805598756, + "grad_norm": 1.205088496208191, + "learning_rate": 3.4675e-05, + "log_odds_chosen": 11.991146087646484, + "log_odds_ratio": -0.0009046989143826067, + "logits/chosen": 0.16803990304470062, + "logits/rejected": 1.0780525207519531, + "logps/chosen": -1.2963323593139648, + "logps/rejected": -12.865903854370117, + "loss": 2.9082, + "nll_loss": 2.9080753326416016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12963323295116425, + "rewards/margins": 1.1569572687149048, + "rewards/rejected": -1.2865904569625854, + "step": 613 + }, + { + "epoch": 0.38195956454121305, + "grad_norm": 0.6109058260917664, + "learning_rate": 3.465e-05, + "log_odds_chosen": 10.368602752685547, + "log_odds_ratio": -0.05110776424407959, + "logits/chosen": 0.15692661702632904, + "logits/rejected": 0.9058154821395874, + "logps/chosen": -1.3313194513320923, + "logps/rejected": -11.122682571411133, + "loss": 3.1653, + "nll_loss": 3.160191297531128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1331319510936737, + "rewards/margins": 0.9791362881660461, + "rewards/rejected": -1.112268328666687, + "step": 614 + }, + { + "epoch": 0.38258164852255055, + "grad_norm": 0.3697699308395386, + "learning_rate": 3.4625e-05, + "log_odds_chosen": 6.597249984741211, + "log_odds_ratio": -0.18403266370296478, + "logits/chosen": 0.2341020703315735, + "logits/rejected": 0.8733110427856445, + "logps/chosen": -1.0065994262695312, + "logps/rejected": -7.087155342102051, + "loss": 3.0979, + "nll_loss": 3.0794601440429688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10065993666648865, + "rewards/margins": 0.6080557107925415, + "rewards/rejected": -0.708715558052063, + "step": 615 + }, + { + "epoch": 0.383203732503888, + "grad_norm": 0.43355652689933777, + "learning_rate": 3.46e-05, + "log_odds_chosen": 5.680283069610596, + "log_odds_ratio": -0.3767112195491791, + "logits/chosen": 0.03800162300467491, + "logits/rejected": 0.27057909965515137, + "logps/chosen": -1.3413360118865967, + "logps/rejected": -6.765846252441406, + "loss": 2.745, + "nll_loss": 2.707333564758301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13413360714912415, + "rewards/margins": 0.542451024055481, + "rewards/rejected": -0.6765846014022827, + "step": 616 + }, + { + "epoch": 0.3838258164852255, + "grad_norm": 0.3831072151660919, + "learning_rate": 3.4575e-05, + "log_odds_chosen": 5.973001480102539, + "log_odds_ratio": -0.13384968042373657, + "logits/chosen": 0.09891879558563232, + "logits/rejected": 0.39111006259918213, + "logps/chosen": -1.1477863788604736, + "logps/rejected": -6.674656867980957, + "loss": 3.2631, + "nll_loss": 3.2497549057006836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11477864533662796, + "rewards/margins": 0.5526870489120483, + "rewards/rejected": -0.6674657464027405, + "step": 617 + }, + { + "epoch": 0.38444790046656296, + "grad_norm": 0.4581452012062073, + "learning_rate": 3.455e-05, + "log_odds_chosen": 5.603791236877441, + "log_odds_ratio": -0.1752859354019165, + "logits/chosen": 0.04793360084295273, + "logits/rejected": 0.3412993550300598, + "logps/chosen": -1.081144094467163, + "logps/rejected": -6.33277702331543, + "loss": 3.1135, + "nll_loss": 3.0959632396698, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10811440646648407, + "rewards/margins": 0.5251632928848267, + "rewards/rejected": -0.6332777142524719, + "step": 618 + }, + { + "epoch": 0.38506998444790047, + "grad_norm": 0.421712189912796, + "learning_rate": 3.4525e-05, + "log_odds_chosen": 5.768167972564697, + "log_odds_ratio": -0.22745099663734436, + "logits/chosen": 0.03324780613183975, + "logits/rejected": 0.42689570784568787, + "logps/chosen": -1.1148405075073242, + "logps/rejected": -6.4015655517578125, + "loss": 2.9409, + "nll_loss": 2.9181556701660156, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11148406565189362, + "rewards/margins": 0.5286725163459778, + "rewards/rejected": -0.6401565670967102, + "step": 619 + }, + { + "epoch": 0.3856920684292379, + "grad_norm": 0.4084291458129883, + "learning_rate": 3.45e-05, + "log_odds_chosen": 6.440693378448486, + "log_odds_ratio": -0.1236582025885582, + "logits/chosen": 0.10940997302532196, + "logits/rejected": 0.420942485332489, + "logps/chosen": -1.1581194400787354, + "logps/rejected": -6.912134170532227, + "loss": 3.3024, + "nll_loss": 3.2900736331939697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11581194400787354, + "rewards/margins": 0.5754014849662781, + "rewards/rejected": -0.6912134289741516, + "step": 620 + }, + { + "epoch": 0.38631415241057543, + "grad_norm": 0.5068473219871521, + "learning_rate": 3.4475000000000005e-05, + "log_odds_chosen": 6.7337493896484375, + "log_odds_ratio": -0.1989458054304123, + "logits/chosen": 0.19425322115421295, + "logits/rejected": 0.316842645406723, + "logps/chosen": -1.122262716293335, + "logps/rejected": -7.526772499084473, + "loss": 3.0876, + "nll_loss": 3.0677075386047363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11222627013921738, + "rewards/margins": 0.6404510140419006, + "rewards/rejected": -0.7526772618293762, + "step": 621 + }, + { + "epoch": 0.38693623639191294, + "grad_norm": 0.38466599583625793, + "learning_rate": 3.445e-05, + "log_odds_chosen": 5.886903762817383, + "log_odds_ratio": -0.2482847273349762, + "logits/chosen": 0.2649783492088318, + "logits/rejected": 0.7941878437995911, + "logps/chosen": -1.2445883750915527, + "logps/rejected": -6.8715128898620605, + "loss": 3.1309, + "nll_loss": 3.1060690879821777, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12445883452892303, + "rewards/margins": 0.5626925230026245, + "rewards/rejected": -0.687151312828064, + "step": 622 + }, + { + "epoch": 0.3875583203732504, + "grad_norm": 0.40377840399742126, + "learning_rate": 3.4425e-05, + "log_odds_chosen": 8.402645111083984, + "log_odds_ratio": -0.2527872323989868, + "logits/chosen": 0.14706459641456604, + "logits/rejected": 0.5948149561882019, + "logps/chosen": -1.120314598083496, + "logps/rejected": -9.159903526306152, + "loss": 3.1371, + "nll_loss": 3.111776828765869, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11203145235776901, + "rewards/margins": 0.8039589524269104, + "rewards/rejected": -0.9159903526306152, + "step": 623 + }, + { + "epoch": 0.3881804043545879, + "grad_norm": 0.41054055094718933, + "learning_rate": 3.4399999999999996e-05, + "log_odds_chosen": 6.867403984069824, + "log_odds_ratio": -0.08644460141658783, + "logits/chosen": 0.2943398952484131, + "logits/rejected": 0.6636469960212708, + "logps/chosen": -0.9363681077957153, + "logps/rejected": -7.202298641204834, + "loss": 3.3252, + "nll_loss": 3.3165183067321777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09363681823015213, + "rewards/margins": 0.6265930533409119, + "rewards/rejected": -0.7202298641204834, + "step": 624 + }, + { + "epoch": 0.38880248833592534, + "grad_norm": 0.4753408432006836, + "learning_rate": 3.4375e-05, + "log_odds_chosen": 6.773974418640137, + "log_odds_ratio": -0.2092844694852829, + "logits/chosen": 0.29289454221725464, + "logits/rejected": 0.7853541374206543, + "logps/chosen": -1.3344194889068604, + "logps/rejected": -7.880761623382568, + "loss": 3.0419, + "nll_loss": 3.0209245681762695, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1334419548511505, + "rewards/margins": 0.6546341776847839, + "rewards/rejected": -0.7880761623382568, + "step": 625 + }, + { + "epoch": 0.38942457231726285, + "grad_norm": 0.38249650597572327, + "learning_rate": 3.435e-05, + "log_odds_chosen": 9.644584655761719, + "log_odds_ratio": -0.07656724750995636, + "logits/chosen": 0.32878345251083374, + "logits/rejected": 0.9979957342147827, + "logps/chosen": -0.9833849668502808, + "logps/rejected": -10.156517028808594, + "loss": 3.1942, + "nll_loss": 3.186497688293457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09833849966526031, + "rewards/margins": 0.917313277721405, + "rewards/rejected": -1.015651822090149, + "step": 626 + }, + { + "epoch": 0.3900466562986003, + "grad_norm": 0.5387607216835022, + "learning_rate": 3.4325e-05, + "log_odds_chosen": 7.158836364746094, + "log_odds_ratio": -0.13461348414421082, + "logits/chosen": 0.11595956236124039, + "logits/rejected": 0.8866112232208252, + "logps/chosen": -1.2095855474472046, + "logps/rejected": -8.037429809570312, + "loss": 2.1498, + "nll_loss": 2.1363189220428467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12095855176448822, + "rewards/margins": 0.6827844977378845, + "rewards/rejected": -0.8037430047988892, + "step": 627 + }, + { + "epoch": 0.3906687402799378, + "grad_norm": 18.14006233215332, + "learning_rate": 3.430000000000001e-05, + "log_odds_chosen": 8.21702766418457, + "log_odds_ratio": -0.22415418922901154, + "logits/chosen": 0.5142999887466431, + "logits/rejected": 0.9347355961799622, + "logps/chosen": -3.1413345336914062, + "logps/rejected": -11.10336685180664, + "loss": 3.7885, + "nll_loss": 3.7660984992980957, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.31413349509239197, + "rewards/margins": 0.7962032556533813, + "rewards/rejected": -1.1103367805480957, + "step": 628 + }, + { + "epoch": 0.39129082426127526, + "grad_norm": 0.5293332934379578, + "learning_rate": 3.4275e-05, + "log_odds_chosen": 9.935027122497559, + "log_odds_ratio": -0.0972500815987587, + "logits/chosen": 0.372843861579895, + "logits/rejected": 0.5359551906585693, + "logps/chosen": -1.0797069072723389, + "logps/rejected": -10.594209671020508, + "loss": 3.561, + "nll_loss": 3.551283121109009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10797069221735, + "rewards/margins": 0.9514502882957458, + "rewards/rejected": -1.0594210624694824, + "step": 629 + }, + { + "epoch": 0.39191290824261277, + "grad_norm": 0.5737473368644714, + "learning_rate": 3.4250000000000006e-05, + "log_odds_chosen": 8.50829029083252, + "log_odds_ratio": -0.14946213364601135, + "logits/chosen": 0.21567851305007935, + "logits/rejected": 0.841961681842804, + "logps/chosen": -1.2254951000213623, + "logps/rejected": -9.41191291809082, + "loss": 2.6145, + "nll_loss": 2.599510669708252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12254951894283295, + "rewards/margins": 0.8186418414115906, + "rewards/rejected": -0.9411913156509399, + "step": 630 + }, + { + "epoch": 0.3925349922239502, + "grad_norm": 0.5078374147415161, + "learning_rate": 3.4225e-05, + "log_odds_chosen": 8.175843238830566, + "log_odds_ratio": -0.14807230234146118, + "logits/chosen": 0.4468088746070862, + "logits/rejected": 0.967327356338501, + "logps/chosen": -0.9474899768829346, + "logps/rejected": -8.668539047241211, + "loss": 2.8416, + "nll_loss": 2.8268425464630127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09474898874759674, + "rewards/margins": 0.7721049785614014, + "rewards/rejected": -0.8668538928031921, + "step": 631 + }, + { + "epoch": 0.3931570762052877, + "grad_norm": 0.43191152811050415, + "learning_rate": 3.4200000000000005e-05, + "log_odds_chosen": 4.073611736297607, + "log_odds_ratio": -0.16531451046466827, + "logits/chosen": 0.35050833225250244, + "logits/rejected": 0.45885440707206726, + "logps/chosen": -1.4971977472305298, + "logps/rejected": -5.289085388183594, + "loss": 3.174, + "nll_loss": 3.1574785709381104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14971977472305298, + "rewards/margins": 0.3791887164115906, + "rewards/rejected": -0.5289084911346436, + "step": 632 + }, + { + "epoch": 0.3937791601866252, + "grad_norm": 0.48704293370246887, + "learning_rate": 3.4175000000000004e-05, + "log_odds_chosen": 4.4312849044799805, + "log_odds_ratio": -0.15987420082092285, + "logits/chosen": 0.3942926228046417, + "logits/rejected": 0.5708335638046265, + "logps/chosen": -1.2215713262557983, + "logps/rejected": -5.378739833831787, + "loss": 3.6228, + "nll_loss": 3.6067724227905273, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12215713411569595, + "rewards/margins": 0.41571688652038574, + "rewards/rejected": -0.5378739833831787, + "step": 633 + }, + { + "epoch": 0.3944012441679627, + "grad_norm": 0.5552240014076233, + "learning_rate": 3.415e-05, + "log_odds_chosen": 7.168707847595215, + "log_odds_ratio": -0.04835962504148483, + "logits/chosen": 0.1953887641429901, + "logits/rejected": 0.6829716563224792, + "logps/chosen": -0.9781539440155029, + "logps/rejected": -7.654153347015381, + "loss": 2.4954, + "nll_loss": 2.4905707836151123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09781539440155029, + "rewards/margins": 0.6675999164581299, + "rewards/rejected": -0.765415370464325, + "step": 634 + }, + { + "epoch": 0.39502332814930013, + "grad_norm": 0.43904614448547363, + "learning_rate": 3.4125e-05, + "log_odds_chosen": 7.5145721435546875, + "log_odds_ratio": -0.01926579885184765, + "logits/chosen": 0.2834244668483734, + "logits/rejected": 0.6822664737701416, + "logps/chosen": -1.2787011861801147, + "logps/rejected": -8.410541534423828, + "loss": 3.4046, + "nll_loss": 3.4026920795440674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1278701275587082, + "rewards/margins": 0.7131839990615845, + "rewards/rejected": -0.8410541415214539, + "step": 635 + }, + { + "epoch": 0.39564541213063764, + "grad_norm": 0.4341355860233307, + "learning_rate": 3.41e-05, + "log_odds_chosen": 7.731707572937012, + "log_odds_ratio": -0.02020195871591568, + "logits/chosen": 0.2916351556777954, + "logits/rejected": 0.7818939685821533, + "logps/chosen": -0.9819329977035522, + "logps/rejected": -8.171854972839355, + "loss": 2.8691, + "nll_loss": 2.8670694828033447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09819329530000687, + "rewards/margins": 0.7189922332763672, + "rewards/rejected": -0.8171855211257935, + "step": 636 + }, + { + "epoch": 0.3962674961119751, + "grad_norm": 0.4297373294830322, + "learning_rate": 3.4075e-05, + "log_odds_chosen": 5.247067451477051, + "log_odds_ratio": -0.35224947333335876, + "logits/chosen": 0.16714119911193848, + "logits/rejected": 0.5036913752555847, + "logps/chosen": -1.042916178703308, + "logps/rejected": -6.024389743804932, + "loss": 2.7291, + "nll_loss": 2.6938908100128174, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10429162532091141, + "rewards/margins": 0.4981473386287689, + "rewards/rejected": -0.6024389266967773, + "step": 637 + }, + { + "epoch": 0.3968895800933126, + "grad_norm": 0.5243464112281799, + "learning_rate": 3.405e-05, + "log_odds_chosen": 8.308638572692871, + "log_odds_ratio": -0.2812676429748535, + "logits/chosen": 0.20677152276039124, + "logits/rejected": 0.3971843421459198, + "logps/chosen": -1.1305897235870361, + "logps/rejected": -9.007345199584961, + "loss": 3.0744, + "nll_loss": 3.0463199615478516, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11305898427963257, + "rewards/margins": 0.7876755595207214, + "rewards/rejected": -0.9007344841957092, + "step": 638 + }, + { + "epoch": 0.39751166407465005, + "grad_norm": 0.541507363319397, + "learning_rate": 3.4025e-05, + "log_odds_chosen": 5.680115699768066, + "log_odds_ratio": -0.08863461017608643, + "logits/chosen": 0.23920726776123047, + "logits/rejected": 0.6990938782691956, + "logps/chosen": -1.3689988851547241, + "logps/rejected": -6.771450996398926, + "loss": 2.8984, + "nll_loss": 2.889554023742676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1368998885154724, + "rewards/margins": 0.5402451753616333, + "rewards/rejected": -0.6771451234817505, + "step": 639 + }, + { + "epoch": 0.39813374805598756, + "grad_norm": 0.44972142577171326, + "learning_rate": 3.4000000000000007e-05, + "log_odds_chosen": 5.760073661804199, + "log_odds_ratio": -0.20923081040382385, + "logits/chosen": 0.28065040707588196, + "logits/rejected": 0.6092424392700195, + "logps/chosen": -1.1194320917129517, + "logps/rejected": -6.457537651062012, + "loss": 2.9208, + "nll_loss": 2.899829864501953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11194320023059845, + "rewards/margins": 0.533810555934906, + "rewards/rejected": -0.6457537412643433, + "step": 640 + }, + { + "epoch": 0.39875583203732506, + "grad_norm": 0.45881423354148865, + "learning_rate": 3.3975e-05, + "log_odds_chosen": 8.45309066772461, + "log_odds_ratio": -0.002736177993938327, + "logits/chosen": 0.1473774015903473, + "logits/rejected": 0.9392131567001343, + "logps/chosen": -1.1920280456542969, + "logps/rejected": -9.228044509887695, + "loss": 2.4849, + "nll_loss": 2.4846346378326416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11920280009508133, + "rewards/margins": 0.8036016225814819, + "rewards/rejected": -0.9228044152259827, + "step": 641 + }, + { + "epoch": 0.3993779160186625, + "grad_norm": 0.4157197177410126, + "learning_rate": 3.3950000000000005e-05, + "log_odds_chosen": 7.536615371704102, + "log_odds_ratio": -0.09330558776855469, + "logits/chosen": 0.31922441720962524, + "logits/rejected": 0.7367990016937256, + "logps/chosen": -1.2460134029388428, + "logps/rejected": -8.397119522094727, + "loss": 3.217, + "nll_loss": 3.2076451778411865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12460136413574219, + "rewards/margins": 0.7151105403900146, + "rewards/rejected": -0.8397119641304016, + "step": 642 + }, + { + "epoch": 0.4, + "grad_norm": 0.3730640113353729, + "learning_rate": 3.3925e-05, + "log_odds_chosen": 7.549066543579102, + "log_odds_ratio": -0.028249753639101982, + "logits/chosen": 0.43140530586242676, + "logits/rejected": 0.803489089012146, + "logps/chosen": -0.825218141078949, + "logps/rejected": -7.683599472045898, + "loss": 3.0414, + "nll_loss": 3.0385513305664062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08252181112766266, + "rewards/margins": 0.6858382225036621, + "rewards/rejected": -0.7683599591255188, + "step": 643 + }, + { + "epoch": 0.4006220839813375, + "grad_norm": 0.5043125748634338, + "learning_rate": 3.3900000000000004e-05, + "log_odds_chosen": 12.2427978515625, + "log_odds_ratio": -0.06448502838611603, + "logits/chosen": 0.22592878341674805, + "logits/rejected": 0.6281089782714844, + "logps/chosen": -0.8235211968421936, + "logps/rejected": -12.127344131469727, + "loss": 3.0036, + "nll_loss": 2.997192859649658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08235213160514832, + "rewards/margins": 1.1303822994232178, + "rewards/rejected": -1.2127344608306885, + "step": 644 + }, + { + "epoch": 0.401244167962675, + "grad_norm": 0.5275508165359497, + "learning_rate": 3.3875000000000003e-05, + "log_odds_chosen": 8.873658180236816, + "log_odds_ratio": -0.16682618856430054, + "logits/chosen": 0.2479245662689209, + "logits/rejected": 0.6735783219337463, + "logps/chosen": -1.1058787107467651, + "logps/rejected": -9.506242752075195, + "loss": 2.9091, + "nll_loss": 2.8924357891082764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11058788001537323, + "rewards/margins": 0.8400364518165588, + "rewards/rejected": -0.9506243467330933, + "step": 645 + }, + { + "epoch": 0.40186625194401243, + "grad_norm": 0.47356879711151123, + "learning_rate": 3.385e-05, + "log_odds_chosen": 6.3066816329956055, + "log_odds_ratio": -0.24541525542736053, + "logits/chosen": 0.4849867820739746, + "logits/rejected": 0.8165839314460754, + "logps/chosen": -1.1368855237960815, + "logps/rejected": -6.978367805480957, + "loss": 3.2928, + "nll_loss": 3.268289089202881, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11368854343891144, + "rewards/margins": 0.5841482877731323, + "rewards/rejected": -0.6978368163108826, + "step": 646 + }, + { + "epoch": 0.40248833592534994, + "grad_norm": 0.5155513882637024, + "learning_rate": 3.3825e-05, + "log_odds_chosen": 5.060737609863281, + "log_odds_ratio": -0.2929985821247101, + "logits/chosen": 0.2869529724121094, + "logits/rejected": 0.567604124546051, + "logps/chosen": -1.1311206817626953, + "logps/rejected": -5.907896995544434, + "loss": 3.0702, + "nll_loss": 3.0408787727355957, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11311206221580505, + "rewards/margins": 0.47767767310142517, + "rewards/rejected": -0.5907896757125854, + "step": 647 + }, + { + "epoch": 0.4031104199066874, + "grad_norm": 0.4957614243030548, + "learning_rate": 3.38e-05, + "log_odds_chosen": 7.915012836456299, + "log_odds_ratio": -0.07648847997188568, + "logits/chosen": 0.3334338963031769, + "logits/rejected": 0.8731058239936829, + "logps/chosen": -0.9914820790290833, + "logps/rejected": -8.388053894042969, + "loss": 2.8132, + "nll_loss": 2.805509090423584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0991482138633728, + "rewards/margins": 0.7396571636199951, + "rewards/rejected": -0.8388054370880127, + "step": 648 + }, + { + "epoch": 0.4037325038880249, + "grad_norm": 0.48051124811172485, + "learning_rate": 3.3775e-05, + "log_odds_chosen": 11.63776683807373, + "log_odds_ratio": -0.05890395864844322, + "logits/chosen": 0.15852481126785278, + "logits/rejected": 0.6033845543861389, + "logps/chosen": -0.6961344480514526, + "logps/rejected": -11.388937950134277, + "loss": 2.9851, + "nll_loss": 2.9792487621307373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06961344182491302, + "rewards/margins": 1.0692802667617798, + "rewards/rejected": -1.138893723487854, + "step": 649 + }, + { + "epoch": 0.40435458786936235, + "grad_norm": 0.5910434722900391, + "learning_rate": 3.375000000000001e-05, + "log_odds_chosen": 13.171907424926758, + "log_odds_ratio": -0.08223630487918854, + "logits/chosen": 0.15894870460033417, + "logits/rejected": 0.8937514424324036, + "logps/chosen": -1.2927449941635132, + "logps/rejected": -14.164291381835938, + "loss": 2.5011, + "nll_loss": 2.4928855895996094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12927450239658356, + "rewards/margins": 1.2871546745300293, + "rewards/rejected": -1.4164291620254517, + "step": 650 + }, + { + "epoch": 0.40497667185069985, + "grad_norm": 2.0554184913635254, + "learning_rate": 3.3725e-05, + "log_odds_chosen": 13.773082733154297, + "log_odds_ratio": -0.07272257655858994, + "logits/chosen": 0.4365695118904114, + "logits/rejected": 1.2860937118530273, + "logps/chosen": -1.0678213834762573, + "logps/rejected": -14.37375545501709, + "loss": 2.8618, + "nll_loss": 2.8545725345611572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10678213834762573, + "rewards/margins": 1.330593466758728, + "rewards/rejected": -1.437375545501709, + "step": 651 + }, + { + "epoch": 0.4055987558320373, + "grad_norm": 2.0059454441070557, + "learning_rate": 3.3700000000000006e-05, + "log_odds_chosen": 13.936979293823242, + "log_odds_ratio": -0.09149403125047684, + "logits/chosen": 0.09907475113868713, + "logits/rejected": 0.763411283493042, + "logps/chosen": -1.1428678035736084, + "logps/rejected": -14.637571334838867, + "loss": 2.3492, + "nll_loss": 2.3400137424468994, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11428678035736084, + "rewards/margins": 1.3494703769683838, + "rewards/rejected": -1.4637572765350342, + "step": 652 + }, + { + "epoch": 0.4062208398133748, + "grad_norm": 0.46556660532951355, + "learning_rate": 3.3675e-05, + "log_odds_chosen": 7.606475830078125, + "log_odds_ratio": -0.18322187662124634, + "logits/chosen": 0.3579789996147156, + "logits/rejected": 0.8734443783760071, + "logps/chosen": -1.0546009540557861, + "logps/rejected": -8.23729419708252, + "loss": 3.08, + "nll_loss": 3.0616695880889893, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10546009242534637, + "rewards/margins": 0.7182692885398865, + "rewards/rejected": -0.8237293362617493, + "step": 653 + }, + { + "epoch": 0.40684292379471226, + "grad_norm": 0.4319630563259125, + "learning_rate": 3.3650000000000005e-05, + "log_odds_chosen": 9.043044090270996, + "log_odds_ratio": -0.17355716228485107, + "logits/chosen": 0.38297635316848755, + "logits/rejected": 0.8766739964485168, + "logps/chosen": -1.290746808052063, + "logps/rejected": -10.054768562316895, + "loss": 3.6632, + "nll_loss": 3.6458590030670166, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12907469272613525, + "rewards/margins": 0.8764022588729858, + "rewards/rejected": -1.005476951599121, + "step": 654 + }, + { + "epoch": 0.40746500777604977, + "grad_norm": 0.586379885673523, + "learning_rate": 3.3625000000000004e-05, + "log_odds_chosen": 10.436687469482422, + "log_odds_ratio": -0.08339973539113998, + "logits/chosen": 0.01694488525390625, + "logits/rejected": 0.7056764364242554, + "logps/chosen": -1.175871729850769, + "logps/rejected": -11.233123779296875, + "loss": 2.2024, + "nll_loss": 2.1940736770629883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11758717149496078, + "rewards/margins": 1.0057252645492554, + "rewards/rejected": -1.1233124732971191, + "step": 655 + }, + { + "epoch": 0.4080870917573872, + "grad_norm": 0.5463007688522339, + "learning_rate": 3.3600000000000004e-05, + "log_odds_chosen": 14.558086395263672, + "log_odds_ratio": -0.020145747810602188, + "logits/chosen": 0.1619691252708435, + "logits/rejected": 0.8365485668182373, + "logps/chosen": -1.254634141921997, + "logps/rejected": -15.396265029907227, + "loss": 2.7958, + "nll_loss": 2.793788433074951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12546342611312866, + "rewards/margins": 1.4141631126403809, + "rewards/rejected": -1.5396264791488647, + "step": 656 + }, + { + "epoch": 0.40870917573872473, + "grad_norm": 1.0127180814743042, + "learning_rate": 3.3575e-05, + "log_odds_chosen": 10.627214431762695, + "log_odds_ratio": -0.025262095034122467, + "logits/chosen": 0.15809279680252075, + "logits/rejected": 1.015772819519043, + "logps/chosen": -1.144571304321289, + "logps/rejected": -11.349461555480957, + "loss": 2.5338, + "nll_loss": 2.5312976837158203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11445712298154831, + "rewards/margins": 1.020488977432251, + "rewards/rejected": -1.1349461078643799, + "step": 657 + }, + { + "epoch": 0.40933125972006223, + "grad_norm": 0.5098369121551514, + "learning_rate": 3.355e-05, + "log_odds_chosen": 7.605299949645996, + "log_odds_ratio": -0.18348553776741028, + "logits/chosen": 0.14395418763160706, + "logits/rejected": 0.305155873298645, + "logps/chosen": -1.009643793106079, + "logps/rejected": -8.116901397705078, + "loss": 2.8782, + "nll_loss": 2.859889507293701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10096438229084015, + "rewards/margins": 0.7107258439064026, + "rewards/rejected": -0.8116902112960815, + "step": 658 + }, + { + "epoch": 0.4099533437013997, + "grad_norm": 0.49793869256973267, + "learning_rate": 3.3525e-05, + "log_odds_chosen": 5.807119369506836, + "log_odds_ratio": -0.18563133478164673, + "logits/chosen": 0.1726893037557602, + "logits/rejected": 0.5850039720535278, + "logps/chosen": -1.2937705516815186, + "logps/rejected": -6.849543571472168, + "loss": 2.8515, + "nll_loss": 2.8329498767852783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12937705218791962, + "rewards/margins": 0.555577278137207, + "rewards/rejected": -0.6849542856216431, + "step": 659 + }, + { + "epoch": 0.4105754276827372, + "grad_norm": 0.4080248773097992, + "learning_rate": 3.35e-05, + "log_odds_chosen": 7.798429489135742, + "log_odds_ratio": -0.033232737332582474, + "logits/chosen": 0.16965456306934357, + "logits/rejected": 0.5689667463302612, + "logps/chosen": -0.8590775728225708, + "logps/rejected": -8.06137466430664, + "loss": 3.1818, + "nll_loss": 3.178438663482666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08590776473283768, + "rewards/margins": 0.7202296853065491, + "rewards/rejected": -0.8061374425888062, + "step": 660 + }, + { + "epoch": 0.41119751166407464, + "grad_norm": 1.0456312894821167, + "learning_rate": 3.3475e-05, + "log_odds_chosen": 4.1362528800964355, + "log_odds_ratio": -0.4419694244861603, + "logits/chosen": 0.1379992961883545, + "logits/rejected": 0.17854338884353638, + "logps/chosen": -1.8760737180709839, + "logps/rejected": -5.827506065368652, + "loss": 3.5102, + "nll_loss": 3.4659581184387207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18760737776756287, + "rewards/margins": 0.39514321088790894, + "rewards/rejected": -0.5827505588531494, + "step": 661 + }, + { + "epoch": 0.41181959564541215, + "grad_norm": 0.45937567949295044, + "learning_rate": 3.345000000000001e-05, + "log_odds_chosen": 6.3919997215271, + "log_odds_ratio": -0.18915338814258575, + "logits/chosen": 0.06113801896572113, + "logits/rejected": 0.46323448419570923, + "logps/chosen": -0.9827958345413208, + "logps/rejected": -6.9341278076171875, + "loss": 2.761, + "nll_loss": 2.742082118988037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09827959537506104, + "rewards/margins": 0.5951332449913025, + "rewards/rejected": -0.6934128403663635, + "step": 662 + }, + { + "epoch": 0.4124416796267496, + "grad_norm": 0.7443048357963562, + "learning_rate": 3.3425e-05, + "log_odds_chosen": 3.1607913970947266, + "log_odds_ratio": -0.3298979699611664, + "logits/chosen": -0.002762638032436371, + "logits/rejected": 0.15435074269771576, + "logps/chosen": -1.0491981506347656, + "logps/rejected": -3.879762649536133, + "loss": 2.6448, + "nll_loss": 2.6117641925811768, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10491982102394104, + "rewards/margins": 0.28305643796920776, + "rewards/rejected": -0.3879762291908264, + "step": 663 + }, + { + "epoch": 0.4130637636080871, + "grad_norm": 0.5198607444763184, + "learning_rate": 3.3400000000000005e-05, + "log_odds_chosen": 5.001226425170898, + "log_odds_ratio": -0.33517199754714966, + "logits/chosen": 0.09879479557275772, + "logits/rejected": 0.3197081387042999, + "logps/chosen": -1.149256944656372, + "logps/rejected": -5.96102237701416, + "loss": 2.7095, + "nll_loss": 2.6760072708129883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11492569744586945, + "rewards/margins": 0.48117655515670776, + "rewards/rejected": -0.596102237701416, + "step": 664 + }, + { + "epoch": 0.41368584758942456, + "grad_norm": 0.6322446465492249, + "learning_rate": 3.3375e-05, + "log_odds_chosen": 8.142166137695312, + "log_odds_ratio": -0.5253583788871765, + "logits/chosen": 0.10554549843072891, + "logits/rejected": 0.32331740856170654, + "logps/chosen": -1.1191916465759277, + "logps/rejected": -8.784271240234375, + "loss": 3.2475, + "nll_loss": 3.194953441619873, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11191916465759277, + "rewards/margins": 0.7665079832077026, + "rewards/rejected": -0.8784270882606506, + "step": 665 + }, + { + "epoch": 0.41430793157076207, + "grad_norm": 0.3725062906742096, + "learning_rate": 3.3350000000000004e-05, + "log_odds_chosen": 2.328892707824707, + "log_odds_ratio": -0.46152687072753906, + "logits/chosen": 0.1010928526520729, + "logits/rejected": 0.24071413278579712, + "logps/chosen": -1.1850922107696533, + "logps/rejected": -3.2830145359039307, + "loss": 3.2168, + "nll_loss": 3.170639753341675, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1185092180967331, + "rewards/margins": 0.20979222655296326, + "rewards/rejected": -0.32830148935317993, + "step": 666 + }, + { + "epoch": 0.4149300155520995, + "grad_norm": 0.6286746263504028, + "learning_rate": 3.3325000000000004e-05, + "log_odds_chosen": 4.8994221687316895, + "log_odds_ratio": -0.09839385747909546, + "logits/chosen": 0.04399656876921654, + "logits/rejected": 0.19438612461090088, + "logps/chosen": -1.0532360076904297, + "logps/rejected": -5.433768272399902, + "loss": 2.3705, + "nll_loss": 2.360666275024414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10532359033823013, + "rewards/margins": 0.43805328011512756, + "rewards/rejected": -0.5433768630027771, + "step": 667 + }, + { + "epoch": 0.415552099533437, + "grad_norm": 0.509515106678009, + "learning_rate": 3.33e-05, + "log_odds_chosen": 1.514922857284546, + "log_odds_ratio": -0.4025351405143738, + "logits/chosen": 0.14341723918914795, + "logits/rejected": 0.2947719693183899, + "logps/chosen": -1.520890474319458, + "logps/rejected": -2.891176223754883, + "loss": 3.3778, + "nll_loss": 3.3375864028930664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15208904445171356, + "rewards/margins": 0.13702860474586487, + "rewards/rejected": -0.28911763429641724, + "step": 668 + }, + { + "epoch": 0.4161741835147745, + "grad_norm": 0.42655518651008606, + "learning_rate": 3.3275e-05, + "log_odds_chosen": 5.49832820892334, + "log_odds_ratio": -0.04279167205095291, + "logits/chosen": 0.17684528231620789, + "logits/rejected": 0.4185295104980469, + "logps/chosen": -1.1025071144104004, + "logps/rejected": -6.174997329711914, + "loss": 3.0329, + "nll_loss": 3.0286574363708496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11025071889162064, + "rewards/margins": 0.5072489976882935, + "rewards/rejected": -0.6174997091293335, + "step": 669 + }, + { + "epoch": 0.416796267496112, + "grad_norm": 0.45711860060691833, + "learning_rate": 3.325e-05, + "log_odds_chosen": 5.088669300079346, + "log_odds_ratio": -0.17721472680568695, + "logits/chosen": 0.21003636717796326, + "logits/rejected": 0.4002099633216858, + "logps/chosen": -0.9949367046356201, + "logps/rejected": -5.5031514167785645, + "loss": 3.3491, + "nll_loss": 3.33135724067688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09949367493391037, + "rewards/margins": 0.45082151889801025, + "rewards/rejected": -0.5503151416778564, + "step": 670 + }, + { + "epoch": 0.41741835147744943, + "grad_norm": 0.4432086646556854, + "learning_rate": 3.3225e-05, + "log_odds_chosen": 7.916843414306641, + "log_odds_ratio": -0.1536819189786911, + "logits/chosen": 0.14663207530975342, + "logits/rejected": 0.4883155822753906, + "logps/chosen": -0.9252321124076843, + "logps/rejected": -8.24629020690918, + "loss": 2.6296, + "nll_loss": 2.6141862869262695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09252320975065231, + "rewards/margins": 0.7321057915687561, + "rewards/rejected": -0.8246290683746338, + "step": 671 + }, + { + "epoch": 0.41804043545878694, + "grad_norm": 0.37037086486816406, + "learning_rate": 3.32e-05, + "log_odds_chosen": 6.104862213134766, + "log_odds_ratio": -0.2211923599243164, + "logits/chosen": 0.14146625995635986, + "logits/rejected": 0.2648002505302429, + "logps/chosen": -1.257808804512024, + "logps/rejected": -6.962691307067871, + "loss": 3.0737, + "nll_loss": 3.0516157150268555, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1257808804512024, + "rewards/margins": 0.5704882144927979, + "rewards/rejected": -0.696269154548645, + "step": 672 + }, + { + "epoch": 0.4186625194401244, + "grad_norm": 0.3770119845867157, + "learning_rate": 3.3175e-05, + "log_odds_chosen": 7.011809349060059, + "log_odds_ratio": -0.11317376792430878, + "logits/chosen": 0.364218533039093, + "logits/rejected": 0.4188498854637146, + "logps/chosen": -0.8818821907043457, + "logps/rejected": -7.176725387573242, + "loss": 3.8569, + "nll_loss": 3.845541477203369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08818823099136353, + "rewards/margins": 0.6294843554496765, + "rewards/rejected": -0.71767258644104, + "step": 673 + }, + { + "epoch": 0.4192846034214619, + "grad_norm": 0.3674079179763794, + "learning_rate": 3.3150000000000006e-05, + "log_odds_chosen": 4.395227432250977, + "log_odds_ratio": -0.3885990381240845, + "logits/chosen": 0.1841599941253662, + "logits/rejected": 0.3264680504798889, + "logps/chosen": -1.2810193300247192, + "logps/rejected": -5.530620098114014, + "loss": 3.2217, + "nll_loss": 3.182861804962158, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12810194492340088, + "rewards/margins": 0.42496004700660706, + "rewards/rejected": -0.5530620217323303, + "step": 674 + }, + { + "epoch": 0.4199066874027994, + "grad_norm": 0.3473321199417114, + "learning_rate": 3.3125e-05, + "log_odds_chosen": 7.033344745635986, + "log_odds_ratio": -0.09653866291046143, + "logits/chosen": 0.022332118824124336, + "logits/rejected": 0.26113685965538025, + "logps/chosen": -0.9488670825958252, + "logps/rejected": -7.321063041687012, + "loss": 2.7077, + "nll_loss": 2.698029041290283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09488672018051147, + "rewards/margins": 0.6372196078300476, + "rewards/rejected": -0.7321063280105591, + "step": 675 + }, + { + "epoch": 0.42052877138413686, + "grad_norm": 0.5475688576698303, + "learning_rate": 3.3100000000000005e-05, + "log_odds_chosen": 4.550343036651611, + "log_odds_ratio": -0.4450215697288513, + "logits/chosen": 0.22917914390563965, + "logits/rejected": 0.44544917345046997, + "logps/chosen": -1.1535780429840088, + "logps/rejected": -5.414106369018555, + "loss": 3.0895, + "nll_loss": 3.044969320297241, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11535780131816864, + "rewards/margins": 0.42605286836624146, + "rewards/rejected": -0.5414106249809265, + "step": 676 + }, + { + "epoch": 0.42115085536547436, + "grad_norm": 0.6116684079170227, + "learning_rate": 3.3075e-05, + "log_odds_chosen": 2.995664596557617, + "log_odds_ratio": -0.6276533603668213, + "logits/chosen": 0.14690755307674408, + "logits/rejected": 0.3131594657897949, + "logps/chosen": -1.151829719543457, + "logps/rejected": -3.948768138885498, + "loss": 2.504, + "nll_loss": 2.4411871433258057, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11518297344446182, + "rewards/margins": 0.2796938121318817, + "rewards/rejected": -0.39487677812576294, + "step": 677 + }, + { + "epoch": 0.4217729393468118, + "grad_norm": 0.4240874946117401, + "learning_rate": 3.3050000000000004e-05, + "log_odds_chosen": 5.284928321838379, + "log_odds_ratio": -0.183100625872612, + "logits/chosen": 0.1283799111843109, + "logits/rejected": 0.2122117280960083, + "logps/chosen": -0.9375210404396057, + "logps/rejected": -5.698101997375488, + "loss": 3.3374, + "nll_loss": 3.319121837615967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09375210851430893, + "rewards/margins": 0.47605806589126587, + "rewards/rejected": -0.569810152053833, + "step": 678 + }, + { + "epoch": 0.4223950233281493, + "grad_norm": 0.3675094246864319, + "learning_rate": 3.3025e-05, + "log_odds_chosen": 8.166585922241211, + "log_odds_ratio": -0.18817320466041565, + "logits/chosen": 0.23240971565246582, + "logits/rejected": 0.30073827505111694, + "logps/chosen": -0.8773943185806274, + "logps/rejected": -8.555867195129395, + "loss": 3.5129, + "nll_loss": 3.4940483570098877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08773943036794662, + "rewards/margins": 0.7678471803665161, + "rewards/rejected": -0.8555866479873657, + "step": 679 + }, + { + "epoch": 0.4230171073094868, + "grad_norm": 0.3875802159309387, + "learning_rate": 3.3e-05, + "log_odds_chosen": 7.042316913604736, + "log_odds_ratio": -0.33785781264305115, + "logits/chosen": 0.2113608419895172, + "logits/rejected": 0.20565426349639893, + "logps/chosen": -1.0790364742279053, + "logps/rejected": -7.6537089347839355, + "loss": 3.4207, + "nll_loss": 3.3869504928588867, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10790365934371948, + "rewards/margins": 0.6574673056602478, + "rewards/rejected": -0.7653709053993225, + "step": 680 + }, + { + "epoch": 0.4236391912908243, + "grad_norm": 0.47894734144210815, + "learning_rate": 3.2975e-05, + "log_odds_chosen": 8.363826751708984, + "log_odds_ratio": -0.121647410094738, + "logits/chosen": 0.16830061376094818, + "logits/rejected": 0.7813224196434021, + "logps/chosen": -1.120478630065918, + "logps/rejected": -9.116507530212402, + "loss": 2.7327, + "nll_loss": 2.720547676086426, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11204788088798523, + "rewards/margins": 0.7996028661727905, + "rewards/rejected": -0.9116507768630981, + "step": 681 + }, + { + "epoch": 0.42426127527216173, + "grad_norm": 0.5742572546005249, + "learning_rate": 3.295e-05, + "log_odds_chosen": 8.133566856384277, + "log_odds_ratio": -0.07514964044094086, + "logits/chosen": 0.10974755883216858, + "logits/rejected": 0.43608176708221436, + "logps/chosen": -0.74546879529953, + "logps/rejected": -8.146604537963867, + "loss": 2.561, + "nll_loss": 2.553471803665161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07454688102006912, + "rewards/margins": 0.7401136159896851, + "rewards/rejected": -0.8146604299545288, + "step": 682 + }, + { + "epoch": 0.42488335925349924, + "grad_norm": 0.4656708240509033, + "learning_rate": 3.2925e-05, + "log_odds_chosen": 5.774636745452881, + "log_odds_ratio": -0.13835880160331726, + "logits/chosen": 0.3287077248096466, + "logits/rejected": 0.6844013929367065, + "logps/chosen": -1.2045843601226807, + "logps/rejected": -6.656024932861328, + "loss": 3.3318, + "nll_loss": 3.3180010318756104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12045843154191971, + "rewards/margins": 0.5451440811157227, + "rewards/rejected": -0.6656025052070618, + "step": 683 + }, + { + "epoch": 0.4255054432348367, + "grad_norm": 0.4819745421409607, + "learning_rate": 3.29e-05, + "log_odds_chosen": 11.101076126098633, + "log_odds_ratio": -0.05002117529511452, + "logits/chosen": 0.4243965446949005, + "logits/rejected": 1.0198917388916016, + "logps/chosen": -1.1497440338134766, + "logps/rejected": -11.876899719238281, + "loss": 3.3201, + "nll_loss": 3.315087080001831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11497440934181213, + "rewards/margins": 1.0727156400680542, + "rewards/rejected": -1.187690019607544, + "step": 684 + }, + { + "epoch": 0.4261275272161742, + "grad_norm": 0.5185660719871521, + "learning_rate": 3.2875e-05, + "log_odds_chosen": 11.11820125579834, + "log_odds_ratio": -0.0768592581152916, + "logits/chosen": 0.29388269782066345, + "logits/rejected": 0.9650092124938965, + "logps/chosen": -1.116542100906372, + "logps/rejected": -11.861469268798828, + "loss": 2.4838, + "nll_loss": 2.4760661125183105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11165420711040497, + "rewards/margins": 1.0744926929473877, + "rewards/rejected": -1.186146855354309, + "step": 685 + }, + { + "epoch": 0.42674961119751165, + "grad_norm": 0.6650855541229248, + "learning_rate": 3.2850000000000006e-05, + "log_odds_chosen": 5.216774940490723, + "log_odds_ratio": -0.18936312198638916, + "logits/chosen": 0.33131909370422363, + "logits/rejected": 0.5103744864463806, + "logps/chosen": -1.0327638387680054, + "logps/rejected": -5.904963493347168, + "loss": 2.7629, + "nll_loss": 2.7439844608306885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10327637195587158, + "rewards/margins": 0.48721998929977417, + "rewards/rejected": -0.5904964208602905, + "step": 686 + }, + { + "epoch": 0.42737169517884915, + "grad_norm": 0.48068517446517944, + "learning_rate": 3.2825e-05, + "log_odds_chosen": 7.785678386688232, + "log_odds_ratio": -0.4847630262374878, + "logits/chosen": 0.31015443801879883, + "logits/rejected": 0.8882300853729248, + "logps/chosen": -1.257796049118042, + "logps/rejected": -8.93236255645752, + "loss": 3.1948, + "nll_loss": 3.146324872970581, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12577959895133972, + "rewards/margins": 0.767456591129303, + "rewards/rejected": -0.8932361602783203, + "step": 687 + }, + { + "epoch": 0.4279937791601866, + "grad_norm": 0.5043879747390747, + "learning_rate": 3.2800000000000004e-05, + "log_odds_chosen": 9.975924491882324, + "log_odds_ratio": -0.09700936079025269, + "logits/chosen": 0.4725090265274048, + "logits/rejected": 0.9532057642936707, + "logps/chosen": -1.3929022550582886, + "logps/rejected": -11.091150283813477, + "loss": 3.2531, + "nll_loss": 3.2434072494506836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1392902284860611, + "rewards/margins": 0.9698247909545898, + "rewards/rejected": -1.1091151237487793, + "step": 688 + }, + { + "epoch": 0.4286158631415241, + "grad_norm": 0.379336953163147, + "learning_rate": 3.2775e-05, + "log_odds_chosen": 6.284054279327393, + "log_odds_ratio": -0.14844508469104767, + "logits/chosen": 0.22702661156654358, + "logits/rejected": 0.30767202377319336, + "logps/chosen": -0.7351300120353699, + "logps/rejected": -6.354801654815674, + "loss": 3.1944, + "nll_loss": 3.179506778717041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07351300120353699, + "rewards/margins": 0.561967134475708, + "rewards/rejected": -0.6354801058769226, + "step": 689 + }, + { + "epoch": 0.42923794712286156, + "grad_norm": 0.3885799050331116, + "learning_rate": 3.275e-05, + "log_odds_chosen": 8.412586212158203, + "log_odds_ratio": -0.14217258989810944, + "logits/chosen": 0.307309627532959, + "logits/rejected": 0.5339474081993103, + "logps/chosen": -1.0682765245437622, + "logps/rejected": -9.073349952697754, + "loss": 3.3597, + "nll_loss": 3.3454573154449463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10682766139507294, + "rewards/margins": 0.8005073070526123, + "rewards/rejected": -0.9073349833488464, + "step": 690 + }, + { + "epoch": 0.42986003110419907, + "grad_norm": 0.4812905192375183, + "learning_rate": 3.2725e-05, + "log_odds_chosen": 4.128777503967285, + "log_odds_ratio": -0.2909623384475708, + "logits/chosen": 0.2246783971786499, + "logits/rejected": 0.3128810524940491, + "logps/chosen": -0.982389509677887, + "logps/rejected": -4.593428134918213, + "loss": 3.1373, + "nll_loss": 3.1081809997558594, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09823895990848541, + "rewards/margins": 0.361103892326355, + "rewards/rejected": -0.4593428373336792, + "step": 691 + }, + { + "epoch": 0.4304821150855365, + "grad_norm": 0.4728715121746063, + "learning_rate": 3.27e-05, + "log_odds_chosen": 8.828766822814941, + "log_odds_ratio": -0.32048314809799194, + "logits/chosen": 0.239055335521698, + "logits/rejected": 0.45625340938568115, + "logps/chosen": -0.9721330404281616, + "logps/rejected": -9.417234420776367, + "loss": 3.3518, + "nll_loss": 3.319707155227661, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09721331298351288, + "rewards/margins": 0.8445100784301758, + "rewards/rejected": -0.9417234659194946, + "step": 692 + }, + { + "epoch": 0.431104199066874, + "grad_norm": 0.6615074872970581, + "learning_rate": 3.2675e-05, + "log_odds_chosen": 3.707711696624756, + "log_odds_ratio": -0.5199106931686401, + "logits/chosen": 0.13658814132213593, + "logits/rejected": 0.24072976410388947, + "logps/chosen": -1.3815832138061523, + "logps/rejected": -4.977440357208252, + "loss": 3.5352, + "nll_loss": 3.483212947845459, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13815832138061523, + "rewards/margins": 0.359585702419281, + "rewards/rejected": -0.49774405360221863, + "step": 693 + }, + { + "epoch": 0.43172628304821153, + "grad_norm": 0.5314946174621582, + "learning_rate": 3.265e-05, + "log_odds_chosen": 12.457331657409668, + "log_odds_ratio": -0.14057813584804535, + "logits/chosen": 0.2213817536830902, + "logits/rejected": 0.8500862121582031, + "logps/chosen": -1.0103843212127686, + "logps/rejected": -12.985528945922852, + "loss": 2.7115, + "nll_loss": 2.697409152984619, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10103843361139297, + "rewards/margins": 1.197514295578003, + "rewards/rejected": -1.2985528707504272, + "step": 694 + }, + { + "epoch": 0.432348367029549, + "grad_norm": 0.47115829586982727, + "learning_rate": 3.2625e-05, + "log_odds_chosen": 16.424076080322266, + "log_odds_ratio": -0.10967248678207397, + "logits/chosen": 0.17491212487220764, + "logits/rejected": 0.6353819370269775, + "logps/chosen": -1.1987203359603882, + "logps/rejected": -17.27529525756836, + "loss": 3.0507, + "nll_loss": 3.0397396087646484, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11987203359603882, + "rewards/margins": 1.6076574325561523, + "rewards/rejected": -1.727529525756836, + "step": 695 + }, + { + "epoch": 0.4329704510108865, + "grad_norm": 0.4146907329559326, + "learning_rate": 3.26e-05, + "log_odds_chosen": 10.058050155639648, + "log_odds_ratio": -0.004779008217155933, + "logits/chosen": 0.33498451113700867, + "logits/rejected": 0.9141867160797119, + "logps/chosen": -1.1475989818572998, + "logps/rejected": -10.654457092285156, + "loss": 3.33, + "nll_loss": 3.329507350921631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1147598996758461, + "rewards/margins": 0.9506858587265015, + "rewards/rejected": -1.0654456615447998, + "step": 696 + }, + { + "epoch": 0.43359253499222394, + "grad_norm": 0.771777868270874, + "learning_rate": 3.2575e-05, + "log_odds_chosen": 5.501579761505127, + "log_odds_ratio": -0.4520403742790222, + "logits/chosen": 0.11421041190624237, + "logits/rejected": 0.4084751307964325, + "logps/chosen": -1.05423903465271, + "logps/rejected": -6.390564441680908, + "loss": 2.4735, + "nll_loss": 2.4283266067504883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10542389750480652, + "rewards/margins": 0.5336325764656067, + "rewards/rejected": -0.6390565037727356, + "step": 697 + }, + { + "epoch": 0.43421461897356145, + "grad_norm": 0.41613906621932983, + "learning_rate": 3.2550000000000005e-05, + "log_odds_chosen": 11.464178085327148, + "log_odds_ratio": -0.3043498396873474, + "logits/chosen": 0.23589307069778442, + "logits/rejected": 0.7353752851486206, + "logps/chosen": -1.2409882545471191, + "logps/rejected": -12.499031066894531, + "loss": 3.0398, + "nll_loss": 3.0093822479248047, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12409882992506027, + "rewards/margins": 1.1258044242858887, + "rewards/rejected": -1.2499030828475952, + "step": 698 + }, + { + "epoch": 0.4348367029548989, + "grad_norm": 0.41328713297843933, + "learning_rate": 3.2525e-05, + "log_odds_chosen": 10.817594528198242, + "log_odds_ratio": -0.10718300193548203, + "logits/chosen": 0.25043705105781555, + "logits/rejected": 0.6431066393852234, + "logps/chosen": -0.8873676061630249, + "logps/rejected": -11.163918495178223, + "loss": 3.1033, + "nll_loss": 3.0926289558410645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08873676508665085, + "rewards/margins": 1.0276551246643066, + "rewards/rejected": -1.1163920164108276, + "step": 699 + }, + { + "epoch": 0.4354587869362364, + "grad_norm": 0.4063067138195038, + "learning_rate": 3.2500000000000004e-05, + "log_odds_chosen": 12.294190406799316, + "log_odds_ratio": -0.12343909591436386, + "logits/chosen": 0.33206403255462646, + "logits/rejected": 0.7490406632423401, + "logps/chosen": -0.9793272614479065, + "logps/rejected": -12.811656951904297, + "loss": 3.2405, + "nll_loss": 3.2281508445739746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09793273359537125, + "rewards/margins": 1.1832330226898193, + "rewards/rejected": -1.2811657190322876, + "step": 700 + }, + { + "epoch": 0.43608087091757386, + "grad_norm": 9.865860939025879, + "learning_rate": 3.2474999999999997e-05, + "log_odds_chosen": 8.678180694580078, + "log_odds_ratio": -0.7480700016021729, + "logits/chosen": 0.48775994777679443, + "logits/rejected": 1.0069730281829834, + "logps/chosen": -3.711902618408203, + "logps/rejected": -12.224882125854492, + "loss": 3.892, + "nll_loss": 3.8171887397766113, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.37119027972221375, + "rewards/margins": 0.8512980341911316, + "rewards/rejected": -1.2224884033203125, + "step": 701 + }, + { + "epoch": 0.43670295489891137, + "grad_norm": 15.485320091247559, + "learning_rate": 3.245e-05, + "log_odds_chosen": 10.974223136901855, + "log_odds_ratio": -0.2399260401725769, + "logits/chosen": 0.2739264965057373, + "logits/rejected": 0.861457347869873, + "logps/chosen": -1.5031821727752686, + "logps/rejected": -12.213601112365723, + "loss": 3.1174, + "nll_loss": 3.093437671661377, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1503182202577591, + "rewards/margins": 1.0710419416427612, + "rewards/rejected": -1.221360206604004, + "step": 702 + }, + { + "epoch": 0.4373250388802488, + "grad_norm": 0.6337563395500183, + "learning_rate": 3.2425e-05, + "log_odds_chosen": 10.547877311706543, + "log_odds_ratio": -0.17687174677848816, + "logits/chosen": 0.16326065361499786, + "logits/rejected": 0.6496604084968567, + "logps/chosen": -1.749812364578247, + "logps/rejected": -12.043559074401855, + "loss": 3.144, + "nll_loss": 3.126358985900879, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1749812364578247, + "rewards/margins": 1.0293747186660767, + "rewards/rejected": -1.2043559551239014, + "step": 703 + }, + { + "epoch": 0.4379471228615863, + "grad_norm": 0.4531756043434143, + "learning_rate": 3.24e-05, + "log_odds_chosen": 7.200477600097656, + "log_odds_ratio": -0.31999287009239197, + "logits/chosen": 0.2357894331216812, + "logits/rejected": 0.5349158644676208, + "logps/chosen": -1.258987545967102, + "logps/rejected": -8.253671646118164, + "loss": 3.0629, + "nll_loss": 3.030871868133545, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12589874863624573, + "rewards/margins": 0.6994683146476746, + "rewards/rejected": -0.8253670930862427, + "step": 704 + }, + { + "epoch": 0.4385692068429238, + "grad_norm": 0.9053670167922974, + "learning_rate": 3.2375e-05, + "log_odds_chosen": 10.318868637084961, + "log_odds_ratio": -0.1663799285888672, + "logits/chosen": 0.26653003692626953, + "logits/rejected": 0.845885157585144, + "logps/chosen": -1.1329401731491089, + "logps/rejected": -11.080015182495117, + "loss": 2.6064, + "nll_loss": 2.589715003967285, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11329401284456253, + "rewards/margins": 0.994707465171814, + "rewards/rejected": -1.108001470565796, + "step": 705 + }, + { + "epoch": 0.4391912908242613, + "grad_norm": 0.7091290950775146, + "learning_rate": 3.235e-05, + "log_odds_chosen": 5.621170997619629, + "log_odds_ratio": -0.23729656636714935, + "logits/chosen": 0.16418294608592987, + "logits/rejected": 0.46262502670288086, + "logps/chosen": -1.284653663635254, + "logps/rejected": -6.603672027587891, + "loss": 2.5671, + "nll_loss": 2.543408155441284, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12846536934375763, + "rewards/margins": 0.5319017767906189, + "rewards/rejected": -0.6603671908378601, + "step": 706 + }, + { + "epoch": 0.43981337480559873, + "grad_norm": 0.4145105183124542, + "learning_rate": 3.2325e-05, + "log_odds_chosen": 11.086092948913574, + "log_odds_ratio": -0.12887164950370789, + "logits/chosen": 0.19201578199863434, + "logits/rejected": 0.8350374102592468, + "logps/chosen": -1.087384581565857, + "logps/rejected": -11.732353210449219, + "loss": 2.7902, + "nll_loss": 2.77732515335083, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10873845964670181, + "rewards/margins": 1.0644968748092651, + "rewards/rejected": -1.1732354164123535, + "step": 707 + }, + { + "epoch": 0.44043545878693624, + "grad_norm": 0.4064066708087921, + "learning_rate": 3.2300000000000006e-05, + "log_odds_chosen": 9.882148742675781, + "log_odds_ratio": -0.08438535779714584, + "logits/chosen": 0.2323889285326004, + "logits/rejected": 0.5477718710899353, + "logps/chosen": -1.0335191488265991, + "logps/rejected": -10.476679801940918, + "loss": 3.3664, + "nll_loss": 3.3579728603363037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10335192084312439, + "rewards/margins": 0.944316029548645, + "rewards/rejected": -1.0476679801940918, + "step": 708 + }, + { + "epoch": 0.4410575427682737, + "grad_norm": 0.46960577368736267, + "learning_rate": 3.2275e-05, + "log_odds_chosen": 9.509929656982422, + "log_odds_ratio": -0.04786451533436775, + "logits/chosen": 0.3471740186214447, + "logits/rejected": 0.7918584942817688, + "logps/chosen": -1.0728213787078857, + "logps/rejected": -10.050264358520508, + "loss": 3.4729, + "nll_loss": 3.468099355697632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1072821393609047, + "rewards/margins": 0.8977442979812622, + "rewards/rejected": -1.0050263404846191, + "step": 709 + }, + { + "epoch": 0.4416796267496112, + "grad_norm": 0.4803198277950287, + "learning_rate": 3.2250000000000005e-05, + "log_odds_chosen": 9.757268905639648, + "log_odds_ratio": -0.057732146233320236, + "logits/chosen": 0.23063698410987854, + "logits/rejected": 0.6390030384063721, + "logps/chosen": -1.1186230182647705, + "logps/rejected": -10.331436157226562, + "loss": 2.9499, + "nll_loss": 2.944155693054199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11186229437589645, + "rewards/margins": 0.9212814569473267, + "rewards/rejected": -1.0331437587738037, + "step": 710 + }, + { + "epoch": 0.4423017107309487, + "grad_norm": 0.5018370747566223, + "learning_rate": 3.2225e-05, + "log_odds_chosen": 7.726747512817383, + "log_odds_ratio": -0.13159742951393127, + "logits/chosen": 0.19366326928138733, + "logits/rejected": 0.6917151808738708, + "logps/chosen": -1.2360267639160156, + "logps/rejected": -8.579374313354492, + "loss": 2.7357, + "nll_loss": 2.722532272338867, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12360267341136932, + "rewards/margins": 0.7343348264694214, + "rewards/rejected": -0.8579374551773071, + "step": 711 + }, + { + "epoch": 0.44292379471228616, + "grad_norm": 0.38528966903686523, + "learning_rate": 3.2200000000000003e-05, + "log_odds_chosen": 7.1198835372924805, + "log_odds_ratio": -0.08342044800519943, + "logits/chosen": 0.08595260232686996, + "logits/rejected": 0.31091731786727905, + "logps/chosen": -0.8347682952880859, + "logps/rejected": -7.294371604919434, + "loss": 2.9365, + "nll_loss": 2.9281294345855713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08347682654857635, + "rewards/margins": 0.6459603309631348, + "rewards/rejected": -0.7294372320175171, + "step": 712 + }, + { + "epoch": 0.44354587869362366, + "grad_norm": 0.4061030447483063, + "learning_rate": 3.2175e-05, + "log_odds_chosen": 10.162424087524414, + "log_odds_ratio": -0.04226357489824295, + "logits/chosen": 0.4276748597621918, + "logits/rejected": 0.6757638454437256, + "logps/chosen": -1.3077592849731445, + "logps/rejected": -11.131202697753906, + "loss": 3.7205, + "nll_loss": 3.7162415981292725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13077592849731445, + "rewards/margins": 0.982344388961792, + "rewards/rejected": -1.1131203174591064, + "step": 713 + }, + { + "epoch": 0.4441679626749611, + "grad_norm": 0.4195731282234192, + "learning_rate": 3.215e-05, + "log_odds_chosen": 6.164430618286133, + "log_odds_ratio": -0.18825969099998474, + "logits/chosen": 0.3099673092365265, + "logits/rejected": 0.4048096835613251, + "logps/chosen": -1.0167288780212402, + "logps/rejected": -6.674760818481445, + "loss": 3.1177, + "nll_loss": 3.098839282989502, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10167289525270462, + "rewards/margins": 0.5658032298088074, + "rewards/rejected": -0.6674761176109314, + "step": 714 + }, + { + "epoch": 0.4447900466562986, + "grad_norm": 0.45698559284210205, + "learning_rate": 3.2125e-05, + "log_odds_chosen": 6.022528648376465, + "log_odds_ratio": -0.13800564408302307, + "logits/chosen": 0.34452202916145325, + "logits/rejected": 0.5671758055686951, + "logps/chosen": -1.0680052042007446, + "logps/rejected": -6.720895290374756, + "loss": 3.0917, + "nll_loss": 3.077878475189209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10680052638053894, + "rewards/margins": 0.5652889609336853, + "rewards/rejected": -0.6720895171165466, + "step": 715 + }, + { + "epoch": 0.4454121306376361, + "grad_norm": 0.5258144736289978, + "learning_rate": 3.21e-05, + "log_odds_chosen": 7.468056678771973, + "log_odds_ratio": -0.05660433694720268, + "logits/chosen": 0.29723796248435974, + "logits/rejected": 0.726962685585022, + "logps/chosen": -1.1292157173156738, + "logps/rejected": -8.210090637207031, + "loss": 2.5706, + "nll_loss": 2.564983367919922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1129215732216835, + "rewards/margins": 0.7080875039100647, + "rewards/rejected": -0.8210090398788452, + "step": 716 + }, + { + "epoch": 0.4460342146189736, + "grad_norm": 0.6525778770446777, + "learning_rate": 3.2075e-05, + "log_odds_chosen": 8.634244918823242, + "log_odds_ratio": -0.2098091095685959, + "logits/chosen": 0.3678334951400757, + "logits/rejected": 0.6341312527656555, + "logps/chosen": -1.0692224502563477, + "logps/rejected": -8.939923286437988, + "loss": 3.4355, + "nll_loss": 3.4145524501800537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10692223906517029, + "rewards/margins": 0.7870701551437378, + "rewards/rejected": -0.8939923644065857, + "step": 717 + }, + { + "epoch": 0.44665629860031103, + "grad_norm": 0.44750717282295227, + "learning_rate": 3.205e-05, + "log_odds_chosen": 3.406766653060913, + "log_odds_ratio": -0.31649070978164673, + "logits/chosen": 0.3066599369049072, + "logits/rejected": 0.2885739207267761, + "logps/chosen": -1.0535868406295776, + "logps/rejected": -4.101262092590332, + "loss": 3.3799, + "nll_loss": 3.3482348918914795, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10535868257284164, + "rewards/margins": 0.3047674894332886, + "rewards/rejected": -0.4101262092590332, + "step": 718 + }, + { + "epoch": 0.44727838258164854, + "grad_norm": 0.46216386556625366, + "learning_rate": 3.2025e-05, + "log_odds_chosen": 9.778112411499023, + "log_odds_ratio": -0.0030376752838492393, + "logits/chosen": 0.33077576756477356, + "logits/rejected": 0.7403644323348999, + "logps/chosen": -1.0820248126983643, + "logps/rejected": -10.346675872802734, + "loss": 3.2615, + "nll_loss": 3.261215925216675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1082024872303009, + "rewards/margins": 0.9264651536941528, + "rewards/rejected": -1.034667730331421, + "step": 719 + }, + { + "epoch": 0.447900466562986, + "grad_norm": 0.749276340007782, + "learning_rate": 3.2000000000000005e-05, + "log_odds_chosen": 5.970105171203613, + "log_odds_ratio": -0.3304394483566284, + "logits/chosen": 0.16680698096752167, + "logits/rejected": 0.3060448467731476, + "logps/chosen": -1.435762882232666, + "logps/rejected": -7.024744033813477, + "loss": 2.9031, + "nll_loss": 2.870039463043213, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14357629418373108, + "rewards/margins": 0.5588980913162231, + "rewards/rejected": -0.7024743556976318, + "step": 720 + }, + { + "epoch": 0.4485225505443235, + "grad_norm": 0.539753794670105, + "learning_rate": 3.1975e-05, + "log_odds_chosen": 3.475248098373413, + "log_odds_ratio": -0.529831051826477, + "logits/chosen": 0.2159595638513565, + "logits/rejected": 0.42169877886772156, + "logps/chosen": -1.6044065952301025, + "logps/rejected": -5.004334926605225, + "loss": 3.2259, + "nll_loss": 3.172966241836548, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16044065356254578, + "rewards/margins": 0.33999279141426086, + "rewards/rejected": -0.5004334449768066, + "step": 721 + }, + { + "epoch": 0.44914463452566095, + "grad_norm": 0.452215313911438, + "learning_rate": 3.1950000000000004e-05, + "log_odds_chosen": 7.956733226776123, + "log_odds_ratio": -0.3105076849460602, + "logits/chosen": 0.16990184783935547, + "logits/rejected": 0.48880094289779663, + "logps/chosen": -1.11954927444458, + "logps/rejected": -8.724953651428223, + "loss": 2.9063, + "nll_loss": 2.8752598762512207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1119549423456192, + "rewards/margins": 0.7605404257774353, + "rewards/rejected": -0.8724953532218933, + "step": 722 + }, + { + "epoch": 0.44976671850699845, + "grad_norm": 0.5523648262023926, + "learning_rate": 3.1925e-05, + "log_odds_chosen": 6.903494358062744, + "log_odds_ratio": -0.2759135067462921, + "logits/chosen": 0.0760892778635025, + "logits/rejected": 0.32330164313316345, + "logps/chosen": -1.1168400049209595, + "logps/rejected": -7.690395832061768, + "loss": 2.3726, + "nll_loss": 2.344987392425537, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11168399453163147, + "rewards/margins": 0.6573556065559387, + "rewards/rejected": -0.7690396308898926, + "step": 723 + }, + { + "epoch": 0.4503888024883359, + "grad_norm": 0.4526844322681427, + "learning_rate": 3.19e-05, + "log_odds_chosen": 7.047250270843506, + "log_odds_ratio": -0.1251072883605957, + "logits/chosen": 0.1379421055316925, + "logits/rejected": 0.5924164652824402, + "logps/chosen": -1.2250146865844727, + "logps/rejected": -7.931028842926025, + "loss": 2.6565, + "nll_loss": 2.643982172012329, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12250147014856339, + "rewards/margins": 0.6706014275550842, + "rewards/rejected": -0.7931028604507446, + "step": 724 + }, + { + "epoch": 0.4510108864696734, + "grad_norm": 0.5021094083786011, + "learning_rate": 3.1875e-05, + "log_odds_chosen": 9.610315322875977, + "log_odds_ratio": -0.24255721271038055, + "logits/chosen": 0.3092350363731384, + "logits/rejected": 0.9811382293701172, + "logps/chosen": -1.3039846420288086, + "logps/rejected": -10.703886032104492, + "loss": 2.5964, + "nll_loss": 2.572134256362915, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1303984671831131, + "rewards/margins": 0.9399901628494263, + "rewards/rejected": -1.0703885555267334, + "step": 725 + }, + { + "epoch": 0.45163297045101086, + "grad_norm": 6.453852653503418, + "learning_rate": 3.185e-05, + "log_odds_chosen": 9.646251678466797, + "log_odds_ratio": -0.11725477129220963, + "logits/chosen": 0.49717283248901367, + "logits/rejected": 0.8251327276229858, + "logps/chosen": -1.5012346506118774, + "logps/rejected": -10.704265594482422, + "loss": 3.4432, + "nll_loss": 3.431502103805542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1501234769821167, + "rewards/margins": 0.9203031659126282, + "rewards/rejected": -1.0704265832901, + "step": 726 + }, + { + "epoch": 0.45225505443234837, + "grad_norm": 0.407627135515213, + "learning_rate": 3.1825e-05, + "log_odds_chosen": 6.178986549377441, + "log_odds_ratio": -0.25438594818115234, + "logits/chosen": 0.1292475461959839, + "logits/rejected": 0.43548843264579773, + "logps/chosen": -0.9622341990470886, + "logps/rejected": -6.761863708496094, + "loss": 2.5614, + "nll_loss": 2.5359420776367188, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09622342884540558, + "rewards/margins": 0.5799629092216492, + "rewards/rejected": -0.6761863231658936, + "step": 727 + }, + { + "epoch": 0.4528771384136858, + "grad_norm": 0.5671166777610779, + "learning_rate": 3.18e-05, + "log_odds_chosen": 9.908763885498047, + "log_odds_ratio": -0.15785574913024902, + "logits/chosen": 0.35942667722702026, + "logits/rejected": 0.8982104063034058, + "logps/chosen": -1.2879177331924438, + "logps/rejected": -10.947426795959473, + "loss": 3.2627, + "nll_loss": 3.2469372749328613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12879177927970886, + "rewards/margins": 0.9659509062767029, + "rewards/rejected": -1.094742774963379, + "step": 728 + }, + { + "epoch": 0.4534992223950233, + "grad_norm": 0.4187334179878235, + "learning_rate": 3.1775e-05, + "log_odds_chosen": 6.857314586639404, + "log_odds_ratio": -0.12401160597801208, + "logits/chosen": 0.29477638006210327, + "logits/rejected": 0.567139744758606, + "logps/chosen": -0.97846519947052, + "logps/rejected": -7.214424133300781, + "loss": 3.2737, + "nll_loss": 3.2612991333007812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09784651547670364, + "rewards/margins": 0.6235959529876709, + "rewards/rejected": -0.721442461013794, + "step": 729 + }, + { + "epoch": 0.45412130637636083, + "grad_norm": 0.43191951513290405, + "learning_rate": 3.175e-05, + "log_odds_chosen": 11.218259811401367, + "log_odds_ratio": -0.068722665309906, + "logits/chosen": 0.43568962812423706, + "logits/rejected": 1.0645880699157715, + "logps/chosen": -1.2674078941345215, + "logps/rejected": -12.176996231079102, + "loss": 3.3798, + "nll_loss": 3.372969388961792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12674078345298767, + "rewards/margins": 1.090958833694458, + "rewards/rejected": -1.217699646949768, + "step": 730 + }, + { + "epoch": 0.4547433903576983, + "grad_norm": 1.9741532802581787, + "learning_rate": 3.1725e-05, + "log_odds_chosen": 7.206732273101807, + "log_odds_ratio": -0.13450881838798523, + "logits/chosen": 0.30000701546669006, + "logits/rejected": 0.480673611164093, + "logps/chosen": -1.4034446477890015, + "logps/rejected": -8.346330642700195, + "loss": 3.3633, + "nll_loss": 3.3498318195343018, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14034447073936462, + "rewards/margins": 0.6942886114120483, + "rewards/rejected": -0.8346331119537354, + "step": 731 + }, + { + "epoch": 0.4553654743390358, + "grad_norm": 0.3933872878551483, + "learning_rate": 3.1700000000000005e-05, + "log_odds_chosen": 9.562494277954102, + "log_odds_ratio": -0.07410041987895966, + "logits/chosen": 0.34438636898994446, + "logits/rejected": 0.6179068088531494, + "logps/chosen": -0.7376449704170227, + "logps/rejected": -9.597999572753906, + "loss": 3.4239, + "nll_loss": 3.4165120124816895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07376450300216675, + "rewards/margins": 0.8860355615615845, + "rewards/rejected": -0.9598000049591064, + "step": 732 + }, + { + "epoch": 0.45598755832037324, + "grad_norm": 0.4037986695766449, + "learning_rate": 3.1675e-05, + "log_odds_chosen": 7.081986427307129, + "log_odds_ratio": -0.10163398832082748, + "logits/chosen": 0.18077480792999268, + "logits/rejected": 0.4592246115207672, + "logps/chosen": -0.9602686166763306, + "logps/rejected": -7.555391311645508, + "loss": 2.9457, + "nll_loss": 2.935537576675415, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09602686762809753, + "rewards/margins": 0.6595123410224915, + "rewards/rejected": -0.7555391788482666, + "step": 733 + }, + { + "epoch": 0.45660964230171075, + "grad_norm": 0.5620434284210205, + "learning_rate": 3.1650000000000004e-05, + "log_odds_chosen": 5.531484603881836, + "log_odds_ratio": -0.32697606086730957, + "logits/chosen": 0.1813567876815796, + "logits/rejected": 0.3070491850376129, + "logps/chosen": -1.2444071769714355, + "logps/rejected": -6.4295549392700195, + "loss": 3.2722, + "nll_loss": 3.239509105682373, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12444071471691132, + "rewards/margins": 0.5185148119926453, + "rewards/rejected": -0.642955482006073, + "step": 734 + }, + { + "epoch": 0.4572317262830482, + "grad_norm": 0.4302683174610138, + "learning_rate": 3.1624999999999996e-05, + "log_odds_chosen": 6.523910999298096, + "log_odds_ratio": -0.14284959435462952, + "logits/chosen": 0.04424915090203285, + "logits/rejected": 0.13490360975265503, + "logps/chosen": -0.9167971611022949, + "logps/rejected": -6.748834133148193, + "loss": 3.1965, + "nll_loss": 3.1821680068969727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09167972207069397, + "rewards/margins": 0.5832037329673767, + "rewards/rejected": -0.6748834848403931, + "step": 735 + }, + { + "epoch": 0.4578538102643857, + "grad_norm": 0.6089425086975098, + "learning_rate": 3.16e-05, + "log_odds_chosen": 5.9218363761901855, + "log_odds_ratio": -0.2695227563381195, + "logits/chosen": 0.2000441998243332, + "logits/rejected": 0.4127127230167389, + "logps/chosen": -1.3729795217514038, + "logps/rejected": -7.12251091003418, + "loss": 3.1966, + "nll_loss": 3.1696414947509766, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13729795813560486, + "rewards/margins": 0.5749530792236328, + "rewards/rejected": -0.7122510671615601, + "step": 736 + }, + { + "epoch": 0.45847589424572316, + "grad_norm": 0.5367254018783569, + "learning_rate": 3.1575e-05, + "log_odds_chosen": 6.432736396789551, + "log_odds_ratio": -0.16896378993988037, + "logits/chosen": 0.06318940222263336, + "logits/rejected": 0.297978013753891, + "logps/chosen": -1.222300410270691, + "logps/rejected": -7.110202312469482, + "loss": 3.0173, + "nll_loss": 3.0003702640533447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12223003804683685, + "rewards/margins": 0.5887902975082397, + "rewards/rejected": -0.7110202312469482, + "step": 737 + }, + { + "epoch": 0.45909797822706067, + "grad_norm": 4.201266288757324, + "learning_rate": 3.155e-05, + "log_odds_chosen": 5.493893623352051, + "log_odds_ratio": -0.45646530389785767, + "logits/chosen": 0.21096059679985046, + "logits/rejected": 0.23016388714313507, + "logps/chosen": -1.8003606796264648, + "logps/rejected": -7.092996597290039, + "loss": 3.7453, + "nll_loss": 3.6996266841888428, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18003606796264648, + "rewards/margins": 0.5292636156082153, + "rewards/rejected": -0.7092996835708618, + "step": 738 + }, + { + "epoch": 0.4597200622083981, + "grad_norm": 0.41682955622673035, + "learning_rate": 3.1525e-05, + "log_odds_chosen": 3.3369433879852295, + "log_odds_ratio": -0.2148256152868271, + "logits/chosen": 0.12079880386590958, + "logits/rejected": 0.19746747612953186, + "logps/chosen": -1.0625683069229126, + "logps/rejected": -3.9425437450408936, + "loss": 3.5171, + "nll_loss": 3.4956541061401367, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10625684261322021, + "rewards/margins": 0.2879975736141205, + "rewards/rejected": -0.3942543864250183, + "step": 739 + }, + { + "epoch": 0.4603421461897356, + "grad_norm": 0.589731752872467, + "learning_rate": 3.15e-05, + "log_odds_chosen": 3.7177412509918213, + "log_odds_ratio": -0.39108750224113464, + "logits/chosen": -0.020437847822904587, + "logits/rejected": 0.1704711765050888, + "logps/chosen": -1.1689900159835815, + "logps/rejected": -4.5871758460998535, + "loss": 2.4873, + "nll_loss": 2.4481828212738037, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11689899861812592, + "rewards/margins": 0.34181857109069824, + "rewards/rejected": -0.45871755480766296, + "step": 740 + }, + { + "epoch": 0.4609642301710731, + "grad_norm": 0.6688488721847534, + "learning_rate": 3.1475e-05, + "log_odds_chosen": 4.912108421325684, + "log_odds_ratio": -0.13216131925582886, + "logits/chosen": 0.22601565718650818, + "logits/rejected": 0.3431988060474396, + "logps/chosen": -1.2438921928405762, + "logps/rejected": -5.821355819702148, + "loss": 3.6307, + "nll_loss": 3.617436408996582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12438922375440598, + "rewards/margins": 0.45774632692337036, + "rewards/rejected": -0.5821355581283569, + "step": 741 + }, + { + "epoch": 0.4615863141524106, + "grad_norm": 0.433036744594574, + "learning_rate": 3.145e-05, + "log_odds_chosen": 3.096634864807129, + "log_odds_ratio": -0.2476002722978592, + "logits/chosen": 0.043339017778635025, + "logits/rejected": -0.03985293209552765, + "logps/chosen": -0.9964567422866821, + "logps/rejected": -3.488633155822754, + "loss": 3.2937, + "nll_loss": 3.26889705657959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09964566677808762, + "rewards/margins": 0.2492176592350006, + "rewards/rejected": -0.34886330366134644, + "step": 742 + }, + { + "epoch": 0.46220839813374803, + "grad_norm": 0.6005847454071045, + "learning_rate": 3.1425e-05, + "log_odds_chosen": 6.896453857421875, + "log_odds_ratio": -0.1344524621963501, + "logits/chosen": 0.08107919245958328, + "logits/rejected": 0.26698002219200134, + "logps/chosen": -0.9853373169898987, + "logps/rejected": -7.432549953460693, + "loss": 2.531, + "nll_loss": 2.5175111293792725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09853372722864151, + "rewards/margins": 0.644721269607544, + "rewards/rejected": -0.7432550191879272, + "step": 743 + }, + { + "epoch": 0.46283048211508554, + "grad_norm": 0.5875562429428101, + "learning_rate": 3.1400000000000004e-05, + "log_odds_chosen": 6.2522687911987305, + "log_odds_ratio": -0.14717459678649902, + "logits/chosen": 0.0802212506532669, + "logits/rejected": 0.36846601963043213, + "logps/chosen": -1.2279167175292969, + "logps/rejected": -7.11676025390625, + "loss": 2.9711, + "nll_loss": 2.9563703536987305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12279167771339417, + "rewards/margins": 0.5888843536376953, + "rewards/rejected": -0.7116760015487671, + "step": 744 + }, + { + "epoch": 0.463452566096423, + "grad_norm": 0.6242512464523315, + "learning_rate": 3.1375e-05, + "log_odds_chosen": 8.841582298278809, + "log_odds_ratio": -0.2243223786354065, + "logits/chosen": 0.14888420701026917, + "logits/rejected": 0.574272096157074, + "logps/chosen": -1.1364891529083252, + "logps/rejected": -9.670944213867188, + "loss": 2.973, + "nll_loss": 2.9505813121795654, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1136489063501358, + "rewards/margins": 0.8534455299377441, + "rewards/rejected": -0.9670944213867188, + "step": 745 + }, + { + "epoch": 0.4640746500777605, + "grad_norm": 0.39740875363349915, + "learning_rate": 3.135e-05, + "log_odds_chosen": 6.4931817054748535, + "log_odds_ratio": -0.07528342306613922, + "logits/chosen": 0.23848633468151093, + "logits/rejected": 0.4403890371322632, + "logps/chosen": -1.1312795877456665, + "logps/rejected": -7.221208572387695, + "loss": 3.546, + "nll_loss": 3.5384349822998047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11312796920537949, + "rewards/margins": 0.608992874622345, + "rewards/rejected": -0.7221208214759827, + "step": 746 + }, + { + "epoch": 0.464696734059098, + "grad_norm": 0.5737400650978088, + "learning_rate": 3.1324999999999996e-05, + "log_odds_chosen": 7.452863693237305, + "log_odds_ratio": -0.15671104192733765, + "logits/chosen": 0.03789973258972168, + "logits/rejected": 0.1605812907218933, + "logps/chosen": -0.8898376226425171, + "logps/rejected": -7.745999336242676, + "loss": 3.0129, + "nll_loss": 2.997223377227783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08898375928401947, + "rewards/margins": 0.6856162548065186, + "rewards/rejected": -0.7745999097824097, + "step": 747 + }, + { + "epoch": 0.46531881804043546, + "grad_norm": 0.4806496500968933, + "learning_rate": 3.13e-05, + "log_odds_chosen": 5.818041801452637, + "log_odds_ratio": -0.1867186278104782, + "logits/chosen": 0.1712789386510849, + "logits/rejected": 0.29497987031936646, + "logps/chosen": -0.8955323696136475, + "logps/rejected": -6.180495262145996, + "loss": 3.3477, + "nll_loss": 3.3290135860443115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08955323696136475, + "rewards/margins": 0.5284963846206665, + "rewards/rejected": -0.6180496215820312, + "step": 748 + }, + { + "epoch": 0.46594090202177296, + "grad_norm": 0.6791718006134033, + "learning_rate": 3.1275e-05, + "log_odds_chosen": 5.693668842315674, + "log_odds_ratio": -0.15988114476203918, + "logits/chosen": 0.11964002251625061, + "logits/rejected": 0.40675055980682373, + "logps/chosen": -0.9922550916671753, + "logps/rejected": -6.27172327041626, + "loss": 2.7005, + "nll_loss": 2.68455171585083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09922550618648529, + "rewards/margins": 0.5279468297958374, + "rewards/rejected": -0.6271723508834839, + "step": 749 + }, + { + "epoch": 0.4665629860031104, + "grad_norm": 0.6238471865653992, + "learning_rate": 3.125e-05, + "log_odds_chosen": 3.5400168895721436, + "log_odds_ratio": -0.42935463786125183, + "logits/chosen": 0.0866260677576065, + "logits/rejected": 0.37089765071868896, + "logps/chosen": -1.1119719743728638, + "logps/rejected": -4.440646171569824, + "loss": 2.7092, + "nll_loss": 2.666236400604248, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11119719594717026, + "rewards/margins": 0.33286744356155396, + "rewards/rejected": -0.4440646469593048, + "step": 750 + }, + { + "epoch": 0.4671850699844479, + "grad_norm": 0.5068716406822205, + "learning_rate": 3.122500000000001e-05, + "log_odds_chosen": 10.899909019470215, + "log_odds_ratio": -0.00871206633746624, + "logits/chosen": -0.01668960601091385, + "logits/rejected": 0.5333446264266968, + "logps/chosen": -1.2172091007232666, + "logps/rejected": -11.635497093200684, + "loss": 2.5222, + "nll_loss": 2.521331310272217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12172091007232666, + "rewards/margins": 1.0418287515640259, + "rewards/rejected": -1.1635496616363525, + "step": 751 + }, + { + "epoch": 0.46780715396578537, + "grad_norm": 1.516399621963501, + "learning_rate": 3.12e-05, + "log_odds_chosen": 7.541528701782227, + "log_odds_ratio": -0.1825413852930069, + "logits/chosen": 0.006808616686612368, + "logits/rejected": 0.20838546752929688, + "logps/chosen": -1.2133069038391113, + "logps/rejected": -8.299160957336426, + "loss": 2.7663, + "nll_loss": 2.7480356693267822, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12133070081472397, + "rewards/margins": 0.7085853815078735, + "rewards/rejected": -0.8299161195755005, + "step": 752 + }, + { + "epoch": 0.4684292379471229, + "grad_norm": 0.43352144956588745, + "learning_rate": 3.1175000000000006e-05, + "log_odds_chosen": 7.610816955566406, + "log_odds_ratio": -0.33424389362335205, + "logits/chosen": 0.17765414714813232, + "logits/rejected": 0.3236091732978821, + "logps/chosen": -1.1976127624511719, + "logps/rejected": -8.585079193115234, + "loss": 3.4346, + "nll_loss": 3.4012205600738525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11976128816604614, + "rewards/margins": 0.738746702671051, + "rewards/rejected": -0.8585079908370972, + "step": 753 + }, + { + "epoch": 0.46905132192846033, + "grad_norm": 0.4248764216899872, + "learning_rate": 3.115e-05, + "log_odds_chosen": 8.949850082397461, + "log_odds_ratio": -0.18056482076644897, + "logits/chosen": 0.1752690225839615, + "logits/rejected": 0.3496873676776886, + "logps/chosen": -0.8166213035583496, + "logps/rejected": -9.080907821655273, + "loss": 3.2551, + "nll_loss": 3.23703932762146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0816621258854866, + "rewards/margins": 0.8264287710189819, + "rewards/rejected": -0.9080908894538879, + "step": 754 + }, + { + "epoch": 0.46967340590979784, + "grad_norm": 0.5243033170700073, + "learning_rate": 3.1125000000000004e-05, + "log_odds_chosen": 12.745326042175293, + "log_odds_ratio": -0.11361424624919891, + "logits/chosen": 0.1282683163881302, + "logits/rejected": 0.7375682592391968, + "logps/chosen": -1.205803394317627, + "logps/rejected": -13.61036205291748, + "loss": 2.9496, + "nll_loss": 2.938230276107788, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12058034539222717, + "rewards/margins": 1.2404558658599854, + "rewards/rejected": -1.3610363006591797, + "step": 755 + }, + { + "epoch": 0.4702954898911353, + "grad_norm": 0.5427597761154175, + "learning_rate": 3.1100000000000004e-05, + "log_odds_chosen": 10.578163146972656, + "log_odds_ratio": -0.34553229808807373, + "logits/chosen": 0.16474664211273193, + "logits/rejected": 0.526168704032898, + "logps/chosen": -1.3434865474700928, + "logps/rejected": -11.748912811279297, + "loss": 3.4079, + "nll_loss": 3.3733925819396973, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13434866070747375, + "rewards/margins": 1.040542721748352, + "rewards/rejected": -1.1748913526535034, + "step": 756 + }, + { + "epoch": 0.4709175738724728, + "grad_norm": 0.47395461797714233, + "learning_rate": 3.1075e-05, + "log_odds_chosen": 4.500313758850098, + "log_odds_ratio": -0.33959078788757324, + "logits/chosen": 0.07310894876718521, + "logits/rejected": 0.23603522777557373, + "logps/chosen": -1.0726346969604492, + "logps/rejected": -5.353082656860352, + "loss": 2.9118, + "nll_loss": 2.8778131008148193, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1072634682059288, + "rewards/margins": 0.428044855594635, + "rewards/rejected": -0.535308301448822, + "step": 757 + }, + { + "epoch": 0.47153965785381025, + "grad_norm": 0.5754133462905884, + "learning_rate": 3.105e-05, + "log_odds_chosen": 9.807731628417969, + "log_odds_ratio": -0.16157642006874084, + "logits/chosen": 0.05956118553876877, + "logits/rejected": 0.5986432433128357, + "logps/chosen": -1.3279142379760742, + "logps/rejected": -10.884170532226562, + "loss": 3.1166, + "nll_loss": 3.100430488586426, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1327914297580719, + "rewards/margins": 0.9556256532669067, + "rewards/rejected": -1.0884170532226562, + "step": 758 + }, + { + "epoch": 0.47216174183514775, + "grad_norm": 0.48073136806488037, + "learning_rate": 3.1025e-05, + "log_odds_chosen": 7.494086265563965, + "log_odds_ratio": -0.2150120735168457, + "logits/chosen": 0.2095496952533722, + "logits/rejected": 0.4400617778301239, + "logps/chosen": -1.2367106676101685, + "logps/rejected": -8.398553848266602, + "loss": 3.5941, + "nll_loss": 3.5726468563079834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12367106974124908, + "rewards/margins": 0.7161843776702881, + "rewards/rejected": -0.839855432510376, + "step": 759 + }, + { + "epoch": 0.4727838258164852, + "grad_norm": 0.5509316325187683, + "learning_rate": 3.1e-05, + "log_odds_chosen": 7.781867504119873, + "log_odds_ratio": -0.1630517691373825, + "logits/chosen": 0.12625464797019958, + "logits/rejected": 0.6091369390487671, + "logps/chosen": -1.2734575271606445, + "logps/rejected": -8.757940292358398, + "loss": 2.8018, + "nll_loss": 2.7855429649353027, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1273457556962967, + "rewards/margins": 0.748448371887207, + "rewards/rejected": -0.8757941126823425, + "step": 760 + }, + { + "epoch": 0.4734059097978227, + "grad_norm": 0.5899271368980408, + "learning_rate": 3.0975e-05, + "log_odds_chosen": 2.6958911418914795, + "log_odds_ratio": -0.5016304850578308, + "logits/chosen": 0.13957029581069946, + "logits/rejected": 0.23938274383544922, + "logps/chosen": -1.2764240503311157, + "logps/rejected": -3.8734121322631836, + "loss": 3.2298, + "nll_loss": 3.1796460151672363, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1276424080133438, + "rewards/margins": 0.2596988081932068, + "rewards/rejected": -0.3873412013053894, + "step": 761 + }, + { + "epoch": 0.47402799377916016, + "grad_norm": 0.5521773099899292, + "learning_rate": 3.095e-05, + "log_odds_chosen": 3.7203664779663086, + "log_odds_ratio": -0.439362496137619, + "logits/chosen": -0.036664508283138275, + "logits/rejected": 0.10708197951316833, + "logps/chosen": -0.9352907538414001, + "logps/rejected": -4.322984218597412, + "loss": 2.5678, + "nll_loss": 2.52388596534729, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09352907538414001, + "rewards/margins": 0.3387693762779236, + "rewards/rejected": -0.4322984218597412, + "step": 762 + }, + { + "epoch": 0.47465007776049767, + "grad_norm": 0.5061217546463013, + "learning_rate": 3.0925000000000006e-05, + "log_odds_chosen": 12.82931137084961, + "log_odds_ratio": -0.07551414519548416, + "logits/chosen": 0.04354046285152435, + "logits/rejected": 0.9819294810295105, + "logps/chosen": -1.4580919742584229, + "logps/rejected": -13.935245513916016, + "loss": 2.7455, + "nll_loss": 2.7379651069641113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14580918848514557, + "rewards/margins": 1.2477153539657593, + "rewards/rejected": -1.393524408340454, + "step": 763 + }, + { + "epoch": 0.4752721617418352, + "grad_norm": 0.6883053183555603, + "learning_rate": 3.09e-05, + "log_odds_chosen": 7.964182376861572, + "log_odds_ratio": -0.46369829773902893, + "logits/chosen": 0.12046404182910919, + "logits/rejected": 0.6274876594543457, + "logps/chosen": -1.3564209938049316, + "logps/rejected": -9.131847381591797, + "loss": 2.7681, + "nll_loss": 2.7217345237731934, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13564209640026093, + "rewards/margins": 0.7775427103042603, + "rewards/rejected": -0.9131847620010376, + "step": 764 + }, + { + "epoch": 0.4758942457231726, + "grad_norm": 0.44783955812454224, + "learning_rate": 3.0875000000000005e-05, + "log_odds_chosen": 9.665321350097656, + "log_odds_ratio": -0.1914052665233612, + "logits/chosen": 0.05238525569438934, + "logits/rejected": 0.6160183548927307, + "logps/chosen": -1.1813290119171143, + "logps/rejected": -10.583022117614746, + "loss": 2.9187, + "nll_loss": 2.8995184898376465, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11813290417194366, + "rewards/margins": 0.9401693940162659, + "rewards/rejected": -1.0583022832870483, + "step": 765 + }, + { + "epoch": 0.47651632970451013, + "grad_norm": 0.48133954405784607, + "learning_rate": 3.0850000000000004e-05, + "log_odds_chosen": 10.398935317993164, + "log_odds_ratio": -0.2939707934856415, + "logits/chosen": 0.196578711271286, + "logits/rejected": 0.7910341024398804, + "logps/chosen": -1.453446626663208, + "logps/rejected": -11.644519805908203, + "loss": 3.0305, + "nll_loss": 3.0011115074157715, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14534467458724976, + "rewards/margins": 1.0191072225570679, + "rewards/rejected": -1.1644519567489624, + "step": 766 + }, + { + "epoch": 0.4771384136858476, + "grad_norm": 0.7429364323616028, + "learning_rate": 3.0825000000000004e-05, + "log_odds_chosen": 8.819571495056152, + "log_odds_ratio": -0.17902621626853943, + "logits/chosen": 0.0832553580403328, + "logits/rejected": 0.9124725461006165, + "logps/chosen": -1.139229416847229, + "logps/rejected": -9.641944885253906, + "loss": 2.4093, + "nll_loss": 2.391360282897949, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11392293870449066, + "rewards/margins": 0.8502715826034546, + "rewards/rejected": -0.9641945958137512, + "step": 767 + }, + { + "epoch": 0.4777604976671851, + "grad_norm": 0.45216140151023865, + "learning_rate": 3.08e-05, + "log_odds_chosen": 10.36209774017334, + "log_odds_ratio": -0.033273592591285706, + "logits/chosen": 0.0013059796765446663, + "logits/rejected": 0.4774538576602936, + "logps/chosen": -0.9916945695877075, + "logps/rejected": -10.634331703186035, + "loss": 2.6309, + "nll_loss": 2.627547264099121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09916945546865463, + "rewards/margins": 0.9642637372016907, + "rewards/rejected": -1.063433289527893, + "step": 768 + }, + { + "epoch": 0.47838258164852254, + "grad_norm": 0.6611552834510803, + "learning_rate": 3.0775e-05, + "log_odds_chosen": 13.858925819396973, + "log_odds_ratio": -0.059990376234054565, + "logits/chosen": 0.07334032654762268, + "logits/rejected": 0.49123767018318176, + "logps/chosen": -0.9776344299316406, + "logps/rejected": -14.378206253051758, + "loss": 3.101, + "nll_loss": 3.0949764251708984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09776344150304794, + "rewards/margins": 1.340057134628296, + "rewards/rejected": -1.437820553779602, + "step": 769 + }, + { + "epoch": 0.47900466562986005, + "grad_norm": 2.7856853008270264, + "learning_rate": 3.075e-05, + "log_odds_chosen": 11.939106941223145, + "log_odds_ratio": -0.11589374393224716, + "logits/chosen": 0.24869629740715027, + "logits/rejected": 0.7966226935386658, + "logps/chosen": -1.8126963376998901, + "logps/rejected": -13.446033477783203, + "loss": 3.6146, + "nll_loss": 3.603039264678955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18126963078975677, + "rewards/margins": 1.163333773612976, + "rewards/rejected": -1.3446035385131836, + "step": 770 + }, + { + "epoch": 0.4796267496111975, + "grad_norm": 0.4278852641582489, + "learning_rate": 3.0725e-05, + "log_odds_chosen": 16.7786865234375, + "log_odds_ratio": -0.15743397176265717, + "logits/chosen": 0.27345868945121765, + "logits/rejected": 0.9987517595291138, + "logps/chosen": -0.9886462092399597, + "logps/rejected": -17.29168128967285, + "loss": 3.3687, + "nll_loss": 3.3529460430145264, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09886462986469269, + "rewards/margins": 1.6303035020828247, + "rewards/rejected": -1.729168176651001, + "step": 771 + }, + { + "epoch": 0.480248833592535, + "grad_norm": 0.4929329454898834, + "learning_rate": 3.07e-05, + "log_odds_chosen": 13.330595970153809, + "log_odds_ratio": -0.08029691874980927, + "logits/chosen": 0.1175004094839096, + "logits/rejected": 0.8302413821220398, + "logps/chosen": -1.2270184755325317, + "logps/rejected": -14.165221214294434, + "loss": 2.9649, + "nll_loss": 2.956918478012085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12270184606313705, + "rewards/margins": 1.2938203811645508, + "rewards/rejected": -1.4165221452713013, + "step": 772 + }, + { + "epoch": 0.48087091757387246, + "grad_norm": 0.4613707661628723, + "learning_rate": 3.067500000000001e-05, + "log_odds_chosen": 17.801786422729492, + "log_odds_ratio": -0.0008926771115511656, + "logits/chosen": 0.22919896245002747, + "logits/rejected": 1.450141429901123, + "logps/chosen": -1.3054852485656738, + "logps/rejected": -18.71816635131836, + "loss": 2.5083, + "nll_loss": 2.5082015991210938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13054853677749634, + "rewards/margins": 1.7412681579589844, + "rewards/rejected": -1.8718167543411255, + "step": 773 + }, + { + "epoch": 0.48149300155520997, + "grad_norm": 0.4349910020828247, + "learning_rate": 3.065e-05, + "log_odds_chosen": 16.952882766723633, + "log_odds_ratio": -0.07804927229881287, + "logits/chosen": -0.0008221510797739029, + "logits/rejected": 1.0307400226593018, + "logps/chosen": -1.0544698238372803, + "logps/rejected": -17.315820693969727, + "loss": 2.7886, + "nll_loss": 2.7808361053466797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10544697940349579, + "rewards/margins": 1.6261351108551025, + "rewards/rejected": -1.7315820455551147, + "step": 774 + }, + { + "epoch": 0.4821150855365474, + "grad_norm": 0.45079338550567627, + "learning_rate": 3.0625000000000006e-05, + "log_odds_chosen": 6.942625045776367, + "log_odds_ratio": -0.2755284011363983, + "logits/chosen": 0.1854054182767868, + "logits/rejected": 0.424145370721817, + "logps/chosen": -1.04248046875, + "logps/rejected": -7.667983055114746, + "loss": 3.4672, + "nll_loss": 3.4396426677703857, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1042480394244194, + "rewards/margins": 0.6625502705574036, + "rewards/rejected": -0.7667983174324036, + "step": 775 + }, + { + "epoch": 0.4827371695178849, + "grad_norm": 1.2325921058654785, + "learning_rate": 3.06e-05, + "log_odds_chosen": 17.07767105102539, + "log_odds_ratio": -0.07737934589385986, + "logits/chosen": 0.209869846701622, + "logits/rejected": 1.219171404838562, + "logps/chosen": -1.1802769899368286, + "logps/rejected": -17.847057342529297, + "loss": 2.9043, + "nll_loss": 2.89656138420105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1180276945233345, + "rewards/margins": 1.6666781902313232, + "rewards/rejected": -1.7847058773040771, + "step": 776 + }, + { + "epoch": 0.4833592534992224, + "grad_norm": 0.4120030999183655, + "learning_rate": 3.0575000000000005e-05, + "log_odds_chosen": 13.921056747436523, + "log_odds_ratio": -0.22028662264347076, + "logits/chosen": 0.2784203588962555, + "logits/rejected": 1.1734328269958496, + "logps/chosen": -1.1785310506820679, + "logps/rejected": -14.79477596282959, + "loss": 3.0951, + "nll_loss": 3.0730538368225098, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11785309761762619, + "rewards/margins": 1.3616244792938232, + "rewards/rejected": -1.4794776439666748, + "step": 777 + }, + { + "epoch": 0.4839813374805599, + "grad_norm": 0.5162467956542969, + "learning_rate": 3.0550000000000004e-05, + "log_odds_chosen": 12.977121353149414, + "log_odds_ratio": -0.20910266041755676, + "logits/chosen": 0.2095002979040146, + "logits/rejected": 0.5365079641342163, + "logps/chosen": -1.314969539642334, + "logps/rejected": -14.050061225891113, + "loss": 3.1444, + "nll_loss": 3.123497486114502, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13149695098400116, + "rewards/margins": 1.2735092639923096, + "rewards/rejected": -1.4050061702728271, + "step": 778 + }, + { + "epoch": 0.48460342146189733, + "grad_norm": 0.4763834774494171, + "learning_rate": 3.0525e-05, + "log_odds_chosen": 20.971172332763672, + "log_odds_ratio": -2.3841880647523794e-07, + "logits/chosen": 0.3068065345287323, + "logits/rejected": 1.4703824520111084, + "logps/chosen": -1.2722759246826172, + "logps/rejected": -21.902528762817383, + "loss": 3.1538, + "nll_loss": 3.153752088546753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12722758948802948, + "rewards/margins": 2.0630252361297607, + "rewards/rejected": -2.1902527809143066, + "step": 779 + }, + { + "epoch": 0.48522550544323484, + "grad_norm": 0.45172974467277527, + "learning_rate": 3.05e-05, + "log_odds_chosen": 11.323856353759766, + "log_odds_ratio": -0.1366121768951416, + "logits/chosen": 0.18574324250221252, + "logits/rejected": 0.7099617719650269, + "logps/chosen": -0.6922950744628906, + "logps/rejected": -11.323362350463867, + "loss": 3.2448, + "nll_loss": 3.2311220169067383, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06922950595617294, + "rewards/margins": 1.0631067752838135, + "rewards/rejected": -1.1323362588882446, + "step": 780 + }, + { + "epoch": 0.4858475894245723, + "grad_norm": 0.32547134160995483, + "learning_rate": 3.0475000000000002e-05, + "log_odds_chosen": 13.120780944824219, + "log_odds_ratio": -0.2054150402545929, + "logits/chosen": 0.3427540361881256, + "logits/rejected": 0.6512297987937927, + "logps/chosen": -1.148818016052246, + "logps/rejected": -13.904681205749512, + "loss": 3.6809, + "nll_loss": 3.6603195667266846, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11488181352615356, + "rewards/margins": 1.2755863666534424, + "rewards/rejected": -1.3904681205749512, + "step": 781 + }, + { + "epoch": 0.4864696734059098, + "grad_norm": 0.4283800721168518, + "learning_rate": 3.045e-05, + "log_odds_chosen": 15.075250625610352, + "log_odds_ratio": -0.05576830729842186, + "logits/chosen": 0.07458168268203735, + "logits/rejected": 0.8056328296661377, + "logps/chosen": -1.0403079986572266, + "logps/rejected": -15.61276912689209, + "loss": 2.6876, + "nll_loss": 2.682063341140747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1040308028459549, + "rewards/margins": 1.4572460651397705, + "rewards/rejected": -1.561276912689209, + "step": 782 + }, + { + "epoch": 0.4870917573872473, + "grad_norm": 7.5606160163879395, + "learning_rate": 3.0425000000000004e-05, + "log_odds_chosen": 15.987709999084473, + "log_odds_ratio": -0.09875954687595367, + "logits/chosen": 0.29279085993766785, + "logits/rejected": 1.0353281497955322, + "logps/chosen": -1.5241639614105225, + "logps/rejected": -17.19584846496582, + "loss": 3.2454, + "nll_loss": 3.2354772090911865, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15241639316082, + "rewards/margins": 1.5671684741973877, + "rewards/rejected": -1.7195848226547241, + "step": 783 + }, + { + "epoch": 0.48771384136858476, + "grad_norm": 0.6330939531326294, + "learning_rate": 3.04e-05, + "log_odds_chosen": 7.304858684539795, + "log_odds_ratio": -0.5831460952758789, + "logits/chosen": 0.21207121014595032, + "logits/rejected": 0.46470144391059875, + "logps/chosen": -1.3569834232330322, + "logps/rejected": -8.61102294921875, + "loss": 3.3432, + "nll_loss": 3.284919261932373, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1356983482837677, + "rewards/margins": 0.725403904914856, + "rewards/rejected": -0.8611023426055908, + "step": 784 + }, + { + "epoch": 0.48833592534992226, + "grad_norm": 0.43546679615974426, + "learning_rate": 3.0375000000000003e-05, + "log_odds_chosen": 7.85859489440918, + "log_odds_ratio": -0.39412838220596313, + "logits/chosen": 0.12173283845186234, + "logits/rejected": 0.5120882391929626, + "logps/chosen": -1.2129175662994385, + "logps/rejected": -8.616832733154297, + "loss": 3.2616, + "nll_loss": 3.2221570014953613, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12129175662994385, + "rewards/margins": 0.7403914928436279, + "rewards/rejected": -0.861683189868927, + "step": 785 + }, + { + "epoch": 0.4889580093312597, + "grad_norm": 0.5060476064682007, + "learning_rate": 3.035e-05, + "log_odds_chosen": 8.088102340698242, + "log_odds_ratio": -0.1689227670431137, + "logits/chosen": 0.18627770245075226, + "logits/rejected": 0.8996896743774414, + "logps/chosen": -1.2733261585235596, + "logps/rejected": -9.087573051452637, + "loss": 3.05, + "nll_loss": 3.0331501960754395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12733261287212372, + "rewards/margins": 0.7814247012138367, + "rewards/rejected": -0.908757209777832, + "step": 786 + }, + { + "epoch": 0.4895800933125972, + "grad_norm": 0.5102691054344177, + "learning_rate": 3.0325000000000002e-05, + "log_odds_chosen": 9.025958061218262, + "log_odds_ratio": -0.18867120146751404, + "logits/chosen": 0.2037818729877472, + "logits/rejected": 0.6893376708030701, + "logps/chosen": -1.0546302795410156, + "logps/rejected": -9.737627983093262, + "loss": 2.8651, + "nll_loss": 2.84627366065979, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10546302795410156, + "rewards/margins": 0.8682998418807983, + "rewards/rejected": -0.9737628698348999, + "step": 787 + }, + { + "epoch": 0.49020217729393467, + "grad_norm": 0.43984538316726685, + "learning_rate": 3.03e-05, + "log_odds_chosen": 10.870232582092285, + "log_odds_ratio": -0.10683636367321014, + "logits/chosen": 0.13067655265331268, + "logits/rejected": 0.9502900838851929, + "logps/chosen": -0.9612005352973938, + "logps/rejected": -11.30851936340332, + "loss": 2.3156, + "nll_loss": 2.304933547973633, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09612004458904266, + "rewards/margins": 1.0347318649291992, + "rewards/rejected": -1.1308519840240479, + "step": 788 + }, + { + "epoch": 0.4908242612752722, + "grad_norm": 0.41433024406433105, + "learning_rate": 3.0275000000000004e-05, + "log_odds_chosen": 7.59851598739624, + "log_odds_ratio": -0.12431486696004868, + "logits/chosen": 0.08263087272644043, + "logits/rejected": 0.3450954556465149, + "logps/chosen": -1.0177639722824097, + "logps/rejected": -8.221351623535156, + "loss": 3.0721, + "nll_loss": 3.0597083568573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10177639871835709, + "rewards/margins": 0.7203587293624878, + "rewards/rejected": -0.8221350908279419, + "step": 789 + }, + { + "epoch": 0.49144634525660963, + "grad_norm": 0.480323851108551, + "learning_rate": 3.025e-05, + "log_odds_chosen": 3.6505227088928223, + "log_odds_ratio": -0.4678669273853302, + "logits/chosen": 0.12035234272480011, + "logits/rejected": 0.24431809782981873, + "logps/chosen": -1.2138129472732544, + "logps/rejected": -4.56633996963501, + "loss": 2.912, + "nll_loss": 2.865224599838257, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12138129770755768, + "rewards/margins": 0.33525270223617554, + "rewards/rejected": -0.456633985042572, + "step": 790 + }, + { + "epoch": 0.49206842923794714, + "grad_norm": 0.5126906037330627, + "learning_rate": 3.0225000000000003e-05, + "log_odds_chosen": 14.473326683044434, + "log_odds_ratio": -7.871995330788195e-05, + "logits/chosen": 0.212377667427063, + "logits/rejected": 1.0169252157211304, + "logps/chosen": -1.15001380443573, + "logps/rejected": -15.20506763458252, + "loss": 3.0556, + "nll_loss": 3.0555496215820312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.115001380443573, + "rewards/margins": 1.4055054187774658, + "rewards/rejected": -1.520506739616394, + "step": 791 + }, + { + "epoch": 0.4926905132192846, + "grad_norm": 0.5260676145553589, + "learning_rate": 3.02e-05, + "log_odds_chosen": 9.164556503295898, + "log_odds_ratio": -0.20497088134288788, + "logits/chosen": 0.14021986722946167, + "logits/rejected": 0.6322394609451294, + "logps/chosen": -1.1137261390686035, + "logps/rejected": -9.867830276489258, + "loss": 3.3855, + "nll_loss": 3.3650379180908203, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11137261986732483, + "rewards/margins": 0.8754103183746338, + "rewards/rejected": -0.986782968044281, + "step": 792 + }, + { + "epoch": 0.4933125972006221, + "grad_norm": 1.3812450170516968, + "learning_rate": 3.0175e-05, + "log_odds_chosen": 7.82905912399292, + "log_odds_ratio": -0.5109982490539551, + "logits/chosen": 0.2852308452129364, + "logits/rejected": 0.6695021390914917, + "logps/chosen": -1.8619670867919922, + "logps/rejected": -9.53437614440918, + "loss": 3.489, + "nll_loss": 3.4378857612609863, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1861967146396637, + "rewards/margins": 0.7672408819198608, + "rewards/rejected": -0.9534375667572021, + "step": 793 + }, + { + "epoch": 0.49393468118195955, + "grad_norm": 0.47631436586380005, + "learning_rate": 3.015e-05, + "log_odds_chosen": 10.75469970703125, + "log_odds_ratio": -0.24386245012283325, + "logits/chosen": 0.14563864469528198, + "logits/rejected": 0.9450300335884094, + "logps/chosen": -1.1033132076263428, + "logps/rejected": -11.575775146484375, + "loss": 2.6249, + "nll_loss": 2.6005167961120605, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11033132672309875, + "rewards/margins": 1.0472462177276611, + "rewards/rejected": -1.1575775146484375, + "step": 794 + }, + { + "epoch": 0.49455676516329705, + "grad_norm": 0.4538606107234955, + "learning_rate": 3.0125000000000004e-05, + "log_odds_chosen": 8.877276420593262, + "log_odds_ratio": -0.19892564415931702, + "logits/chosen": 0.2505427896976471, + "logits/rejected": 0.9329760670661926, + "logps/chosen": -1.0743852853775024, + "logps/rejected": -9.534322738647461, + "loss": 2.7392, + "nll_loss": 2.7193334102630615, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10743853449821472, + "rewards/margins": 0.8459938764572144, + "rewards/rejected": -0.9534323215484619, + "step": 795 + }, + { + "epoch": 0.4951788491446345, + "grad_norm": 0.5034440755844116, + "learning_rate": 3.01e-05, + "log_odds_chosen": 6.5391740798950195, + "log_odds_ratio": -0.3063565492630005, + "logits/chosen": 0.1824992597103119, + "logits/rejected": 0.32490694522857666, + "logps/chosen": -1.5990245342254639, + "logps/rejected": -7.915644645690918, + "loss": 3.681, + "nll_loss": 3.6503705978393555, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1599024385213852, + "rewards/margins": 0.6316620111465454, + "rewards/rejected": -0.7915644645690918, + "step": 796 + }, + { + "epoch": 0.495800933125972, + "grad_norm": 0.4987301528453827, + "learning_rate": 3.0075000000000003e-05, + "log_odds_chosen": 13.378631591796875, + "log_odds_ratio": -0.15653789043426514, + "logits/chosen": 0.19882658123970032, + "logits/rejected": 1.0811190605163574, + "logps/chosen": -0.9882148504257202, + "logps/rejected": -13.973001480102539, + "loss": 2.8765, + "nll_loss": 2.860856056213379, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0988214910030365, + "rewards/margins": 1.2984788417816162, + "rewards/rejected": -1.3973002433776855, + "step": 797 + }, + { + "epoch": 0.49642301710730946, + "grad_norm": 0.35390564799308777, + "learning_rate": 3.0050000000000002e-05, + "log_odds_chosen": 11.163237571716309, + "log_odds_ratio": -0.11993933469057083, + "logits/chosen": 0.24885781109333038, + "logits/rejected": 0.7615518569946289, + "logps/chosen": -1.0514477491378784, + "logps/rejected": -11.754963874816895, + "loss": 3.3127, + "nll_loss": 3.3006787300109863, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10514476895332336, + "rewards/margins": 1.0703516006469727, + "rewards/rejected": -1.1754963397979736, + "step": 798 + }, + { + "epoch": 0.49704510108864697, + "grad_norm": 0.46855729818344116, + "learning_rate": 3.0025000000000005e-05, + "log_odds_chosen": 14.509871482849121, + "log_odds_ratio": -0.06569898873567581, + "logits/chosen": 0.13805624842643738, + "logits/rejected": 0.8150652050971985, + "logps/chosen": -1.496653437614441, + "logps/rejected": -15.731345176696777, + "loss": 3.0396, + "nll_loss": 3.0330357551574707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14966535568237305, + "rewards/margins": 1.4234691858291626, + "rewards/rejected": -1.573134422302246, + "step": 799 + }, + { + "epoch": 0.4976671850699845, + "grad_norm": 0.418714702129364, + "learning_rate": 3e-05, + "log_odds_chosen": 6.016261577606201, + "log_odds_ratio": -0.35451096296310425, + "logits/chosen": 0.012198593467473984, + "logits/rejected": 0.27333053946495056, + "logps/chosen": -1.07492995262146, + "logps/rejected": -6.799544334411621, + "loss": 2.815, + "nll_loss": 2.7795400619506836, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10749299824237823, + "rewards/margins": 0.5724614262580872, + "rewards/rejected": -0.6799544095993042, + "step": 800 + }, + { + "epoch": 0.4982892690513219, + "grad_norm": 0.45310264825820923, + "learning_rate": 2.9975000000000004e-05, + "log_odds_chosen": 12.761698722839355, + "log_odds_ratio": -0.23440547287464142, + "logits/chosen": 0.07036770135164261, + "logits/rejected": 0.8573406338691711, + "logps/chosen": -1.2407668828964233, + "logps/rejected": -13.764123916625977, + "loss": 2.4648, + "nll_loss": 2.4413938522338867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.124076709151268, + "rewards/margins": 1.252335786819458, + "rewards/rejected": -1.3764123916625977, + "step": 801 + }, + { + "epoch": 0.49891135303265943, + "grad_norm": 0.47506943345069885, + "learning_rate": 2.995e-05, + "log_odds_chosen": 9.560124397277832, + "log_odds_ratio": -0.45071908831596375, + "logits/chosen": 0.16752222180366516, + "logits/rejected": 0.6287195086479187, + "logps/chosen": -1.2660199403762817, + "logps/rejected": -10.690040588378906, + "loss": 2.8457, + "nll_loss": 2.8006627559661865, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12660199403762817, + "rewards/margins": 0.942402184009552, + "rewards/rejected": -1.0690041780471802, + "step": 802 + }, + { + "epoch": 0.4995334370139969, + "grad_norm": 0.47123369574546814, + "learning_rate": 2.9925000000000002e-05, + "log_odds_chosen": 7.347402572631836, + "log_odds_ratio": -0.20708920061588287, + "logits/chosen": 0.1796862781047821, + "logits/rejected": 0.46771129965782166, + "logps/chosen": -0.9793645739555359, + "logps/rejected": -7.834238052368164, + "loss": 3.2493, + "nll_loss": 3.2286105155944824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09793645888566971, + "rewards/margins": 0.6854873895645142, + "rewards/rejected": -0.7834238409996033, + "step": 803 + }, + { + "epoch": 0.5001555209953343, + "grad_norm": 0.4224500358104706, + "learning_rate": 2.9900000000000002e-05, + "log_odds_chosen": 9.55636215209961, + "log_odds_ratio": -0.20841999351978302, + "logits/chosen": 0.25128045678138733, + "logits/rejected": 0.9328861832618713, + "logps/chosen": -1.126585602760315, + "logps/rejected": -10.326457977294922, + "loss": 3.3699, + "nll_loss": 3.3491053581237793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11265856772661209, + "rewards/margins": 0.9199872612953186, + "rewards/rejected": -1.03264582157135, + "step": 804 + }, + { + "epoch": 0.5007776049766719, + "grad_norm": 0.6790135502815247, + "learning_rate": 2.9875000000000004e-05, + "log_odds_chosen": 10.101418495178223, + "log_odds_ratio": -0.3796658217906952, + "logits/chosen": 0.1424853503704071, + "logits/rejected": 0.5358273386955261, + "logps/chosen": -1.3166006803512573, + "logps/rejected": -11.287535667419434, + "loss": 2.6253, + "nll_loss": 2.5873122215270996, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1316600739955902, + "rewards/margins": 0.9970934391021729, + "rewards/rejected": -1.1287535429000854, + "step": 805 + }, + { + "epoch": 0.5013996889580093, + "grad_norm": 0.5378172993659973, + "learning_rate": 2.985e-05, + "log_odds_chosen": 12.793437957763672, + "log_odds_ratio": -0.16563086211681366, + "logits/chosen": 0.2660048007965088, + "logits/rejected": 1.0401332378387451, + "logps/chosen": -1.2829506397247314, + "logps/rejected": -13.792176246643066, + "loss": 2.8793, + "nll_loss": 2.8627707958221436, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12829506397247314, + "rewards/margins": 1.250922679901123, + "rewards/rejected": -1.3792178630828857, + "step": 806 + }, + { + "epoch": 0.5020217729393468, + "grad_norm": 0.44759759306907654, + "learning_rate": 2.9825000000000003e-05, + "log_odds_chosen": 8.082733154296875, + "log_odds_ratio": -0.29205119609832764, + "logits/chosen": 0.08207601308822632, + "logits/rejected": 0.7699632048606873, + "logps/chosen": -1.0160481929779053, + "logps/rejected": -8.517202377319336, + "loss": 2.4715, + "nll_loss": 2.442284107208252, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10160481929779053, + "rewards/margins": 0.7501154541969299, + "rewards/rejected": -0.8517202734947205, + "step": 807 + }, + { + "epoch": 0.5026438569206843, + "grad_norm": 12.664875030517578, + "learning_rate": 2.98e-05, + "log_odds_chosen": 9.370549201965332, + "log_odds_ratio": -0.25263848900794983, + "logits/chosen": 0.18772029876708984, + "logits/rejected": 0.5844571590423584, + "logps/chosen": -1.6902470588684082, + "logps/rejected": -10.821871757507324, + "loss": 3.2462, + "nll_loss": 3.220900774002075, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16902472078800201, + "rewards/margins": 0.9131625294685364, + "rewards/rejected": -1.0821871757507324, + "step": 808 + }, + { + "epoch": 0.5032659409020218, + "grad_norm": 0.785666823387146, + "learning_rate": 2.9775000000000002e-05, + "log_odds_chosen": 11.042867660522461, + "log_odds_ratio": -0.21754157543182373, + "logits/chosen": 0.30363738536834717, + "logits/rejected": 0.7481663227081299, + "logps/chosen": -1.3917642831802368, + "logps/rejected": -12.1473388671875, + "loss": 3.4199, + "nll_loss": 3.398167133331299, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13917642831802368, + "rewards/margins": 1.0755574703216553, + "rewards/rejected": -1.2147338390350342, + "step": 809 + }, + { + "epoch": 0.5038880248833593, + "grad_norm": 0.6624091267585754, + "learning_rate": 2.975e-05, + "log_odds_chosen": 12.086601257324219, + "log_odds_ratio": -0.16491734981536865, + "logits/chosen": 0.1394706517457962, + "logits/rejected": 0.6047402620315552, + "logps/chosen": -1.2884442806243896, + "logps/rejected": -13.085152626037598, + "loss": 2.5779, + "nll_loss": 2.561450481414795, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12884442508220673, + "rewards/margins": 1.179670810699463, + "rewards/rejected": -1.3085153102874756, + "step": 810 + }, + { + "epoch": 0.5045101088646967, + "grad_norm": 0.35120460391044617, + "learning_rate": 2.9725000000000004e-05, + "log_odds_chosen": 6.397805690765381, + "log_odds_ratio": -0.39501500129699707, + "logits/chosen": 0.25109362602233887, + "logits/rejected": 0.48641854524612427, + "logps/chosen": -1.0249440670013428, + "logps/rejected": -7.021490097045898, + "loss": 3.4616, + "nll_loss": 3.4220848083496094, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10249441117048264, + "rewards/margins": 0.5996546149253845, + "rewards/rejected": -0.7021490335464478, + "step": 811 + }, + { + "epoch": 0.5051321928460342, + "grad_norm": 0.5609656572341919, + "learning_rate": 2.97e-05, + "log_odds_chosen": 15.698429107666016, + "log_odds_ratio": -0.017468160018324852, + "logits/chosen": 0.3717779815196991, + "logits/rejected": 1.3647180795669556, + "logps/chosen": -1.1899399757385254, + "logps/rejected": -16.393325805664062, + "loss": 3.1869, + "nll_loss": 3.1851115226745605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11899399757385254, + "rewards/margins": 1.520338773727417, + "rewards/rejected": -1.63933265209198, + "step": 812 + }, + { + "epoch": 0.5057542768273717, + "grad_norm": 0.436008095741272, + "learning_rate": 2.9675000000000003e-05, + "log_odds_chosen": 17.379844665527344, + "log_odds_ratio": -0.048989683389663696, + "logits/chosen": 0.2973853647708893, + "logits/rejected": 1.2079492807388306, + "logps/chosen": -1.2121381759643555, + "logps/rejected": -18.229488372802734, + "loss": 3.0875, + "nll_loss": 3.0826025009155273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12121382355690002, + "rewards/margins": 1.7017349004745483, + "rewards/rejected": -1.822948694229126, + "step": 813 + }, + { + "epoch": 0.5063763608087092, + "grad_norm": 0.4556023180484772, + "learning_rate": 2.965e-05, + "log_odds_chosen": 17.50226402282715, + "log_odds_ratio": -0.00011185341281816363, + "logits/chosen": 0.230424702167511, + "logits/rejected": 0.8504374027252197, + "logps/chosen": -1.146848440170288, + "logps/rejected": -18.064584732055664, + "loss": 3.1448, + "nll_loss": 3.1448068618774414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11468484997749329, + "rewards/margins": 1.6917736530303955, + "rewards/rejected": -1.8064584732055664, + "step": 814 + }, + { + "epoch": 0.5069984447900466, + "grad_norm": 0.3557608723640442, + "learning_rate": 2.9625000000000002e-05, + "log_odds_chosen": 9.623047828674316, + "log_odds_ratio": -0.2141369879245758, + "logits/chosen": 0.22592505812644958, + "logits/rejected": 0.46705248951911926, + "logps/chosen": -1.2357347011566162, + "logps/rejected": -10.610895156860352, + "loss": 3.3321, + "nll_loss": 3.3106772899627686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12357348203659058, + "rewards/margins": 0.9375160336494446, + "rewards/rejected": -1.0610895156860352, + "step": 815 + }, + { + "epoch": 0.5076205287713841, + "grad_norm": 0.4675641655921936, + "learning_rate": 2.96e-05, + "log_odds_chosen": 14.062185287475586, + "log_odds_ratio": -0.10161672532558441, + "logits/chosen": 0.18339157104492188, + "logits/rejected": 1.1679893732070923, + "logps/chosen": -1.142388105392456, + "logps/rejected": -14.841217994689941, + "loss": 2.7793, + "nll_loss": 2.769094705581665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11423881351947784, + "rewards/margins": 1.3698830604553223, + "rewards/rejected": -1.4841217994689941, + "step": 816 + }, + { + "epoch": 0.5082426127527216, + "grad_norm": 0.3435609042644501, + "learning_rate": 2.9575000000000004e-05, + "log_odds_chosen": 11.574292182922363, + "log_odds_ratio": -0.1866357922554016, + "logits/chosen": 0.3857420086860657, + "logits/rejected": 1.0843833684921265, + "logps/chosen": -1.4344037771224976, + "logps/rejected": -12.822274208068848, + "loss": 3.5625, + "nll_loss": 3.543877124786377, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1434403955936432, + "rewards/margins": 1.1387871503829956, + "rewards/rejected": -1.2822275161743164, + "step": 817 + }, + { + "epoch": 0.5088646967340591, + "grad_norm": 0.463178813457489, + "learning_rate": 2.955e-05, + "log_odds_chosen": 11.81800651550293, + "log_odds_ratio": -0.18851415812969208, + "logits/chosen": 0.285900741815567, + "logits/rejected": 1.0203773975372314, + "logps/chosen": -1.2619681358337402, + "logps/rejected": -12.845138549804688, + "loss": 3.0084, + "nll_loss": 2.9895620346069336, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12619681656360626, + "rewards/margins": 1.1583170890808105, + "rewards/rejected": -1.2845139503479004, + "step": 818 + }, + { + "epoch": 0.5094867807153965, + "grad_norm": 0.38956987857818604, + "learning_rate": 2.9525000000000003e-05, + "log_odds_chosen": 14.569768905639648, + "log_odds_ratio": -0.0029117946978658438, + "logits/chosen": 0.19263651967048645, + "logits/rejected": 1.1464442014694214, + "logps/chosen": -0.9850395917892456, + "logps/rejected": -15.035454750061035, + "loss": 2.6472, + "nll_loss": 2.646864891052246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0985039621591568, + "rewards/margins": 1.4050414562225342, + "rewards/rejected": -1.5035455226898193, + "step": 819 + }, + { + "epoch": 0.5101088646967341, + "grad_norm": 0.3614479601383209, + "learning_rate": 2.95e-05, + "log_odds_chosen": 13.27166748046875, + "log_odds_ratio": -0.0006157811731100082, + "logits/chosen": 0.2117461860179901, + "logits/rejected": 1.3962442874908447, + "logps/chosen": -1.2849613428115845, + "logps/rejected": -14.16645622253418, + "loss": 3.0077, + "nll_loss": 3.0076475143432617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12849614024162292, + "rewards/margins": 1.2881494760513306, + "rewards/rejected": -1.4166457653045654, + "step": 820 + }, + { + "epoch": 0.5107309486780716, + "grad_norm": 0.4717678129673004, + "learning_rate": 2.9475e-05, + "log_odds_chosen": 10.178583145141602, + "log_odds_ratio": -0.13750000298023224, + "logits/chosen": 0.20223943889141083, + "logits/rejected": 1.0256826877593994, + "logps/chosen": -0.9736385345458984, + "logps/rejected": -10.707265853881836, + "loss": 2.6019, + "nll_loss": 2.5881075859069824, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09736385941505432, + "rewards/margins": 0.9733626842498779, + "rewards/rejected": -1.0707266330718994, + "step": 821 + }, + { + "epoch": 0.511353032659409, + "grad_norm": 0.37724214792251587, + "learning_rate": 2.945e-05, + "log_odds_chosen": 15.004103660583496, + "log_odds_ratio": -0.0004430253757163882, + "logits/chosen": 0.3318445086479187, + "logits/rejected": 0.974646806716919, + "logps/chosen": -1.1180328130722046, + "logps/rejected": -15.67713451385498, + "loss": 3.4453, + "nll_loss": 3.4452362060546875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11180329322814941, + "rewards/margins": 1.4559102058410645, + "rewards/rejected": -1.5677136182785034, + "step": 822 + }, + { + "epoch": 0.5119751166407465, + "grad_norm": 0.4549922049045563, + "learning_rate": 2.9425000000000004e-05, + "log_odds_chosen": 15.360553741455078, + "log_odds_ratio": -0.050239816308021545, + "logits/chosen": 0.44666236639022827, + "logits/rejected": 1.4149678945541382, + "logps/chosen": -1.1860952377319336, + "logps/rejected": -15.948168754577637, + "loss": 3.2089, + "nll_loss": 3.203843593597412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11860951781272888, + "rewards/margins": 1.4762073755264282, + "rewards/rejected": -1.5948166847229004, + "step": 823 + }, + { + "epoch": 0.512597200622084, + "grad_norm": 0.535690426826477, + "learning_rate": 2.94e-05, + "log_odds_chosen": 8.157598495483398, + "log_odds_ratio": -0.32862338423728943, + "logits/chosen": 0.19047969579696655, + "logits/rejected": 0.8336578011512756, + "logps/chosen": -1.151352882385254, + "logps/rejected": -8.862951278686523, + "loss": 2.6731, + "nll_loss": 2.6401925086975098, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11513528972864151, + "rewards/margins": 0.771159827709198, + "rewards/rejected": -0.8862950801849365, + "step": 824 + }, + { + "epoch": 0.5132192846034215, + "grad_norm": 0.38767126202583313, + "learning_rate": 2.9375000000000003e-05, + "log_odds_chosen": 8.195636749267578, + "log_odds_ratio": -0.2015450894832611, + "logits/chosen": 0.21387703716754913, + "logits/rejected": 0.6429730653762817, + "logps/chosen": -1.1645689010620117, + "logps/rejected": -8.997748374938965, + "loss": 3.2326, + "nll_loss": 3.212407112121582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11645688116550446, + "rewards/margins": 0.7833179831504822, + "rewards/rejected": -0.8997748494148254, + "step": 825 + }, + { + "epoch": 0.5138413685847589, + "grad_norm": 0.5092079043388367, + "learning_rate": 2.935e-05, + "log_odds_chosen": 5.449245929718018, + "log_odds_ratio": -0.11578687280416489, + "logits/chosen": 0.24035300314426422, + "logits/rejected": 0.5837621092796326, + "logps/chosen": -1.1935652494430542, + "logps/rejected": -6.307559490203857, + "loss": 3.0066, + "nll_loss": 2.9950246810913086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11935652047395706, + "rewards/margins": 0.5113994479179382, + "rewards/rejected": -0.6307559013366699, + "step": 826 + }, + { + "epoch": 0.5144634525660964, + "grad_norm": 0.6121360063552856, + "learning_rate": 2.9325e-05, + "log_odds_chosen": 8.71769905090332, + "log_odds_ratio": -0.22162342071533203, + "logits/chosen": 0.2978130578994751, + "logits/rejected": 0.6275981664657593, + "logps/chosen": -1.168921709060669, + "logps/rejected": -9.59075927734375, + "loss": 3.1936, + "nll_loss": 3.171393871307373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11689215898513794, + "rewards/margins": 0.8421837687492371, + "rewards/rejected": -0.959075927734375, + "step": 827 + }, + { + "epoch": 0.5150855365474339, + "grad_norm": 0.4831790626049042, + "learning_rate": 2.93e-05, + "log_odds_chosen": 14.293399810791016, + "log_odds_ratio": -0.09568853676319122, + "logits/chosen": 0.3437972664833069, + "logits/rejected": 1.154407262802124, + "logps/chosen": -1.0440468788146973, + "logps/rejected": -14.955587387084961, + "loss": 3.0785, + "nll_loss": 3.068910837173462, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10440470278263092, + "rewards/margins": 1.3911540508270264, + "rewards/rejected": -1.495558500289917, + "step": 828 + }, + { + "epoch": 0.5157076205287714, + "grad_norm": 0.8993392586708069, + "learning_rate": 2.9275000000000003e-05, + "log_odds_chosen": 12.711183547973633, + "log_odds_ratio": -0.09046018123626709, + "logits/chosen": 0.41627803444862366, + "logits/rejected": 1.3701362609863281, + "logps/chosen": -1.0129324197769165, + "logps/rejected": -13.203245162963867, + "loss": 2.6354, + "nll_loss": 2.626333236694336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10129325091838837, + "rewards/margins": 1.2190312147140503, + "rewards/rejected": -1.3203246593475342, + "step": 829 + }, + { + "epoch": 0.5163297045101088, + "grad_norm": 0.5971187949180603, + "learning_rate": 2.925e-05, + "log_odds_chosen": 7.836894989013672, + "log_odds_ratio": -0.3798048496246338, + "logits/chosen": 0.3004685938358307, + "logits/rejected": 0.724410355091095, + "logps/chosen": -1.366215705871582, + "logps/rejected": -9.068084716796875, + "loss": 3.2621, + "nll_loss": 3.224073886871338, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13662157952785492, + "rewards/margins": 0.7701869606971741, + "rewards/rejected": -0.9068085551261902, + "step": 830 + }, + { + "epoch": 0.5169517884914463, + "grad_norm": 0.36452698707580566, + "learning_rate": 2.9225000000000002e-05, + "log_odds_chosen": 15.304241180419922, + "log_odds_ratio": -0.0003950538521166891, + "logits/chosen": 0.3968985080718994, + "logits/rejected": 1.0415889024734497, + "logps/chosen": -1.0593630075454712, + "logps/rejected": -15.823348999023438, + "loss": 3.9988, + "nll_loss": 3.9988064765930176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10593629628419876, + "rewards/margins": 1.4763985872268677, + "rewards/rejected": -1.5823347568511963, + "step": 831 + }, + { + "epoch": 0.5175738724727839, + "grad_norm": 1.9738948345184326, + "learning_rate": 2.9199999999999998e-05, + "log_odds_chosen": 12.84214973449707, + "log_odds_ratio": -0.1361134946346283, + "logits/chosen": 0.08501138538122177, + "logits/rejected": 0.4218248426914215, + "logps/chosen": -1.936566710472107, + "logps/rejected": -14.241930961608887, + "loss": 3.0152, + "nll_loss": 3.0015807151794434, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19365663826465607, + "rewards/margins": 1.2305364608764648, + "rewards/rejected": -1.4241931438446045, + "step": 832 + }, + { + "epoch": 0.5181959564541213, + "grad_norm": 0.4944245219230652, + "learning_rate": 2.9175e-05, + "log_odds_chosen": 20.146743774414062, + "log_odds_ratio": -9.030426554090809e-06, + "logits/chosen": 0.2090873271226883, + "logits/rejected": 1.6611508131027222, + "logps/chosen": -1.001164197921753, + "logps/rejected": -20.59023094177246, + "loss": 2.5188, + "nll_loss": 2.5187838077545166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10011641681194305, + "rewards/margins": 1.958906650543213, + "rewards/rejected": -2.059023141860962, + "step": 833 + }, + { + "epoch": 0.5188180404354588, + "grad_norm": 1.0386806726455688, + "learning_rate": 2.915e-05, + "log_odds_chosen": 10.240373611450195, + "log_odds_ratio": -0.17523886263370514, + "logits/chosen": 0.2517136335372925, + "logits/rejected": 0.601931095123291, + "logps/chosen": -1.1842358112335205, + "logps/rejected": -11.045554161071777, + "loss": 3.3807, + "nll_loss": 3.363182544708252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11842358112335205, + "rewards/margins": 0.9861319661140442, + "rewards/rejected": -1.1045554876327515, + "step": 834 + }, + { + "epoch": 0.5194401244167963, + "grad_norm": 4.311429023742676, + "learning_rate": 2.9125000000000003e-05, + "log_odds_chosen": 12.021062850952148, + "log_odds_ratio": -0.011027473025023937, + "logits/chosen": 0.22837401926517487, + "logits/rejected": 0.7856446504592896, + "logps/chosen": -1.6762373447418213, + "logps/rejected": -13.32023811340332, + "loss": 2.9735, + "nll_loss": 2.9724273681640625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16762374341487885, + "rewards/margins": 1.1644001007080078, + "rewards/rejected": -1.3320238590240479, + "step": 835 + }, + { + "epoch": 0.5200622083981338, + "grad_norm": 1.8414102792739868, + "learning_rate": 2.91e-05, + "log_odds_chosen": 13.802828788757324, + "log_odds_ratio": -0.06723160296678543, + "logits/chosen": 0.19973208010196686, + "logits/rejected": 1.1031876802444458, + "logps/chosen": -1.8014975786209106, + "logps/rejected": -15.22963809967041, + "loss": 2.9892, + "nll_loss": 2.982433319091797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18014976382255554, + "rewards/margins": 1.3428139686584473, + "rewards/rejected": -1.5229637622833252, + "step": 836 + }, + { + "epoch": 0.5206842923794712, + "grad_norm": 0.47048988938331604, + "learning_rate": 2.9075000000000002e-05, + "log_odds_chosen": 16.698774337768555, + "log_odds_ratio": -1.1175909548910568e-06, + "logits/chosen": 0.33147627115249634, + "logits/rejected": 1.3666659593582153, + "logps/chosen": -1.1629831790924072, + "logps/rejected": -17.404766082763672, + "loss": 3.2272, + "nll_loss": 3.2272276878356934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11629832535982132, + "rewards/margins": 1.624178171157837, + "rewards/rejected": -1.7404766082763672, + "step": 837 + }, + { + "epoch": 0.5213063763608087, + "grad_norm": 1.6325106620788574, + "learning_rate": 2.9049999999999998e-05, + "log_odds_chosen": 4.975150108337402, + "log_odds_ratio": -0.32594966888427734, + "logits/chosen": 0.16253390908241272, + "logits/rejected": 0.3543585538864136, + "logps/chosen": -1.0561513900756836, + "logps/rejected": -5.589508056640625, + "loss": 3.1341, + "nll_loss": 3.1014583110809326, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10561515390872955, + "rewards/margins": 0.453335702419281, + "rewards/rejected": -0.5589507818222046, + "step": 838 + }, + { + "epoch": 0.5219284603421462, + "grad_norm": 0.4523632228374481, + "learning_rate": 2.9025e-05, + "log_odds_chosen": 13.034244537353516, + "log_odds_ratio": -0.0004373805713839829, + "logits/chosen": 0.23802269995212555, + "logits/rejected": 1.02839994430542, + "logps/chosen": -1.0669258832931519, + "logps/rejected": -13.587167739868164, + "loss": 2.9686, + "nll_loss": 2.968569755554199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1066925898194313, + "rewards/margins": 1.2520242929458618, + "rewards/rejected": -1.3587168455123901, + "step": 839 + }, + { + "epoch": 0.5225505443234837, + "grad_norm": 0.46523481607437134, + "learning_rate": 2.9e-05, + "log_odds_chosen": 5.310853958129883, + "log_odds_ratio": -0.19879606366157532, + "logits/chosen": 0.12425953149795532, + "logits/rejected": 0.24234220385551453, + "logps/chosen": -0.9893929362297058, + "logps/rejected": -5.892968654632568, + "loss": 3.0965, + "nll_loss": 3.0765981674194336, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09893929213285446, + "rewards/margins": 0.49035757780075073, + "rewards/rejected": -0.5892968773841858, + "step": 840 + }, + { + "epoch": 0.5231726283048211, + "grad_norm": 0.43103811144828796, + "learning_rate": 2.8975000000000003e-05, + "log_odds_chosen": 7.345240592956543, + "log_odds_ratio": -0.3413081169128418, + "logits/chosen": 0.015453029423952103, + "logits/rejected": 0.28171202540397644, + "logps/chosen": -0.8644816279411316, + "logps/rejected": -7.795204162597656, + "loss": 2.733, + "nll_loss": 2.6988906860351562, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08644817024469376, + "rewards/margins": 0.6930723190307617, + "rewards/rejected": -0.7795204520225525, + "step": 841 + }, + { + "epoch": 0.5237947122861586, + "grad_norm": 0.4897030293941498, + "learning_rate": 2.895e-05, + "log_odds_chosen": 4.109506607055664, + "log_odds_ratio": -0.5368872284889221, + "logits/chosen": 0.2146613895893097, + "logits/rejected": 0.4014909565448761, + "logps/chosen": -1.3654534816741943, + "logps/rejected": -5.355642318725586, + "loss": 3.0509, + "nll_loss": 2.997218132019043, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1365453451871872, + "rewards/margins": 0.39901888370513916, + "rewards/rejected": -0.5355641841888428, + "step": 842 + }, + { + "epoch": 0.5244167962674962, + "grad_norm": 0.47838544845581055, + "learning_rate": 2.8925000000000002e-05, + "log_odds_chosen": 2.614036798477173, + "log_odds_ratio": -0.4196464419364929, + "logits/chosen": 0.04910755902528763, + "logits/rejected": 0.2252475917339325, + "logps/chosen": -1.306127667427063, + "logps/rejected": -3.7415530681610107, + "loss": 2.6839, + "nll_loss": 2.641903877258301, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13061276078224182, + "rewards/margins": 0.24354252219200134, + "rewards/rejected": -0.37415528297424316, + "step": 843 + }, + { + "epoch": 0.5250388802488336, + "grad_norm": 0.329008549451828, + "learning_rate": 2.8899999999999998e-05, + "log_odds_chosen": 4.632389068603516, + "log_odds_ratio": -0.3140389621257782, + "logits/chosen": 0.2015816867351532, + "logits/rejected": 0.3459731340408325, + "logps/chosen": -1.283074975013733, + "logps/rejected": -5.729971408843994, + "loss": 3.4623, + "nll_loss": 3.430915355682373, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1283074915409088, + "rewards/margins": 0.44468966126441956, + "rewards/rejected": -0.5729971528053284, + "step": 844 + }, + { + "epoch": 0.5256609642301711, + "grad_norm": 0.43985334038734436, + "learning_rate": 2.8875e-05, + "log_odds_chosen": 1.6101486682891846, + "log_odds_ratio": -0.4912552237510681, + "logits/chosen": 0.13369233906269073, + "logits/rejected": 0.1671290248632431, + "logps/chosen": -1.0167124271392822, + "logps/rejected": -2.167614698410034, + "loss": 2.8231, + "nll_loss": 2.7740092277526855, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1016712337732315, + "rewards/margins": 0.11509023606777191, + "rewards/rejected": -0.2167614847421646, + "step": 845 + }, + { + "epoch": 0.5262830482115085, + "grad_norm": 0.4271375834941864, + "learning_rate": 2.885e-05, + "log_odds_chosen": 7.256542205810547, + "log_odds_ratio": -0.037149470299482346, + "logits/chosen": 0.24337813258171082, + "logits/rejected": 0.6470229625701904, + "logps/chosen": -0.8154711723327637, + "logps/rejected": -7.411262512207031, + "loss": 3.2228, + "nll_loss": 3.219062328338623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08154712617397308, + "rewards/margins": 0.6595790386199951, + "rewards/rejected": -0.7411262392997742, + "step": 846 + }, + { + "epoch": 0.5269051321928461, + "grad_norm": 1.1752288341522217, + "learning_rate": 2.8825000000000003e-05, + "log_odds_chosen": 5.8892974853515625, + "log_odds_ratio": -0.18083639442920685, + "logits/chosen": 0.22460246086120605, + "logits/rejected": 0.6375922560691833, + "logps/chosen": -1.37664794921875, + "logps/rejected": -7.0137481689453125, + "loss": 3.0231, + "nll_loss": 3.0050535202026367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1376648098230362, + "rewards/margins": 0.5637100338935852, + "rewards/rejected": -0.701374888420105, + "step": 847 + }, + { + "epoch": 0.5275272161741835, + "grad_norm": 0.3938504457473755, + "learning_rate": 2.88e-05, + "log_odds_chosen": 6.437436580657959, + "log_odds_ratio": -0.1518944650888443, + "logits/chosen": 0.2168705314397812, + "logits/rejected": 0.40676745772361755, + "logps/chosen": -1.264991283416748, + "logps/rejected": -7.434615135192871, + "loss": 3.5069, + "nll_loss": 3.4916627407073975, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12649914622306824, + "rewards/margins": 0.6169624328613281, + "rewards/rejected": -0.7434616088867188, + "step": 848 + }, + { + "epoch": 0.528149300155521, + "grad_norm": 0.36738061904907227, + "learning_rate": 2.8775e-05, + "log_odds_chosen": 4.217384338378906, + "log_odds_ratio": -0.19647136330604553, + "logits/chosen": 0.1279819905757904, + "logits/rejected": 0.30928248167037964, + "logps/chosen": -1.111844539642334, + "logps/rejected": -4.994659423828125, + "loss": 3.0897, + "nll_loss": 3.0701029300689697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11118445545434952, + "rewards/margins": 0.3882814049720764, + "rewards/rejected": -0.4994658827781677, + "step": 849 + }, + { + "epoch": 0.5287713841368584, + "grad_norm": 0.49180299043655396, + "learning_rate": 2.8749999999999997e-05, + "log_odds_chosen": 3.4940242767333984, + "log_odds_ratio": -0.29053792357444763, + "logits/chosen": 0.3198105990886688, + "logits/rejected": 0.5212329030036926, + "logps/chosen": -1.107250452041626, + "logps/rejected": -4.281674385070801, + "loss": 3.3457, + "nll_loss": 3.3166162967681885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1107250452041626, + "rewards/margins": 0.317442387342453, + "rewards/rejected": -0.4281674325466156, + "step": 850 + }, + { + "epoch": 0.529393468118196, + "grad_norm": 0.5136086940765381, + "learning_rate": 2.8725e-05, + "log_odds_chosen": 4.076441764831543, + "log_odds_ratio": -0.2946397066116333, + "logits/chosen": -0.006016634404659271, + "logits/rejected": 0.35069453716278076, + "logps/chosen": -1.2343401908874512, + "logps/rejected": -5.004025936126709, + "loss": 2.3004, + "nll_loss": 2.270923376083374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12343402206897736, + "rewards/margins": 0.3769685626029968, + "rewards/rejected": -0.500402569770813, + "step": 851 + }, + { + "epoch": 0.5300155520995334, + "grad_norm": 0.42883893847465515, + "learning_rate": 2.87e-05, + "log_odds_chosen": 2.978586196899414, + "log_odds_ratio": -0.30816155672073364, + "logits/chosen": 0.10813058167695999, + "logits/rejected": 0.12170670181512833, + "logps/chosen": -0.927642822265625, + "logps/rejected": -3.484469413757324, + "loss": 3.1087, + "nll_loss": 3.0778818130493164, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09276428818702698, + "rewards/margins": 0.25568264722824097, + "rewards/rejected": -0.34844693541526794, + "step": 852 + }, + { + "epoch": 0.5306376360808709, + "grad_norm": 0.4703337550163269, + "learning_rate": 2.8675000000000002e-05, + "log_odds_chosen": 4.520643711090088, + "log_odds_ratio": -0.3384997248649597, + "logits/chosen": 0.1783015877008438, + "logits/rejected": 0.42561283707618713, + "logps/chosen": -1.0047498941421509, + "logps/rejected": -5.196669101715088, + "loss": 2.9576, + "nll_loss": 2.923743486404419, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10047499090433121, + "rewards/margins": 0.41919195652008057, + "rewards/rejected": -0.5196669101715088, + "step": 853 + }, + { + "epoch": 0.5312597200622085, + "grad_norm": 0.5501247644424438, + "learning_rate": 2.865e-05, + "log_odds_chosen": 3.5761749744415283, + "log_odds_ratio": -0.1852397471666336, + "logits/chosen": 0.2851634919643402, + "logits/rejected": 0.3833601772785187, + "logps/chosen": -1.2000057697296143, + "logps/rejected": -4.437448024749756, + "loss": 3.2604, + "nll_loss": 3.241886615753174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12000058591365814, + "rewards/margins": 0.3237442076206207, + "rewards/rejected": -0.44374483823776245, + "step": 854 + }, + { + "epoch": 0.5318818040435459, + "grad_norm": 0.4883931577205658, + "learning_rate": 2.8625e-05, + "log_odds_chosen": 4.188264846801758, + "log_odds_ratio": -0.2911498546600342, + "logits/chosen": 0.11234842240810394, + "logits/rejected": 0.4352606236934662, + "logps/chosen": -1.0508315563201904, + "logps/rejected": -4.962338447570801, + "loss": 2.8563, + "nll_loss": 2.827171564102173, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1050831526517868, + "rewards/margins": 0.39115068316459656, + "rewards/rejected": -0.49623382091522217, + "step": 855 + }, + { + "epoch": 0.5325038880248834, + "grad_norm": 0.523665189743042, + "learning_rate": 2.86e-05, + "log_odds_chosen": 5.883430004119873, + "log_odds_ratio": -0.3124341666698456, + "logits/chosen": 0.18846668303012848, + "logits/rejected": 0.35798412561416626, + "logps/chosen": -0.9777987003326416, + "logps/rejected": -6.380369663238525, + "loss": 3.1017, + "nll_loss": 3.070420026779175, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09777987003326416, + "rewards/margins": 0.5402571558952332, + "rewards/rejected": -0.6380370259284973, + "step": 856 + }, + { + "epoch": 0.5331259720062208, + "grad_norm": 0.4752536714076996, + "learning_rate": 2.8575000000000003e-05, + "log_odds_chosen": 4.861764907836914, + "log_odds_ratio": -0.13584004342556, + "logits/chosen": 0.29713207483291626, + "logits/rejected": 0.5332407355308533, + "logps/chosen": -1.4573677778244019, + "logps/rejected": -6.071244239807129, + "loss": 3.228, + "nll_loss": 3.2143874168395996, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14573678374290466, + "rewards/margins": 0.46138763427734375, + "rewards/rejected": -0.6071244478225708, + "step": 857 + }, + { + "epoch": 0.5337480559875584, + "grad_norm": 0.3933459222316742, + "learning_rate": 2.855e-05, + "log_odds_chosen": 5.766116142272949, + "log_odds_ratio": -0.2710520327091217, + "logits/chosen": 0.2104707956314087, + "logits/rejected": 0.3261168599128723, + "logps/chosen": -0.9775829315185547, + "logps/rejected": -6.416999816894531, + "loss": 3.2929, + "nll_loss": 3.2657809257507324, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09775829315185547, + "rewards/margins": 0.5439417362213135, + "rewards/rejected": -0.6416999697685242, + "step": 858 + }, + { + "epoch": 0.5343701399688958, + "grad_norm": 0.5813342332839966, + "learning_rate": 2.8525000000000002e-05, + "log_odds_chosen": 4.442496299743652, + "log_odds_ratio": -0.17784219980239868, + "logits/chosen": 0.24876517057418823, + "logits/rejected": 0.58121657371521, + "logps/chosen": -1.5985615253448486, + "logps/rejected": -5.860179901123047, + "loss": 3.1737, + "nll_loss": 3.155898094177246, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1598561555147171, + "rewards/margins": 0.42616188526153564, + "rewards/rejected": -0.5860180258750916, + "step": 859 + }, + { + "epoch": 0.5349922239502333, + "grad_norm": 0.4372238218784332, + "learning_rate": 2.8499999999999998e-05, + "log_odds_chosen": 5.981117248535156, + "log_odds_ratio": -0.1759411096572876, + "logits/chosen": 0.1712474822998047, + "logits/rejected": 0.4489181637763977, + "logps/chosen": -1.0640121698379517, + "logps/rejected": -6.628488540649414, + "loss": 3.1263, + "nll_loss": 3.1087098121643066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.106401227414608, + "rewards/margins": 0.5564476251602173, + "rewards/rejected": -0.6628488302230835, + "step": 860 + }, + { + "epoch": 0.5356143079315707, + "grad_norm": 0.5723143815994263, + "learning_rate": 2.8475e-05, + "log_odds_chosen": 3.2769057750701904, + "log_odds_ratio": -0.3183494210243225, + "logits/chosen": 0.15005794167518616, + "logits/rejected": 0.4674699604511261, + "logps/chosen": -1.1343958377838135, + "logps/rejected": -4.155270576477051, + "loss": 2.7241, + "nll_loss": 2.6922266483306885, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11343959718942642, + "rewards/margins": 0.3020874857902527, + "rewards/rejected": -0.4155270755290985, + "step": 861 + }, + { + "epoch": 0.5362363919129083, + "grad_norm": 0.3801248073577881, + "learning_rate": 2.845e-05, + "log_odds_chosen": 5.401886940002441, + "log_odds_ratio": -0.1325007528066635, + "logits/chosen": 0.2797131836414337, + "logits/rejected": 0.7725595235824585, + "logps/chosen": -1.2554233074188232, + "logps/rejected": -6.341432094573975, + "loss": 3.4174, + "nll_loss": 3.4041857719421387, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12554234266281128, + "rewards/margins": 0.5086008906364441, + "rewards/rejected": -0.6341432332992554, + "step": 862 + }, + { + "epoch": 0.5368584758942457, + "grad_norm": 0.45833510160446167, + "learning_rate": 2.8425000000000003e-05, + "log_odds_chosen": 7.4684834480285645, + "log_odds_ratio": -0.2111528515815735, + "logits/chosen": 0.3720274269580841, + "logits/rejected": 1.0443079471588135, + "logps/chosen": -1.074413776397705, + "logps/rejected": -8.263851165771484, + "loss": 3.4695, + "nll_loss": 3.448345184326172, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10744138062000275, + "rewards/margins": 0.7189437747001648, + "rewards/rejected": -0.8263851404190063, + "step": 863 + }, + { + "epoch": 0.5374805598755832, + "grad_norm": 0.5950579047203064, + "learning_rate": 2.84e-05, + "log_odds_chosen": 6.653730392456055, + "log_odds_ratio": -0.2213490754365921, + "logits/chosen": 0.1977083534002304, + "logits/rejected": 0.6113790273666382, + "logps/chosen": -1.0264054536819458, + "logps/rejected": -7.271251678466797, + "loss": 2.6901, + "nll_loss": 2.667936086654663, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1026405543088913, + "rewards/margins": 0.624484658241272, + "rewards/rejected": -0.7271252274513245, + "step": 864 + }, + { + "epoch": 0.5381026438569206, + "grad_norm": 0.5189838409423828, + "learning_rate": 2.8375000000000002e-05, + "log_odds_chosen": 4.811558246612549, + "log_odds_ratio": -0.20530752837657928, + "logits/chosen": 0.29856374859809875, + "logits/rejected": 0.7025430202484131, + "logps/chosen": -1.3187425136566162, + "logps/rejected": -5.89030122756958, + "loss": 3.147, + "nll_loss": 3.1264843940734863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13187426328659058, + "rewards/margins": 0.45715591311454773, + "rewards/rejected": -0.5890301465988159, + "step": 865 + }, + { + "epoch": 0.5387247278382582, + "grad_norm": 0.5219860076904297, + "learning_rate": 2.8349999999999998e-05, + "log_odds_chosen": 10.06355094909668, + "log_odds_ratio": -0.06661475449800491, + "logits/chosen": 0.16469168663024902, + "logits/rejected": 0.5855368971824646, + "logps/chosen": -0.9745451807975769, + "logps/rejected": -10.555267333984375, + "loss": 3.0044, + "nll_loss": 2.997767448425293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09745452553033829, + "rewards/margins": 0.9580721855163574, + "rewards/rejected": -1.0555267333984375, + "step": 866 + }, + { + "epoch": 0.5393468118195957, + "grad_norm": 0.40929824113845825, + "learning_rate": 2.8325e-05, + "log_odds_chosen": 11.310032844543457, + "log_odds_ratio": -0.1569271832704544, + "logits/chosen": 0.28702273964881897, + "logits/rejected": 0.9445208311080933, + "logps/chosen": -1.1780142784118652, + "logps/rejected": -12.161791801452637, + "loss": 3.249, + "nll_loss": 3.2333545684814453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11780144274234772, + "rewards/margins": 1.0983777046203613, + "rewards/rejected": -1.2161791324615479, + "step": 867 + }, + { + "epoch": 0.5399688958009331, + "grad_norm": 0.5718206763267517, + "learning_rate": 2.83e-05, + "log_odds_chosen": 9.48852252960205, + "log_odds_ratio": -0.23974888026714325, + "logits/chosen": 0.20256325602531433, + "logits/rejected": 0.7612900733947754, + "logps/chosen": -1.1071330308914185, + "logps/rejected": -10.291450500488281, + "loss": 2.7763, + "nll_loss": 2.752324104309082, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11071330308914185, + "rewards/margins": 0.91843181848526, + "rewards/rejected": -1.0291451215744019, + "step": 868 + }, + { + "epoch": 0.5405909797822706, + "grad_norm": 0.5120429396629333, + "learning_rate": 2.8275000000000003e-05, + "log_odds_chosen": 7.586127281188965, + "log_odds_ratio": -0.3463227152824402, + "logits/chosen": 0.20187543332576752, + "logits/rejected": 0.6514379382133484, + "logps/chosen": -1.0916359424591064, + "logps/rejected": -8.361661911010742, + "loss": 3.122, + "nll_loss": 3.087322235107422, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10916360467672348, + "rewards/margins": 0.7270025014877319, + "rewards/rejected": -0.8361661434173584, + "step": 869 + }, + { + "epoch": 0.5412130637636081, + "grad_norm": 0.5298638939857483, + "learning_rate": 2.825e-05, + "log_odds_chosen": 10.231219291687012, + "log_odds_ratio": -0.11733568459749222, + "logits/chosen": 0.16062955558300018, + "logits/rejected": 0.6653136610984802, + "logps/chosen": -1.0218132734298706, + "logps/rejected": -10.77658748626709, + "loss": 2.9773, + "nll_loss": 2.9655981063842773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1021813303232193, + "rewards/margins": 0.9754774570465088, + "rewards/rejected": -1.0776586532592773, + "step": 870 + }, + { + "epoch": 0.5418351477449456, + "grad_norm": 0.629223644733429, + "learning_rate": 2.8225e-05, + "log_odds_chosen": 12.822935104370117, + "log_odds_ratio": -0.16422484815120697, + "logits/chosen": 0.18336540460586548, + "logits/rejected": 1.1963590383529663, + "logps/chosen": -1.1354261636734009, + "logps/rejected": -13.514695167541504, + "loss": 2.4868, + "nll_loss": 2.4704039096832275, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11354262381792068, + "rewards/margins": 1.2379268407821655, + "rewards/rejected": -1.3514695167541504, + "step": 871 + }, + { + "epoch": 0.542457231726283, + "grad_norm": 1.4472790956497192, + "learning_rate": 2.8199999999999998e-05, + "log_odds_chosen": 15.860309600830078, + "log_odds_ratio": -0.14200741052627563, + "logits/chosen": 0.1716628074645996, + "logits/rejected": 0.962594211101532, + "logps/chosen": -1.1267319917678833, + "logps/rejected": -16.692829132080078, + "loss": 2.6449, + "nll_loss": 2.630686044692993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11267320811748505, + "rewards/margins": 1.5566097497940063, + "rewards/rejected": -1.6692829132080078, + "step": 872 + }, + { + "epoch": 0.5430793157076206, + "grad_norm": 0.4921049475669861, + "learning_rate": 2.8175e-05, + "log_odds_chosen": 16.460073471069336, + "log_odds_ratio": -0.06028711423277855, + "logits/chosen": 0.33112722635269165, + "logits/rejected": 1.0481317043304443, + "logps/chosen": -1.0758942365646362, + "logps/rejected": -16.93272590637207, + "loss": 3.3084, + "nll_loss": 3.30238676071167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10758942365646362, + "rewards/margins": 1.5856831073760986, + "rewards/rejected": -1.693272590637207, + "step": 873 + }, + { + "epoch": 0.543701399688958, + "grad_norm": 0.47205042839050293, + "learning_rate": 2.815e-05, + "log_odds_chosen": 10.865059852600098, + "log_odds_ratio": -0.231788769364357, + "logits/chosen": 0.28823232650756836, + "logits/rejected": 1.0439519882202148, + "logps/chosen": -1.0718377828598022, + "logps/rejected": -11.658307075500488, + "loss": 3.0113, + "nll_loss": 2.988163948059082, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1071837842464447, + "rewards/margins": 1.0586470365524292, + "rewards/rejected": -1.1658308506011963, + "step": 874 + }, + { + "epoch": 0.5443234836702955, + "grad_norm": 0.370347797870636, + "learning_rate": 2.8125000000000003e-05, + "log_odds_chosen": 10.927844047546387, + "log_odds_ratio": -0.1950116902589798, + "logits/chosen": 0.17625781893730164, + "logits/rejected": 0.5121840238571167, + "logps/chosen": -0.975417971611023, + "logps/rejected": -11.16118049621582, + "loss": 3.09, + "nll_loss": 3.0705158710479736, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09754179418087006, + "rewards/margins": 1.0185761451721191, + "rewards/rejected": -1.1161179542541504, + "step": 875 + }, + { + "epoch": 0.5449455676516329, + "grad_norm": 0.4597725570201874, + "learning_rate": 2.8100000000000005e-05, + "log_odds_chosen": 9.441105842590332, + "log_odds_ratio": -0.18627074360847473, + "logits/chosen": 0.2629444897174835, + "logits/rejected": 0.7703899145126343, + "logps/chosen": -1.3209762573242188, + "logps/rejected": -10.505077362060547, + "loss": 3.265, + "nll_loss": 3.2463271617889404, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13209763169288635, + "rewards/margins": 0.9184101223945618, + "rewards/rejected": -1.0505077838897705, + "step": 876 + }, + { + "epoch": 0.5455676516329705, + "grad_norm": 0.3739025890827179, + "learning_rate": 2.8075e-05, + "log_odds_chosen": 11.734682083129883, + "log_odds_ratio": -0.07055188715457916, + "logits/chosen": 0.22402915358543396, + "logits/rejected": 0.826654314994812, + "logps/chosen": -1.6761119365692139, + "logps/rejected": -13.128270149230957, + "loss": 3.2389, + "nll_loss": 3.2317991256713867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16761119663715363, + "rewards/margins": 1.1452157497406006, + "rewards/rejected": -1.3128271102905273, + "step": 877 + }, + { + "epoch": 0.546189735614308, + "grad_norm": 0.4357503354549408, + "learning_rate": 2.8050000000000004e-05, + "log_odds_chosen": 18.032630920410156, + "log_odds_ratio": -6.927158392500132e-05, + "logits/chosen": 0.4070179760456085, + "logits/rejected": 1.3222649097442627, + "logps/chosen": -1.4783521890640259, + "logps/rejected": -19.151775360107422, + "loss": 3.4047, + "nll_loss": 3.404698133468628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14783522486686707, + "rewards/margins": 1.7673425674438477, + "rewards/rejected": -1.915177822113037, + "step": 878 + }, + { + "epoch": 0.5468118195956454, + "grad_norm": 0.4408838748931885, + "learning_rate": 2.8025e-05, + "log_odds_chosen": 10.542194366455078, + "log_odds_ratio": -0.28352150321006775, + "logits/chosen": 0.25072023272514343, + "logits/rejected": 1.1159553527832031, + "logps/chosen": -1.1630507707595825, + "logps/rejected": -11.427528381347656, + "loss": 2.7491, + "nll_loss": 2.7207608222961426, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11630507558584213, + "rewards/margins": 1.0264477729797363, + "rewards/rejected": -1.1427528858184814, + "step": 879 + }, + { + "epoch": 0.5474339035769828, + "grad_norm": 0.5349311828613281, + "learning_rate": 2.8000000000000003e-05, + "log_odds_chosen": 14.291757583618164, + "log_odds_ratio": -0.251430481672287, + "logits/chosen": 0.32711905241012573, + "logits/rejected": 0.9382522702217102, + "logps/chosen": -1.1399996280670166, + "logps/rejected": -15.159124374389648, + "loss": 2.9631, + "nll_loss": 2.937985420227051, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11399996280670166, + "rewards/margins": 1.4019126892089844, + "rewards/rejected": -1.5159125328063965, + "step": 880 + }, + { + "epoch": 0.5480559875583204, + "grad_norm": 0.4650278091430664, + "learning_rate": 2.7975000000000002e-05, + "log_odds_chosen": 14.79272747039795, + "log_odds_ratio": -0.06423240154981613, + "logits/chosen": 0.3779009282588959, + "logits/rejected": 1.0615414381027222, + "logps/chosen": -1.2400293350219727, + "logps/rejected": -15.624247550964355, + "loss": 3.1322, + "nll_loss": 3.1258246898651123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12400293350219727, + "rewards/margins": 1.4384218454360962, + "rewards/rejected": -1.562424898147583, + "step": 881 + }, + { + "epoch": 0.5486780715396579, + "grad_norm": 0.4733143746852875, + "learning_rate": 2.7950000000000005e-05, + "log_odds_chosen": 6.083606719970703, + "log_odds_ratio": -0.33826759457588196, + "logits/chosen": 0.2951495349407196, + "logits/rejected": 0.5478895902633667, + "logps/chosen": -1.0794161558151245, + "logps/rejected": -6.8474345207214355, + "loss": 2.9234, + "nll_loss": 2.889528751373291, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10794162005186081, + "rewards/margins": 0.5768018364906311, + "rewards/rejected": -0.6847434639930725, + "step": 882 + }, + { + "epoch": 0.5493001555209953, + "grad_norm": 0.42593225836753845, + "learning_rate": 2.7925e-05, + "log_odds_chosen": 12.911933898925781, + "log_odds_ratio": -0.3391897976398468, + "logits/chosen": 0.31484758853912354, + "logits/rejected": 1.1003917455673218, + "logps/chosen": -1.1400558948516846, + "logps/rejected": -13.681197166442871, + "loss": 2.7707, + "nll_loss": 2.7367684841156006, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11400558799505234, + "rewards/margins": 1.2541141510009766, + "rewards/rejected": -1.368119716644287, + "step": 883 + }, + { + "epoch": 0.5499222395023328, + "grad_norm": 0.44411978125572205, + "learning_rate": 2.7900000000000004e-05, + "log_odds_chosen": 15.06024169921875, + "log_odds_ratio": -0.13867679238319397, + "logits/chosen": 0.2806948125362396, + "logits/rejected": 1.4216599464416504, + "logps/chosen": -1.1676044464111328, + "logps/rejected": -15.850728034973145, + "loss": 2.7267, + "nll_loss": 2.7128076553344727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11676044762134552, + "rewards/margins": 1.468312382698059, + "rewards/rejected": -1.5850728750228882, + "step": 884 + }, + { + "epoch": 0.5505443234836703, + "grad_norm": 1.44214928150177, + "learning_rate": 2.7875e-05, + "log_odds_chosen": 12.520509719848633, + "log_odds_ratio": -0.18546557426452637, + "logits/chosen": 0.16061872243881226, + "logits/rejected": 0.9799090027809143, + "logps/chosen": -1.3326960802078247, + "logps/rejected": -13.587608337402344, + "loss": 2.9357, + "nll_loss": 2.917177677154541, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13326960802078247, + "rewards/margins": 1.2254914045333862, + "rewards/rejected": -1.3587608337402344, + "step": 885 + }, + { + "epoch": 0.5511664074650078, + "grad_norm": 0.4010365605354309, + "learning_rate": 2.7850000000000003e-05, + "log_odds_chosen": 17.20555877685547, + "log_odds_ratio": -8.836646884446964e-06, + "logits/chosen": 0.32361698150634766, + "logits/rejected": 1.3668720722198486, + "logps/chosen": -1.1861801147460938, + "logps/rejected": -17.992464065551758, + "loss": 3.2253, + "nll_loss": 3.2252705097198486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11861801147460938, + "rewards/margins": 1.6806282997131348, + "rewards/rejected": -1.7992461919784546, + "step": 886 + }, + { + "epoch": 0.5517884914463452, + "grad_norm": 0.5873351693153381, + "learning_rate": 2.7825000000000002e-05, + "log_odds_chosen": 8.508808135986328, + "log_odds_ratio": -0.3058502972126007, + "logits/chosen": 0.2773277759552002, + "logits/rejected": 0.7884806394577026, + "logps/chosen": -1.3308159112930298, + "logps/rejected": -9.67809009552002, + "loss": 2.7651, + "nll_loss": 2.7345235347747803, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1330816000699997, + "rewards/margins": 0.83472740650177, + "rewards/rejected": -0.9678090214729309, + "step": 887 + }, + { + "epoch": 0.5524105754276827, + "grad_norm": 22.026321411132812, + "learning_rate": 2.7800000000000005e-05, + "log_odds_chosen": 13.509963035583496, + "log_odds_ratio": -0.002110434928908944, + "logits/chosen": 0.3400443196296692, + "logits/rejected": 1.213066577911377, + "logps/chosen": -1.1196520328521729, + "logps/rejected": -13.974618911743164, + "loss": 2.9615, + "nll_loss": 2.961289882659912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11196519434452057, + "rewards/margins": 1.2854965925216675, + "rewards/rejected": -1.3974618911743164, + "step": 888 + }, + { + "epoch": 0.5530326594090202, + "grad_norm": 3.287785291671753, + "learning_rate": 2.7775e-05, + "log_odds_chosen": 10.978760719299316, + "log_odds_ratio": -0.5777610540390015, + "logits/chosen": 0.35481563210487366, + "logits/rejected": 0.9291172027587891, + "logps/chosen": -2.050435781478882, + "logps/rejected": -12.973872184753418, + "loss": 3.2977, + "nll_loss": 3.2398900985717773, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.20504358410835266, + "rewards/margins": 1.0923435688018799, + "rewards/rejected": -1.2973871231079102, + "step": 889 + }, + { + "epoch": 0.5536547433903577, + "grad_norm": 0.43079885840415955, + "learning_rate": 2.7750000000000004e-05, + "log_odds_chosen": 13.530800819396973, + "log_odds_ratio": -0.01729394681751728, + "logits/chosen": 0.22219036519527435, + "logits/rejected": 0.8625215888023376, + "logps/chosen": -1.6603683233261108, + "logps/rejected": -14.933816909790039, + "loss": 3.3577, + "nll_loss": 3.3560125827789307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16603684425354004, + "rewards/margins": 1.3273450136184692, + "rewards/rejected": -1.4933817386627197, + "step": 890 + }, + { + "epoch": 0.5542768273716951, + "grad_norm": 0.4820476770401001, + "learning_rate": 2.7725e-05, + "log_odds_chosen": 7.27010440826416, + "log_odds_ratio": -0.21835504472255707, + "logits/chosen": 0.2366558313369751, + "logits/rejected": 0.615390419960022, + "logps/chosen": -1.1921238899230957, + "logps/rejected": -8.159709930419922, + "loss": 3.1871, + "nll_loss": 3.1652331352233887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11921238899230957, + "rewards/margins": 0.6967586278915405, + "rewards/rejected": -0.8159710168838501, + "step": 891 + }, + { + "epoch": 0.5548989113530327, + "grad_norm": 0.4553963840007782, + "learning_rate": 2.7700000000000002e-05, + "log_odds_chosen": 9.985837936401367, + "log_odds_ratio": -0.2571437954902649, + "logits/chosen": 0.23121318221092224, + "logits/rejected": 0.8296804428100586, + "logps/chosen": -1.2417988777160645, + "logps/rejected": -11.013862609863281, + "loss": 2.8727, + "nll_loss": 2.846977710723877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1241798847913742, + "rewards/margins": 0.977206289768219, + "rewards/rejected": -1.1013861894607544, + "step": 892 + }, + { + "epoch": 0.5555209953343702, + "grad_norm": 0.5255298018455505, + "learning_rate": 2.7675000000000002e-05, + "log_odds_chosen": 13.86887264251709, + "log_odds_ratio": -0.19372327625751495, + "logits/chosen": 0.30388227105140686, + "logits/rejected": 1.4697858095169067, + "logps/chosen": -1.3105946779251099, + "logps/rejected": -14.92329216003418, + "loss": 2.4972, + "nll_loss": 2.47778058052063, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.131059467792511, + "rewards/margins": 1.3612698316574097, + "rewards/rejected": -1.4923293590545654, + "step": 893 + }, + { + "epoch": 0.5561430793157076, + "grad_norm": 0.5121700763702393, + "learning_rate": 2.7650000000000005e-05, + "log_odds_chosen": 3.2167305946350098, + "log_odds_ratio": -0.32042694091796875, + "logits/chosen": 0.25697678327560425, + "logits/rejected": 0.38786739110946655, + "logps/chosen": -1.0599229335784912, + "logps/rejected": -3.9423530101776123, + "loss": 2.8183, + "nll_loss": 2.786297082901001, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10599227994680405, + "rewards/margins": 0.28824299573898315, + "rewards/rejected": -0.3942352831363678, + "step": 894 + }, + { + "epoch": 0.5567651632970451, + "grad_norm": 5.089456558227539, + "learning_rate": 2.7625e-05, + "log_odds_chosen": 8.74205493927002, + "log_odds_ratio": -0.15869277715682983, + "logits/chosen": 0.32882875204086304, + "logits/rejected": 1.0555585622787476, + "logps/chosen": -1.5479052066802979, + "logps/rejected": -9.963598251342773, + "loss": 2.7022, + "nll_loss": 2.686356782913208, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15479052066802979, + "rewards/margins": 0.8415693044662476, + "rewards/rejected": -0.9963598847389221, + "step": 895 + }, + { + "epoch": 0.5573872472783826, + "grad_norm": 0.523443341255188, + "learning_rate": 2.7600000000000003e-05, + "log_odds_chosen": 15.575971603393555, + "log_odds_ratio": -0.08751137554645538, + "logits/chosen": 0.38994699716567993, + "logits/rejected": 1.6484946012496948, + "logps/chosen": -1.126055121421814, + "logps/rejected": -16.346158981323242, + "loss": 2.8432, + "nll_loss": 2.834432363510132, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1126055121421814, + "rewards/margins": 1.5220104455947876, + "rewards/rejected": -1.6346158981323242, + "step": 896 + }, + { + "epoch": 0.5580093312597201, + "grad_norm": 0.43732067942619324, + "learning_rate": 2.7575e-05, + "log_odds_chosen": 12.99947452545166, + "log_odds_ratio": -0.22496089339256287, + "logits/chosen": 0.3272874653339386, + "logits/rejected": 0.942588210105896, + "logps/chosen": -1.0477204322814941, + "logps/rejected": -13.669449806213379, + "loss": 3.2135, + "nll_loss": 3.191025495529175, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10477204620838165, + "rewards/margins": 1.2621729373931885, + "rewards/rejected": -1.3669450283050537, + "step": 897 + }, + { + "epoch": 0.5586314152410575, + "grad_norm": 0.6751424670219421, + "learning_rate": 2.7550000000000002e-05, + "log_odds_chosen": 8.845113754272461, + "log_odds_ratio": -0.1459217518568039, + "logits/chosen": 0.12097790837287903, + "logits/rejected": 0.7129504084587097, + "logps/chosen": -0.9098332524299622, + "logps/rejected": -9.02649974822998, + "loss": 2.4552, + "nll_loss": 2.440615653991699, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0909833312034607, + "rewards/margins": 0.81166672706604, + "rewards/rejected": -0.9026500582695007, + "step": 898 + }, + { + "epoch": 0.559253499222395, + "grad_norm": 0.5536679029464722, + "learning_rate": 2.7525e-05, + "log_odds_chosen": 10.608327865600586, + "log_odds_ratio": -0.01897694170475006, + "logits/chosen": 0.22658635675907135, + "logits/rejected": 0.7577848434448242, + "logps/chosen": -1.0816148519515991, + "logps/rejected": -11.110265731811523, + "loss": 2.8688, + "nll_loss": 2.866927146911621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10816149413585663, + "rewards/margins": 1.0028650760650635, + "rewards/rejected": -1.111026644706726, + "step": 899 + }, + { + "epoch": 0.5598755832037325, + "grad_norm": 0.43533676862716675, + "learning_rate": 2.7500000000000004e-05, + "log_odds_chosen": 7.546494483947754, + "log_odds_ratio": -0.1429726481437683, + "logits/chosen": 0.20976758003234863, + "logits/rejected": 0.4699644446372986, + "logps/chosen": -1.0662420988082886, + "logps/rejected": -8.181591033935547, + "loss": 3.1895, + "nll_loss": 3.1751976013183594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10662420839071274, + "rewards/margins": 0.7115349769592285, + "rewards/rejected": -0.8181591629981995, + "step": 900 + }, + { + "epoch": 0.56049766718507, + "grad_norm": 0.5101000666618347, + "learning_rate": 2.7475e-05, + "log_odds_chosen": 9.346063613891602, + "log_odds_ratio": -0.16607795655727386, + "logits/chosen": 0.22395896911621094, + "logits/rejected": 0.6142349243164062, + "logps/chosen": -1.1865246295928955, + "logps/rejected": -9.847481727600098, + "loss": 3.1556, + "nll_loss": 3.1389565467834473, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11865246295928955, + "rewards/margins": 0.8660956621170044, + "rewards/rejected": -0.9847480654716492, + "step": 901 + }, + { + "epoch": 0.5611197511664074, + "grad_norm": 0.44190743565559387, + "learning_rate": 2.7450000000000003e-05, + "log_odds_chosen": 7.044838905334473, + "log_odds_ratio": -0.14595672488212585, + "logits/chosen": 0.1050267368555069, + "logits/rejected": 0.3443432152271271, + "logps/chosen": -1.0475255250930786, + "logps/rejected": -7.505648612976074, + "loss": 3.0451, + "nll_loss": 3.030492067337036, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1047525554895401, + "rewards/margins": 0.6458122730255127, + "rewards/rejected": -0.7505648136138916, + "step": 902 + }, + { + "epoch": 0.5617418351477449, + "grad_norm": 0.3499910235404968, + "learning_rate": 2.7425e-05, + "log_odds_chosen": 7.8855743408203125, + "log_odds_ratio": -0.2606319189071655, + "logits/chosen": 0.29135677218437195, + "logits/rejected": 0.577876091003418, + "logps/chosen": -0.9390688538551331, + "logps/rejected": -8.381467819213867, + "loss": 3.7401, + "nll_loss": 3.7140378952026367, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09390688687562943, + "rewards/margins": 0.7442399263381958, + "rewards/rejected": -0.8381468057632446, + "step": 903 + }, + { + "epoch": 0.5623639191290825, + "grad_norm": 0.5649048089981079, + "learning_rate": 2.7400000000000002e-05, + "log_odds_chosen": 8.967251777648926, + "log_odds_ratio": -0.03768826648592949, + "logits/chosen": 0.2890816330909729, + "logits/rejected": 0.7596768140792847, + "logps/chosen": -0.9239016175270081, + "logps/rejected": -9.305893898010254, + "loss": 3.2545, + "nll_loss": 3.2506906986236572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09239016473293304, + "rewards/margins": 0.838199257850647, + "rewards/rejected": -0.9305894374847412, + "step": 904 + }, + { + "epoch": 0.5629860031104199, + "grad_norm": 0.45023804903030396, + "learning_rate": 2.7375e-05, + "log_odds_chosen": 6.16334342956543, + "log_odds_ratio": -0.2813335359096527, + "logits/chosen": 0.16055354475975037, + "logits/rejected": 0.49607011675834656, + "logps/chosen": -1.1517943143844604, + "logps/rejected": -6.91542911529541, + "loss": 2.9652, + "nll_loss": 2.9370510578155518, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11517943441867828, + "rewards/margins": 0.5763634443283081, + "rewards/rejected": -0.6915428638458252, + "step": 905 + }, + { + "epoch": 0.5636080870917574, + "grad_norm": 0.4151860475540161, + "learning_rate": 2.7350000000000004e-05, + "log_odds_chosen": 5.84874153137207, + "log_odds_ratio": -0.30869588255882263, + "logits/chosen": 0.17200124263763428, + "logits/rejected": 0.48454806208610535, + "logps/chosen": -1.2069495916366577, + "logps/rejected": -6.622710227966309, + "loss": 3.2825, + "nll_loss": 3.2516021728515625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12069497257471085, + "rewards/margins": 0.5415760278701782, + "rewards/rejected": -0.6622710227966309, + "step": 906 + }, + { + "epoch": 0.5642301710730949, + "grad_norm": 0.3901362717151642, + "learning_rate": 2.7325e-05, + "log_odds_chosen": 8.91002368927002, + "log_odds_ratio": -0.09111232310533524, + "logits/chosen": 0.22789613902568817, + "logits/rejected": 0.7879712581634521, + "logps/chosen": -1.3444089889526367, + "logps/rejected": -9.875727653503418, + "loss": 3.2262, + "nll_loss": 3.2170474529266357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13444089889526367, + "rewards/margins": 0.8531317710876465, + "rewards/rejected": -0.9875727295875549, + "step": 907 + }, + { + "epoch": 0.5648522550544324, + "grad_norm": 0.4596397280693054, + "learning_rate": 2.7300000000000003e-05, + "log_odds_chosen": 6.89910888671875, + "log_odds_ratio": -0.14412494003772736, + "logits/chosen": 0.2593741714954376, + "logits/rejected": 0.8184172511100769, + "logps/chosen": -1.1923887729644775, + "logps/rejected": -7.755105018615723, + "loss": 2.9366, + "nll_loss": 2.9221863746643066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11923889070749283, + "rewards/margins": 0.6562716960906982, + "rewards/rejected": -0.7755105495452881, + "step": 908 + }, + { + "epoch": 0.5654743390357698, + "grad_norm": 0.5999178290367126, + "learning_rate": 2.7275e-05, + "log_odds_chosen": 11.099242210388184, + "log_odds_ratio": -0.17223107814788818, + "logits/chosen": 0.16461758315563202, + "logits/rejected": 0.345947265625, + "logps/chosen": -1.1326367855072021, + "logps/rejected": -11.848851203918457, + "loss": 3.0254, + "nll_loss": 3.008146047592163, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11326368153095245, + "rewards/margins": 1.0716214179992676, + "rewards/rejected": -1.1848851442337036, + "step": 909 + }, + { + "epoch": 0.5660964230171073, + "grad_norm": 0.37448394298553467, + "learning_rate": 2.725e-05, + "log_odds_chosen": 5.122501850128174, + "log_odds_ratio": -0.26253485679626465, + "logits/chosen": 0.1458125114440918, + "logits/rejected": 0.38186800479888916, + "logps/chosen": -0.9692260026931763, + "logps/rejected": -5.624997138977051, + "loss": 2.946, + "nll_loss": 2.9197349548339844, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09692259877920151, + "rewards/margins": 0.4655771255493164, + "rewards/rejected": -0.5624997019767761, + "step": 910 + }, + { + "epoch": 0.5667185069984448, + "grad_norm": 0.46044209599494934, + "learning_rate": 2.7225e-05, + "log_odds_chosen": 6.9234795570373535, + "log_odds_ratio": -0.21885745227336884, + "logits/chosen": 0.09006873518228531, + "logits/rejected": 0.40548866987228394, + "logps/chosen": -0.9413871169090271, + "logps/rejected": -7.406796455383301, + "loss": 2.6326, + "nll_loss": 2.6107070446014404, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09413871169090271, + "rewards/margins": 0.6465408802032471, + "rewards/rejected": -0.7406796216964722, + "step": 911 + }, + { + "epoch": 0.5673405909797823, + "grad_norm": 0.48035767674446106, + "learning_rate": 2.7200000000000004e-05, + "log_odds_chosen": 9.26119613647461, + "log_odds_ratio": -0.17187556624412537, + "logits/chosen": 0.13499784469604492, + "logits/rejected": 0.6166836619377136, + "logps/chosen": -1.251449704170227, + "logps/rejected": -10.169729232788086, + "loss": 2.6754, + "nll_loss": 2.658219814300537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12514497339725494, + "rewards/margins": 0.8918280005455017, + "rewards/rejected": -1.0169728994369507, + "step": 912 + }, + { + "epoch": 0.5679626749611197, + "grad_norm": 0.47506648302078247, + "learning_rate": 2.7175e-05, + "log_odds_chosen": 10.130144119262695, + "log_odds_ratio": -0.04994361102581024, + "logits/chosen": 0.17378772795200348, + "logits/rejected": 0.629872739315033, + "logps/chosen": -0.772241473197937, + "logps/rejected": -10.029999732971191, + "loss": 2.8842, + "nll_loss": 2.8792366981506348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07722415030002594, + "rewards/margins": 0.9257758855819702, + "rewards/rejected": -1.003000020980835, + "step": 913 + }, + { + "epoch": 0.5685847589424572, + "grad_norm": 0.48764681816101074, + "learning_rate": 2.7150000000000003e-05, + "log_odds_chosen": 5.916272163391113, + "log_odds_ratio": -0.25303128361701965, + "logits/chosen": 0.1644158661365509, + "logits/rejected": 0.30059051513671875, + "logps/chosen": -1.1256078481674194, + "logps/rejected": -6.709453105926514, + "loss": 3.0249, + "nll_loss": 2.9995951652526855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11256079375743866, + "rewards/margins": 0.5583845376968384, + "rewards/rejected": -0.6709453463554382, + "step": 914 + }, + { + "epoch": 0.5692068429237948, + "grad_norm": 0.5090453028678894, + "learning_rate": 2.7125000000000002e-05, + "log_odds_chosen": 9.453378677368164, + "log_odds_ratio": -0.11234141886234283, + "logits/chosen": 0.2241164743900299, + "logits/rejected": 0.9943174719810486, + "logps/chosen": -1.0483677387237549, + "logps/rejected": -9.902132987976074, + "loss": 2.8647, + "nll_loss": 2.8534181118011475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10483676195144653, + "rewards/margins": 0.8853765726089478, + "rewards/rejected": -0.9902133941650391, + "step": 915 + }, + { + "epoch": 0.5698289269051322, + "grad_norm": 0.3839872181415558, + "learning_rate": 2.7100000000000005e-05, + "log_odds_chosen": 12.735099792480469, + "log_odds_ratio": -0.07314764708280563, + "logits/chosen": 0.3284153938293457, + "logits/rejected": 1.1309449672698975, + "logps/chosen": -0.8413907289505005, + "logps/rejected": -12.652066230773926, + "loss": 3.1403, + "nll_loss": 3.132990837097168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08413907140493393, + "rewards/margins": 1.1810675859451294, + "rewards/rejected": -1.2652066946029663, + "step": 916 + }, + { + "epoch": 0.5704510108864697, + "grad_norm": 0.7262453436851501, + "learning_rate": 2.7075e-05, + "log_odds_chosen": 10.773283958435059, + "log_odds_ratio": -0.17366476356983185, + "logits/chosen": 0.27300554513931274, + "logits/rejected": 0.6795448064804077, + "logps/chosen": -1.1927194595336914, + "logps/rejected": -11.570327758789062, + "loss": 2.9818, + "nll_loss": 2.964430570602417, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11927194148302078, + "rewards/margins": 1.037760853767395, + "rewards/rejected": -1.15703284740448, + "step": 917 + }, + { + "epoch": 0.5710730948678071, + "grad_norm": 0.4268437922000885, + "learning_rate": 2.7050000000000004e-05, + "log_odds_chosen": 12.669416427612305, + "log_odds_ratio": -0.18929001688957214, + "logits/chosen": 0.23210585117340088, + "logits/rejected": 0.7328687906265259, + "logps/chosen": -0.9498953223228455, + "logps/rejected": -13.114092826843262, + "loss": 3.6412, + "nll_loss": 3.622225761413574, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09498953074216843, + "rewards/margins": 1.2164198160171509, + "rewards/rejected": -1.3114093542099, + "step": 918 + }, + { + "epoch": 0.5716951788491447, + "grad_norm": 0.4433814585208893, + "learning_rate": 2.7025e-05, + "log_odds_chosen": 10.184727668762207, + "log_odds_ratio": -0.025781212374567986, + "logits/chosen": 0.17551684379577637, + "logits/rejected": 1.0827550888061523, + "logps/chosen": -1.2214159965515137, + "logps/rejected": -11.030315399169922, + "loss": 2.7723, + "nll_loss": 2.769714832305908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12214161455631256, + "rewards/margins": 0.9808899760246277, + "rewards/rejected": -1.1030315160751343, + "step": 919 + }, + { + "epoch": 0.5723172628304821, + "grad_norm": 0.48769259452819824, + "learning_rate": 2.7000000000000002e-05, + "log_odds_chosen": 9.125656127929688, + "log_odds_ratio": -0.26961836218833923, + "logits/chosen": 0.1965629905462265, + "logits/rejected": 0.3319164514541626, + "logps/chosen": -1.3702173233032227, + "logps/rejected": -10.158577919006348, + "loss": 3.5997, + "nll_loss": 3.5727407932281494, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1370217353105545, + "rewards/margins": 0.8788360357284546, + "rewards/rejected": -1.0158576965332031, + "step": 920 + }, + { + "epoch": 0.5729393468118196, + "grad_norm": 0.48434844613075256, + "learning_rate": 2.6975000000000002e-05, + "log_odds_chosen": 13.56881332397461, + "log_odds_ratio": -0.0026917767245322466, + "logits/chosen": 0.2141491025686264, + "logits/rejected": 1.3020198345184326, + "logps/chosen": -1.3336665630340576, + "logps/rejected": -14.552571296691895, + "loss": 2.7108, + "nll_loss": 2.7105112075805664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1333666741847992, + "rewards/margins": 1.3218905925750732, + "rewards/rejected": -1.4552571773529053, + "step": 921 + }, + { + "epoch": 0.573561430793157, + "grad_norm": 0.5728157162666321, + "learning_rate": 2.6950000000000005e-05, + "log_odds_chosen": 10.488691329956055, + "log_odds_ratio": -0.23412871360778809, + "logits/chosen": 0.2155309021472931, + "logits/rejected": 0.6558471322059631, + "logps/chosen": -1.1757211685180664, + "logps/rejected": -11.354267120361328, + "loss": 3.3212, + "nll_loss": 3.2977969646453857, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.117572121322155, + "rewards/margins": 1.0178545713424683, + "rewards/rejected": -1.135426640510559, + "step": 922 + }, + { + "epoch": 0.5741835147744946, + "grad_norm": 0.472119003534317, + "learning_rate": 2.6925e-05, + "log_odds_chosen": 8.171377182006836, + "log_odds_ratio": -0.22905880212783813, + "logits/chosen": 0.1736389398574829, + "logits/rejected": 0.8877742290496826, + "logps/chosen": -1.1164681911468506, + "logps/rejected": -8.963059425354004, + "loss": 2.8084, + "nll_loss": 2.785543203353882, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11164682358503342, + "rewards/margins": 0.7846590876579285, + "rewards/rejected": -0.8963059186935425, + "step": 923 + }, + { + "epoch": 0.574805598755832, + "grad_norm": 1.098946213722229, + "learning_rate": 2.6900000000000003e-05, + "log_odds_chosen": 8.532747268676758, + "log_odds_ratio": -0.16291804611682892, + "logits/chosen": 0.26898065209388733, + "logits/rejected": 0.7751161456108093, + "logps/chosen": -1.2293330430984497, + "logps/rejected": -9.393357276916504, + "loss": 3.3784, + "nll_loss": 3.3621580600738525, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12293331325054169, + "rewards/margins": 0.8164024353027344, + "rewards/rejected": -0.9393357634544373, + "step": 924 + }, + { + "epoch": 0.5754276827371695, + "grad_norm": 0.661605715751648, + "learning_rate": 2.6875e-05, + "log_odds_chosen": 16.35256576538086, + "log_odds_ratio": -0.1963188201189041, + "logits/chosen": 0.1324017345905304, + "logits/rejected": 0.8213398456573486, + "logps/chosen": -1.7915053367614746, + "logps/rejected": -17.91004180908203, + "loss": 3.0766, + "nll_loss": 3.0570147037506104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1791505366563797, + "rewards/margins": 1.611853837966919, + "rewards/rejected": -1.7910041809082031, + "step": 925 + }, + { + "epoch": 0.576049766718507, + "grad_norm": 0.3944947123527527, + "learning_rate": 2.6850000000000002e-05, + "log_odds_chosen": 19.438589096069336, + "log_odds_ratio": -0.09257218986749649, + "logits/chosen": 0.11398239433765411, + "logits/rejected": 0.9922657608985901, + "logps/chosen": -0.9387992024421692, + "logps/rejected": -19.83526039123535, + "loss": 2.8709, + "nll_loss": 2.8616538047790527, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09387992322444916, + "rewards/margins": 1.889646291732788, + "rewards/rejected": -1.9835262298583984, + "step": 926 + }, + { + "epoch": 0.5766718506998445, + "grad_norm": 0.3849738538265228, + "learning_rate": 2.6825e-05, + "log_odds_chosen": 16.61699867248535, + "log_odds_ratio": -0.03179116174578667, + "logits/chosen": 0.2330043613910675, + "logits/rejected": 1.0248134136199951, + "logps/chosen": -1.3797637224197388, + "logps/rejected": -17.69917106628418, + "loss": 3.4806, + "nll_loss": 3.4774537086486816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13797636330127716, + "rewards/margins": 1.6319406032562256, + "rewards/rejected": -1.7699170112609863, + "step": 927 + }, + { + "epoch": 0.577293934681182, + "grad_norm": 0.5158620476722717, + "learning_rate": 2.6800000000000004e-05, + "log_odds_chosen": 20.5090274810791, + "log_odds_ratio": -0.0825430378317833, + "logits/chosen": 0.11498251557350159, + "logits/rejected": 0.9717357754707336, + "logps/chosen": -0.903712272644043, + "logps/rejected": -20.81587028503418, + "loss": 2.7439, + "nll_loss": 2.735637664794922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09037123620510101, + "rewards/margins": 1.9912158250808716, + "rewards/rejected": -2.081587314605713, + "step": 928 + }, + { + "epoch": 0.5779160186625194, + "grad_norm": 0.4215962886810303, + "learning_rate": 2.6775e-05, + "log_odds_chosen": 22.170780181884766, + "log_odds_ratio": 0.0, + "logits/chosen": 0.3199015259742737, + "logits/rejected": 1.854710340499878, + "logps/chosen": -1.3474838733673096, + "logps/rejected": -23.19628143310547, + "loss": 3.4444, + "nll_loss": 3.444409132003784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13474836945533752, + "rewards/margins": 2.184879779815674, + "rewards/rejected": -2.3196280002593994, + "step": 929 + }, + { + "epoch": 0.578538102643857, + "grad_norm": 0.7233343720436096, + "learning_rate": 2.6750000000000003e-05, + "log_odds_chosen": 13.190216064453125, + "log_odds_ratio": -0.1239352896809578, + "logits/chosen": -0.007729051634669304, + "logits/rejected": 0.6695624589920044, + "logps/chosen": -1.3063056468963623, + "logps/rejected": -14.166091918945312, + "loss": 2.7662, + "nll_loss": 2.7538347244262695, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13063055276870728, + "rewards/margins": 1.2859786748886108, + "rewards/rejected": -1.416609287261963, + "step": 930 + }, + { + "epoch": 0.5791601866251944, + "grad_norm": 0.5069626569747925, + "learning_rate": 2.6725e-05, + "log_odds_chosen": 10.079081535339355, + "log_odds_ratio": -0.2044445276260376, + "logits/chosen": 0.21780771017074585, + "logits/rejected": 0.6870952844619751, + "logps/chosen": -1.3345181941986084, + "logps/rejected": -11.053346633911133, + "loss": 3.3541, + "nll_loss": 3.333698272705078, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13345181941986084, + "rewards/margins": 0.9718827605247498, + "rewards/rejected": -1.1053345203399658, + "step": 931 + }, + { + "epoch": 0.5797822706065319, + "grad_norm": 0.7521727681159973, + "learning_rate": 2.6700000000000002e-05, + "log_odds_chosen": 10.476853370666504, + "log_odds_ratio": -0.2420932799577713, + "logits/chosen": 0.17987507581710815, + "logits/rejected": 0.8727846741676331, + "logps/chosen": -1.2537364959716797, + "logps/rejected": -11.50350570678711, + "loss": 3.1104, + "nll_loss": 3.0861494541168213, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12537366151809692, + "rewards/margins": 1.0249770879745483, + "rewards/rejected": -1.1503506898880005, + "step": 932 + }, + { + "epoch": 0.5804043545878693, + "grad_norm": 0.5356715321540833, + "learning_rate": 2.6675e-05, + "log_odds_chosen": 14.187935829162598, + "log_odds_ratio": -0.24278056621551514, + "logits/chosen": 0.17469918727874756, + "logits/rejected": 1.1708053350448608, + "logps/chosen": -1.0131219625473022, + "logps/rejected": -14.83353328704834, + "loss": 2.2884, + "nll_loss": 2.26407527923584, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10131219774484634, + "rewards/margins": 1.3820412158966064, + "rewards/rejected": -1.4833533763885498, + "step": 933 + }, + { + "epoch": 0.5810264385692069, + "grad_norm": 0.5571019649505615, + "learning_rate": 2.6650000000000004e-05, + "log_odds_chosen": 9.943033218383789, + "log_odds_ratio": -0.26776123046875, + "logits/chosen": 0.265704870223999, + "logits/rejected": 0.9763750433921814, + "logps/chosen": -1.1288625001907349, + "logps/rejected": -10.797094345092773, + "loss": 3.5617, + "nll_loss": 3.534961700439453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11288625001907349, + "rewards/margins": 0.966823160648346, + "rewards/rejected": -1.0797094106674194, + "step": 934 + }, + { + "epoch": 0.5816485225505443, + "grad_norm": 0.45613187551498413, + "learning_rate": 2.6625e-05, + "log_odds_chosen": 8.923871040344238, + "log_odds_ratio": -0.25769421458244324, + "logits/chosen": 0.07586418092250824, + "logits/rejected": 0.34221017360687256, + "logps/chosen": -1.0070983171463013, + "logps/rejected": -9.35234260559082, + "loss": 3.1174, + "nll_loss": 3.0916061401367188, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10070984065532684, + "rewards/margins": 0.8345244526863098, + "rewards/rejected": -0.9352343082427979, + "step": 935 + }, + { + "epoch": 0.5822706065318818, + "grad_norm": 0.44240835309028625, + "learning_rate": 2.6600000000000003e-05, + "log_odds_chosen": 20.093387603759766, + "log_odds_ratio": -0.11705049872398376, + "logits/chosen": 0.2263740599155426, + "logits/rejected": 1.405846357345581, + "logps/chosen": -1.076794147491455, + "logps/rejected": -20.625396728515625, + "loss": 2.8483, + "nll_loss": 2.8366036415100098, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10767941921949387, + "rewards/margins": 1.9548602104187012, + "rewards/rejected": -2.062539577484131, + "step": 936 + }, + { + "epoch": 0.5828926905132192, + "grad_norm": 0.39377760887145996, + "learning_rate": 2.6575e-05, + "log_odds_chosen": 14.986661911010742, + "log_odds_ratio": -0.27674102783203125, + "logits/chosen": 0.1652100682258606, + "logits/rejected": 0.9994176626205444, + "logps/chosen": -1.0255934000015259, + "logps/rejected": -15.587301254272461, + "loss": 2.8296, + "nll_loss": 2.801974296569824, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10255934298038483, + "rewards/margins": 1.4561707973480225, + "rewards/rejected": -1.558730125427246, + "step": 937 + }, + { + "epoch": 0.5835147744945568, + "grad_norm": 4.184680938720703, + "learning_rate": 2.655e-05, + "log_odds_chosen": 11.676606178283691, + "log_odds_ratio": -0.12282869964838028, + "logits/chosen": 0.21672847867012024, + "logits/rejected": 0.7535300254821777, + "logps/chosen": -1.5664196014404297, + "logps/rejected": -12.69096851348877, + "loss": 3.4853, + "nll_loss": 3.4730653762817383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15664197504520416, + "rewards/margins": 1.112454891204834, + "rewards/rejected": -1.269096851348877, + "step": 938 + }, + { + "epoch": 0.5841368584758942, + "grad_norm": 0.5880439877510071, + "learning_rate": 2.6525e-05, + "log_odds_chosen": 10.81521224975586, + "log_odds_ratio": -0.47919604182243347, + "logits/chosen": 0.1771206557750702, + "logits/rejected": 0.8535869121551514, + "logps/chosen": -1.2247461080551147, + "logps/rejected": -11.720307350158691, + "loss": 2.8229, + "nll_loss": 2.774963855743408, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12247461825609207, + "rewards/margins": 1.0495561361312866, + "rewards/rejected": -1.1720308065414429, + "step": 939 + }, + { + "epoch": 0.5847589424572317, + "grad_norm": 2.263173818588257, + "learning_rate": 2.6500000000000004e-05, + "log_odds_chosen": 14.880687713623047, + "log_odds_ratio": -0.04369397461414337, + "logits/chosen": 0.17089928686618805, + "logits/rejected": 0.7093451023101807, + "logps/chosen": -1.288264274597168, + "logps/rejected": -15.811230659484863, + "loss": 3.2519, + "nll_loss": 3.247570276260376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12882643938064575, + "rewards/margins": 1.4522968530654907, + "rewards/rejected": -1.5811231136322021, + "step": 940 + }, + { + "epoch": 0.5853810264385692, + "grad_norm": 0.6679919362068176, + "learning_rate": 2.6475e-05, + "log_odds_chosen": 13.88052749633789, + "log_odds_ratio": -0.17339672148227692, + "logits/chosen": 0.2502378821372986, + "logits/rejected": 1.128913402557373, + "logps/chosen": -1.2840638160705566, + "logps/rejected": -14.904745101928711, + "loss": 2.6963, + "nll_loss": 2.67893648147583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1284063756465912, + "rewards/margins": 1.3620681762695312, + "rewards/rejected": -1.4904745817184448, + "step": 941 + }, + { + "epoch": 0.5860031104199067, + "grad_norm": 1.2314687967300415, + "learning_rate": 2.6450000000000003e-05, + "log_odds_chosen": 18.171859741210938, + "log_odds_ratio": -0.1047465056180954, + "logits/chosen": 0.36809712648391724, + "logits/rejected": 1.2636301517486572, + "logps/chosen": -1.0214661359786987, + "logps/rejected": -18.68871307373047, + "loss": 3.5026, + "nll_loss": 3.492130756378174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10214661061763763, + "rewards/margins": 1.7667248249053955, + "rewards/rejected": -1.8688714504241943, + "step": 942 + }, + { + "epoch": 0.5866251944012442, + "grad_norm": 0.5520533919334412, + "learning_rate": 2.6425e-05, + "log_odds_chosen": 9.532363891601562, + "log_odds_ratio": -0.033491168171167374, + "logits/chosen": 0.3422502279281616, + "logits/rejected": 0.8718456029891968, + "logps/chosen": -1.18587327003479, + "logps/rejected": -10.302650451660156, + "loss": 3.2041, + "nll_loss": 3.20070743560791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11858732998371124, + "rewards/margins": 0.9116777181625366, + "rewards/rejected": -1.0302650928497314, + "step": 943 + }, + { + "epoch": 0.5872472783825816, + "grad_norm": 0.5288044810295105, + "learning_rate": 2.64e-05, + "log_odds_chosen": 15.668525695800781, + "log_odds_ratio": -0.27726009488105774, + "logits/chosen": 0.2326730489730835, + "logits/rejected": 0.898745059967041, + "logps/chosen": -1.0137486457824707, + "logps/rejected": -16.33362579345703, + "loss": 2.6635, + "nll_loss": 2.6358118057250977, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10137484967708588, + "rewards/margins": 1.5319877862930298, + "rewards/rejected": -1.6333625316619873, + "step": 944 + }, + { + "epoch": 0.5878693623639192, + "grad_norm": 0.46301695704460144, + "learning_rate": 2.6375e-05, + "log_odds_chosen": 17.152551651000977, + "log_odds_ratio": -0.07121972739696503, + "logits/chosen": 0.4616335928440094, + "logits/rejected": 1.5514743328094482, + "logps/chosen": -1.638116478919983, + "logps/rejected": -18.526134490966797, + "loss": 3.2308, + "nll_loss": 3.22369647026062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16381165385246277, + "rewards/margins": 1.6888020038604736, + "rewards/rejected": -1.8526135683059692, + "step": 945 + }, + { + "epoch": 0.5884914463452566, + "grad_norm": 0.4805068075656891, + "learning_rate": 2.6350000000000004e-05, + "log_odds_chosen": 10.249034881591797, + "log_odds_ratio": -0.19067715108394623, + "logits/chosen": 0.2742692232131958, + "logits/rejected": 0.5140730142593384, + "logps/chosen": -1.0531798601150513, + "logps/rejected": -10.836760520935059, + "loss": 2.7838, + "nll_loss": 2.764707565307617, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10531798750162125, + "rewards/margins": 0.9783580899238586, + "rewards/rejected": -1.0836760997772217, + "step": 946 + }, + { + "epoch": 0.5891135303265941, + "grad_norm": 0.4442595839500427, + "learning_rate": 2.6325e-05, + "log_odds_chosen": 16.763885498046875, + "log_odds_ratio": -0.14290139079093933, + "logits/chosen": 0.3244515061378479, + "logits/rejected": 1.0541630983352661, + "logps/chosen": -0.9952157735824585, + "logps/rejected": -17.375211715698242, + "loss": 3.0868, + "nll_loss": 3.072525978088379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09952157735824585, + "rewards/margins": 1.6379995346069336, + "rewards/rejected": -1.7375211715698242, + "step": 947 + }, + { + "epoch": 0.5897356143079315, + "grad_norm": 0.3361284136772156, + "learning_rate": 2.6300000000000002e-05, + "log_odds_chosen": 19.245708465576172, + "log_odds_ratio": -0.07403115928173065, + "logits/chosen": 0.31128478050231934, + "logits/rejected": 0.9080635905265808, + "logps/chosen": -0.8605557680130005, + "logps/rejected": -19.487812042236328, + "loss": 3.7087, + "nll_loss": 3.7012686729431152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08605558425188065, + "rewards/margins": 1.8627257347106934, + "rewards/rejected": -1.9487813711166382, + "step": 948 + }, + { + "epoch": 0.5903576982892691, + "grad_norm": 0.5872551798820496, + "learning_rate": 2.6275e-05, + "log_odds_chosen": 8.34841251373291, + "log_odds_ratio": -0.3960355222225189, + "logits/chosen": 0.1396239548921585, + "logits/rejected": 0.6412820219993591, + "logps/chosen": -1.0153343677520752, + "logps/rejected": -9.043679237365723, + "loss": 2.4457, + "nll_loss": 2.4060676097869873, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.101533442735672, + "rewards/margins": 0.8028345108032227, + "rewards/rejected": -0.9043679237365723, + "step": 949 + }, + { + "epoch": 0.5909797822706065, + "grad_norm": 0.5597155094146729, + "learning_rate": 2.625e-05, + "log_odds_chosen": 17.4451847076416, + "log_odds_ratio": -0.0725177526473999, + "logits/chosen": 0.110418900847435, + "logits/rejected": 0.8833154439926147, + "logps/chosen": -0.8898366093635559, + "logps/rejected": -17.72234535217285, + "loss": 2.631, + "nll_loss": 2.623771905899048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08898366987705231, + "rewards/margins": 1.683250904083252, + "rewards/rejected": -1.7722344398498535, + "step": 950 + }, + { + "epoch": 0.591601866251944, + "grad_norm": 0.7085988521575928, + "learning_rate": 2.6225e-05, + "log_odds_chosen": 8.395122528076172, + "log_odds_ratio": -0.36476755142211914, + "logits/chosen": 0.26892557740211487, + "logits/rejected": 0.6090400218963623, + "logps/chosen": -1.258937954902649, + "logps/rejected": -9.453425407409668, + "loss": 3.015, + "nll_loss": 2.9785232543945312, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12589380145072937, + "rewards/margins": 0.8194488286972046, + "rewards/rejected": -0.9453425407409668, + "step": 951 + }, + { + "epoch": 0.5922239502332814, + "grad_norm": 0.7648904323577881, + "learning_rate": 2.6200000000000003e-05, + "log_odds_chosen": 8.385675430297852, + "log_odds_ratio": -0.21214787662029266, + "logits/chosen": 0.18494370579719543, + "logits/rejected": 0.8852918148040771, + "logps/chosen": -1.3512593507766724, + "logps/rejected": -9.508600234985352, + "loss": 2.5537, + "nll_loss": 2.532467842102051, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13512593507766724, + "rewards/margins": 0.8157341480255127, + "rewards/rejected": -0.9508601427078247, + "step": 952 + }, + { + "epoch": 0.592846034214619, + "grad_norm": 0.4409056305885315, + "learning_rate": 2.6175e-05, + "log_odds_chosen": 19.01350212097168, + "log_odds_ratio": -7.912763976491988e-06, + "logits/chosen": 0.24696475267410278, + "logits/rejected": 1.9544768333435059, + "logps/chosen": -1.246018648147583, + "logps/rejected": -19.90366554260254, + "loss": 2.4758, + "nll_loss": 2.4758312702178955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12460187077522278, + "rewards/margins": 1.8657647371292114, + "rewards/rejected": -1.9903665781021118, + "step": 953 + }, + { + "epoch": 0.5934681181959565, + "grad_norm": 0.5151073336601257, + "learning_rate": 2.6150000000000002e-05, + "log_odds_chosen": 10.778078079223633, + "log_odds_ratio": -0.29022905230522156, + "logits/chosen": 0.13743232190608978, + "logits/rejected": 0.6315884590148926, + "logps/chosen": -1.1233041286468506, + "logps/rejected": -11.37158203125, + "loss": 2.9005, + "nll_loss": 2.871509313583374, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11233042180538177, + "rewards/margins": 1.0248278379440308, + "rewards/rejected": -1.1371582746505737, + "step": 954 + }, + { + "epoch": 0.5940902021772939, + "grad_norm": 0.3522619307041168, + "learning_rate": 2.6124999999999998e-05, + "log_odds_chosen": 11.523155212402344, + "log_odds_ratio": -0.2107788622379303, + "logits/chosen": 0.3925516903400421, + "logits/rejected": 1.0518916845321655, + "logps/chosen": -1.0370612144470215, + "logps/rejected": -12.201189994812012, + "loss": 3.6713, + "nll_loss": 3.650195360183716, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10370612144470215, + "rewards/margins": 1.116412878036499, + "rewards/rejected": -1.2201189994812012, + "step": 955 + }, + { + "epoch": 0.5947122861586314, + "grad_norm": 0.5082216262817383, + "learning_rate": 2.61e-05, + "log_odds_chosen": 16.058555603027344, + "log_odds_ratio": -0.06356058269739151, + "logits/chosen": 0.17834536731243134, + "logits/rejected": 0.8007286190986633, + "logps/chosen": -1.3070101737976074, + "logps/rejected": -17.062992095947266, + "loss": 3.2413, + "nll_loss": 3.234963893890381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13070102035999298, + "rewards/margins": 1.5755982398986816, + "rewards/rejected": -1.7062993049621582, + "step": 956 + }, + { + "epoch": 0.5953343701399689, + "grad_norm": 0.5702728629112244, + "learning_rate": 2.6075e-05, + "log_odds_chosen": 24.80580711364746, + "log_odds_ratio": -0.021674897521734238, + "logits/chosen": 0.2285652458667755, + "logits/rejected": 1.1431223154067993, + "logps/chosen": -1.0820637941360474, + "logps/rejected": -25.419315338134766, + "loss": 2.9305, + "nll_loss": 2.9282896518707275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1082063838839531, + "rewards/margins": 2.433724880218506, + "rewards/rejected": -2.541931390762329, + "step": 957 + }, + { + "epoch": 0.5959564541213064, + "grad_norm": 1.0141100883483887, + "learning_rate": 2.6050000000000003e-05, + "log_odds_chosen": 16.26883316040039, + "log_odds_ratio": -0.3170120120048523, + "logits/chosen": 0.3057654798030853, + "logits/rejected": 1.2227212190628052, + "logps/chosen": -1.0155651569366455, + "logps/rejected": -16.994293212890625, + "loss": 2.2965, + "nll_loss": 2.264829158782959, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10155650973320007, + "rewards/margins": 1.5978727340698242, + "rewards/rejected": -1.6994292736053467, + "step": 958 + }, + { + "epoch": 0.5965785381026438, + "grad_norm": 0.489462673664093, + "learning_rate": 2.6025e-05, + "log_odds_chosen": 19.681560516357422, + "log_odds_ratio": -0.023856129497289658, + "logits/chosen": 0.2779539227485657, + "logits/rejected": 1.4224390983581543, + "logps/chosen": -1.0721073150634766, + "logps/rejected": -20.223628997802734, + "loss": 2.8642, + "nll_loss": 2.861823558807373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10721074789762497, + "rewards/margins": 1.9151520729064941, + "rewards/rejected": -2.0223629474639893, + "step": 959 + }, + { + "epoch": 0.5972006220839814, + "grad_norm": 0.5690145492553711, + "learning_rate": 2.6000000000000002e-05, + "log_odds_chosen": 16.70014190673828, + "log_odds_ratio": -0.10952571779489517, + "logits/chosen": 0.26871487498283386, + "logits/rejected": 1.045303463935852, + "logps/chosen": -1.7916289567947388, + "logps/rejected": -18.297130584716797, + "loss": 2.8808, + "nll_loss": 2.869889736175537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1791629046201706, + "rewards/margins": 1.650550127029419, + "rewards/rejected": -1.8297131061553955, + "step": 960 + }, + { + "epoch": 0.5978227060653188, + "grad_norm": 0.37688279151916504, + "learning_rate": 2.5974999999999998e-05, + "log_odds_chosen": 11.515750885009766, + "log_odds_ratio": -0.12808285653591156, + "logits/chosen": 0.4044106602668762, + "logits/rejected": 1.100311517715454, + "logps/chosen": -1.228320837020874, + "logps/rejected": -12.13768482208252, + "loss": 3.5135, + "nll_loss": 3.500669002532959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12283208966255188, + "rewards/margins": 1.0909364223480225, + "rewards/rejected": -1.213768482208252, + "step": 961 + }, + { + "epoch": 0.5984447900466563, + "grad_norm": 0.5325865745544434, + "learning_rate": 2.595e-05, + "log_odds_chosen": 21.426223754882812, + "log_odds_ratio": -0.13188225030899048, + "logits/chosen": 0.30785199999809265, + "logits/rejected": 1.3725229501724243, + "logps/chosen": -1.0754218101501465, + "logps/rejected": -21.913288116455078, + "loss": 2.9752, + "nll_loss": 2.9620540142059326, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10754218697547913, + "rewards/margins": 2.0837864875793457, + "rewards/rejected": -2.191328525543213, + "step": 962 + }, + { + "epoch": 0.5990668740279937, + "grad_norm": 0.5490668416023254, + "learning_rate": 2.5925e-05, + "log_odds_chosen": 9.045169830322266, + "log_odds_ratio": -0.29519692063331604, + "logits/chosen": 0.2381560206413269, + "logits/rejected": 0.8757392764091492, + "logps/chosen": -1.1203395128250122, + "logps/rejected": -9.771276473999023, + "loss": 2.7598, + "nll_loss": 2.730231761932373, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11203394830226898, + "rewards/margins": 0.8650937676429749, + "rewards/rejected": -0.977127730846405, + "step": 963 + }, + { + "epoch": 0.5996889580093313, + "grad_norm": 0.5232042074203491, + "learning_rate": 2.5900000000000003e-05, + "log_odds_chosen": 16.675994873046875, + "log_odds_ratio": -0.07509205490350723, + "logits/chosen": 0.24147437512874603, + "logits/rejected": 1.0761547088623047, + "logps/chosen": -0.8001296520233154, + "logps/rejected": -16.835155487060547, + "loss": 3.0022, + "nll_loss": 2.9946541786193848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0800129622220993, + "rewards/margins": 1.6035027503967285, + "rewards/rejected": -1.6835157871246338, + "step": 964 + }, + { + "epoch": 0.6003110419906688, + "grad_norm": 0.36291661858558655, + "learning_rate": 2.5875e-05, + "log_odds_chosen": 19.8973445892334, + "log_odds_ratio": -0.022971786558628082, + "logits/chosen": 0.07175733894109726, + "logits/rejected": 0.8581687211990356, + "logps/chosen": -0.9344284534454346, + "logps/rejected": -20.3033390045166, + "loss": 2.6604, + "nll_loss": 2.658095359802246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09344284236431122, + "rewards/margins": 1.9368910789489746, + "rewards/rejected": -2.030333995819092, + "step": 965 + }, + { + "epoch": 0.6009331259720062, + "grad_norm": 0.8470042943954468, + "learning_rate": 2.585e-05, + "log_odds_chosen": 16.980083465576172, + "log_odds_ratio": -0.2519052028656006, + "logits/chosen": 0.22023792564868927, + "logits/rejected": 1.116905689239502, + "logps/chosen": -1.0762019157409668, + "logps/rejected": -17.686002731323242, + "loss": 3.0433, + "nll_loss": 3.0181312561035156, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10762019455432892, + "rewards/margins": 1.6609801054000854, + "rewards/rejected": -1.7686002254486084, + "step": 966 + }, + { + "epoch": 0.6015552099533437, + "grad_norm": 0.6046644449234009, + "learning_rate": 2.5824999999999998e-05, + "log_odds_chosen": 12.115790367126465, + "log_odds_ratio": -0.22894859313964844, + "logits/chosen": 0.21607916057109833, + "logits/rejected": 0.9050865173339844, + "logps/chosen": -1.0518709421157837, + "logps/rejected": -12.8179292678833, + "loss": 2.7021, + "nll_loss": 2.6792078018188477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10518710315227509, + "rewards/margins": 1.1766059398651123, + "rewards/rejected": -1.2817929983139038, + "step": 967 + }, + { + "epoch": 0.6021772939346812, + "grad_norm": 0.5602324604988098, + "learning_rate": 2.58e-05, + "log_odds_chosen": 16.60848617553711, + "log_odds_ratio": -0.16328565776348114, + "logits/chosen": 0.2715965807437897, + "logits/rejected": 1.2223488092422485, + "logps/chosen": -1.2314372062683105, + "logps/rejected": -17.56035804748535, + "loss": 3.1499, + "nll_loss": 3.1335926055908203, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12314371764659882, + "rewards/margins": 1.63289213180542, + "rewards/rejected": -1.7560360431671143, + "step": 968 + }, + { + "epoch": 0.6027993779160187, + "grad_norm": 0.499213308095932, + "learning_rate": 2.5775e-05, + "log_odds_chosen": 9.463824272155762, + "log_odds_ratio": -0.2594992518424988, + "logits/chosen": 0.13768236339092255, + "logits/rejected": 0.3776164650917053, + "logps/chosen": -1.2077715396881104, + "logps/rejected": -10.380818367004395, + "loss": 3.2398, + "nll_loss": 3.2138986587524414, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12077715992927551, + "rewards/margins": 0.9173046350479126, + "rewards/rejected": -1.0380817651748657, + "step": 969 + }, + { + "epoch": 0.6034214618973561, + "grad_norm": 0.44655492901802063, + "learning_rate": 2.5750000000000002e-05, + "log_odds_chosen": 19.190210342407227, + "log_odds_ratio": -0.10930891335010529, + "logits/chosen": 0.1695220023393631, + "logits/rejected": 1.1123127937316895, + "logps/chosen": -1.1357357501983643, + "logps/rejected": -19.973918914794922, + "loss": 2.8969, + "nll_loss": 2.885981321334839, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1135735735297203, + "rewards/margins": 1.8838183879852295, + "rewards/rejected": -1.997391939163208, + "step": 970 + }, + { + "epoch": 0.6040435458786936, + "grad_norm": 0.539529025554657, + "learning_rate": 2.5725e-05, + "log_odds_chosen": 18.83353042602539, + "log_odds_ratio": -0.09447011351585388, + "logits/chosen": 0.15006932616233826, + "logits/rejected": 0.7248711585998535, + "logps/chosen": -1.026084303855896, + "logps/rejected": -19.30118179321289, + "loss": 3.159, + "nll_loss": 3.1495561599731445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10260843485593796, + "rewards/margins": 1.827509880065918, + "rewards/rejected": -1.9301183223724365, + "step": 971 + }, + { + "epoch": 0.6046656298600311, + "grad_norm": 5.969109058380127, + "learning_rate": 2.57e-05, + "log_odds_chosen": 14.402713775634766, + "log_odds_ratio": -0.3919801414012909, + "logits/chosen": 0.18785786628723145, + "logits/rejected": 0.6937410235404968, + "logps/chosen": -1.815807819366455, + "logps/rejected": -15.869197845458984, + "loss": 3.7518, + "nll_loss": 3.712613821029663, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1815807819366455, + "rewards/margins": 1.4053387641906738, + "rewards/rejected": -1.5869196653366089, + "step": 972 + }, + { + "epoch": 0.6052877138413686, + "grad_norm": 0.48145902156829834, + "learning_rate": 2.5675e-05, + "log_odds_chosen": 18.73621940612793, + "log_odds_ratio": -0.11193342506885529, + "logits/chosen": 0.20852616429328918, + "logits/rejected": 1.1668469905853271, + "logps/chosen": -1.0225214958190918, + "logps/rejected": -19.24210548400879, + "loss": 2.9252, + "nll_loss": 2.9140021800994873, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10225215554237366, + "rewards/margins": 1.8219584226608276, + "rewards/rejected": -1.924210548400879, + "step": 973 + }, + { + "epoch": 0.605909797822706, + "grad_norm": 0.3980204463005066, + "learning_rate": 2.5650000000000003e-05, + "log_odds_chosen": 11.081676483154297, + "log_odds_ratio": -0.08179325610399246, + "logits/chosen": 0.26339149475097656, + "logits/rejected": 0.9369683861732483, + "logps/chosen": -0.9269077777862549, + "logps/rejected": -11.361970901489258, + "loss": 3.488, + "nll_loss": 3.479806900024414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09269078075885773, + "rewards/margins": 1.043506383895874, + "rewards/rejected": -1.1361970901489258, + "step": 974 + }, + { + "epoch": 0.6065318818040435, + "grad_norm": 0.5839635729789734, + "learning_rate": 2.5625e-05, + "log_odds_chosen": 13.500631332397461, + "log_odds_ratio": -0.0822688415646553, + "logits/chosen": 0.28204748034477234, + "logits/rejected": 1.2430696487426758, + "logps/chosen": -1.311071753501892, + "logps/rejected": -14.472478866577148, + "loss": 3.6005, + "nll_loss": 3.59232234954834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1311071664094925, + "rewards/margins": 1.3161406517028809, + "rewards/rejected": -1.4472477436065674, + "step": 975 + }, + { + "epoch": 0.6071539657853811, + "grad_norm": 0.44382327795028687, + "learning_rate": 2.5600000000000002e-05, + "log_odds_chosen": 18.092775344848633, + "log_odds_ratio": -0.11017350852489471, + "logits/chosen": 0.06789172440767288, + "logits/rejected": 0.7348443865776062, + "logps/chosen": -0.9244815111160278, + "logps/rejected": -18.366724014282227, + "loss": 3.2554, + "nll_loss": 3.2444264888763428, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0924481526017189, + "rewards/margins": 1.744224190711975, + "rewards/rejected": -1.8366724252700806, + "step": 976 + }, + { + "epoch": 0.6077760497667185, + "grad_norm": 0.6069784164428711, + "learning_rate": 2.5574999999999998e-05, + "log_odds_chosen": 10.627729415893555, + "log_odds_ratio": -0.12660586833953857, + "logits/chosen": 0.0689058005809784, + "logits/rejected": 0.5917812585830688, + "logps/chosen": -0.9631859064102173, + "logps/rejected": -11.07681655883789, + "loss": 2.7384, + "nll_loss": 2.7257091999053955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09631859511137009, + "rewards/margins": 1.0113630294799805, + "rewards/rejected": -1.1076816320419312, + "step": 977 + }, + { + "epoch": 0.608398133748056, + "grad_norm": 0.45217904448509216, + "learning_rate": 2.555e-05, + "log_odds_chosen": 15.359528541564941, + "log_odds_ratio": -0.21051090955734253, + "logits/chosen": 0.065369613468647, + "logits/rejected": 0.8507799506187439, + "logps/chosen": -1.0331388711929321, + "logps/rejected": -15.877238273620605, + "loss": 2.6868, + "nll_loss": 2.6657238006591797, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10331389307975769, + "rewards/margins": 1.4844098091125488, + "rewards/rejected": -1.587723731994629, + "step": 978 + }, + { + "epoch": 0.6090202177293935, + "grad_norm": 0.5865066051483154, + "learning_rate": 2.5525e-05, + "log_odds_chosen": 17.618770599365234, + "log_odds_ratio": -0.10912881791591644, + "logits/chosen": 0.110966756939888, + "logits/rejected": 1.0559561252593994, + "logps/chosen": -1.1738271713256836, + "logps/rejected": -18.374874114990234, + "loss": 2.8958, + "nll_loss": 2.8849360942840576, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1173827201128006, + "rewards/margins": 1.7201048135757446, + "rewards/rejected": -1.8374874591827393, + "step": 979 + }, + { + "epoch": 0.609642301710731, + "grad_norm": 0.4005695879459381, + "learning_rate": 2.5500000000000003e-05, + "log_odds_chosen": 15.089963912963867, + "log_odds_ratio": -0.10706076771020889, + "logits/chosen": 0.19660604000091553, + "logits/rejected": 0.7421777844429016, + "logps/chosen": -1.6507558822631836, + "logps/rejected": -16.388992309570312, + "loss": 3.6645, + "nll_loss": 3.6537725925445557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16507560014724731, + "rewards/margins": 1.4738237857818604, + "rewards/rejected": -1.638899326324463, + "step": 980 + }, + { + "epoch": 0.6102643856920684, + "grad_norm": 0.49785563349723816, + "learning_rate": 2.5475e-05, + "log_odds_chosen": 11.623516082763672, + "log_odds_ratio": -0.26292070746421814, + "logits/chosen": 0.11350183188915253, + "logits/rejected": 0.8784139752388, + "logps/chosen": -1.347922682762146, + "logps/rejected": -12.797706604003906, + "loss": 2.894, + "nll_loss": 2.867668390274048, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1347922682762146, + "rewards/margins": 1.144978404045105, + "rewards/rejected": -1.2797706127166748, + "step": 981 + }, + { + "epoch": 0.6108864696734059, + "grad_norm": 0.4649457037448883, + "learning_rate": 2.5450000000000002e-05, + "log_odds_chosen": 20.90005874633789, + "log_odds_ratio": -8.940698137394065e-08, + "logits/chosen": 0.21456483006477356, + "logits/rejected": 1.568105697631836, + "logps/chosen": -1.1345700025558472, + "logps/rejected": -21.52691650390625, + "loss": 3.2944, + "nll_loss": 3.2943665981292725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11345699429512024, + "rewards/margins": 2.0392348766326904, + "rewards/rejected": -2.1526918411254883, + "step": 982 + }, + { + "epoch": 0.6115085536547434, + "grad_norm": 0.5988860130310059, + "learning_rate": 2.5424999999999998e-05, + "log_odds_chosen": 10.66419792175293, + "log_odds_ratio": -0.3058769106864929, + "logits/chosen": 0.05050152167677879, + "logits/rejected": 0.2465941309928894, + "logps/chosen": -0.8942004442214966, + "logps/rejected": -11.015356063842773, + "loss": 3.0743, + "nll_loss": 3.0436720848083496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08942003548145294, + "rewards/margins": 1.012115716934204, + "rewards/rejected": -1.101535677909851, + "step": 983 + }, + { + "epoch": 0.6121306376360809, + "grad_norm": 0.6072388887405396, + "learning_rate": 2.54e-05, + "log_odds_chosen": 16.32722282409668, + "log_odds_ratio": -0.11603593826293945, + "logits/chosen": 0.12268135696649551, + "logits/rejected": 1.0790374279022217, + "logps/chosen": -1.3971768617630005, + "logps/rejected": -17.381330490112305, + "loss": 2.7476, + "nll_loss": 2.735957384109497, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1397176831960678, + "rewards/margins": 1.5984153747558594, + "rewards/rejected": -1.7381330728530884, + "step": 984 + }, + { + "epoch": 0.6127527216174183, + "grad_norm": 0.4394105076789856, + "learning_rate": 2.5375e-05, + "log_odds_chosen": 20.815372467041016, + "log_odds_ratio": -0.11913128942251205, + "logits/chosen": 0.2849823832511902, + "logits/rejected": 1.7651139497756958, + "logps/chosen": -1.3435211181640625, + "logps/rejected": -21.902217864990234, + "loss": 3.1595, + "nll_loss": 3.1476199626922607, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13435211777687073, + "rewards/margins": 2.0558695793151855, + "rewards/rejected": -2.1902217864990234, + "step": 985 + }, + { + "epoch": 0.6133748055987558, + "grad_norm": 0.4176173210144043, + "learning_rate": 2.5350000000000003e-05, + "log_odds_chosen": 20.118194580078125, + "log_odds_ratio": -0.041579604148864746, + "logits/chosen": 0.2213955968618393, + "logits/rejected": 1.2255231142044067, + "logps/chosen": -1.022838830947876, + "logps/rejected": -20.671480178833008, + "loss": 3.4104, + "nll_loss": 3.406257390975952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10228388756513596, + "rewards/margins": 1.9648641347885132, + "rewards/rejected": -2.067148208618164, + "step": 986 + }, + { + "epoch": 0.6139968895800934, + "grad_norm": 0.5290238261222839, + "learning_rate": 2.5325e-05, + "log_odds_chosen": 15.454238891601562, + "log_odds_ratio": -0.09638696908950806, + "logits/chosen": 0.2696585953235626, + "logits/rejected": 1.1312285661697388, + "logps/chosen": -1.2690268754959106, + "logps/rejected": -16.407495498657227, + "loss": 3.1839, + "nll_loss": 3.174257755279541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12690268456935883, + "rewards/margins": 1.51384699344635, + "rewards/rejected": -1.6407495737075806, + "step": 987 + }, + { + "epoch": 0.6146189735614308, + "grad_norm": 0.44811832904815674, + "learning_rate": 2.5300000000000002e-05, + "log_odds_chosen": 18.196022033691406, + "log_odds_ratio": -0.0027530835941433907, + "logits/chosen": 0.306151807308197, + "logits/rejected": 1.4259248971939087, + "logps/chosen": -1.3465137481689453, + "logps/rejected": -19.213729858398438, + "loss": 3.4642, + "nll_loss": 3.463940143585205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13465136289596558, + "rewards/margins": 1.7867215871810913, + "rewards/rejected": -1.9213731288909912, + "step": 988 + }, + { + "epoch": 0.6152410575427683, + "grad_norm": 0.5820187330245972, + "learning_rate": 2.5274999999999998e-05, + "log_odds_chosen": 13.781078338623047, + "log_odds_ratio": -0.19668269157409668, + "logits/chosen": -0.0342557318508625, + "logits/rejected": 0.30851006507873535, + "logps/chosen": -0.8840237855911255, + "logps/rejected": -14.013053894042969, + "loss": 2.5317, + "nll_loss": 2.5119993686676025, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08840238302946091, + "rewards/margins": 1.3129030466079712, + "rewards/rejected": -1.4013054370880127, + "step": 989 + }, + { + "epoch": 0.6158631415241057, + "grad_norm": 0.5693361163139343, + "learning_rate": 2.525e-05, + "log_odds_chosen": 9.703099250793457, + "log_odds_ratio": -0.2681022882461548, + "logits/chosen": 0.00022461498156189919, + "logits/rejected": 0.3866625428199768, + "logps/chosen": -1.0230138301849365, + "logps/rejected": -10.349815368652344, + "loss": 2.8994, + "nll_loss": 2.8725709915161133, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10230138897895813, + "rewards/margins": 0.9326801300048828, + "rewards/rejected": -1.0349814891815186, + "step": 990 + }, + { + "epoch": 0.6164852255054433, + "grad_norm": 0.5749667882919312, + "learning_rate": 2.5225e-05, + "log_odds_chosen": 10.241193771362305, + "log_odds_ratio": -0.3188455104827881, + "logits/chosen": 0.07506673783063889, + "logits/rejected": 0.46898865699768066, + "logps/chosen": -1.2021185159683228, + "logps/rejected": -11.231522560119629, + "loss": 2.8334, + "nll_loss": 2.8015153408050537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12021185457706451, + "rewards/margins": 1.0029405355453491, + "rewards/rejected": -1.1231523752212524, + "step": 991 + }, + { + "epoch": 0.6171073094867807, + "grad_norm": 0.6073411107063293, + "learning_rate": 2.5200000000000003e-05, + "log_odds_chosen": 11.625310897827148, + "log_odds_ratio": -0.19626720249652863, + "logits/chosen": 0.2134128212928772, + "logits/rejected": 0.8630800247192383, + "logps/chosen": -0.7396443486213684, + "logps/rejected": -11.635083198547363, + "loss": 3.168, + "nll_loss": 3.148395538330078, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0739644318819046, + "rewards/margins": 1.0895440578460693, + "rewards/rejected": -1.163508415222168, + "step": 992 + }, + { + "epoch": 0.6177293934681182, + "grad_norm": 0.5637009143829346, + "learning_rate": 2.5175e-05, + "log_odds_chosen": 8.45470142364502, + "log_odds_ratio": -0.38158631324768066, + "logits/chosen": 0.02333865687251091, + "logits/rejected": 0.4010867476463318, + "logps/chosen": -1.1170642375946045, + "logps/rejected": -9.288888931274414, + "loss": 2.8206, + "nll_loss": 2.7824807167053223, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11170642077922821, + "rewards/margins": 0.8171824216842651, + "rewards/rejected": -0.9288888573646545, + "step": 993 + }, + { + "epoch": 0.6183514774494556, + "grad_norm": 0.5053258538246155, + "learning_rate": 2.515e-05, + "log_odds_chosen": 21.61244773864746, + "log_odds_ratio": -0.11922389268875122, + "logits/chosen": 0.281334787607193, + "logits/rejected": 1.7507303953170776, + "logps/chosen": -1.2235077619552612, + "logps/rejected": -22.507673263549805, + "loss": 2.9003, + "nll_loss": 2.88834810256958, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1223507821559906, + "rewards/margins": 2.1284167766571045, + "rewards/rejected": -2.250767469406128, + "step": 994 + }, + { + "epoch": 0.6189735614307932, + "grad_norm": 0.4218065142631531, + "learning_rate": 2.5124999999999997e-05, + "log_odds_chosen": 13.609810829162598, + "log_odds_ratio": -0.17529192566871643, + "logits/chosen": 0.20690587162971497, + "logits/rejected": 0.9557357430458069, + "logps/chosen": -1.2656549215316772, + "logps/rejected": -14.553084373474121, + "loss": 3.604, + "nll_loss": 3.5864503383636475, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12656548619270325, + "rewards/margins": 1.3287428617477417, + "rewards/rejected": -1.455308437347412, + "step": 995 + }, + { + "epoch": 0.6195956454121306, + "grad_norm": 0.6388038396835327, + "learning_rate": 2.51e-05, + "log_odds_chosen": 13.46223258972168, + "log_odds_ratio": -0.28370407223701477, + "logits/chosen": 0.19488823413848877, + "logits/rejected": 0.7138241529464722, + "logps/chosen": -1.0961161851882935, + "logps/rejected": -14.22940444946289, + "loss": 3.1086, + "nll_loss": 3.0802266597747803, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1096116155385971, + "rewards/margins": 1.3133288621902466, + "rewards/rejected": -1.4229404926300049, + "step": 996 + }, + { + "epoch": 0.6202177293934681, + "grad_norm": 0.49505603313446045, + "learning_rate": 2.5075e-05, + "log_odds_chosen": 9.735605239868164, + "log_odds_ratio": -0.21508948504924774, + "logits/chosen": 0.14490671455860138, + "logits/rejected": 0.45840179920196533, + "logps/chosen": -1.259823203086853, + "logps/rejected": -10.751209259033203, + "loss": 3.2765, + "nll_loss": 3.254956007003784, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12598231434822083, + "rewards/margins": 0.9491385221481323, + "rewards/rejected": -1.0751209259033203, + "step": 997 + }, + { + "epoch": 0.6208398133748056, + "grad_norm": 0.5221970677375793, + "learning_rate": 2.5050000000000002e-05, + "log_odds_chosen": 17.351388931274414, + "log_odds_ratio": -0.0899883359670639, + "logits/chosen": 0.22275736927986145, + "logits/rejected": 0.9023792743682861, + "logps/chosen": -1.104635238647461, + "logps/rejected": -18.011194229125977, + "loss": 3.4768, + "nll_loss": 3.467817783355713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11046352982521057, + "rewards/margins": 1.690656065940857, + "rewards/rejected": -1.8011195659637451, + "step": 998 + }, + { + "epoch": 0.6214618973561431, + "grad_norm": 0.5278105735778809, + "learning_rate": 2.5025e-05, + "log_odds_chosen": 19.447450637817383, + "log_odds_ratio": -0.1714281141757965, + "logits/chosen": 0.2100244164466858, + "logits/rejected": 1.0468909740447998, + "logps/chosen": -0.9524563550949097, + "logps/rejected": -19.773094177246094, + "loss": 3.0782, + "nll_loss": 3.06109619140625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09524564445018768, + "rewards/margins": 1.882063865661621, + "rewards/rejected": -1.9773094654083252, + "step": 999 + }, + { + "epoch": 0.6220839813374806, + "grad_norm": 0.7394985556602478, + "learning_rate": 2.5e-05, + "log_odds_chosen": 24.217151641845703, + "log_odds_ratio": -0.11016383767127991, + "logits/chosen": 0.09682357311248779, + "logits/rejected": 1.188527226448059, + "logps/chosen": -1.3372222185134888, + "logps/rejected": -25.23487091064453, + "loss": 2.2574, + "nll_loss": 2.2463905811309814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1337222158908844, + "rewards/margins": 2.3897650241851807, + "rewards/rejected": -2.5234873294830322, + "step": 1000 + } + ], + "logging_steps": 1, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}