{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 530, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0037735849056603774, "grad_norm": 1.4419530630111694, "learning_rate": 4.9905660377358493e-05, "log_odds_chosen": 0.4804525375366211, "log_odds_ratio": -0.6413240432739258, "logits/chosen": 0.3143516182899475, "logits/rejected": -1.3078216314315796, "logps/chosen": -1.7236464023590088, "logps/rejected": -2.0679845809936523, "loss": 2.2212, "nll_loss": 2.1570305824279785, "rewards/accuracies": 0.6875, "rewards/chosen": -0.17236465215682983, "rewards/margins": 0.03443381190299988, "rewards/rejected": -0.2067984640598297, "step": 1 }, { "epoch": 0.007547169811320755, "grad_norm": 1.0689440965652466, "learning_rate": 4.9811320754716985e-05, "log_odds_chosen": -0.09131823480129242, "log_odds_ratio": -0.8326093554496765, "logits/chosen": -0.3561238646507263, "logits/rejected": -1.7676267623901367, "logps/chosen": -1.9550193548202515, "logps/rejected": -1.842592477798462, "loss": 2.1604, "nll_loss": 2.077115297317505, "rewards/accuracies": 0.375, "rewards/chosen": -0.19550195336341858, "rewards/margins": -0.011242689564824104, "rewards/rejected": -0.18425926566123962, "step": 2 }, { "epoch": 0.011320754716981131, "grad_norm": 1.0649460554122925, "learning_rate": 4.9716981132075476e-05, "log_odds_chosen": 0.6475443840026855, "log_odds_ratio": -0.5298449993133545, "logits/chosen": 0.7758256196975708, "logits/rejected": -1.0398880243301392, "logps/chosen": -1.5218658447265625, "logps/rejected": -1.9791918992996216, "loss": 1.9187, "nll_loss": 1.8656777143478394, "rewards/accuracies": 0.75, "rewards/chosen": -0.1521865725517273, "rewards/margins": 0.04573261737823486, "rewards/rejected": -0.19791918992996216, "step": 3 }, { "epoch": 0.01509433962264151, "grad_norm": 0.8160853981971741, "learning_rate": 4.962264150943397e-05, "log_odds_chosen": 0.4427639842033386, "log_odds_ratio": -0.6302679777145386, "logits/chosen": -1.3039603233337402, "logits/rejected": -2.81551456451416, "logps/chosen": -1.551461100578308, "logps/rejected": -1.879504919052124, "loss": 1.6384, "nll_loss": 1.575362205505371, "rewards/accuracies": 0.6875, "rewards/chosen": -0.15514612197875977, "rewards/margins": 0.032804377377033234, "rewards/rejected": -0.1879504919052124, "step": 4 }, { "epoch": 0.018867924528301886, "grad_norm": 0.8768725991249084, "learning_rate": 4.952830188679246e-05, "log_odds_chosen": 0.1845681071281433, "log_odds_ratio": -0.7325757741928101, "logits/chosen": -0.4272328019142151, "logits/rejected": -1.7803056240081787, "logps/chosen": -1.8399394750595093, "logps/rejected": -1.96200692653656, "loss": 2.0367, "nll_loss": 1.9634612798690796, "rewards/accuracies": 0.5, "rewards/chosen": -0.18399396538734436, "rewards/margins": 0.012206735089421272, "rewards/rejected": -0.19620069861412048, "step": 5 }, { "epoch": 0.022641509433962263, "grad_norm": 3.759185314178467, "learning_rate": 4.943396226415095e-05, "log_odds_chosen": 0.4950372576713562, "log_odds_ratio": -0.5588759183883667, "logits/chosen": -0.019302427768707275, "logits/rejected": -1.1816325187683105, "logps/chosen": -1.6534287929534912, "logps/rejected": -2.0343308448791504, "loss": 1.9177, "nll_loss": 1.8618510961532593, "rewards/accuracies": 0.75, "rewards/chosen": -0.16534289717674255, "rewards/margins": 0.038090191781520844, "rewards/rejected": -0.2034330666065216, "step": 6 }, { "epoch": 0.026415094339622643, "grad_norm": 0.9324076175689697, "learning_rate": 4.933962264150943e-05, "log_odds_chosen": 0.12158789485692978, "log_odds_ratio": -0.7240866422653198, "logits/chosen": -0.2951589822769165, "logits/rejected": -2.193943500518799, "logps/chosen": -1.9935435056686401, "logps/rejected": -2.0926284790039062, "loss": 1.9309, "nll_loss": 1.8584506511688232, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1993543654680252, "rewards/margins": 0.009908500127494335, "rewards/rejected": -0.20926286280155182, "step": 7 }, { "epoch": 0.03018867924528302, "grad_norm": 0.7746831178665161, "learning_rate": 4.9245283018867924e-05, "log_odds_chosen": 1.0372132062911987, "log_odds_ratio": -0.44096675515174866, "logits/chosen": -0.7707222700119019, "logits/rejected": -2.369938850402832, "logps/chosen": -1.2799935340881348, "logps/rejected": -2.0016367435455322, "loss": 1.2616, "nll_loss": 1.2174644470214844, "rewards/accuracies": 0.8125, "rewards/chosen": -0.12799936532974243, "rewards/margins": 0.07216434180736542, "rewards/rejected": -0.20016369223594666, "step": 8 }, { "epoch": 0.033962264150943396, "grad_norm": 0.7088687419891357, "learning_rate": 4.9150943396226415e-05, "log_odds_chosen": 0.1920507550239563, "log_odds_ratio": -0.729371964931488, "logits/chosen": -0.6697689294815063, "logits/rejected": -3.048678398132324, "logps/chosen": -1.8056210279464722, "logps/rejected": -1.9406684637069702, "loss": 1.89, "nll_loss": 1.8170198202133179, "rewards/accuracies": 0.4375, "rewards/chosen": -0.1805620938539505, "rewards/margins": 0.01350475661456585, "rewards/rejected": -0.1940668523311615, "step": 9 }, { "epoch": 0.03773584905660377, "grad_norm": 0.7092457413673401, "learning_rate": 4.9056603773584906e-05, "log_odds_chosen": 0.8096474409103394, "log_odds_ratio": -0.4400815963745117, "logits/chosen": -0.08921952545642853, "logits/rejected": -2.1929593086242676, "logps/chosen": -1.4596810340881348, "logps/rejected": -2.0628461837768555, "loss": 1.6278, "nll_loss": 1.583770751953125, "rewards/accuracies": 0.875, "rewards/chosen": -0.14596810936927795, "rewards/margins": 0.060316506773233414, "rewards/rejected": -0.20628461241722107, "step": 10 }, { "epoch": 0.04150943396226415, "grad_norm": 0.7152612209320068, "learning_rate": 4.89622641509434e-05, "log_odds_chosen": 0.5647962689399719, "log_odds_ratio": -0.5348465442657471, "logits/chosen": -0.022822290658950806, "logits/rejected": -0.9105501174926758, "logps/chosen": -1.738325595855713, "logps/rejected": -2.1731629371643066, "loss": 1.715, "nll_loss": 1.6615500450134277, "rewards/accuracies": 0.75, "rewards/chosen": -0.17383255064487457, "rewards/margins": 0.043483734130859375, "rewards/rejected": -0.21731628477573395, "step": 11 }, { "epoch": 0.045283018867924525, "grad_norm": 0.49451878666877747, "learning_rate": 4.886792452830189e-05, "log_odds_chosen": 0.8111759424209595, "log_odds_ratio": -0.5735799670219421, "logits/chosen": -0.1950460523366928, "logits/rejected": -1.8716578483581543, "logps/chosen": -1.6253588199615479, "logps/rejected": -2.223177671432495, "loss": 1.6836, "nll_loss": 1.6262810230255127, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1625358760356903, "rewards/margins": 0.05978189408779144, "rewards/rejected": -0.22231778502464294, "step": 12 }, { "epoch": 0.04905660377358491, "grad_norm": 0.49509674310684204, "learning_rate": 4.877358490566038e-05, "log_odds_chosen": 1.2461881637573242, "log_odds_ratio": -0.3561839163303375, "logits/chosen": -0.05543512478470802, "logits/rejected": -1.6817309856414795, "logps/chosen": -1.4552483558654785, "logps/rejected": -2.393232583999634, "loss": 1.6718, "nll_loss": 1.6361980438232422, "rewards/accuracies": 0.875, "rewards/chosen": -0.14552482962608337, "rewards/margins": 0.09379842877388, "rewards/rejected": -0.23932327330112457, "step": 13 }, { "epoch": 0.052830188679245285, "grad_norm": 0.5647757649421692, "learning_rate": 4.867924528301887e-05, "log_odds_chosen": 0.9501932263374329, "log_odds_ratio": -0.4295998215675354, "logits/chosen": -0.2805134057998657, "logits/rejected": -2.1965882778167725, "logps/chosen": -1.7489657402038574, "logps/rejected": -2.5288243293762207, "loss": 1.7557, "nll_loss": 1.7127894163131714, "rewards/accuracies": 0.875, "rewards/chosen": -0.17489658296108246, "rewards/margins": 0.07798586785793304, "rewards/rejected": -0.2528824508190155, "step": 14 }, { "epoch": 0.05660377358490566, "grad_norm": 0.46898186206817627, "learning_rate": 4.858490566037736e-05, "log_odds_chosen": 1.263291597366333, "log_odds_ratio": -0.4335542321205139, "logits/chosen": -0.10565178096294403, "logits/rejected": -1.839094638824463, "logps/chosen": -1.3841191530227661, "logps/rejected": -2.2567789554595947, "loss": 1.4752, "nll_loss": 1.431824803352356, "rewards/accuracies": 0.75, "rewards/chosen": -0.13841190934181213, "rewards/margins": 0.0872659832239151, "rewards/rejected": -0.22567789256572723, "step": 15 }, { "epoch": 0.06037735849056604, "grad_norm": 0.43158116936683655, "learning_rate": 4.849056603773585e-05, "log_odds_chosen": 0.6095455288887024, "log_odds_ratio": -0.5792121887207031, "logits/chosen": -0.17698495090007782, "logits/rejected": -2.8387160301208496, "logps/chosen": -2.014704942703247, "logps/rejected": -2.574517250061035, "loss": 1.9717, "nll_loss": 1.9137517213821411, "rewards/accuracies": 0.75, "rewards/chosen": -0.2014704942703247, "rewards/margins": 0.05598121136426926, "rewards/rejected": -0.25745171308517456, "step": 16 }, { "epoch": 0.06415094339622641, "grad_norm": 0.4689409136772156, "learning_rate": 4.8396226415094344e-05, "log_odds_chosen": 1.2785823345184326, "log_odds_ratio": -0.3914850652217865, "logits/chosen": -0.17786982655525208, "logits/rejected": -1.9287208318710327, "logps/chosen": -1.5383832454681396, "logps/rejected": -2.564413547515869, "loss": 1.6308, "nll_loss": 1.5916929244995117, "rewards/accuracies": 0.8125, "rewards/chosen": -0.15383832156658173, "rewards/margins": 0.10260303318500519, "rewards/rejected": -0.2564413547515869, "step": 17 }, { "epoch": 0.06792452830188679, "grad_norm": 0.4632433354854584, "learning_rate": 4.8301886792452835e-05, "log_odds_chosen": 1.8498305082321167, "log_odds_ratio": -0.2724185287952423, "logits/chosen": 0.00803515687584877, "logits/rejected": -1.558810830116272, "logps/chosen": -1.5008854866027832, "logps/rejected": -3.094841957092285, "loss": 1.3575, "nll_loss": 1.3302488327026367, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15008854866027832, "rewards/margins": 0.15939566493034363, "rewards/rejected": -0.30948421359062195, "step": 18 }, { "epoch": 0.07169811320754717, "grad_norm": 0.35216155648231506, "learning_rate": 4.8207547169811326e-05, "log_odds_chosen": 2.0445966720581055, "log_odds_ratio": -0.22769802808761597, "logits/chosen": -1.055422306060791, "logits/rejected": -2.8562209606170654, "logps/chosen": -1.4856079816818237, "logps/rejected": -3.2529525756835938, "loss": 1.6494, "nll_loss": 1.626587152481079, "rewards/accuracies": 0.875, "rewards/chosen": -0.1485608071088791, "rewards/margins": 0.17673446238040924, "rewards/rejected": -0.32529526948928833, "step": 19 }, { "epoch": 0.07547169811320754, "grad_norm": 0.3028462827205658, "learning_rate": 4.811320754716982e-05, "log_odds_chosen": 1.1802749633789062, "log_odds_ratio": -0.4493004083633423, "logits/chosen": -1.202689290046692, "logits/rejected": -3.1590261459350586, "logps/chosen": -1.8469630479812622, "logps/rejected": -2.9175806045532227, "loss": 1.6889, "nll_loss": 1.6439603567123413, "rewards/accuracies": 0.6875, "rewards/chosen": -0.18469631671905518, "rewards/margins": 0.10706175863742828, "rewards/rejected": -0.29175806045532227, "step": 20 }, { "epoch": 0.07924528301886792, "grad_norm": 0.28935325145721436, "learning_rate": 4.80188679245283e-05, "log_odds_chosen": 2.2533977031707764, "log_odds_ratio": -0.21968787908554077, "logits/chosen": -0.12538594007492065, "logits/rejected": -1.9605629444122314, "logps/chosen": -1.555745244026184, "logps/rejected": -3.573531150817871, "loss": 1.479, "nll_loss": 1.4570300579071045, "rewards/accuracies": 0.875, "rewards/chosen": -0.15557453036308289, "rewards/margins": 0.2017785757780075, "rewards/rejected": -0.3573530912399292, "step": 21 }, { "epoch": 0.0830188679245283, "grad_norm": 0.39032241702079773, "learning_rate": 4.792452830188679e-05, "log_odds_chosen": 1.7034038305282593, "log_odds_ratio": -0.41050586104393005, "logits/chosen": -0.2803301215171814, "logits/rejected": -2.3801870346069336, "logps/chosen": -1.699625015258789, "logps/rejected": -3.263505458831787, "loss": 1.7973, "nll_loss": 1.7562379837036133, "rewards/accuracies": 0.75, "rewards/chosen": -0.16996252536773682, "rewards/margins": 0.1563880443572998, "rewards/rejected": -0.3263505697250366, "step": 22 }, { "epoch": 0.08679245283018867, "grad_norm": 0.2868567705154419, "learning_rate": 4.7830188679245284e-05, "log_odds_chosen": 2.2137861251831055, "log_odds_ratio": -0.3145076334476471, "logits/chosen": -1.0021281242370605, "logits/rejected": -2.4567623138427734, "logps/chosen": -1.621106743812561, "logps/rejected": -3.6742091178894043, "loss": 1.5856, "nll_loss": 1.5541696548461914, "rewards/accuracies": 0.8125, "rewards/chosen": -0.16211068630218506, "rewards/margins": 0.20531021058559418, "rewards/rejected": -0.36742085218429565, "step": 23 }, { "epoch": 0.09056603773584905, "grad_norm": 0.3213948607444763, "learning_rate": 4.7735849056603775e-05, "log_odds_chosen": 3.161144256591797, "log_odds_ratio": -0.2373344600200653, "logits/chosen": -0.881264865398407, "logits/rejected": -2.189188241958618, "logps/chosen": -1.3406716585159302, "logps/rejected": -3.9409539699554443, "loss": 1.5735, "nll_loss": 1.5498067140579224, "rewards/accuracies": 0.875, "rewards/chosen": -0.13406717777252197, "rewards/margins": 0.2600281834602356, "rewards/rejected": -0.39409539103507996, "step": 24 }, { "epoch": 0.09433962264150944, "grad_norm": 0.3009367287158966, "learning_rate": 4.7641509433962266e-05, "log_odds_chosen": 2.8621065616607666, "log_odds_ratio": -0.2253154218196869, "logits/chosen": -0.2625047266483307, "logits/rejected": -2.8666999340057373, "logps/chosen": -1.7110705375671387, "logps/rejected": -4.400345325469971, "loss": 1.7058, "nll_loss": 1.683306097984314, "rewards/accuracies": 0.8125, "rewards/chosen": -0.17110705375671387, "rewards/margins": 0.2689274847507477, "rewards/rejected": -0.44003453850746155, "step": 25 }, { "epoch": 0.09811320754716982, "grad_norm": 0.3267085552215576, "learning_rate": 4.754716981132076e-05, "log_odds_chosen": 2.815523386001587, "log_odds_ratio": -0.2813441753387451, "logits/chosen": -1.2153959274291992, "logits/rejected": -3.1709775924682617, "logps/chosen": -1.6071617603302002, "logps/rejected": -4.192075729370117, "loss": 1.642, "nll_loss": 1.6138246059417725, "rewards/accuracies": 0.8125, "rewards/chosen": -0.16071617603302002, "rewards/margins": 0.2584913671016693, "rewards/rejected": -0.41920754313468933, "step": 26 }, { "epoch": 0.1018867924528302, "grad_norm": 0.4692343771457672, "learning_rate": 4.745283018867925e-05, "log_odds_chosen": 3.2350544929504395, "log_odds_ratio": -0.22966182231903076, "logits/chosen": -0.6714242696762085, "logits/rejected": -3.4772703647613525, "logps/chosen": -1.573210597038269, "logps/rejected": -4.57033634185791, "loss": 1.5627, "nll_loss": 1.5397582054138184, "rewards/accuracies": 0.8125, "rewards/chosen": -0.15732106566429138, "rewards/margins": 0.29971253871917725, "rewards/rejected": -0.45703360438346863, "step": 27 }, { "epoch": 0.10566037735849057, "grad_norm": 0.31730151176452637, "learning_rate": 4.735849056603774e-05, "log_odds_chosen": 3.0557804107666016, "log_odds_ratio": -0.19463254511356354, "logits/chosen": -0.8169313669204712, "logits/rejected": -1.874739646911621, "logps/chosen": -1.7243003845214844, "logps/rejected": -4.595163345336914, "loss": 1.5311, "nll_loss": 1.5116479396820068, "rewards/accuracies": 0.875, "rewards/chosen": -0.17243005335330963, "rewards/margins": 0.2870863080024719, "rewards/rejected": -0.45951637625694275, "step": 28 }, { "epoch": 0.10943396226415095, "grad_norm": 0.25958406925201416, "learning_rate": 4.726415094339623e-05, "log_odds_chosen": 4.204806327819824, "log_odds_ratio": -0.06744246184825897, "logits/chosen": -0.7214003801345825, "logits/rejected": -3.8571434020996094, "logps/chosen": -1.5588371753692627, "logps/rejected": -5.471315383911133, "loss": 1.5991, "nll_loss": 1.5923649072647095, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15588372945785522, "rewards/margins": 0.39124780893325806, "rewards/rejected": -0.5471315383911133, "step": 29 }, { "epoch": 0.11320754716981132, "grad_norm": 0.2962592542171478, "learning_rate": 4.716981132075472e-05, "log_odds_chosen": 2.306314468383789, "log_odds_ratio": -0.5868710279464722, "logits/chosen": -0.9562402963638306, "logits/rejected": -4.068589687347412, "logps/chosen": -1.8381496667861938, "logps/rejected": -4.1114959716796875, "loss": 1.7932, "nll_loss": 1.7344965934753418, "rewards/accuracies": 0.5625, "rewards/chosen": -0.18381497263908386, "rewards/margins": 0.2273346334695816, "rewards/rejected": -0.41114962100982666, "step": 30 }, { "epoch": 0.1169811320754717, "grad_norm": 0.3253747224807739, "learning_rate": 4.707547169811321e-05, "log_odds_chosen": 3.867879867553711, "log_odds_ratio": -0.2663135528564453, "logits/chosen": -1.6323354244232178, "logits/rejected": -2.8868775367736816, "logps/chosen": -1.3807705640792847, "logps/rejected": -4.98048210144043, "loss": 1.3989, "nll_loss": 1.3722314834594727, "rewards/accuracies": 0.8125, "rewards/chosen": -0.13807706534862518, "rewards/margins": 0.35997116565704346, "rewards/rejected": -0.49804821610450745, "step": 31 }, { "epoch": 0.12075471698113208, "grad_norm": 0.25523456931114197, "learning_rate": 4.6981132075471704e-05, "log_odds_chosen": 4.133279800415039, "log_odds_ratio": -0.20295441150665283, "logits/chosen": -0.9551544785499573, "logits/rejected": -4.06210470199585, "logps/chosen": -1.683975338935852, "logps/rejected": -5.5400261878967285, "loss": 1.7012, "nll_loss": 1.6808902025222778, "rewards/accuracies": 0.875, "rewards/chosen": -0.16839754581451416, "rewards/margins": 0.3856050968170166, "rewards/rejected": -0.5540026426315308, "step": 32 }, { "epoch": 0.12452830188679245, "grad_norm": 0.2824070155620575, "learning_rate": 4.6886792452830195e-05, "log_odds_chosen": 4.0810699462890625, "log_odds_ratio": -0.24137283861637115, "logits/chosen": -0.1275748610496521, "logits/rejected": -1.968299150466919, "logps/chosen": -1.4146742820739746, "logps/rejected": -5.19221830368042, "loss": 1.4468, "nll_loss": 1.422704815864563, "rewards/accuracies": 0.875, "rewards/chosen": -0.14146743714809418, "rewards/margins": 0.37775442004203796, "rewards/rejected": -0.519221842288971, "step": 33 }, { "epoch": 0.12830188679245283, "grad_norm": 0.3047614097595215, "learning_rate": 4.679245283018868e-05, "log_odds_chosen": 4.300058364868164, "log_odds_ratio": -0.2853168845176697, "logits/chosen": -2.589682102203369, "logits/rejected": -3.8946940898895264, "logps/chosen": -1.4936325550079346, "logps/rejected": -5.401619911193848, "loss": 1.6643, "nll_loss": 1.6357437372207642, "rewards/accuracies": 0.8125, "rewards/chosen": -0.14936324954032898, "rewards/margins": 0.3907987177371979, "rewards/rejected": -0.5401619672775269, "step": 34 }, { "epoch": 0.1320754716981132, "grad_norm": 0.298910528421402, "learning_rate": 4.669811320754717e-05, "log_odds_chosen": 6.439607620239258, "log_odds_ratio": -0.03047255240380764, "logits/chosen": -1.3439738750457764, "logits/rejected": -3.9568533897399902, "logps/chosen": -1.3088639974594116, "logps/rejected": -7.318572044372559, "loss": 1.6855, "nll_loss": 1.6824290752410889, "rewards/accuracies": 1.0, "rewards/chosen": -0.13088640570640564, "rewards/margins": 0.6009708642959595, "rewards/rejected": -0.7318572402000427, "step": 35 }, { "epoch": 0.13584905660377358, "grad_norm": 0.4024220407009125, "learning_rate": 4.660377358490566e-05, "log_odds_chosen": 5.031867980957031, "log_odds_ratio": -0.17651405930519104, "logits/chosen": -0.4531555771827698, "logits/rejected": -3.1221275329589844, "logps/chosen": -1.6552730798721313, "logps/rejected": -6.324914932250977, "loss": 1.7674, "nll_loss": 1.749765157699585, "rewards/accuracies": 0.875, "rewards/chosen": -0.16552734375, "rewards/margins": 0.46696415543556213, "rewards/rejected": -0.6324914693832397, "step": 36 }, { "epoch": 0.13962264150943396, "grad_norm": 0.2770419418811798, "learning_rate": 4.650943396226415e-05, "log_odds_chosen": 5.099140167236328, "log_odds_ratio": -0.25729480385780334, "logits/chosen": -1.506839632987976, "logits/rejected": -3.454437732696533, "logps/chosen": -1.6088337898254395, "logps/rejected": -6.49648904800415, "loss": 1.5488, "nll_loss": 1.5230274200439453, "rewards/accuracies": 0.8125, "rewards/chosen": -0.160883367061615, "rewards/margins": 0.48876553773880005, "rewards/rejected": -0.649648904800415, "step": 37 }, { "epoch": 0.14339622641509434, "grad_norm": 0.31073063611984253, "learning_rate": 4.641509433962264e-05, "log_odds_chosen": 5.445122241973877, "log_odds_ratio": -0.18372483551502228, "logits/chosen": -1.1133687496185303, "logits/rejected": -3.4876227378845215, "logps/chosen": -1.4217114448547363, "logps/rejected": -6.529870986938477, "loss": 1.4282, "nll_loss": 1.4098114967346191, "rewards/accuracies": 0.875, "rewards/chosen": -0.14217115938663483, "rewards/margins": 0.5108159184455872, "rewards/rejected": -0.6529870629310608, "step": 38 }, { "epoch": 0.1471698113207547, "grad_norm": 0.29280173778533936, "learning_rate": 4.6320754716981134e-05, "log_odds_chosen": 6.195460319519043, "log_odds_ratio": -0.035549942404031754, "logits/chosen": -1.361371636390686, "logits/rejected": -4.5884246826171875, "logps/chosen": -1.7262309789657593, "logps/rejected": -7.716752529144287, "loss": 1.5956, "nll_loss": 1.5920307636260986, "rewards/accuracies": 1.0, "rewards/chosen": -0.17262309789657593, "rewards/margins": 0.5990520715713501, "rewards/rejected": -0.7716752290725708, "step": 39 }, { "epoch": 0.1509433962264151, "grad_norm": 0.28057682514190674, "learning_rate": 4.6226415094339625e-05, "log_odds_chosen": 6.020131587982178, "log_odds_ratio": -0.03772380203008652, "logits/chosen": -1.2496304512023926, "logits/rejected": -4.043735504150391, "logps/chosen": -1.566927433013916, "logps/rejected": -7.254069805145264, "loss": 1.4067, "nll_loss": 1.402917742729187, "rewards/accuracies": 1.0, "rewards/chosen": -0.1566927433013916, "rewards/margins": 0.5687142610549927, "rewards/rejected": -0.7254070043563843, "step": 40 }, { "epoch": 0.15471698113207547, "grad_norm": 0.2948552072048187, "learning_rate": 4.6132075471698117e-05, "log_odds_chosen": 4.575827598571777, "log_odds_ratio": -0.2891075015068054, "logits/chosen": -0.34096649289131165, "logits/rejected": -2.514559507369995, "logps/chosen": -1.75924813747406, "logps/rejected": -6.152600288391113, "loss": 1.6047, "nll_loss": 1.575812816619873, "rewards/accuracies": 0.8125, "rewards/chosen": -0.17592480778694153, "rewards/margins": 0.4393352270126343, "rewards/rejected": -0.6152600049972534, "step": 41 }, { "epoch": 0.15849056603773584, "grad_norm": 0.3365479111671448, "learning_rate": 4.603773584905661e-05, "log_odds_chosen": 6.562654972076416, "log_odds_ratio": -0.16193996369838715, "logits/chosen": -1.5872305631637573, "logits/rejected": -3.203104019165039, "logps/chosen": -1.6002566814422607, "logps/rejected": -7.7676682472229, "loss": 1.7498, "nll_loss": 1.7336182594299316, "rewards/accuracies": 0.875, "rewards/chosen": -0.1600256711244583, "rewards/margins": 0.6167411208152771, "rewards/rejected": -0.7767667770385742, "step": 42 }, { "epoch": 0.16226415094339622, "grad_norm": 0.28234297037124634, "learning_rate": 4.59433962264151e-05, "log_odds_chosen": 5.429837226867676, "log_odds_ratio": -0.21709167957305908, "logits/chosen": 0.13629187643527985, "logits/rejected": -3.1108312606811523, "logps/chosen": -1.7401251792907715, "logps/rejected": -7.000548362731934, "loss": 1.5603, "nll_loss": 1.538613200187683, "rewards/accuracies": 0.875, "rewards/chosen": -0.17401251196861267, "rewards/margins": 0.5260423421859741, "rewards/rejected": -0.7000548243522644, "step": 43 }, { "epoch": 0.1660377358490566, "grad_norm": 0.30918800830841064, "learning_rate": 4.584905660377359e-05, "log_odds_chosen": 5.783122539520264, "log_odds_ratio": -0.11934540420770645, "logits/chosen": -0.9094568490982056, "logits/rejected": -3.6608901023864746, "logps/chosen": -1.6641435623168945, "logps/rejected": -7.227668285369873, "loss": 1.5404, "nll_loss": 1.5284289121627808, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16641436517238617, "rewards/margins": 0.5563524961471558, "rewards/rejected": -0.7227668762207031, "step": 44 }, { "epoch": 0.16981132075471697, "grad_norm": 0.3235601782798767, "learning_rate": 4.575471698113208e-05, "log_odds_chosen": 6.502612113952637, "log_odds_ratio": -0.10429678857326508, "logits/chosen": -1.07891047000885, "logits/rejected": -3.1915483474731445, "logps/chosen": -1.5495665073394775, "logps/rejected": -7.755338668823242, "loss": 1.548, "nll_loss": 1.5375723838806152, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1549566686153412, "rewards/margins": 0.6205772161483765, "rewards/rejected": -0.7755338549613953, "step": 45 }, { "epoch": 0.17358490566037735, "grad_norm": 0.26763808727264404, "learning_rate": 4.566037735849057e-05, "log_odds_chosen": 6.199982166290283, "log_odds_ratio": -0.07396085560321808, "logits/chosen": -1.4093493223190308, "logits/rejected": -5.298598289489746, "logps/chosen": -1.7217578887939453, "logps/rejected": -7.687525749206543, "loss": 1.5789, "nll_loss": 1.571506381034851, "rewards/accuracies": 1.0, "rewards/chosen": -0.172175794839859, "rewards/margins": 0.5965768098831177, "rewards/rejected": -0.7687525749206543, "step": 46 }, { "epoch": 0.17735849056603772, "grad_norm": 0.28478890657424927, "learning_rate": 4.556603773584906e-05, "log_odds_chosen": 5.49038028717041, "log_odds_ratio": -0.17011021077632904, "logits/chosen": -0.664116621017456, "logits/rejected": -3.217937469482422, "logps/chosen": -2.0000357627868652, "logps/rejected": -7.373146057128906, "loss": 1.7461, "nll_loss": 1.7290459871292114, "rewards/accuracies": 0.9375, "rewards/chosen": -0.20000356435775757, "rewards/margins": 0.5373110771179199, "rewards/rejected": -0.7373145818710327, "step": 47 }, { "epoch": 0.1811320754716981, "grad_norm": 0.29675430059432983, "learning_rate": 4.547169811320755e-05, "log_odds_chosen": 6.941376209259033, "log_odds_ratio": -0.07056228816509247, "logits/chosen": -0.9245410561561584, "logits/rejected": -3.209742546081543, "logps/chosen": -1.5620588064193726, "logps/rejected": -8.255936622619629, "loss": 1.6204, "nll_loss": 1.6133677959442139, "rewards/accuracies": 1.0, "rewards/chosen": -0.1562058925628662, "rewards/margins": 0.6693878173828125, "rewards/rejected": -0.8255936503410339, "step": 48 }, { "epoch": 0.18490566037735848, "grad_norm": 0.3024120330810547, "learning_rate": 4.537735849056604e-05, "log_odds_chosen": 7.8667497634887695, "log_odds_ratio": -0.11265160888433456, "logits/chosen": -1.589120864868164, "logits/rejected": -4.3788957595825195, "logps/chosen": -1.7432844638824463, "logps/rejected": -9.341209411621094, "loss": 1.7561, "nll_loss": 1.7448220252990723, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17432843148708344, "rewards/margins": 0.7597925662994385, "rewards/rejected": -0.9341210126876831, "step": 49 }, { "epoch": 0.18867924528301888, "grad_norm": 0.27790966629981995, "learning_rate": 4.528301886792453e-05, "log_odds_chosen": 9.197233200073242, "log_odds_ratio": -0.0743798017501831, "logits/chosen": -2.399343729019165, "logits/rejected": -5.902744770050049, "logps/chosen": -1.7229493856430054, "logps/rejected": -10.651434898376465, "loss": 1.6465, "nll_loss": 1.6390764713287354, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17229494452476501, "rewards/margins": 0.8928486108779907, "rewards/rejected": -1.0651434659957886, "step": 50 }, { "epoch": 0.19245283018867926, "grad_norm": 0.28730452060699463, "learning_rate": 4.518867924528302e-05, "log_odds_chosen": 7.1606950759887695, "log_odds_ratio": -0.15011747181415558, "logits/chosen": -0.7371699810028076, "logits/rejected": -3.6732337474823, "logps/chosen": -1.4196903705596924, "logps/rejected": -8.275727272033691, "loss": 1.5138, "nll_loss": 1.4987900257110596, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14196905493736267, "rewards/margins": 0.685603678226471, "rewards/rejected": -0.8275727033615112, "step": 51 }, { "epoch": 0.19622641509433963, "grad_norm": 0.3072900176048279, "learning_rate": 4.509433962264151e-05, "log_odds_chosen": 9.726778030395508, "log_odds_ratio": -0.0466623455286026, "logits/chosen": -2.5045127868652344, "logits/rejected": -5.068806171417236, "logps/chosen": -1.6165649890899658, "logps/rejected": -11.054706573486328, "loss": 1.4102, "nll_loss": 1.4055166244506836, "rewards/accuracies": 1.0, "rewards/chosen": -0.16165651381015778, "rewards/margins": 0.9438142776489258, "rewards/rejected": -1.1054707765579224, "step": 52 }, { "epoch": 0.2, "grad_norm": 0.2943893074989319, "learning_rate": 4.5e-05, "log_odds_chosen": 10.128807067871094, "log_odds_ratio": -0.0014914250932633877, "logits/chosen": -1.9018776416778564, "logits/rejected": -4.2667951583862305, "logps/chosen": -1.5886954069137573, "logps/rejected": -11.448442459106445, "loss": 1.6031, "nll_loss": 1.6029250621795654, "rewards/accuracies": 1.0, "rewards/chosen": -0.15886953473091125, "rewards/margins": 0.9859746694564819, "rewards/rejected": -1.1448442935943604, "step": 53 }, { "epoch": 0.2037735849056604, "grad_norm": 0.2680610418319702, "learning_rate": 4.4905660377358494e-05, "log_odds_chosen": 8.853492736816406, "log_odds_ratio": -0.04176551476120949, "logits/chosen": -1.450791835784912, "logits/rejected": -5.139822006225586, "logps/chosen": -1.6527010202407837, "logps/rejected": -10.218703269958496, "loss": 1.6926, "nll_loss": 1.6884214878082275, "rewards/accuracies": 1.0, "rewards/chosen": -0.16527009010314941, "rewards/margins": 0.8566002249717712, "rewards/rejected": -1.0218703746795654, "step": 54 }, { "epoch": 0.20754716981132076, "grad_norm": 0.32189199328422546, "learning_rate": 4.4811320754716985e-05, "log_odds_chosen": 11.52000617980957, "log_odds_ratio": -0.000579544750507921, "logits/chosen": -1.3866709470748901, "logits/rejected": -3.310800790786743, "logps/chosen": -1.6044374704360962, "logps/rejected": -12.8070068359375, "loss": 1.527, "nll_loss": 1.5269696712493896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1604437530040741, "rewards/margins": 1.120257019996643, "rewards/rejected": -1.2807008028030396, "step": 55 }, { "epoch": 0.21132075471698114, "grad_norm": 0.2579837739467621, "learning_rate": 4.4716981132075476e-05, "log_odds_chosen": 8.566587448120117, "log_odds_ratio": -0.025317970663309097, "logits/chosen": -0.9709105491638184, "logits/rejected": -5.532227993011475, "logps/chosen": -1.6428523063659668, "logps/rejected": -9.977346420288086, "loss": 1.5057, "nll_loss": 1.5031633377075195, "rewards/accuracies": 1.0, "rewards/chosen": -0.16428521275520325, "rewards/margins": 0.8334494829177856, "rewards/rejected": -0.9977346658706665, "step": 56 }, { "epoch": 0.21509433962264152, "grad_norm": 0.32639437913894653, "learning_rate": 4.462264150943397e-05, "log_odds_chosen": 10.306001663208008, "log_odds_ratio": -0.03766224905848503, "logits/chosen": -1.1223177909851074, "logits/rejected": -2.9572572708129883, "logps/chosen": -1.6183445453643799, "logps/rejected": -11.599272727966309, "loss": 1.5893, "nll_loss": 1.5855064392089844, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618344485759735, "rewards/margins": 0.998092770576477, "rewards/rejected": -1.159927248954773, "step": 57 }, { "epoch": 0.2188679245283019, "grad_norm": 0.25903022289276123, "learning_rate": 4.452830188679246e-05, "log_odds_chosen": 8.881806373596191, "log_odds_ratio": -0.10347943007946014, "logits/chosen": -0.9591866731643677, "logits/rejected": -4.289064407348633, "logps/chosen": -1.6897010803222656, "logps/rejected": -10.346776962280273, "loss": 1.6166, "nll_loss": 1.606278419494629, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16897010803222656, "rewards/margins": 0.8657075762748718, "rewards/rejected": -1.0346777439117432, "step": 58 }, { "epoch": 0.22264150943396227, "grad_norm": 0.2665920853614807, "learning_rate": 4.443396226415095e-05, "log_odds_chosen": 11.143839836120605, "log_odds_ratio": -0.01865122653543949, "logits/chosen": -2.2705538272857666, "logits/rejected": -4.639202117919922, "logps/chosen": -1.646267056465149, "logps/rejected": -12.43774127960205, "loss": 1.5823, "nll_loss": 1.58046555519104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16462671756744385, "rewards/margins": 1.0791475772857666, "rewards/rejected": -1.243774175643921, "step": 59 }, { "epoch": 0.22641509433962265, "grad_norm": 0.2563314139842987, "learning_rate": 4.433962264150944e-05, "log_odds_chosen": 11.137109756469727, "log_odds_ratio": -0.03539396822452545, "logits/chosen": -0.5501875281333923, "logits/rejected": -4.503236770629883, "logps/chosen": -1.5203689336776733, "logps/rejected": -12.256806373596191, "loss": 1.5345, "nll_loss": 1.5309462547302246, "rewards/accuracies": 1.0, "rewards/chosen": -0.1520369052886963, "rewards/margins": 1.073643684387207, "rewards/rejected": -1.2256807088851929, "step": 60 }, { "epoch": 0.23018867924528302, "grad_norm": 0.3310481011867523, "learning_rate": 4.4245283018867925e-05, "log_odds_chosen": 10.03518295288086, "log_odds_ratio": -0.09628309309482574, "logits/chosen": -1.4740400314331055, "logits/rejected": -4.722970962524414, "logps/chosen": -1.992842435836792, "logps/rejected": -11.883909225463867, "loss": 1.7703, "nll_loss": 1.7606245279312134, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19928425550460815, "rewards/margins": 0.9891066551208496, "rewards/rejected": -1.1883909702301025, "step": 61 }, { "epoch": 0.2339622641509434, "grad_norm": 0.3190458416938782, "learning_rate": 4.4150943396226416e-05, "log_odds_chosen": 9.210914611816406, "log_odds_ratio": -0.040729597210884094, "logits/chosen": -0.7013575434684753, "logits/rejected": -3.3500924110412598, "logps/chosen": -1.950926423072815, "logps/rejected": -10.972383499145508, "loss": 1.752, "nll_loss": 1.7479382753372192, "rewards/accuracies": 1.0, "rewards/chosen": -0.19509264826774597, "rewards/margins": 0.9021456837654114, "rewards/rejected": -1.097238302230835, "step": 62 }, { "epoch": 0.23773584905660378, "grad_norm": 0.3573385179042816, "learning_rate": 4.405660377358491e-05, "log_odds_chosen": 11.01914119720459, "log_odds_ratio": -0.06296153366565704, "logits/chosen": -1.7574726343154907, "logits/rejected": -3.601536989212036, "logps/chosen": -1.648729920387268, "logps/rejected": -12.339969635009766, "loss": 1.528, "nll_loss": 1.5217459201812744, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16487298905849457, "rewards/margins": 1.0691239833831787, "rewards/rejected": -1.233996868133545, "step": 63 }, { "epoch": 0.24150943396226415, "grad_norm": 0.2962454557418823, "learning_rate": 4.39622641509434e-05, "log_odds_chosen": 10.007362365722656, "log_odds_ratio": -0.043272338807582855, "logits/chosen": -1.2760871648788452, "logits/rejected": -4.875994682312012, "logps/chosen": -1.6131874322891235, "logps/rejected": -11.287424087524414, "loss": 1.5696, "nll_loss": 1.5652244091033936, "rewards/accuracies": 1.0, "rewards/chosen": -0.16131874918937683, "rewards/margins": 0.967423677444458, "rewards/rejected": -1.1287423372268677, "step": 64 }, { "epoch": 0.24528301886792453, "grad_norm": 0.3105560541152954, "learning_rate": 4.386792452830189e-05, "log_odds_chosen": 11.872138977050781, "log_odds_ratio": -0.07399442046880722, "logits/chosen": -0.6608960628509521, "logits/rejected": -3.028031349182129, "logps/chosen": -1.554811716079712, "logps/rejected": -13.131336212158203, "loss": 1.3613, "nll_loss": 1.3538590669631958, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15548115968704224, "rewards/margins": 1.1576523780822754, "rewards/rejected": -1.313133716583252, "step": 65 }, { "epoch": 0.2490566037735849, "grad_norm": 0.27170437574386597, "learning_rate": 4.377358490566038e-05, "log_odds_chosen": 9.97773265838623, "log_odds_ratio": -0.05262986570596695, "logits/chosen": -1.7461464405059814, "logits/rejected": -3.9088220596313477, "logps/chosen": -1.6022846698760986, "logps/rejected": -11.324554443359375, "loss": 1.5483, "nll_loss": 1.542998194694519, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16022847592830658, "rewards/margins": 0.9722269177436829, "rewards/rejected": -1.1324553489685059, "step": 66 }, { "epoch": 0.2528301886792453, "grad_norm": 0.34835413098335266, "learning_rate": 4.367924528301887e-05, "log_odds_chosen": 9.539581298828125, "log_odds_ratio": -0.11755736172199249, "logits/chosen": -1.313893437385559, "logits/rejected": -3.734592914581299, "logps/chosen": -1.4113194942474365, "logps/rejected": -10.656464576721191, "loss": 1.3774, "nll_loss": 1.3656796216964722, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1411319375038147, "rewards/margins": 0.9245145320892334, "rewards/rejected": -1.0656465291976929, "step": 67 }, { "epoch": 0.25660377358490566, "grad_norm": 0.2896377444267273, "learning_rate": 4.358490566037736e-05, "log_odds_chosen": 10.555123329162598, "log_odds_ratio": -0.08397966623306274, "logits/chosen": -0.9625846743583679, "logits/rejected": -3.1470422744750977, "logps/chosen": -1.5027086734771729, "logps/rejected": -11.76999282836914, "loss": 1.5365, "nll_loss": 1.5281280279159546, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15027087926864624, "rewards/margins": 1.0267283916473389, "rewards/rejected": -1.1769993305206299, "step": 68 }, { "epoch": 0.26037735849056604, "grad_norm": 0.2656661868095398, "learning_rate": 4.3490566037735853e-05, "log_odds_chosen": 11.847007751464844, "log_odds_ratio": -0.0024143236223608255, "logits/chosen": -2.249497890472412, "logits/rejected": -4.6625542640686035, "logps/chosen": -1.611348271369934, "logps/rejected": -13.128585815429688, "loss": 1.3686, "nll_loss": 1.368332862854004, "rewards/accuracies": 1.0, "rewards/chosen": -0.16113483905792236, "rewards/margins": 1.1517237424850464, "rewards/rejected": -1.3128585815429688, "step": 69 }, { "epoch": 0.2641509433962264, "grad_norm": 0.2848161458969116, "learning_rate": 4.3396226415094345e-05, "log_odds_chosen": 11.41952896118164, "log_odds_ratio": -0.06205465644598007, "logits/chosen": -1.0198203325271606, "logits/rejected": -4.255819320678711, "logps/chosen": -1.6085125207901, "logps/rejected": -12.758248329162598, "loss": 1.5198, "nll_loss": 1.5136139392852783, "rewards/accuracies": 1.0, "rewards/chosen": -0.16085125505924225, "rewards/margins": 1.114973545074463, "rewards/rejected": -1.2758249044418335, "step": 70 }, { "epoch": 0.2679245283018868, "grad_norm": 0.3033360540866852, "learning_rate": 4.3301886792452836e-05, "log_odds_chosen": 8.192865371704102, "log_odds_ratio": -0.06372419744729996, "logits/chosen": -2.1323366165161133, "logits/rejected": -4.94224214553833, "logps/chosen": -1.7140583992004395, "logps/rejected": -9.679903030395508, "loss": 1.6245, "nll_loss": 1.6181257963180542, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714058518409729, "rewards/margins": 0.7965843677520752, "rewards/rejected": -0.9679902791976929, "step": 71 }, { "epoch": 0.27169811320754716, "grad_norm": 0.35890570282936096, "learning_rate": 4.320754716981133e-05, "log_odds_chosen": 9.654875755310059, "log_odds_ratio": -0.13519693911075592, "logits/chosen": -0.6182310581207275, "logits/rejected": -2.691154718399048, "logps/chosen": -1.8398606777191162, "logps/rejected": -11.286558151245117, "loss": 1.5057, "nll_loss": 1.492200255393982, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18398606777191162, "rewards/margins": 0.9446697235107422, "rewards/rejected": -1.1286557912826538, "step": 72 }, { "epoch": 0.27547169811320754, "grad_norm": 1.4705555438995361, "learning_rate": 4.311320754716982e-05, "log_odds_chosen": 13.209151268005371, "log_odds_ratio": -0.0005382616654969752, "logits/chosen": -0.8367394804954529, "logits/rejected": -4.00326681137085, "logps/chosen": -1.536256194114685, "logps/rejected": -14.438907623291016, "loss": 1.4609, "nll_loss": 1.4608838558197021, "rewards/accuracies": 1.0, "rewards/chosen": -0.15362560749053955, "rewards/margins": 1.2902652025222778, "rewards/rejected": -1.4438908100128174, "step": 73 }, { "epoch": 0.2792452830188679, "grad_norm": 0.33669015765190125, "learning_rate": 4.301886792452831e-05, "log_odds_chosen": 11.09627628326416, "log_odds_ratio": -0.09562374651432037, "logits/chosen": -1.0604119300842285, "logits/rejected": -2.2503695487976074, "logps/chosen": -1.7556407451629639, "logps/rejected": -12.555806159973145, "loss": 1.3805, "nll_loss": 1.3709644079208374, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17556408047676086, "rewards/margins": 1.0800164937973022, "rewards/rejected": -1.2555806636810303, "step": 74 }, { "epoch": 0.2830188679245283, "grad_norm": 0.26332709193229675, "learning_rate": 4.292452830188679e-05, "log_odds_chosen": 11.302566528320312, "log_odds_ratio": -0.013619111850857735, "logits/chosen": -1.1802372932434082, "logits/rejected": -4.334959506988525, "logps/chosen": -1.7836018800735474, "logps/rejected": -12.880077362060547, "loss": 1.6039, "nll_loss": 1.6024914979934692, "rewards/accuracies": 1.0, "rewards/chosen": -0.17836017906665802, "rewards/margins": 1.1096476316452026, "rewards/rejected": -1.2880078554153442, "step": 75 }, { "epoch": 0.28679245283018867, "grad_norm": 0.2604648470878601, "learning_rate": 4.2830188679245284e-05, "log_odds_chosen": 11.805458068847656, "log_odds_ratio": -0.09639889001846313, "logits/chosen": -1.4125428199768066, "logits/rejected": -4.383103847503662, "logps/chosen": -1.753788948059082, "logps/rejected": -13.35352897644043, "loss": 1.5627, "nll_loss": 1.5530548095703125, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17537888884544373, "rewards/margins": 1.1599740982055664, "rewards/rejected": -1.3353530168533325, "step": 76 }, { "epoch": 0.29056603773584905, "grad_norm": 0.2621481418609619, "learning_rate": 4.2735849056603775e-05, "log_odds_chosen": 12.079008102416992, "log_odds_ratio": -0.029307007789611816, "logits/chosen": -1.0459429025650024, "logits/rejected": -4.2921833992004395, "logps/chosen": -1.7573282718658447, "logps/rejected": -13.61059856414795, "loss": 1.6019, "nll_loss": 1.5990142822265625, "rewards/accuracies": 1.0, "rewards/chosen": -0.17573282122612, "rewards/margins": 1.1853270530700684, "rewards/rejected": -1.3610599040985107, "step": 77 }, { "epoch": 0.2943396226415094, "grad_norm": 0.23861609399318695, "learning_rate": 4.2641509433962266e-05, "log_odds_chosen": 14.536327362060547, "log_odds_ratio": -0.006748859770596027, "logits/chosen": -1.6867283582687378, "logits/rejected": -4.363768100738525, "logps/chosen": -1.6174168586730957, "logps/rejected": -15.880059242248535, "loss": 1.5851, "nll_loss": 1.5844483375549316, "rewards/accuracies": 1.0, "rewards/chosen": -0.16174167394638062, "rewards/margins": 1.4262642860412598, "rewards/rejected": -1.5880060195922852, "step": 78 }, { "epoch": 0.2981132075471698, "grad_norm": 0.26430046558380127, "learning_rate": 4.254716981132076e-05, "log_odds_chosen": 10.643120765686035, "log_odds_ratio": -0.15290699899196625, "logits/chosen": -1.1397204399108887, "logits/rejected": -4.667537689208984, "logps/chosen": -1.5166621208190918, "logps/rejected": -11.917654991149902, "loss": 1.4211, "nll_loss": 1.4057632684707642, "rewards/accuracies": 0.875, "rewards/chosen": -0.15166620910167694, "rewards/margins": 1.0400993824005127, "rewards/rejected": -1.191765546798706, "step": 79 }, { "epoch": 0.3018867924528302, "grad_norm": 0.26361650228500366, "learning_rate": 4.245283018867925e-05, "log_odds_chosen": 14.004961967468262, "log_odds_ratio": -0.0008121158462017775, "logits/chosen": -1.747410774230957, "logits/rejected": -5.366614818572998, "logps/chosen": -1.7156703472137451, "logps/rejected": -15.486541748046875, "loss": 1.6296, "nll_loss": 1.6295278072357178, "rewards/accuracies": 1.0, "rewards/chosen": -0.17156702280044556, "rewards/margins": 1.377087116241455, "rewards/rejected": -1.5486541986465454, "step": 80 }, { "epoch": 0.30566037735849055, "grad_norm": 0.3431408405303955, "learning_rate": 4.235849056603774e-05, "log_odds_chosen": 9.586076736450195, "log_odds_ratio": -0.07982049137353897, "logits/chosen": -2.0183656215667725, "logits/rejected": -4.584896564483643, "logps/chosen": -1.6326284408569336, "logps/rejected": -10.968765258789062, "loss": 1.3615, "nll_loss": 1.3535064458847046, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16326284408569336, "rewards/margins": 0.9336137175559998, "rewards/rejected": -1.096876621246338, "step": 81 }, { "epoch": 0.30943396226415093, "grad_norm": 0.29136648774147034, "learning_rate": 4.226415094339623e-05, "log_odds_chosen": 12.734142303466797, "log_odds_ratio": -0.004932960495352745, "logits/chosen": -2.464571952819824, "logits/rejected": -4.204010009765625, "logps/chosen": -1.1688156127929688, "logps/rejected": -13.363677978515625, "loss": 1.0781, "nll_loss": 1.0775874853134155, "rewards/accuracies": 1.0, "rewards/chosen": -0.11688156425952911, "rewards/margins": 1.2194862365722656, "rewards/rejected": -1.3363678455352783, "step": 82 }, { "epoch": 0.3132075471698113, "grad_norm": 0.3025481402873993, "learning_rate": 4.216981132075472e-05, "log_odds_chosen": 11.096177101135254, "log_odds_ratio": -0.0837731808423996, "logits/chosen": -0.643512487411499, "logits/rejected": -2.385446071624756, "logps/chosen": -1.6675033569335938, "logps/rejected": -12.507808685302734, "loss": 1.4565, "nll_loss": 1.4480957984924316, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16675035655498505, "rewards/margins": 1.0840305089950562, "rewards/rejected": -1.2507808208465576, "step": 83 }, { "epoch": 0.3169811320754717, "grad_norm": 0.2666255831718445, "learning_rate": 4.207547169811321e-05, "log_odds_chosen": 12.243534088134766, "log_odds_ratio": -0.17083272337913513, "logits/chosen": -1.1914499998092651, "logits/rejected": -4.547676086425781, "logps/chosen": -1.4934356212615967, "logps/rejected": -13.507254600524902, "loss": 1.5673, "nll_loss": 1.5502480268478394, "rewards/accuracies": 0.875, "rewards/chosen": -0.1493435651063919, "rewards/margins": 1.2013819217681885, "rewards/rejected": -1.3507256507873535, "step": 84 }, { "epoch": 0.32075471698113206, "grad_norm": 0.3045007884502411, "learning_rate": 4.1981132075471704e-05, "log_odds_chosen": 12.160042762756348, "log_odds_ratio": -0.08180340379476547, "logits/chosen": -2.2346179485321045, "logits/rejected": -4.7149529457092285, "logps/chosen": -1.7960240840911865, "logps/rejected": -13.734954833984375, "loss": 1.5391, "nll_loss": 1.5308822393417358, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17960241436958313, "rewards/margins": 1.1938930749893188, "rewards/rejected": -1.3734955787658691, "step": 85 }, { "epoch": 0.32452830188679244, "grad_norm": 0.324097216129303, "learning_rate": 4.1886792452830195e-05, "log_odds_chosen": 13.820845603942871, "log_odds_ratio": -0.0012615115847438574, "logits/chosen": -1.7233232259750366, "logits/rejected": -4.733954906463623, "logps/chosen": -1.5087658166885376, "logps/rejected": -15.039979934692383, "loss": 1.4518, "nll_loss": 1.451686143875122, "rewards/accuracies": 1.0, "rewards/chosen": -0.15087658166885376, "rewards/margins": 1.353121280670166, "rewards/rejected": -1.5039979219436646, "step": 86 }, { "epoch": 0.3283018867924528, "grad_norm": 0.31188490986824036, "learning_rate": 4.1792452830188686e-05, "log_odds_chosen": 13.122318267822266, "log_odds_ratio": -0.026753831654787064, "logits/chosen": -1.6933764219284058, "logits/rejected": -3.5955612659454346, "logps/chosen": -1.3663079738616943, "logps/rejected": -14.080284118652344, "loss": 1.3399, "nll_loss": 1.3372132778167725, "rewards/accuracies": 1.0, "rewards/chosen": -0.13663078844547272, "rewards/margins": 1.271397590637207, "rewards/rejected": -1.4080283641815186, "step": 87 }, { "epoch": 0.3320754716981132, "grad_norm": 0.2510411739349365, "learning_rate": 4.169811320754717e-05, "log_odds_chosen": 13.573080062866211, "log_odds_ratio": -0.07667991518974304, "logits/chosen": -1.1544338464736938, "logits/rejected": -5.001407146453857, "logps/chosen": -1.331773042678833, "logps/rejected": -14.553543090820312, "loss": 1.4304, "nll_loss": 1.4226869344711304, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13317731022834778, "rewards/margins": 1.3221771717071533, "rewards/rejected": -1.4553543329238892, "step": 88 }, { "epoch": 0.33584905660377357, "grad_norm": 0.31842777132987976, "learning_rate": 4.160377358490566e-05, "log_odds_chosen": 13.996376037597656, "log_odds_ratio": -0.0015381659613922238, "logits/chosen": -2.5720713138580322, "logits/rejected": -4.779295921325684, "logps/chosen": -1.5352387428283691, "logps/rejected": -15.243785858154297, "loss": 1.6801, "nll_loss": 1.6799163818359375, "rewards/accuracies": 1.0, "rewards/chosen": -0.15352387726306915, "rewards/margins": 1.3708547353744507, "rewards/rejected": -1.524378776550293, "step": 89 }, { "epoch": 0.33962264150943394, "grad_norm": 0.30600160360336304, "learning_rate": 4.150943396226415e-05, "log_odds_chosen": 14.847908973693848, "log_odds_ratio": -0.0009415854001417756, "logits/chosen": -0.8421066999435425, "logits/rejected": -3.2322371006011963, "logps/chosen": -1.3275384902954102, "logps/rejected": -15.784387588500977, "loss": 1.324, "nll_loss": 1.323903203010559, "rewards/accuracies": 1.0, "rewards/chosen": -0.13275384902954102, "rewards/margins": 1.4456850290298462, "rewards/rejected": -1.5784387588500977, "step": 90 }, { "epoch": 0.3433962264150943, "grad_norm": 0.30167537927627563, "learning_rate": 4.1415094339622644e-05, "log_odds_chosen": 12.118502616882324, "log_odds_ratio": -0.08149873465299606, "logits/chosen": -0.8174037337303162, "logits/rejected": -3.398536443710327, "logps/chosen": -1.4228649139404297, "logps/rejected": -13.159191131591797, "loss": 1.3606, "nll_loss": 1.35248601436615, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1422865092754364, "rewards/margins": 1.1736326217651367, "rewards/rejected": -1.3159191608428955, "step": 91 }, { "epoch": 0.3471698113207547, "grad_norm": 0.4155506491661072, "learning_rate": 4.1320754716981135e-05, "log_odds_chosen": 13.490291595458984, "log_odds_ratio": -0.11288166791200638, "logits/chosen": -0.7610112428665161, "logits/rejected": -3.971395492553711, "logps/chosen": -1.609310269355774, "logps/rejected": -14.705669403076172, "loss": 1.6664, "nll_loss": 1.6551594734191895, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16093102097511292, "rewards/margins": 1.3096359968185425, "rewards/rejected": -1.470566987991333, "step": 92 }, { "epoch": 0.35094339622641507, "grad_norm": 0.2882811427116394, "learning_rate": 4.1226415094339626e-05, "log_odds_chosen": 13.462080001831055, "log_odds_ratio": -0.15872977674007416, "logits/chosen": -1.3535653352737427, "logits/rejected": -4.049402236938477, "logps/chosen": -1.4857981204986572, "logps/rejected": -14.58334732055664, "loss": 1.4598, "nll_loss": 1.4439499378204346, "rewards/accuracies": 0.875, "rewards/chosen": -0.14857982099056244, "rewards/margins": 1.3097549676895142, "rewards/rejected": -1.4583346843719482, "step": 93 }, { "epoch": 0.35471698113207545, "grad_norm": 0.3006541430950165, "learning_rate": 4.113207547169812e-05, "log_odds_chosen": 12.660806655883789, "log_odds_ratio": -0.05375822260975838, "logits/chosen": -2.8630645275115967, "logits/rejected": -5.452940464019775, "logps/chosen": -1.785542368888855, "logps/rejected": -14.212843894958496, "loss": 1.7117, "nll_loss": 1.7063469886779785, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1785542368888855, "rewards/margins": 1.2427302598953247, "rewards/rejected": -1.4212844371795654, "step": 94 }, { "epoch": 0.3584905660377358, "grad_norm": 0.2722157835960388, "learning_rate": 4.103773584905661e-05, "log_odds_chosen": 15.298785209655762, "log_odds_ratio": -0.000986977363936603, "logits/chosen": -1.7740228176116943, "logits/rejected": -4.849878311157227, "logps/chosen": -1.6763180494308472, "logps/rejected": -16.691926956176758, "loss": 1.6459, "nll_loss": 1.645756721496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.16763180494308472, "rewards/margins": 1.5015610456466675, "rewards/rejected": -1.6691927909851074, "step": 95 }, { "epoch": 0.3622641509433962, "grad_norm": 0.5683091878890991, "learning_rate": 4.09433962264151e-05, "log_odds_chosen": 13.669389724731445, "log_odds_ratio": -0.0002820601512212306, "logits/chosen": -3.1163902282714844, "logits/rejected": -6.106686115264893, "logps/chosen": -1.557908058166504, "logps/rejected": -14.852912902832031, "loss": 1.4868, "nll_loss": 1.4867894649505615, "rewards/accuracies": 1.0, "rewards/chosen": -0.1557908058166504, "rewards/margins": 1.329500436782837, "rewards/rejected": -1.4852913618087769, "step": 96 }, { "epoch": 0.3660377358490566, "grad_norm": 0.2856326699256897, "learning_rate": 4.084905660377359e-05, "log_odds_chosen": 15.168952941894531, "log_odds_ratio": -0.07142313569784164, "logits/chosen": -1.4473392963409424, "logits/rejected": -4.727592468261719, "logps/chosen": -1.3984313011169434, "logps/rejected": -16.263202667236328, "loss": 1.581, "nll_loss": 1.5738685131072998, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13984312117099762, "rewards/margins": 1.486477255821228, "rewards/rejected": -1.6263203620910645, "step": 97 }, { "epoch": 0.36981132075471695, "grad_norm": 0.29159900546073914, "learning_rate": 4.075471698113208e-05, "log_odds_chosen": 15.751093864440918, "log_odds_ratio": -0.00026286751381121576, "logits/chosen": -3.1162948608398438, "logits/rejected": -5.55817985534668, "logps/chosen": -1.6828699111938477, "logps/rejected": -17.040075302124023, "loss": 1.4492, "nll_loss": 1.4491852521896362, "rewards/accuracies": 1.0, "rewards/chosen": -0.168286994099617, "rewards/margins": 1.5357205867767334, "rewards/rejected": -1.704007625579834, "step": 98 }, { "epoch": 0.37358490566037733, "grad_norm": 0.27507874369621277, "learning_rate": 4.066037735849057e-05, "log_odds_chosen": 13.987431526184082, "log_odds_ratio": -0.07922342419624329, "logits/chosen": -1.512285590171814, "logits/rejected": -6.580392837524414, "logps/chosen": -1.6363471746444702, "logps/rejected": -15.355705261230469, "loss": 1.4966, "nll_loss": 1.4886995553970337, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16363471746444702, "rewards/margins": 1.3719358444213867, "rewards/rejected": -1.535570502281189, "step": 99 }, { "epoch": 0.37735849056603776, "grad_norm": 0.25051409006118774, "learning_rate": 4.0566037735849064e-05, "log_odds_chosen": 15.439388275146484, "log_odds_ratio": -5.675078136846423e-05, "logits/chosen": -1.3515607118606567, "logits/rejected": -5.860863208770752, "logps/chosen": -1.4820207357406616, "logps/rejected": -16.625112533569336, "loss": 1.5335, "nll_loss": 1.5334585905075073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482020765542984, "rewards/margins": 1.5143091678619385, "rewards/rejected": -1.6625111103057861, "step": 100 }, { "epoch": 0.38113207547169814, "grad_norm": 0.3001623749732971, "learning_rate": 4.047169811320755e-05, "log_odds_chosen": 12.887317657470703, "log_odds_ratio": -0.04469170421361923, "logits/chosen": -2.702786684036255, "logits/rejected": -4.913610458374023, "logps/chosen": -1.787389874458313, "logps/rejected": -14.465229988098145, "loss": 1.5049, "nll_loss": 1.5004353523254395, "rewards/accuracies": 1.0, "rewards/chosen": -0.17873898148536682, "rewards/margins": 1.2677841186523438, "rewards/rejected": -1.4465229511260986, "step": 101 }, { "epoch": 0.3849056603773585, "grad_norm": 0.34954193234443665, "learning_rate": 4.037735849056604e-05, "log_odds_chosen": 13.664055824279785, "log_odds_ratio": -0.1444215476512909, "logits/chosen": -1.608425498008728, "logits/rejected": -4.3469367027282715, "logps/chosen": -1.8433992862701416, "logps/rejected": -15.285165786743164, "loss": 1.6168, "nll_loss": 1.60233736038208, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18433992564678192, "rewards/margins": 1.3441766500473022, "rewards/rejected": -1.5285166501998901, "step": 102 }, { "epoch": 0.3886792452830189, "grad_norm": 0.3157506287097931, "learning_rate": 4.028301886792453e-05, "log_odds_chosen": 11.686209678649902, "log_odds_ratio": -0.10109421610832214, "logits/chosen": -0.873611330986023, "logits/rejected": -3.7620019912719727, "logps/chosen": -1.6527265310287476, "logps/rejected": -13.108765602111816, "loss": 1.5602, "nll_loss": 1.5500413179397583, "rewards/accuracies": 0.875, "rewards/chosen": -0.16527265310287476, "rewards/margins": 1.145603895187378, "rewards/rejected": -1.3108766078948975, "step": 103 }, { "epoch": 0.39245283018867927, "grad_norm": 0.2715020477771759, "learning_rate": 4.018867924528302e-05, "log_odds_chosen": 11.378678321838379, "log_odds_ratio": -0.13346989452838898, "logits/chosen": -1.1194851398468018, "logits/rejected": -4.566447734832764, "logps/chosen": -1.6644248962402344, "logps/rejected": -12.777153015136719, "loss": 1.5005, "nll_loss": 1.487147331237793, "rewards/accuracies": 0.875, "rewards/chosen": -0.16644248366355896, "rewards/margins": 1.1112728118896484, "rewards/rejected": -1.2777153253555298, "step": 104 }, { "epoch": 0.39622641509433965, "grad_norm": 0.29437255859375, "learning_rate": 4.009433962264151e-05, "log_odds_chosen": 15.541433334350586, "log_odds_ratio": -5.782198059023358e-05, "logits/chosen": -1.276192307472229, "logits/rejected": -4.482449054718018, "logps/chosen": -1.7318949699401855, "logps/rejected": -17.009288787841797, "loss": 1.5203, "nll_loss": 1.5202912092208862, "rewards/accuracies": 1.0, "rewards/chosen": -0.17318949103355408, "rewards/margins": 1.5277395248413086, "rewards/rejected": -1.700929045677185, "step": 105 }, { "epoch": 0.4, "grad_norm": 0.28076162934303284, "learning_rate": 4e-05, "log_odds_chosen": 15.423474311828613, "log_odds_ratio": -0.003808586858212948, "logits/chosen": -1.461142659187317, "logits/rejected": -4.638182163238525, "logps/chosen": -1.5100231170654297, "logps/rejected": -16.47011947631836, "loss": 1.497, "nll_loss": 1.4965708255767822, "rewards/accuracies": 1.0, "rewards/chosen": -0.15100233256816864, "rewards/margins": 1.4960097074508667, "rewards/rejected": -1.6470119953155518, "step": 106 }, { "epoch": 0.4037735849056604, "grad_norm": 0.28875720500946045, "learning_rate": 3.9905660377358494e-05, "log_odds_chosen": 12.895769119262695, "log_odds_ratio": -0.060368865728378296, "logits/chosen": -2.2072086334228516, "logits/rejected": -5.86301851272583, "logps/chosen": -1.8704140186309814, "logps/rejected": -14.505237579345703, "loss": 1.5992, "nll_loss": 1.593145489692688, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18704140186309814, "rewards/margins": 1.2634824514389038, "rewards/rejected": -1.4505239725112915, "step": 107 }, { "epoch": 0.4075471698113208, "grad_norm": 0.2719891369342804, "learning_rate": 3.9811320754716985e-05, "log_odds_chosen": 15.212738037109375, "log_odds_ratio": -0.06606274098157883, "logits/chosen": -1.9831483364105225, "logits/rejected": -5.709364414215088, "logps/chosen": -1.6806718111038208, "logps/rejected": -16.673917770385742, "loss": 1.5861, "nll_loss": 1.5794475078582764, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16806718707084656, "rewards/margins": 1.4993247985839844, "rewards/rejected": -1.6673917770385742, "step": 108 }, { "epoch": 0.41132075471698115, "grad_norm": 0.30551427602767944, "learning_rate": 3.9716981132075477e-05, "log_odds_chosen": 13.554816246032715, "log_odds_ratio": -0.17723311483860016, "logits/chosen": -1.8992154598236084, "logits/rejected": -4.842866897583008, "logps/chosen": -1.408470869064331, "logps/rejected": -14.5311861038208, "loss": 1.4911, "nll_loss": 1.4734236001968384, "rewards/accuracies": 0.875, "rewards/chosen": -0.1408470869064331, "rewards/margins": 1.3122715950012207, "rewards/rejected": -1.4531188011169434, "step": 109 }, { "epoch": 0.41509433962264153, "grad_norm": 0.25901442766189575, "learning_rate": 3.962264150943397e-05, "log_odds_chosen": 14.055702209472656, "log_odds_ratio": -0.07455631345510483, "logits/chosen": -1.1471797227859497, "logits/rejected": -4.136468887329102, "logps/chosen": -1.398701786994934, "logps/rejected": -15.004222869873047, "loss": 1.4465, "nll_loss": 1.4390912055969238, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13987018167972565, "rewards/margins": 1.3605520725250244, "rewards/rejected": -1.5004222393035889, "step": 110 }, { "epoch": 0.4188679245283019, "grad_norm": 0.3040740191936493, "learning_rate": 3.952830188679246e-05, "log_odds_chosen": 13.487771987915039, "log_odds_ratio": -0.0007269117631949484, "logits/chosen": -2.4711754322052, "logits/rejected": -5.052443504333496, "logps/chosen": -1.6260043382644653, "logps/rejected": -14.823724746704102, "loss": 1.432, "nll_loss": 1.431890606880188, "rewards/accuracies": 1.0, "rewards/chosen": -0.16260044276714325, "rewards/margins": 1.3197720050811768, "rewards/rejected": -1.482372522354126, "step": 111 }, { "epoch": 0.4226415094339623, "grad_norm": 0.25823676586151123, "learning_rate": 3.943396226415095e-05, "log_odds_chosen": 15.438188552856445, "log_odds_ratio": -0.00011414527398301288, "logits/chosen": -2.096723794937134, "logits/rejected": -6.7537760734558105, "logps/chosen": -1.8138432502746582, "logps/rejected": -16.985055923461914, "loss": 1.7199, "nll_loss": 1.7199323177337646, "rewards/accuracies": 1.0, "rewards/chosen": -0.18138432502746582, "rewards/margins": 1.517121434211731, "rewards/rejected": -1.6985057592391968, "step": 112 }, { "epoch": 0.42641509433962266, "grad_norm": 0.349231094121933, "learning_rate": 3.933962264150944e-05, "log_odds_chosen": 12.445716857910156, "log_odds_ratio": -0.06478449702262878, "logits/chosen": -2.650108575820923, "logits/rejected": -5.185212135314941, "logps/chosen": -1.6985199451446533, "logps/rejected": -13.793228149414062, "loss": 1.746, "nll_loss": 1.739565372467041, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16985198855400085, "rewards/margins": 1.2094708681106567, "rewards/rejected": -1.3793230056762695, "step": 113 }, { "epoch": 0.43018867924528303, "grad_norm": 0.2924027740955353, "learning_rate": 3.924528301886793e-05, "log_odds_chosen": 12.061296463012695, "log_odds_ratio": -0.03779318928718567, "logits/chosen": -0.749549150466919, "logits/rejected": -3.7414801120758057, "logps/chosen": -1.8810656070709229, "logps/rejected": -13.728483200073242, "loss": 1.7387, "nll_loss": 1.7349181175231934, "rewards/accuracies": 1.0, "rewards/chosen": -0.18810656666755676, "rewards/margins": 1.1847418546676636, "rewards/rejected": -1.3728485107421875, "step": 114 }, { "epoch": 0.4339622641509434, "grad_norm": 0.28793764114379883, "learning_rate": 3.9150943396226416e-05, "log_odds_chosen": 16.13731575012207, "log_odds_ratio": -0.054478421807289124, "logits/chosen": -1.7537418603897095, "logits/rejected": -5.8226141929626465, "logps/chosen": -1.4789087772369385, "logps/rejected": -17.265403747558594, "loss": 1.589, "nll_loss": 1.5835916996002197, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14789089560508728, "rewards/margins": 1.5786495208740234, "rewards/rejected": -1.7265403270721436, "step": 115 }, { "epoch": 0.4377358490566038, "grad_norm": 0.31606027483940125, "learning_rate": 3.905660377358491e-05, "log_odds_chosen": 14.270502090454102, "log_odds_ratio": -0.14657607674598694, "logits/chosen": -1.5621134042739868, "logits/rejected": -3.9824719429016113, "logps/chosen": -1.5558480024337769, "logps/rejected": -15.48137092590332, "loss": 1.4384, "nll_loss": 1.4237393140792847, "rewards/accuracies": 0.875, "rewards/chosen": -0.15558481216430664, "rewards/margins": 1.392552375793457, "rewards/rejected": -1.5481371879577637, "step": 116 }, { "epoch": 0.44150943396226416, "grad_norm": 0.6901794672012329, "learning_rate": 3.89622641509434e-05, "log_odds_chosen": 12.610831260681152, "log_odds_ratio": -0.11986835300922394, "logits/chosen": -1.5961331129074097, "logits/rejected": -4.9528584480285645, "logps/chosen": -1.864069938659668, "logps/rejected": -14.308099746704102, "loss": 1.7146, "nll_loss": 1.702580213546753, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18640699982643127, "rewards/margins": 1.2444028854370117, "rewards/rejected": -1.4308099746704102, "step": 117 }, { "epoch": 0.44528301886792454, "grad_norm": 0.28305572271347046, "learning_rate": 3.886792452830189e-05, "log_odds_chosen": 13.709443092346191, "log_odds_ratio": -0.08205895870923996, "logits/chosen": -1.5196449756622314, "logits/rejected": -4.281819820404053, "logps/chosen": -1.5432316064834595, "logps/rejected": -14.945755958557129, "loss": 1.5562, "nll_loss": 1.5479600429534912, "rewards/accuracies": 1.0, "rewards/chosen": -0.15432317554950714, "rewards/margins": 1.34025239944458, "rewards/rejected": -1.4945755004882812, "step": 118 }, { "epoch": 0.4490566037735849, "grad_norm": 0.30540400743484497, "learning_rate": 3.877358490566038e-05, "log_odds_chosen": 14.886051177978516, "log_odds_ratio": -0.0001277975970879197, "logits/chosen": -2.5971062183380127, "logits/rejected": -6.4186625480651855, "logps/chosen": -1.446777105331421, "logps/rejected": -15.858993530273438, "loss": 1.3599, "nll_loss": 1.3598815202713013, "rewards/accuracies": 1.0, "rewards/chosen": -0.14467771351337433, "rewards/margins": 1.4412215948104858, "rewards/rejected": -1.5858993530273438, "step": 119 }, { "epoch": 0.4528301886792453, "grad_norm": 0.279973566532135, "learning_rate": 3.867924528301887e-05, "log_odds_chosen": 12.182138442993164, "log_odds_ratio": -0.007018540520220995, "logits/chosen": -1.7155228853225708, "logits/rejected": -5.174653053283691, "logps/chosen": -1.7579047679901123, "logps/rejected": -13.713508605957031, "loss": 1.6865, "nll_loss": 1.6858264207839966, "rewards/accuracies": 1.0, "rewards/chosen": -0.1757904589176178, "rewards/margins": 1.195560336112976, "rewards/rejected": -1.371350884437561, "step": 120 }, { "epoch": 0.45660377358490567, "grad_norm": 0.2613874077796936, "learning_rate": 3.858490566037736e-05, "log_odds_chosen": 14.095218658447266, "log_odds_ratio": -0.07463585585355759, "logits/chosen": -1.3325459957122803, "logits/rejected": -6.121455192565918, "logps/chosen": -1.5543732643127441, "logps/rejected": -15.366453170776367, "loss": 1.5167, "nll_loss": 1.509216070175171, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15543733537197113, "rewards/margins": 1.3812079429626465, "rewards/rejected": -1.5366454124450684, "step": 121 }, { "epoch": 0.46037735849056605, "grad_norm": 0.30324679613113403, "learning_rate": 3.8490566037735854e-05, "log_odds_chosen": 16.875022888183594, "log_odds_ratio": -0.00017375449533574283, "logits/chosen": -1.8117984533309937, "logits/rejected": -4.163735866546631, "logps/chosen": -1.515238881111145, "logps/rejected": -17.963499069213867, "loss": 1.4792, "nll_loss": 1.4791332483291626, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515238881111145, "rewards/margins": 1.6448261737823486, "rewards/rejected": -1.7963500022888184, "step": 122 }, { "epoch": 0.4641509433962264, "grad_norm": 0.3230510354042053, "learning_rate": 3.8396226415094345e-05, "log_odds_chosen": 17.355361938476562, "log_odds_ratio": -8.270166631518805e-07, "logits/chosen": -0.9121267795562744, "logits/rejected": -4.532161712646484, "logps/chosen": -1.351798415184021, "logps/rejected": -18.30146026611328, "loss": 1.4902, "nll_loss": 1.4902395009994507, "rewards/accuracies": 1.0, "rewards/chosen": -0.13517984747886658, "rewards/margins": 1.6949663162231445, "rewards/rejected": -1.8301459550857544, "step": 123 }, { "epoch": 0.4679245283018868, "grad_norm": 0.27051493525505066, "learning_rate": 3.8301886792452836e-05, "log_odds_chosen": 16.027782440185547, "log_odds_ratio": -0.05309152603149414, "logits/chosen": -2.257577896118164, "logits/rejected": -6.006890296936035, "logps/chosen": -1.417616844177246, "logps/rejected": -17.086490631103516, "loss": 1.439, "nll_loss": 1.4336605072021484, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1417616754770279, "rewards/margins": 1.5668874979019165, "rewards/rejected": -1.7086491584777832, "step": 124 }, { "epoch": 0.4716981132075472, "grad_norm": 0.28948161005973816, "learning_rate": 3.820754716981133e-05, "log_odds_chosen": 13.788773536682129, "log_odds_ratio": -0.057656653225421906, "logits/chosen": -1.0248003005981445, "logits/rejected": -6.2845001220703125, "logps/chosen": -1.651491641998291, "logps/rejected": -15.156482696533203, "loss": 1.6935, "nll_loss": 1.6877108812332153, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16514916718006134, "rewards/margins": 1.350499153137207, "rewards/rejected": -1.515648365020752, "step": 125 }, { "epoch": 0.47547169811320755, "grad_norm": 0.29785606265068054, "learning_rate": 3.811320754716982e-05, "log_odds_chosen": 15.78986930847168, "log_odds_ratio": -8.410341251874343e-05, "logits/chosen": -2.4442198276519775, "logits/rejected": -6.754056453704834, "logps/chosen": -1.6627094745635986, "logps/rejected": -17.13760757446289, "loss": 1.6297, "nll_loss": 1.62972891330719, "rewards/accuracies": 1.0, "rewards/chosen": -0.16627094149589539, "rewards/margins": 1.547489881515503, "rewards/rejected": -1.7137608528137207, "step": 126 }, { "epoch": 0.47924528301886793, "grad_norm": 0.3125203251838684, "learning_rate": 3.801886792452831e-05, "log_odds_chosen": 15.262259483337402, "log_odds_ratio": -0.00025166559498757124, "logits/chosen": -2.318103790283203, "logits/rejected": -4.911985397338867, "logps/chosen": -1.5119843482971191, "logps/rejected": -16.430225372314453, "loss": 1.5521, "nll_loss": 1.5521178245544434, "rewards/accuracies": 1.0, "rewards/chosen": -0.15119843184947968, "rewards/margins": 1.4918241500854492, "rewards/rejected": -1.6430225372314453, "step": 127 }, { "epoch": 0.4830188679245283, "grad_norm": 0.4130852222442627, "learning_rate": 3.7924528301886794e-05, "log_odds_chosen": 13.181659698486328, "log_odds_ratio": -0.02264661341905594, "logits/chosen": -1.5559453964233398, "logits/rejected": -5.835235595703125, "logps/chosen": -1.6575170755386353, "logps/rejected": -14.44080638885498, "loss": 1.531, "nll_loss": 1.5287659168243408, "rewards/accuracies": 1.0, "rewards/chosen": -0.16575171053409576, "rewards/margins": 1.2783288955688477, "rewards/rejected": -1.4440807104110718, "step": 128 }, { "epoch": 0.4867924528301887, "grad_norm": 0.27878594398498535, "learning_rate": 3.7830188679245285e-05, "log_odds_chosen": 15.444690704345703, "log_odds_ratio": -0.024172237142920494, "logits/chosen": -1.8147392272949219, "logits/rejected": -5.994168758392334, "logps/chosen": -1.4637315273284912, "logps/rejected": -16.60024070739746, "loss": 1.41, "nll_loss": 1.4076224565505981, "rewards/accuracies": 1.0, "rewards/chosen": -0.14637315273284912, "rewards/margins": 1.5136507749557495, "rewards/rejected": -1.6600239276885986, "step": 129 }, { "epoch": 0.49056603773584906, "grad_norm": 0.36368459463119507, "learning_rate": 3.7735849056603776e-05, "log_odds_chosen": 15.150455474853516, "log_odds_ratio": -0.06176932156085968, "logits/chosen": -2.638796806335449, "logits/rejected": -6.133980751037598, "logps/chosen": -1.5448698997497559, "logps/rejected": -16.379032135009766, "loss": 1.5112, "nll_loss": 1.505061388015747, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1544869840145111, "rewards/margins": 1.4834163188934326, "rewards/rejected": -1.6379034519195557, "step": 130 }, { "epoch": 0.49433962264150944, "grad_norm": 0.3242456316947937, "learning_rate": 3.764150943396227e-05, "log_odds_chosen": 13.985454559326172, "log_odds_ratio": -4.5815289922757074e-05, "logits/chosen": -1.494691014289856, "logits/rejected": -5.898371696472168, "logps/chosen": -1.9884130954742432, "logps/rejected": -15.798318862915039, "loss": 1.7428, "nll_loss": 1.7428255081176758, "rewards/accuracies": 1.0, "rewards/chosen": -0.19884130358695984, "rewards/margins": 1.3809905052185059, "rewards/rejected": -1.579831838607788, "step": 131 }, { "epoch": 0.4981132075471698, "grad_norm": 0.29855185747146606, "learning_rate": 3.754716981132076e-05, "log_odds_chosen": 12.895346641540527, "log_odds_ratio": -0.0862637311220169, "logits/chosen": -0.5306591391563416, "logits/rejected": -3.3876893520355225, "logps/chosen": -1.6508989334106445, "logps/rejected": -14.312397956848145, "loss": 1.5424, "nll_loss": 1.533748745918274, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1650899052619934, "rewards/margins": 1.2661499977111816, "rewards/rejected": -1.4312398433685303, "step": 132 }, { "epoch": 0.5018867924528302, "grad_norm": 0.3010408282279968, "learning_rate": 3.745283018867924e-05, "log_odds_chosen": 14.359496116638184, "log_odds_ratio": -0.11943729966878891, "logits/chosen": -1.3975955247879028, "logits/rejected": -5.733314037322998, "logps/chosen": -1.5766863822937012, "logps/rejected": -15.659753799438477, "loss": 1.5544, "nll_loss": 1.5425056219100952, "rewards/accuracies": 0.875, "rewards/chosen": -0.15766863524913788, "rewards/margins": 1.4083068370819092, "rewards/rejected": -1.5659754276275635, "step": 133 }, { "epoch": 0.5056603773584906, "grad_norm": 0.28366541862487793, "learning_rate": 3.735849056603773e-05, "log_odds_chosen": 16.42931365966797, "log_odds_ratio": -1.6768943169154227e-05, "logits/chosen": -1.0664993524551392, "logits/rejected": -5.362708568572998, "logps/chosen": -1.4823436737060547, "logps/rejected": -17.60668182373047, "loss": 1.4417, "nll_loss": 1.441743016242981, "rewards/accuracies": 1.0, "rewards/chosen": -0.14823436737060547, "rewards/margins": 1.612433671951294, "rewards/rejected": -1.7606680393218994, "step": 134 }, { "epoch": 0.5094339622641509, "grad_norm": 0.297313928604126, "learning_rate": 3.7264150943396224e-05, "log_odds_chosen": 15.513206481933594, "log_odds_ratio": -0.03894759714603424, "logits/chosen": -1.827707290649414, "logits/rejected": -5.077935218811035, "logps/chosen": -1.5281659364700317, "logps/rejected": -16.74795150756836, "loss": 1.4237, "nll_loss": 1.4197759628295898, "rewards/accuracies": 1.0, "rewards/chosen": -0.15281659364700317, "rewards/margins": 1.5219786167144775, "rewards/rejected": -1.674795150756836, "step": 135 }, { "epoch": 0.5132075471698113, "grad_norm": 0.2611730992794037, "learning_rate": 3.7169811320754716e-05, "log_odds_chosen": 14.942113876342773, "log_odds_ratio": -0.001385436742566526, "logits/chosen": -1.9019191265106201, "logits/rejected": -5.761693000793457, "logps/chosen": -1.4520561695098877, "logps/rejected": -16.059383392333984, "loss": 1.4831, "nll_loss": 1.4829171895980835, "rewards/accuracies": 1.0, "rewards/chosen": -0.14520561695098877, "rewards/margins": 1.4607326984405518, "rewards/rejected": -1.60593843460083, "step": 136 }, { "epoch": 0.5169811320754717, "grad_norm": 0.37624403834342957, "learning_rate": 3.7075471698113207e-05, "log_odds_chosen": 15.124824523925781, "log_odds_ratio": -0.06929050385951996, "logits/chosen": -0.5525396466255188, "logits/rejected": -4.60980749130249, "logps/chosen": -1.61146080493927, "logps/rejected": -16.51461410522461, "loss": 1.4183, "nll_loss": 1.4113963842391968, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16114608943462372, "rewards/margins": 1.4903154373168945, "rewards/rejected": -1.6514614820480347, "step": 137 }, { "epoch": 0.5207547169811321, "grad_norm": 0.3030368387699127, "learning_rate": 3.69811320754717e-05, "log_odds_chosen": 12.055831909179688, "log_odds_ratio": -0.11039459705352783, "logits/chosen": -0.8935803174972534, "logits/rejected": -4.527928352355957, "logps/chosen": -1.668050765991211, "logps/rejected": -13.411079406738281, "loss": 1.5587, "nll_loss": 1.5476906299591064, "rewards/accuracies": 0.875, "rewards/chosen": -0.16680508852005005, "rewards/margins": 1.1743026971817017, "rewards/rejected": -1.3411078453063965, "step": 138 }, { "epoch": 0.5245283018867924, "grad_norm": 0.2544783353805542, "learning_rate": 3.688679245283019e-05, "log_odds_chosen": 15.021339416503906, "log_odds_ratio": -0.06613866984844208, "logits/chosen": -2.3585753440856934, "logits/rejected": -6.415126323699951, "logps/chosen": -1.4854167699813843, "logps/rejected": -16.204166412353516, "loss": 1.3412, "nll_loss": 1.3345497846603394, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1485416740179062, "rewards/margins": 1.4718750715255737, "rewards/rejected": -1.6204167604446411, "step": 139 }, { "epoch": 0.5283018867924528, "grad_norm": 0.3188823461532593, "learning_rate": 3.679245283018868e-05, "log_odds_chosen": 15.247077941894531, "log_odds_ratio": -0.0011911022011190653, "logits/chosen": -2.5056328773498535, "logits/rejected": -5.532197952270508, "logps/chosen": -1.569747805595398, "logps/rejected": -16.510929107666016, "loss": 1.4605, "nll_loss": 1.4603441953659058, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697479248046875, "rewards/margins": 1.494118094444275, "rewards/rejected": -1.6510928869247437, "step": 140 }, { "epoch": 0.5320754716981132, "grad_norm": 0.3175397515296936, "learning_rate": 3.669811320754717e-05, "log_odds_chosen": 13.892709732055664, "log_odds_ratio": -0.0589115172624588, "logits/chosen": -0.8055652379989624, "logits/rejected": -3.4150259494781494, "logps/chosen": -1.5232586860656738, "logps/rejected": -14.959342956542969, "loss": 1.5432, "nll_loss": 1.5373512506484985, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15232588350772858, "rewards/margins": 1.3436083793640137, "rewards/rejected": -1.495934247970581, "step": 141 }, { "epoch": 0.5358490566037736, "grad_norm": 0.27956297993659973, "learning_rate": 3.660377358490566e-05, "log_odds_chosen": 15.927668571472168, "log_odds_ratio": -0.0016179109225049615, "logits/chosen": -1.636272668838501, "logits/rejected": -5.451028347015381, "logps/chosen": -1.640097975730896, "logps/rejected": -17.290578842163086, "loss": 1.5572, "nll_loss": 1.5570058822631836, "rewards/accuracies": 1.0, "rewards/chosen": -0.16400979459285736, "rewards/margins": 1.565048098564148, "rewards/rejected": -1.729057788848877, "step": 142 }, { "epoch": 0.539622641509434, "grad_norm": 0.2930643558502197, "learning_rate": 3.650943396226415e-05, "log_odds_chosen": 16.596500396728516, "log_odds_ratio": -9.716653585201129e-05, "logits/chosen": -1.436676263809204, "logits/rejected": -4.450935363769531, "logps/chosen": -1.3673814535140991, "logps/rejected": -17.530248641967773, "loss": 1.3955, "nll_loss": 1.3954448699951172, "rewards/accuracies": 1.0, "rewards/chosen": -0.1367381513118744, "rewards/margins": 1.6162867546081543, "rewards/rejected": -1.753024935722351, "step": 143 }, { "epoch": 0.5433962264150943, "grad_norm": 0.4236195683479309, "learning_rate": 3.641509433962264e-05, "log_odds_chosen": 14.702760696411133, "log_odds_ratio": -0.0004511699662543833, "logits/chosen": -1.313840389251709, "logits/rejected": -4.356607437133789, "logps/chosen": -1.8211621046066284, "logps/rejected": -16.25528335571289, "loss": 1.547, "nll_loss": 1.5469856262207031, "rewards/accuracies": 1.0, "rewards/chosen": -0.18211621046066284, "rewards/margins": 1.443412184715271, "rewards/rejected": -1.6255284547805786, "step": 144 }, { "epoch": 0.5471698113207547, "grad_norm": 0.33278077840805054, "learning_rate": 3.632075471698113e-05, "log_odds_chosen": 15.362979888916016, "log_odds_ratio": -0.0003507338115014136, "logits/chosen": -1.3937156200408936, "logits/rejected": -5.756740570068359, "logps/chosen": -1.7482523918151855, "logps/rejected": -16.875289916992188, "loss": 1.608, "nll_loss": 1.6079708337783813, "rewards/accuracies": 1.0, "rewards/chosen": -0.1748252511024475, "rewards/margins": 1.5127036571502686, "rewards/rejected": -1.6875288486480713, "step": 145 }, { "epoch": 0.5509433962264151, "grad_norm": 0.32443860173225403, "learning_rate": 3.622641509433962e-05, "log_odds_chosen": 15.61093521118164, "log_odds_ratio": -0.06764474511146545, "logits/chosen": -1.9850184917449951, "logits/rejected": -5.995416164398193, "logps/chosen": -1.7201964855194092, "logps/rejected": -17.044771194458008, "loss": 1.6287, "nll_loss": 1.6218953132629395, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17201966047286987, "rewards/margins": 1.5324573516845703, "rewards/rejected": -1.704477071762085, "step": 146 }, { "epoch": 0.5547169811320755, "grad_norm": 0.2766498029232025, "learning_rate": 3.613207547169811e-05, "log_odds_chosen": 14.549701690673828, "log_odds_ratio": -0.000522086163982749, "logits/chosen": -2.167712688446045, "logits/rejected": -6.221152305603027, "logps/chosen": -1.494560956954956, "logps/rejected": -15.703763961791992, "loss": 1.4152, "nll_loss": 1.4151456356048584, "rewards/accuracies": 1.0, "rewards/chosen": -0.14945609867572784, "rewards/margins": 1.4209203720092773, "rewards/rejected": -1.5703763961791992, "step": 147 }, { "epoch": 0.5584905660377358, "grad_norm": 0.4621659219264984, "learning_rate": 3.60377358490566e-05, "log_odds_chosen": 14.655370712280273, "log_odds_ratio": -0.07225409895181656, "logits/chosen": -0.9679865837097168, "logits/rejected": -3.743302345275879, "logps/chosen": -1.47898268699646, "logps/rejected": -15.858048439025879, "loss": 1.3764, "nll_loss": 1.3691895008087158, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14789827167987823, "rewards/margins": 1.4379065036773682, "rewards/rejected": -1.5858049392700195, "step": 148 }, { "epoch": 0.5622641509433962, "grad_norm": 0.319667249917984, "learning_rate": 3.594339622641509e-05, "log_odds_chosen": 15.044527053833008, "log_odds_ratio": -0.00024553845287300646, "logits/chosen": -1.18473219871521, "logits/rejected": -4.766110420227051, "logps/chosen": -1.5453764200210571, "logps/rejected": -16.291166305541992, "loss": 1.5824, "nll_loss": 1.5824246406555176, "rewards/accuracies": 1.0, "rewards/chosen": -0.1545376479625702, "rewards/margins": 1.474579095840454, "rewards/rejected": -1.6291167736053467, "step": 149 }, { "epoch": 0.5660377358490566, "grad_norm": 0.3204805552959442, "learning_rate": 3.5849056603773584e-05, "log_odds_chosen": 17.485958099365234, "log_odds_ratio": -0.10664539784193039, "logits/chosen": -1.6424400806427002, "logits/rejected": -3.842500686645508, "logps/chosen": -1.4972755908966064, "logps/rejected": -18.57717514038086, "loss": 1.3499, "nll_loss": 1.3392703533172607, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14972755312919617, "rewards/margins": 1.7079898118972778, "rewards/rejected": -1.8577172756195068, "step": 150 }, { "epoch": 0.569811320754717, "grad_norm": 0.26958513259887695, "learning_rate": 3.5754716981132075e-05, "log_odds_chosen": 15.958301544189453, "log_odds_ratio": -0.0007034969748929143, "logits/chosen": -1.0648162364959717, "logits/rejected": -6.030667304992676, "logps/chosen": -1.6111671924591064, "logps/rejected": -17.287628173828125, "loss": 1.6164, "nll_loss": 1.6163535118103027, "rewards/accuracies": 1.0, "rewards/chosen": -0.16111671924591064, "rewards/margins": 1.5676461458206177, "rewards/rejected": -1.7287628650665283, "step": 151 }, { "epoch": 0.5735849056603773, "grad_norm": 0.3305363059043884, "learning_rate": 3.5660377358490566e-05, "log_odds_chosen": 17.14991569519043, "log_odds_ratio": -3.2623302104184404e-05, "logits/chosen": -1.6892523765563965, "logits/rejected": -4.232363224029541, "logps/chosen": -1.5470982789993286, "logps/rejected": -18.353713989257812, "loss": 1.4759, "nll_loss": 1.4759438037872314, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547098308801651, "rewards/margins": 1.6806614398956299, "rewards/rejected": -1.8353712558746338, "step": 152 }, { "epoch": 0.5773584905660377, "grad_norm": 0.3334077000617981, "learning_rate": 3.556603773584906e-05, "log_odds_chosen": 15.461456298828125, "log_odds_ratio": -3.081683098571375e-05, "logits/chosen": -1.5841903686523438, "logits/rejected": -5.0937604904174805, "logps/chosen": -1.774914264678955, "logps/rejected": -16.993589401245117, "loss": 1.5688, "nll_loss": 1.5687741041183472, "rewards/accuracies": 1.0, "rewards/chosen": -0.1774914264678955, "rewards/margins": 1.5218675136566162, "rewards/rejected": -1.6993589401245117, "step": 153 }, { "epoch": 0.5811320754716981, "grad_norm": 0.29705750942230225, "learning_rate": 3.547169811320755e-05, "log_odds_chosen": 14.837601661682129, "log_odds_ratio": -4.0413448004983366e-05, "logits/chosen": -2.566657781600952, "logits/rejected": -6.176463603973389, "logps/chosen": -1.7194881439208984, "logps/rejected": -16.30776023864746, "loss": 1.5701, "nll_loss": 1.5700526237487793, "rewards/accuracies": 1.0, "rewards/chosen": -0.17194882035255432, "rewards/margins": 1.4588273763656616, "rewards/rejected": -1.6307761669158936, "step": 154 }, { "epoch": 0.5849056603773585, "grad_norm": 0.4479868710041046, "learning_rate": 3.537735849056604e-05, "log_odds_chosen": 16.22223472595215, "log_odds_ratio": -5.1474453357513994e-05, "logits/chosen": -1.6525013446807861, "logits/rejected": -5.60921573638916, "logps/chosen": -1.4534053802490234, "logps/rejected": -17.35995101928711, "loss": 1.3998, "nll_loss": 1.3998193740844727, "rewards/accuracies": 1.0, "rewards/chosen": -0.14534053206443787, "rewards/margins": 1.5906546115875244, "rewards/rejected": -1.7359951734542847, "step": 155 }, { "epoch": 0.5886792452830188, "grad_norm": 0.3321553170681, "learning_rate": 3.528301886792453e-05, "log_odds_chosen": 14.45380973815918, "log_odds_ratio": -0.00011496634397190064, "logits/chosen": -1.5721373558044434, "logits/rejected": -5.987687110900879, "logps/chosen": -1.7010905742645264, "logps/rejected": -15.914693832397461, "loss": 1.4444, "nll_loss": 1.4443511962890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.17010906338691711, "rewards/margins": 1.4213604927062988, "rewards/rejected": -1.591469407081604, "step": 156 }, { "epoch": 0.5924528301886792, "grad_norm": 0.32465389370918274, "learning_rate": 3.518867924528302e-05, "log_odds_chosen": 15.948369979858398, "log_odds_ratio": -1.5907631677691825e-05, "logits/chosen": -1.2226861715316772, "logits/rejected": -4.106148719787598, "logps/chosen": -1.8807780742645264, "logps/rejected": -17.614301681518555, "loss": 1.5042, "nll_loss": 1.5042164325714111, "rewards/accuracies": 1.0, "rewards/chosen": -0.18807780742645264, "rewards/margins": 1.5733524560928345, "rewards/rejected": -1.761430263519287, "step": 157 }, { "epoch": 0.5962264150943396, "grad_norm": 0.2754722237586975, "learning_rate": 3.5094339622641506e-05, "log_odds_chosen": 15.541213989257812, "log_odds_ratio": -0.16899409890174866, "logits/chosen": -2.1642675399780273, "logits/rejected": -5.719496250152588, "logps/chosen": -1.5195682048797607, "logps/rejected": -16.678632736206055, "loss": 1.6354, "nll_loss": 1.6184601783752441, "rewards/accuracies": 0.875, "rewards/chosen": -0.15195682644844055, "rewards/margins": 1.5159064531326294, "rewards/rejected": -1.6678632497787476, "step": 158 }, { "epoch": 0.6, "grad_norm": 0.28031638264656067, "learning_rate": 3.5e-05, "log_odds_chosen": 11.228550910949707, "log_odds_ratio": -0.20393189787864685, "logits/chosen": -0.9825246334075928, "logits/rejected": -4.9232587814331055, "logps/chosen": -1.7185983657836914, "logps/rejected": -12.776618003845215, "loss": 1.5923, "nll_loss": 1.5718731880187988, "rewards/accuracies": 0.875, "rewards/chosen": -0.17185983061790466, "rewards/margins": 1.1058019399642944, "rewards/rejected": -1.2776618003845215, "step": 159 }, { "epoch": 0.6037735849056604, "grad_norm": 0.2919527292251587, "learning_rate": 3.490566037735849e-05, "log_odds_chosen": 14.894678115844727, "log_odds_ratio": -0.0009987247176468372, "logits/chosen": -0.6535999178886414, "logits/rejected": -4.017884254455566, "logps/chosen": -1.5091729164123535, "logps/rejected": -16.018918991088867, "loss": 1.403, "nll_loss": 1.40290367603302, "rewards/accuracies": 1.0, "rewards/chosen": -0.15091729164123535, "rewards/margins": 1.450974702835083, "rewards/rejected": -1.6018919944763184, "step": 160 }, { "epoch": 0.6075471698113207, "grad_norm": 0.3071415424346924, "learning_rate": 3.481132075471698e-05, "log_odds_chosen": 16.471067428588867, "log_odds_ratio": -2.074857911793515e-05, "logits/chosen": -0.09290022403001785, "logits/rejected": -3.7179436683654785, "logps/chosen": -1.471862554550171, "logps/rejected": -17.618274688720703, "loss": 1.4793, "nll_loss": 1.4793322086334229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471862643957138, "rewards/margins": 1.6146413087844849, "rewards/rejected": -1.7618277072906494, "step": 161 }, { "epoch": 0.6113207547169811, "grad_norm": 0.2777910530567169, "learning_rate": 3.471698113207547e-05, "log_odds_chosen": 16.91250991821289, "log_odds_ratio": -0.016157550737261772, "logits/chosen": -1.235945463180542, "logits/rejected": -5.316408157348633, "logps/chosen": -1.6419563293457031, "logps/rejected": -18.302946090698242, "loss": 1.429, "nll_loss": 1.4273896217346191, "rewards/accuracies": 1.0, "rewards/chosen": -0.16419564187526703, "rewards/margins": 1.6660988330841064, "rewards/rejected": -1.8302946090698242, "step": 162 }, { "epoch": 0.6150943396226415, "grad_norm": 0.2712962329387665, "learning_rate": 3.462264150943396e-05, "log_odds_chosen": 16.41084098815918, "log_odds_ratio": -2.9441296646837145e-05, "logits/chosen": -1.025072693824768, "logits/rejected": -5.848681449890137, "logps/chosen": -1.5957037210464478, "logps/rejected": -17.6140193939209, "loss": 1.5493, "nll_loss": 1.5493419170379639, "rewards/accuracies": 1.0, "rewards/chosen": -0.15957039594650269, "rewards/margins": 1.6018316745758057, "rewards/rejected": -1.7614020109176636, "step": 163 }, { "epoch": 0.6188679245283019, "grad_norm": 0.30003705620765686, "learning_rate": 3.452830188679245e-05, "log_odds_chosen": 16.494930267333984, "log_odds_ratio": -8.993155461212154e-06, "logits/chosen": -1.4486334323883057, "logits/rejected": -5.38150691986084, "logps/chosen": -1.4585084915161133, "logps/rejected": -17.579425811767578, "loss": 1.5673, "nll_loss": 1.5673246383666992, "rewards/accuracies": 1.0, "rewards/chosen": -0.14585085213184357, "rewards/margins": 1.6120917797088623, "rewards/rejected": -1.7579425573349, "step": 164 }, { "epoch": 0.6226415094339622, "grad_norm": 0.2940906286239624, "learning_rate": 3.4433962264150943e-05, "log_odds_chosen": 15.412787437438965, "log_odds_ratio": -0.07001832127571106, "logits/chosen": -1.34386146068573, "logits/rejected": -5.122559070587158, "logps/chosen": -1.8467469215393066, "logps/rejected": -17.047271728515625, "loss": 1.6754, "nll_loss": 1.6683518886566162, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1846746802330017, "rewards/margins": 1.5200525522232056, "rewards/rejected": -1.7047271728515625, "step": 165 }, { "epoch": 0.6264150943396226, "grad_norm": 0.3121996521949768, "learning_rate": 3.4339622641509435e-05, "log_odds_chosen": 16.29378318786621, "log_odds_ratio": -0.05672796443104744, "logits/chosen": -2.245677947998047, "logits/rejected": -4.856801986694336, "logps/chosen": -1.4243159294128418, "logps/rejected": -17.36766815185547, "loss": 1.5158, "nll_loss": 1.5101523399353027, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1424316018819809, "rewards/margins": 1.5943353176116943, "rewards/rejected": -1.7367669343948364, "step": 166 }, { "epoch": 0.630188679245283, "grad_norm": 0.28171584010124207, "learning_rate": 3.4245283018867926e-05, "log_odds_chosen": 15.490211486816406, "log_odds_ratio": -0.024965543299913406, "logits/chosen": -0.1043301597237587, "logits/rejected": -4.409641742706299, "logps/chosen": -1.771804690361023, "logps/rejected": -17.02152442932129, "loss": 1.6991, "nll_loss": 1.6965808868408203, "rewards/accuracies": 1.0, "rewards/chosen": -0.1771804690361023, "rewards/margins": 1.5249719619750977, "rewards/rejected": -1.7021524906158447, "step": 167 }, { "epoch": 0.6339622641509434, "grad_norm": 0.27846312522888184, "learning_rate": 3.415094339622642e-05, "log_odds_chosen": 16.138580322265625, "log_odds_ratio": -0.00026864392566494644, "logits/chosen": -1.5777232646942139, "logits/rejected": -6.5474162101745605, "logps/chosen": -1.6822800636291504, "logps/rejected": -17.583065032958984, "loss": 1.5533, "nll_loss": 1.5532336235046387, "rewards/accuracies": 1.0, "rewards/chosen": -0.16822800040245056, "rewards/margins": 1.5900784730911255, "rewards/rejected": -1.7583065032958984, "step": 168 }, { "epoch": 0.6377358490566037, "grad_norm": 0.3010658025741577, "learning_rate": 3.405660377358491e-05, "log_odds_chosen": 15.855169296264648, "log_odds_ratio": -1.3776767445961013e-05, "logits/chosen": -1.5479809045791626, "logits/rejected": -5.985370635986328, "logps/chosen": -1.7337384223937988, "logps/rejected": -17.28085708618164, "loss": 1.6455, "nll_loss": 1.645505666732788, "rewards/accuracies": 1.0, "rewards/chosen": -0.17337384819984436, "rewards/margins": 1.5547118186950684, "rewards/rejected": -1.7280856370925903, "step": 169 }, { "epoch": 0.6415094339622641, "grad_norm": 0.3382321000099182, "learning_rate": 3.39622641509434e-05, "log_odds_chosen": 15.886013984680176, "log_odds_ratio": -8.158626769727562e-06, "logits/chosen": -1.8764511346817017, "logits/rejected": -5.525568962097168, "logps/chosen": -1.3874365091323853, "logps/rejected": -16.957935333251953, "loss": 1.4466, "nll_loss": 1.4466451406478882, "rewards/accuracies": 1.0, "rewards/chosen": -0.13874365389347076, "rewards/margins": 1.5570499897003174, "rewards/rejected": -1.695793628692627, "step": 170 }, { "epoch": 0.6452830188679245, "grad_norm": 0.33056262135505676, "learning_rate": 3.386792452830188e-05, "log_odds_chosen": 15.484598159790039, "log_odds_ratio": -8.27780422696378e-06, "logits/chosen": -1.8570010662078857, "logits/rejected": -4.6085357666015625, "logps/chosen": -1.5708074569702148, "logps/rejected": -16.722640991210938, "loss": 1.5653, "nll_loss": 1.5652695894241333, "rewards/accuracies": 1.0, "rewards/chosen": -0.157080739736557, "rewards/margins": 1.5151833295822144, "rewards/rejected": -1.6722640991210938, "step": 171 }, { "epoch": 0.6490566037735849, "grad_norm": 0.31156399846076965, "learning_rate": 3.3773584905660374e-05, "log_odds_chosen": 15.454925537109375, "log_odds_ratio": -0.01993393711745739, "logits/chosen": -1.1617259979248047, "logits/rejected": -5.1523756980896, "logps/chosen": -1.5937132835388184, "logps/rejected": -16.762189865112305, "loss": 1.3782, "nll_loss": 1.3762309551239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.15937133133411407, "rewards/margins": 1.5168476104736328, "rewards/rejected": -1.6762189865112305, "step": 172 }, { "epoch": 0.6528301886792452, "grad_norm": 0.2925812005996704, "learning_rate": 3.3679245283018865e-05, "log_odds_chosen": 17.620845794677734, "log_odds_ratio": -0.0001716611732263118, "logits/chosen": -1.702080249786377, "logits/rejected": -5.444385051727295, "logps/chosen": -1.224086046218872, "logps/rejected": -18.321523666381836, "loss": 1.324, "nll_loss": 1.3239673376083374, "rewards/accuracies": 1.0, "rewards/chosen": -0.12240861356258392, "rewards/margins": 1.709743857383728, "rewards/rejected": -1.8321523666381836, "step": 173 }, { "epoch": 0.6566037735849056, "grad_norm": 0.3308000862598419, "learning_rate": 3.3584905660377356e-05, "log_odds_chosen": 13.906808853149414, "log_odds_ratio": -0.05518035590648651, "logits/chosen": -1.4705300331115723, "logits/rejected": -4.510253429412842, "logps/chosen": -1.5690919160842896, "logps/rejected": -15.069160461425781, "loss": 1.3463, "nll_loss": 1.3407379388809204, "rewards/accuracies": 1.0, "rewards/chosen": -0.15690919756889343, "rewards/margins": 1.3500069379806519, "rewards/rejected": -1.5069161653518677, "step": 174 }, { "epoch": 0.660377358490566, "grad_norm": 0.2951977550983429, "learning_rate": 3.349056603773585e-05, "log_odds_chosen": 17.145343780517578, "log_odds_ratio": -0.06170666217803955, "logits/chosen": -2.072368860244751, "logits/rejected": -5.481960773468018, "logps/chosen": -1.3195624351501465, "logps/rejected": -18.049631118774414, "loss": 1.3551, "nll_loss": 1.348900556564331, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13195623457431793, "rewards/margins": 1.6730068922042847, "rewards/rejected": -1.8049631118774414, "step": 175 }, { "epoch": 0.6641509433962264, "grad_norm": 0.37980836629867554, "learning_rate": 3.339622641509434e-05, "log_odds_chosen": 14.718152046203613, "log_odds_ratio": -0.13777947425842285, "logits/chosen": -1.4754526615142822, "logits/rejected": -3.541693687438965, "logps/chosen": -1.6201732158660889, "logps/rejected": -15.98199462890625, "loss": 1.4959, "nll_loss": 1.4821652173995972, "rewards/accuracies": 0.875, "rewards/chosen": -0.1620173305273056, "rewards/margins": 1.4361821413040161, "rewards/rejected": -1.598199486732483, "step": 176 }, { "epoch": 0.6679245283018868, "grad_norm": 0.2625764012336731, "learning_rate": 3.330188679245283e-05, "log_odds_chosen": 16.301048278808594, "log_odds_ratio": -0.07389828562736511, "logits/chosen": -1.6500033140182495, "logits/rejected": -5.951912879943848, "logps/chosen": -1.6418194770812988, "logps/rejected": -17.625288009643555, "loss": 1.6667, "nll_loss": 1.659282922744751, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16418196260929108, "rewards/margins": 1.5983469486236572, "rewards/rejected": -1.762528896331787, "step": 177 }, { "epoch": 0.6716981132075471, "grad_norm": 0.33599424362182617, "learning_rate": 3.320754716981132e-05, "log_odds_chosen": 18.26766014099121, "log_odds_ratio": -2.645006361490232e-06, "logits/chosen": -1.5161099433898926, "logits/rejected": -4.741918087005615, "logps/chosen": -1.222088098526001, "logps/rejected": -18.985591888427734, "loss": 1.4988, "nll_loss": 1.4987843036651611, "rewards/accuracies": 1.0, "rewards/chosen": -0.12220881879329681, "rewards/margins": 1.7763502597808838, "rewards/rejected": -1.8985592126846313, "step": 178 }, { "epoch": 0.6754716981132075, "grad_norm": 0.3460422158241272, "learning_rate": 3.311320754716981e-05, "log_odds_chosen": 15.672019958496094, "log_odds_ratio": -0.0003013765381183475, "logits/chosen": -1.8128689527511597, "logits/rejected": -5.0894670486450195, "logps/chosen": -1.912743330001831, "logps/rejected": -17.398412704467773, "loss": 1.4925, "nll_loss": 1.49249267578125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19127434492111206, "rewards/margins": 1.5485669374465942, "rewards/rejected": -1.7398412227630615, "step": 179 }, { "epoch": 0.6792452830188679, "grad_norm": 0.325990229845047, "learning_rate": 3.30188679245283e-05, "log_odds_chosen": 16.67650032043457, "log_odds_ratio": -7.659475159016438e-06, "logits/chosen": -1.7975369691848755, "logits/rejected": -5.100127220153809, "logps/chosen": -1.545456886291504, "logps/rejected": -17.93629264831543, "loss": 1.3418, "nll_loss": 1.3418041467666626, "rewards/accuracies": 1.0, "rewards/chosen": -0.15454569458961487, "rewards/margins": 1.639083743095398, "rewards/rejected": -1.7936294078826904, "step": 180 }, { "epoch": 0.6830188679245283, "grad_norm": 0.31531310081481934, "learning_rate": 3.2924528301886794e-05, "log_odds_chosen": 16.065807342529297, "log_odds_ratio": -0.06276258826255798, "logits/chosen": -1.224330186843872, "logits/rejected": -4.1433610916137695, "logps/chosen": -1.5791137218475342, "logps/rejected": -17.382640838623047, "loss": 1.2837, "nll_loss": 1.277405858039856, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15791137516498566, "rewards/margins": 1.580352783203125, "rewards/rejected": -1.7382642030715942, "step": 181 }, { "epoch": 0.6867924528301886, "grad_norm": 0.3160483241081238, "learning_rate": 3.2830188679245285e-05, "log_odds_chosen": 13.249960899353027, "log_odds_ratio": -0.09639393538236618, "logits/chosen": -1.4797954559326172, "logits/rejected": -5.896228790283203, "logps/chosen": -1.5692899227142334, "logps/rejected": -14.452865600585938, "loss": 1.4959, "nll_loss": 1.4862685203552246, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15692900121212006, "rewards/margins": 1.2883576154708862, "rewards/rejected": -1.4452866315841675, "step": 182 }, { "epoch": 0.690566037735849, "grad_norm": 0.3040304183959961, "learning_rate": 3.2735849056603776e-05, "log_odds_chosen": 18.430316925048828, "log_odds_ratio": -0.00038310332456603646, "logits/chosen": -1.5657317638397217, "logits/rejected": -4.217155933380127, "logps/chosen": -1.4011714458465576, "logps/rejected": -19.451824188232422, "loss": 1.5214, "nll_loss": 1.5213794708251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.14011713862419128, "rewards/margins": 1.805065393447876, "rewards/rejected": -1.9451824426651, "step": 183 }, { "epoch": 0.6943396226415094, "grad_norm": 0.34743577241897583, "learning_rate": 3.264150943396227e-05, "log_odds_chosen": 17.73297882080078, "log_odds_ratio": -2.7628839234239422e-05, "logits/chosen": -0.9867445230484009, "logits/rejected": -3.8493878841400146, "logps/chosen": -1.577934741973877, "logps/rejected": -19.020362854003906, "loss": 1.4107, "nll_loss": 1.4106800556182861, "rewards/accuracies": 1.0, "rewards/chosen": -0.15779347717761993, "rewards/margins": 1.7442429065704346, "rewards/rejected": -1.9020363092422485, "step": 184 }, { "epoch": 0.6981132075471698, "grad_norm": 0.29433855414390564, "learning_rate": 3.254716981132075e-05, "log_odds_chosen": 18.695938110351562, "log_odds_ratio": -2.980303406729945e-06, "logits/chosen": -1.6250673532485962, "logits/rejected": -5.061118125915527, "logps/chosen": -1.1787736415863037, "logps/rejected": -19.337848663330078, "loss": 1.4456, "nll_loss": 1.4456124305725098, "rewards/accuracies": 1.0, "rewards/chosen": -0.11787736415863037, "rewards/margins": 1.8159077167510986, "rewards/rejected": -1.9337849617004395, "step": 185 }, { "epoch": 0.7018867924528301, "grad_norm": 0.33233845233917236, "learning_rate": 3.245283018867924e-05, "log_odds_chosen": 18.042146682739258, "log_odds_ratio": -3.9149457734311e-05, "logits/chosen": -1.7575011253356934, "logits/rejected": -5.358031272888184, "logps/chosen": -1.8146780729293823, "logps/rejected": -19.64087677001953, "loss": 1.5776, "nll_loss": 1.5775460004806519, "rewards/accuracies": 1.0, "rewards/chosen": -0.18146783113479614, "rewards/margins": 1.7826199531555176, "rewards/rejected": -1.964087724685669, "step": 186 }, { "epoch": 0.7056603773584905, "grad_norm": 0.3320876657962799, "learning_rate": 3.2358490566037734e-05, "log_odds_chosen": 17.938766479492188, "log_odds_ratio": -3.449665200605523e-06, "logits/chosen": -1.9206900596618652, "logits/rejected": -4.899381160736084, "logps/chosen": -1.370758295059204, "logps/rejected": -18.916780471801758, "loss": 1.3438, "nll_loss": 1.3438050746917725, "rewards/accuracies": 1.0, "rewards/chosen": -0.13707584142684937, "rewards/margins": 1.754602313041687, "rewards/rejected": -1.8916780948638916, "step": 187 }, { "epoch": 0.7094339622641509, "grad_norm": 0.5529193878173828, "learning_rate": 3.2264150943396225e-05, "log_odds_chosen": 17.36458969116211, "log_odds_ratio": -3.398192711756565e-05, "logits/chosen": -2.5587615966796875, "logits/rejected": -5.520333290100098, "logps/chosen": -1.3868842124938965, "logps/rejected": -18.35110855102539, "loss": 1.3595, "nll_loss": 1.3595402240753174, "rewards/accuracies": 1.0, "rewards/chosen": -0.13868843019008636, "rewards/margins": 1.6964225769042969, "rewards/rejected": -1.8351107835769653, "step": 188 }, { "epoch": 0.7132075471698113, "grad_norm": 0.49584269523620605, "learning_rate": 3.2169811320754716e-05, "log_odds_chosen": 16.835905075073242, "log_odds_ratio": -0.018066758289933205, "logits/chosen": -1.943171739578247, "logits/rejected": -5.371037006378174, "logps/chosen": -1.5242998600006104, "logps/rejected": -18.055362701416016, "loss": 1.6063, "nll_loss": 1.6044623851776123, "rewards/accuracies": 1.0, "rewards/chosen": -0.15242999792099, "rewards/margins": 1.6531062126159668, "rewards/rejected": -1.8055362701416016, "step": 189 }, { "epoch": 0.7169811320754716, "grad_norm": 0.3044775128364563, "learning_rate": 3.207547169811321e-05, "log_odds_chosen": 17.216352462768555, "log_odds_ratio": -1.419415457348805e-05, "logits/chosen": -1.447345495223999, "logits/rejected": -5.178557395935059, "logps/chosen": -1.574653148651123, "logps/rejected": -18.42108726501465, "loss": 1.6679, "nll_loss": 1.6678857803344727, "rewards/accuracies": 1.0, "rewards/chosen": -0.15746530890464783, "rewards/margins": 1.6846433877944946, "rewards/rejected": -1.8421087265014648, "step": 190 }, { "epoch": 0.720754716981132, "grad_norm": 0.33981502056121826, "learning_rate": 3.19811320754717e-05, "log_odds_chosen": 16.49580192565918, "log_odds_ratio": -4.55726585641969e-05, "logits/chosen": -2.627636671066284, "logits/rejected": -5.452616214752197, "logps/chosen": -1.7808005809783936, "logps/rejected": -18.012622833251953, "loss": 1.6983, "nll_loss": 1.6982595920562744, "rewards/accuracies": 1.0, "rewards/chosen": -0.17808005213737488, "rewards/margins": 1.6231824159622192, "rewards/rejected": -1.8012624979019165, "step": 191 }, { "epoch": 0.7245283018867924, "grad_norm": 0.2643303871154785, "learning_rate": 3.188679245283019e-05, "log_odds_chosen": 18.62483024597168, "log_odds_ratio": -2.2575586626771837e-06, "logits/chosen": -0.962692141532898, "logits/rejected": -5.363802433013916, "logps/chosen": -1.6161587238311768, "logps/rejected": -19.921184539794922, "loss": 1.6401, "nll_loss": 1.6400614976882935, "rewards/accuracies": 1.0, "rewards/chosen": -0.16161587834358215, "rewards/margins": 1.8305026292800903, "rewards/rejected": -1.99211847782135, "step": 192 }, { "epoch": 0.7283018867924528, "grad_norm": 0.5224670171737671, "learning_rate": 3.179245283018868e-05, "log_odds_chosen": 17.83526039123535, "log_odds_ratio": -2.4736191335250624e-06, "logits/chosen": -2.249207019805908, "logits/rejected": -5.893120765686035, "logps/chosen": -1.5763590335845947, "logps/rejected": -19.069358825683594, "loss": 1.4385, "nll_loss": 1.438537359237671, "rewards/accuracies": 1.0, "rewards/chosen": -0.157635897397995, "rewards/margins": 1.7493000030517578, "rewards/rejected": -1.9069358110427856, "step": 193 }, { "epoch": 0.7320754716981132, "grad_norm": 0.33392333984375, "learning_rate": 3.169811320754717e-05, "log_odds_chosen": 14.567859649658203, "log_odds_ratio": -0.004244968760758638, "logits/chosen": -1.951322317123413, "logits/rejected": -6.3537211418151855, "logps/chosen": -1.85011887550354, "logps/rejected": -16.212106704711914, "loss": 1.6572, "nll_loss": 1.6567444801330566, "rewards/accuracies": 1.0, "rewards/chosen": -0.18501189351081848, "rewards/margins": 1.4361987113952637, "rewards/rejected": -1.6212105751037598, "step": 194 }, { "epoch": 0.7358490566037735, "grad_norm": 0.33259832859039307, "learning_rate": 3.160377358490566e-05, "log_odds_chosen": 14.361299514770508, "log_odds_ratio": -0.18310227990150452, "logits/chosen": -1.6968700885772705, "logits/rejected": -5.388698577880859, "logps/chosen": -1.7790277004241943, "logps/rejected": -15.95116901397705, "loss": 1.571, "nll_loss": 1.552640438079834, "rewards/accuracies": 0.875, "rewards/chosen": -0.17790275812149048, "rewards/margins": 1.4172141551971436, "rewards/rejected": -1.5951169729232788, "step": 195 }, { "epoch": 0.7396226415094339, "grad_norm": 0.3773075342178345, "learning_rate": 3.1509433962264154e-05, "log_odds_chosen": 13.168961524963379, "log_odds_ratio": -0.06529372930526733, "logits/chosen": -1.13154935836792, "logits/rejected": -4.5446624755859375, "logps/chosen": -1.3777858018875122, "logps/rejected": -14.206029891967773, "loss": 1.3619, "nll_loss": 1.3554112911224365, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13777858018875122, "rewards/margins": 1.2828243970870972, "rewards/rejected": -1.4206030368804932, "step": 196 }, { "epoch": 0.7433962264150943, "grad_norm": 0.3047720789909363, "learning_rate": 3.1415094339622645e-05, "log_odds_chosen": 18.703304290771484, "log_odds_ratio": -3.1293072879634565e-06, "logits/chosen": -1.948151707649231, "logits/rejected": -5.263698101043701, "logps/chosen": -1.3663289546966553, "logps/rejected": -19.713842391967773, "loss": 1.2498, "nll_loss": 1.2497950792312622, "rewards/accuracies": 1.0, "rewards/chosen": -0.13663290441036224, "rewards/margins": 1.8347513675689697, "rewards/rejected": -1.9713842868804932, "step": 197 }, { "epoch": 0.7471698113207547, "grad_norm": 0.3021809458732605, "learning_rate": 3.132075471698113e-05, "log_odds_chosen": 16.85780143737793, "log_odds_ratio": -0.09297717362642288, "logits/chosen": -2.2993781566619873, "logits/rejected": -5.5674147605896, "logps/chosen": -1.532175064086914, "logps/rejected": -17.899972915649414, "loss": 1.4076, "nll_loss": 1.3982605934143066, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15321749448776245, "rewards/margins": 1.63677978515625, "rewards/rejected": -1.7899973392486572, "step": 198 }, { "epoch": 0.7509433962264151, "grad_norm": 0.3545827269554138, "learning_rate": 3.122641509433962e-05, "log_odds_chosen": 16.710426330566406, "log_odds_ratio": -1.721089915918128e-06, "logits/chosen": -2.0543713569641113, "logits/rejected": -6.516955375671387, "logps/chosen": -1.7566440105438232, "logps/rejected": -18.181135177612305, "loss": 1.5498, "nll_loss": 1.5497593879699707, "rewards/accuracies": 1.0, "rewards/chosen": -0.17566442489624023, "rewards/margins": 1.642449140548706, "rewards/rejected": -1.8181135654449463, "step": 199 }, { "epoch": 0.7547169811320755, "grad_norm": 0.34778276085853577, "learning_rate": 3.113207547169811e-05, "log_odds_chosen": 18.424583435058594, "log_odds_ratio": -7.215005462057889e-05, "logits/chosen": -1.3918465375900269, "logits/rejected": -3.4874095916748047, "logps/chosen": -1.4977375268936157, "logps/rejected": -19.452709197998047, "loss": 1.5377, "nll_loss": 1.5376887321472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497737616300583, "rewards/margins": 1.795497179031372, "rewards/rejected": -1.9452710151672363, "step": 200 }, { "epoch": 0.7584905660377359, "grad_norm": 0.2991779148578644, "learning_rate": 3.10377358490566e-05, "log_odds_chosen": 17.403640747070312, "log_odds_ratio": -9.71866975305602e-05, "logits/chosen": -1.1479456424713135, "logits/rejected": -5.782166481018066, "logps/chosen": -1.5020239353179932, "logps/rejected": -18.525487899780273, "loss": 1.4808, "nll_loss": 1.4808180332183838, "rewards/accuracies": 1.0, "rewards/chosen": -0.15020239353179932, "rewards/margins": 1.7023463249206543, "rewards/rejected": -1.8525487184524536, "step": 201 }, { "epoch": 0.7622641509433963, "grad_norm": 0.32272449135780334, "learning_rate": 3.094339622641509e-05, "log_odds_chosen": 18.042186737060547, "log_odds_ratio": -1.0580668458715081e-05, "logits/chosen": -1.1393158435821533, "logits/rejected": -5.057635307312012, "logps/chosen": -1.3160855770111084, "logps/rejected": -18.88675308227539, "loss": 1.3602, "nll_loss": 1.3602066040039062, "rewards/accuracies": 1.0, "rewards/chosen": -0.13160854578018188, "rewards/margins": 1.7570668458938599, "rewards/rejected": -1.888675332069397, "step": 202 }, { "epoch": 0.7660377358490567, "grad_norm": 0.32788515090942383, "learning_rate": 3.0849056603773584e-05, "log_odds_chosen": 16.405580520629883, "log_odds_ratio": -0.06243692338466644, "logits/chosen": -1.4627768993377686, "logits/rejected": -5.623252868652344, "logps/chosen": -1.8027026653289795, "logps/rejected": -18.00653839111328, "loss": 1.6046, "nll_loss": 1.5983972549438477, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1802702695131302, "rewards/margins": 1.6203837394714355, "rewards/rejected": -1.8006539344787598, "step": 203 }, { "epoch": 0.769811320754717, "grad_norm": 1.234761118888855, "learning_rate": 3.0754716981132075e-05, "log_odds_chosen": 18.839767456054688, "log_odds_ratio": -4.664214884542162e-06, "logits/chosen": -2.3654236793518066, "logits/rejected": -5.212252140045166, "logps/chosen": -1.4169280529022217, "logps/rejected": -19.78860092163086, "loss": 1.4248, "nll_loss": 1.4247938394546509, "rewards/accuracies": 1.0, "rewards/chosen": -0.14169281721115112, "rewards/margins": 1.8371672630310059, "rewards/rejected": -1.9788599014282227, "step": 204 }, { "epoch": 0.7735849056603774, "grad_norm": 0.31495201587677, "learning_rate": 3.0660377358490567e-05, "log_odds_chosen": 17.49294090270996, "log_odds_ratio": -1.4975751128076809e-06, "logits/chosen": -2.495401382446289, "logits/rejected": -5.783306121826172, "logps/chosen": -1.6663013696670532, "logps/rejected": -18.882904052734375, "loss": 1.684, "nll_loss": 1.683968186378479, "rewards/accuracies": 1.0, "rewards/chosen": -0.16663014888763428, "rewards/margins": 1.7216603755950928, "rewards/rejected": -1.8882904052734375, "step": 205 }, { "epoch": 0.7773584905660378, "grad_norm": 0.4705086350440979, "learning_rate": 3.056603773584906e-05, "log_odds_chosen": 18.32155990600586, "log_odds_ratio": -0.05019821971654892, "logits/chosen": -1.1568958759307861, "logits/rejected": -4.691790580749512, "logps/chosen": -1.4851261377334595, "logps/rejected": -19.43079376220703, "loss": 1.4827, "nll_loss": 1.4777144193649292, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14851261675357819, "rewards/margins": 1.7945667505264282, "rewards/rejected": -1.9430793523788452, "step": 206 }, { "epoch": 0.7811320754716982, "grad_norm": 0.3913361728191376, "learning_rate": 3.047169811320755e-05, "log_odds_chosen": 16.319984436035156, "log_odds_ratio": -0.05193231999874115, "logits/chosen": -2.967703342437744, "logits/rejected": -5.948724269866943, "logps/chosen": -1.7976689338684082, "logps/rejected": -17.946117401123047, "loss": 1.7308, "nll_loss": 1.7255607843399048, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17976689338684082, "rewards/margins": 1.6148450374603271, "rewards/rejected": -1.7946120500564575, "step": 207 }, { "epoch": 0.7849056603773585, "grad_norm": 0.309137761592865, "learning_rate": 3.0377358490566036e-05, "log_odds_chosen": 19.219402313232422, "log_odds_ratio": -7.450602197422995e-07, "logits/chosen": -0.8913941383361816, "logits/rejected": -4.125641822814941, "logps/chosen": -1.470247507095337, "logps/rejected": -20.202777862548828, "loss": 1.4043, "nll_loss": 1.4043306112289429, "rewards/accuracies": 1.0, "rewards/chosen": -0.14702476561069489, "rewards/margins": 1.8732528686523438, "rewards/rejected": -2.020277500152588, "step": 208 }, { "epoch": 0.7886792452830189, "grad_norm": 0.28629574179649353, "learning_rate": 3.0283018867924528e-05, "log_odds_chosen": 16.79374122619629, "log_odds_ratio": -0.00662041874602437, "logits/chosen": -0.6648188829421997, "logits/rejected": -6.581351280212402, "logps/chosen": -1.4513788223266602, "logps/rejected": -17.918479919433594, "loss": 1.4518, "nll_loss": 1.4511276483535767, "rewards/accuracies": 1.0, "rewards/chosen": -0.14513790607452393, "rewards/margins": 1.6467101573944092, "rewards/rejected": -1.7918481826782227, "step": 209 }, { "epoch": 0.7924528301886793, "grad_norm": 0.32280489802360535, "learning_rate": 3.018867924528302e-05, "log_odds_chosen": 17.51369857788086, "log_odds_ratio": -3.799800651904661e-07, "logits/chosen": -1.7065296173095703, "logits/rejected": -5.712586402893066, "logps/chosen": -1.4828323125839233, "logps/rejected": -18.661277770996094, "loss": 1.5052, "nll_loss": 1.5052316188812256, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482832282781601, "rewards/margins": 1.7178447246551514, "rewards/rejected": -1.8661279678344727, "step": 210 }, { "epoch": 0.7962264150943397, "grad_norm": 1.3183481693267822, "learning_rate": 3.009433962264151e-05, "log_odds_chosen": 16.796680450439453, "log_odds_ratio": -1.360561145702377e-05, "logits/chosen": -1.5383379459381104, "logits/rejected": -4.357212543487549, "logps/chosen": -1.435273289680481, "logps/rejected": -17.903779983520508, "loss": 1.4249, "nll_loss": 1.4248584508895874, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435273289680481, "rewards/margins": 1.646850824356079, "rewards/rejected": -1.790378212928772, "step": 211 }, { "epoch": 0.8, "grad_norm": 0.3218695819377899, "learning_rate": 3e-05, "log_odds_chosen": 17.916414260864258, "log_odds_ratio": -1.38259565574117e-05, "logits/chosen": -0.24590548872947693, "logits/rejected": -4.629819869995117, "logps/chosen": -1.4526543617248535, "logps/rejected": -18.942493438720703, "loss": 1.5065, "nll_loss": 1.5065315961837769, "rewards/accuracies": 1.0, "rewards/chosen": -0.14526543021202087, "rewards/margins": 1.7489840984344482, "rewards/rejected": -1.894249439239502, "step": 212 }, { "epoch": 0.8037735849056604, "grad_norm": 0.3358078896999359, "learning_rate": 2.9905660377358492e-05, "log_odds_chosen": 14.796281814575195, "log_odds_ratio": -0.0924949049949646, "logits/chosen": -1.8153976202011108, "logits/rejected": -4.928350448608398, "logps/chosen": -1.4929924011230469, "logps/rejected": -15.970230102539062, "loss": 1.4921, "nll_loss": 1.4828470945358276, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1492992341518402, "rewards/margins": 1.4477238655090332, "rewards/rejected": -1.5970230102539062, "step": 213 }, { "epoch": 0.8075471698113208, "grad_norm": 0.3094753921031952, "learning_rate": 2.9811320754716983e-05, "log_odds_chosen": 15.192641258239746, "log_odds_ratio": -0.02841039001941681, "logits/chosen": -2.0973596572875977, "logits/rejected": -4.845073699951172, "logps/chosen": -1.5211032629013062, "logps/rejected": -16.33423614501953, "loss": 1.5114, "nll_loss": 1.5085326433181763, "rewards/accuracies": 1.0, "rewards/chosen": -0.15211032330989838, "rewards/margins": 1.4813133478164673, "rewards/rejected": -1.6334238052368164, "step": 214 }, { "epoch": 0.8113207547169812, "grad_norm": 0.3006260395050049, "learning_rate": 2.971698113207547e-05, "log_odds_chosen": 16.960983276367188, "log_odds_ratio": -0.05776922032237053, "logits/chosen": -0.9070106744766235, "logits/rejected": -4.620122909545898, "logps/chosen": -1.5111815929412842, "logps/rejected": -18.19387435913086, "loss": 1.4915, "nll_loss": 1.4857306480407715, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15111815929412842, "rewards/margins": 1.6682692766189575, "rewards/rejected": -1.819387435913086, "step": 215 }, { "epoch": 0.8150943396226416, "grad_norm": 0.33227863907814026, "learning_rate": 2.9622641509433962e-05, "log_odds_chosen": 18.719661712646484, "log_odds_ratio": -9.611276254872791e-07, "logits/chosen": -1.1189182996749878, "logits/rejected": -4.95672082901001, "logps/chosen": -1.2981202602386475, "logps/rejected": -19.64653778076172, "loss": 1.3002, "nll_loss": 1.3002173900604248, "rewards/accuracies": 1.0, "rewards/chosen": -0.12981203198432922, "rewards/margins": 1.8348416090011597, "rewards/rejected": -1.9646536111831665, "step": 216 }, { "epoch": 0.8188679245283019, "grad_norm": 0.2781373858451843, "learning_rate": 2.9528301886792453e-05, "log_odds_chosen": 17.168392181396484, "log_odds_ratio": -0.0007286164909601212, "logits/chosen": -2.3207314014434814, "logits/rejected": -6.724469184875488, "logps/chosen": -1.6115381717681885, "logps/rejected": -18.52199935913086, "loss": 1.576, "nll_loss": 1.575928807258606, "rewards/accuracies": 1.0, "rewards/chosen": -0.16115380823612213, "rewards/margins": 1.6910459995269775, "rewards/rejected": -1.8521997928619385, "step": 217 }, { "epoch": 0.8226415094339623, "grad_norm": 0.29558250308036804, "learning_rate": 2.9433962264150944e-05, "log_odds_chosen": 15.084373474121094, "log_odds_ratio": -0.025247111916542053, "logits/chosen": -0.26611119508743286, "logits/rejected": -4.655291557312012, "logps/chosen": -1.8194210529327393, "logps/rejected": -16.723453521728516, "loss": 1.6581, "nll_loss": 1.6555492877960205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819421350955963, "rewards/margins": 1.490403175354004, "rewards/rejected": -1.6723453998565674, "step": 218 }, { "epoch": 0.8264150943396227, "grad_norm": 0.2880527973175049, "learning_rate": 2.9339622641509435e-05, "log_odds_chosen": 17.260496139526367, "log_odds_ratio": -4.0319591789739206e-05, "logits/chosen": -0.7846205830574036, "logits/rejected": -4.96677827835083, "logps/chosen": -1.624293565750122, "logps/rejected": -18.620319366455078, "loss": 1.5828, "nll_loss": 1.5828019380569458, "rewards/accuracies": 1.0, "rewards/chosen": -0.16242937743663788, "rewards/margins": 1.6996028423309326, "rewards/rejected": -1.8620320558547974, "step": 219 }, { "epoch": 0.8301886792452831, "grad_norm": 0.24706144630908966, "learning_rate": 2.9245283018867926e-05, "log_odds_chosen": 18.422889709472656, "log_odds_ratio": -8.091576091828756e-06, "logits/chosen": -1.6815857887268066, "logits/rejected": -6.923168659210205, "logps/chosen": -1.4026367664337158, "logps/rejected": -19.40614891052246, "loss": 1.4331, "nll_loss": 1.4331367015838623, "rewards/accuracies": 1.0, "rewards/chosen": -0.14026367664337158, "rewards/margins": 1.8003512620925903, "rewards/rejected": -1.940614938735962, "step": 220 }, { "epoch": 0.8339622641509434, "grad_norm": 0.3142763078212738, "learning_rate": 2.9150943396226417e-05, "log_odds_chosen": 17.446882247924805, "log_odds_ratio": -0.07998549938201904, "logits/chosen": -1.7830405235290527, "logits/rejected": -5.290676593780518, "logps/chosen": -1.7186626195907593, "logps/rejected": -18.9097957611084, "loss": 1.672, "nll_loss": 1.6639615297317505, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1718662679195404, "rewards/margins": 1.7191133499145508, "rewards/rejected": -1.890979528427124, "step": 221 }, { "epoch": 0.8377358490566038, "grad_norm": 0.32208096981048584, "learning_rate": 2.9056603773584905e-05, "log_odds_chosen": 18.16156005859375, "log_odds_ratio": -8.31518536870135e-06, "logits/chosen": -2.01678729057312, "logits/rejected": -5.499569892883301, "logps/chosen": -1.758958101272583, "logps/rejected": -19.644182205200195, "loss": 1.5194, "nll_loss": 1.5194087028503418, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758958250284195, "rewards/margins": 1.788522481918335, "rewards/rejected": -1.9644180536270142, "step": 222 }, { "epoch": 0.8415094339622642, "grad_norm": 0.3508431613445282, "learning_rate": 2.8962264150943396e-05, "log_odds_chosen": 16.593107223510742, "log_odds_ratio": -4.483833254198544e-05, "logits/chosen": -1.0808777809143066, "logits/rejected": -5.002264022827148, "logps/chosen": -1.6178133487701416, "logps/rejected": -17.956409454345703, "loss": 1.4168, "nll_loss": 1.4167941808700562, "rewards/accuracies": 1.0, "rewards/chosen": -0.16178134083747864, "rewards/margins": 1.633859395980835, "rewards/rejected": -1.7956409454345703, "step": 223 }, { "epoch": 0.8452830188679246, "grad_norm": 0.3514016270637512, "learning_rate": 2.8867924528301887e-05, "log_odds_chosen": 16.712297439575195, "log_odds_ratio": -3.978675522375852e-06, "logits/chosen": -1.4249446392059326, "logits/rejected": -4.565216064453125, "logps/chosen": -1.8810580968856812, "logps/rejected": -18.380489349365234, "loss": 1.4511, "nll_loss": 1.4511330127716064, "rewards/accuracies": 1.0, "rewards/chosen": -0.18810580670833588, "rewards/margins": 1.6499433517456055, "rewards/rejected": -1.8380491733551025, "step": 224 }, { "epoch": 0.8490566037735849, "grad_norm": 0.3235526978969574, "learning_rate": 2.8773584905660378e-05, "log_odds_chosen": 17.07125473022461, "log_odds_ratio": -4.053184511576546e-06, "logits/chosen": -0.9398770332336426, "logits/rejected": -4.418087482452393, "logps/chosen": -1.5705912113189697, "logps/rejected": -18.33386993408203, "loss": 1.5413, "nll_loss": 1.5412647724151611, "rewards/accuracies": 1.0, "rewards/chosen": -0.15705913305282593, "rewards/margins": 1.6763279438018799, "rewards/rejected": -1.8333872556686401, "step": 225 }, { "epoch": 0.8528301886792453, "grad_norm": 0.3555522561073303, "learning_rate": 2.867924528301887e-05, "log_odds_chosen": 16.7454833984375, "log_odds_ratio": -0.09962432086467743, "logits/chosen": -2.2143068313598633, "logits/rejected": -5.618562698364258, "logps/chosen": -1.8031433820724487, "logps/rejected": -18.335561752319336, "loss": 1.5886, "nll_loss": 1.5786436796188354, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1803143322467804, "rewards/margins": 1.6532416343688965, "rewards/rejected": -1.833556056022644, "step": 226 }, { "epoch": 0.8566037735849057, "grad_norm": 0.30511021614074707, "learning_rate": 2.858490566037736e-05, "log_odds_chosen": 16.435148239135742, "log_odds_ratio": -0.05051492527127266, "logits/chosen": -1.1615848541259766, "logits/rejected": -4.774338722229004, "logps/chosen": -1.4906309843063354, "logps/rejected": -17.58717155456543, "loss": 1.4203, "nll_loss": 1.4152026176452637, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1490630954504013, "rewards/margins": 1.6096539497375488, "rewards/rejected": -1.7587170600891113, "step": 227 }, { "epoch": 0.8603773584905661, "grad_norm": 0.2953304946422577, "learning_rate": 2.8490566037735848e-05, "log_odds_chosen": 17.240137100219727, "log_odds_ratio": -0.00012110001989640296, "logits/chosen": -2.184664726257324, "logits/rejected": -6.46320915222168, "logps/chosen": -1.8011096715927124, "logps/rejected": -18.774145126342773, "loss": 1.5879, "nll_loss": 1.5879219770431519, "rewards/accuracies": 1.0, "rewards/chosen": -0.18011096119880676, "rewards/margins": 1.6973035335540771, "rewards/rejected": -1.8774144649505615, "step": 228 }, { "epoch": 0.8641509433962264, "grad_norm": 0.3585098087787628, "learning_rate": 2.839622641509434e-05, "log_odds_chosen": 15.124954223632812, "log_odds_ratio": -0.00011490716133266687, "logits/chosen": -0.7336174249649048, "logits/rejected": -3.8608899116516113, "logps/chosen": -1.7269798517227173, "logps/rejected": -16.582866668701172, "loss": 1.6302, "nll_loss": 1.6301769018173218, "rewards/accuracies": 1.0, "rewards/chosen": -0.1726979911327362, "rewards/margins": 1.4855889081954956, "rewards/rejected": -1.6582868099212646, "step": 229 }, { "epoch": 0.8679245283018868, "grad_norm": 0.8896408677101135, "learning_rate": 2.830188679245283e-05, "log_odds_chosen": 18.62564468383789, "log_odds_ratio": -1.3411050758804777e-07, "logits/chosen": -0.7562139630317688, "logits/rejected": -6.943488121032715, "logps/chosen": -1.6029720306396484, "logps/rejected": -19.97881507873535, "loss": 1.5316, "nll_loss": 1.531610131263733, "rewards/accuracies": 1.0, "rewards/chosen": -0.1602971851825714, "rewards/margins": 1.837584376335144, "rewards/rejected": -1.997881531715393, "step": 230 }, { "epoch": 0.8716981132075472, "grad_norm": 0.3106488883495331, "learning_rate": 2.820754716981132e-05, "log_odds_chosen": 17.55572509765625, "log_odds_ratio": -0.057248592376708984, "logits/chosen": -1.0949640274047852, "logits/rejected": -3.665689468383789, "logps/chosen": -1.5709400177001953, "logps/rejected": -18.767902374267578, "loss": 1.3768, "nll_loss": 1.3711098432540894, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15709398686885834, "rewards/margins": 1.719696283340454, "rewards/rejected": -1.8767902851104736, "step": 231 }, { "epoch": 0.8754716981132076, "grad_norm": 0.329771488904953, "learning_rate": 2.8113207547169812e-05, "log_odds_chosen": 14.081006050109863, "log_odds_ratio": -0.22504055500030518, "logits/chosen": -1.2311842441558838, "logits/rejected": -3.282421827316284, "logps/chosen": -1.647598385810852, "logps/rejected": -15.423025131225586, "loss": 1.4569, "nll_loss": 1.4343469142913818, "rewards/accuracies": 0.875, "rewards/chosen": -0.16475984454154968, "rewards/margins": 1.3775426149368286, "rewards/rejected": -1.5423026084899902, "step": 232 }, { "epoch": 0.879245283018868, "grad_norm": 0.3315247595310211, "learning_rate": 2.8018867924528303e-05, "log_odds_chosen": 16.6135196685791, "log_odds_ratio": -0.015212212689220905, "logits/chosen": -1.0095354318618774, "logits/rejected": -5.442407608032227, "logps/chosen": -1.6335803270339966, "logps/rejected": -17.933931350708008, "loss": 1.5397, "nll_loss": 1.5382212400436401, "rewards/accuracies": 1.0, "rewards/chosen": -0.16335803270339966, "rewards/margins": 1.6300350427627563, "rewards/rejected": -1.7933931350708008, "step": 233 }, { "epoch": 0.8830188679245283, "grad_norm": 0.2853158116340637, "learning_rate": 2.7924528301886794e-05, "log_odds_chosen": 17.776758193969727, "log_odds_ratio": -0.00014782443759031594, "logits/chosen": -1.9506645202636719, "logits/rejected": -6.754343509674072, "logps/chosen": -1.5541913509368896, "logps/rejected": -19.017948150634766, "loss": 1.5127, "nll_loss": 1.5127075910568237, "rewards/accuracies": 1.0, "rewards/chosen": -0.15541914105415344, "rewards/margins": 1.7463756799697876, "rewards/rejected": -1.9017947912216187, "step": 234 }, { "epoch": 0.8867924528301887, "grad_norm": 0.3323514759540558, "learning_rate": 2.7830188679245282e-05, "log_odds_chosen": 19.43785285949707, "log_odds_ratio": -1.6764031443017302e-06, "logits/chosen": -1.0080933570861816, "logits/rejected": -4.819693565368652, "logps/chosen": -1.4333504438400269, "logps/rejected": -20.516963958740234, "loss": 1.3111, "nll_loss": 1.3111367225646973, "rewards/accuracies": 1.0, "rewards/chosen": -0.14333504438400269, "rewards/margins": 1.9083614349365234, "rewards/rejected": -2.051696538925171, "step": 235 }, { "epoch": 0.8905660377358491, "grad_norm": 0.39661461114883423, "learning_rate": 2.7735849056603773e-05, "log_odds_chosen": 17.063642501831055, "log_odds_ratio": -0.02693340554833412, "logits/chosen": -2.2235286235809326, "logits/rejected": -4.167008876800537, "logps/chosen": -1.677812099456787, "logps/rejected": -18.464336395263672, "loss": 1.5569, "nll_loss": 1.5542023181915283, "rewards/accuracies": 1.0, "rewards/chosen": -0.16778121888637543, "rewards/margins": 1.6786524057388306, "rewards/rejected": -1.8464335203170776, "step": 236 }, { "epoch": 0.8943396226415095, "grad_norm": 0.2811707854270935, "learning_rate": 2.7641509433962264e-05, "log_odds_chosen": 17.162025451660156, "log_odds_ratio": -0.0785740464925766, "logits/chosen": -2.9656736850738525, "logits/rejected": -5.683234214782715, "logps/chosen": -1.4220517873764038, "logps/rejected": -18.229448318481445, "loss": 1.5842, "nll_loss": 1.576366662979126, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14220517873764038, "rewards/margins": 1.6807397603988647, "rewards/rejected": -1.8229451179504395, "step": 237 }, { "epoch": 0.8981132075471698, "grad_norm": 0.29917973279953003, "learning_rate": 2.7547169811320755e-05, "log_odds_chosen": 16.159685134887695, "log_odds_ratio": -0.022400004789233208, "logits/chosen": -0.6307175755500793, "logits/rejected": -5.9625935554504395, "logps/chosen": -1.3831230401992798, "logps/rejected": -17.1973876953125, "loss": 1.3797, "nll_loss": 1.377410888671875, "rewards/accuracies": 1.0, "rewards/chosen": -0.13831230998039246, "rewards/margins": 1.5814265012741089, "rewards/rejected": -1.7197388410568237, "step": 238 }, { "epoch": 0.9018867924528302, "grad_norm": 0.4006063938140869, "learning_rate": 2.7452830188679247e-05, "log_odds_chosen": 17.939586639404297, "log_odds_ratio": -1.753244941937737e-05, "logits/chosen": -0.8297520875930786, "logits/rejected": -5.683241844177246, "logps/chosen": -1.3332468271255493, "logps/rejected": -18.919038772583008, "loss": 1.3098, "nll_loss": 1.3098140954971313, "rewards/accuracies": 1.0, "rewards/chosen": -0.13332468271255493, "rewards/margins": 1.7585792541503906, "rewards/rejected": -1.8919038772583008, "step": 239 }, { "epoch": 0.9056603773584906, "grad_norm": 0.30147290229797363, "learning_rate": 2.7358490566037738e-05, "log_odds_chosen": 15.91531753540039, "log_odds_ratio": -0.03641377389431, "logits/chosen": -0.7411012649536133, "logits/rejected": -4.249468803405762, "logps/chosen": -1.5858731269836426, "logps/rejected": -17.18975830078125, "loss": 1.5853, "nll_loss": 1.5816415548324585, "rewards/accuracies": 1.0, "rewards/chosen": -0.15858730673789978, "rewards/margins": 1.5603885650634766, "rewards/rejected": -1.7189757823944092, "step": 240 }, { "epoch": 0.909433962264151, "grad_norm": 0.31893160939216614, "learning_rate": 2.726415094339623e-05, "log_odds_chosen": 16.919687271118164, "log_odds_ratio": -0.0328671857714653, "logits/chosen": -1.2528479099273682, "logits/rejected": -4.872306823730469, "logps/chosen": -1.2921593189239502, "logps/rejected": -17.771411895751953, "loss": 1.4341, "nll_loss": 1.43081533908844, "rewards/accuracies": 1.0, "rewards/chosen": -0.12921592593193054, "rewards/margins": 1.6479253768920898, "rewards/rejected": -1.7771413326263428, "step": 241 }, { "epoch": 0.9132075471698113, "grad_norm": 0.295743465423584, "learning_rate": 2.7169811320754716e-05, "log_odds_chosen": 17.80528450012207, "log_odds_ratio": -0.00010832020052475855, "logits/chosen": -0.645114541053772, "logits/rejected": -5.190818786621094, "logps/chosen": -1.5482758283615112, "logps/rejected": -19.037681579589844, "loss": 1.4926, "nll_loss": 1.492592215538025, "rewards/accuracies": 1.0, "rewards/chosen": -0.15482759475708008, "rewards/margins": 1.7489407062530518, "rewards/rejected": -1.9037683010101318, "step": 242 }, { "epoch": 0.9169811320754717, "grad_norm": 0.362249493598938, "learning_rate": 2.7075471698113207e-05, "log_odds_chosen": 17.395801544189453, "log_odds_ratio": -0.007926247082650661, "logits/chosen": -2.3562960624694824, "logits/rejected": -6.801301956176758, "logps/chosen": -1.1412396430969238, "logps/rejected": -17.93915557861328, "loss": 1.2173, "nll_loss": 1.2165017127990723, "rewards/accuracies": 1.0, "rewards/chosen": -0.11412396281957626, "rewards/margins": 1.6797915697097778, "rewards/rejected": -1.7939155101776123, "step": 243 }, { "epoch": 0.9207547169811321, "grad_norm": 0.3344380259513855, "learning_rate": 2.69811320754717e-05, "log_odds_chosen": 18.15846824645996, "log_odds_ratio": -1.184646635010722e-06, "logits/chosen": -1.4028515815734863, "logits/rejected": -5.666131019592285, "logps/chosen": -1.6515846252441406, "logps/rejected": -19.494230270385742, "loss": 1.4627, "nll_loss": 1.462727665901184, "rewards/accuracies": 1.0, "rewards/chosen": -0.1651584655046463, "rewards/margins": 1.7842646837234497, "rewards/rejected": -1.9494233131408691, "step": 244 }, { "epoch": 0.9245283018867925, "grad_norm": 0.3528442084789276, "learning_rate": 2.688679245283019e-05, "log_odds_chosen": 16.469255447387695, "log_odds_ratio": -0.16001646220684052, "logits/chosen": -0.5904859304428101, "logits/rejected": -4.443323135375977, "logps/chosen": -1.5586724281311035, "logps/rejected": -17.767322540283203, "loss": 1.3949, "nll_loss": 1.3788522481918335, "rewards/accuracies": 0.875, "rewards/chosen": -0.15586724877357483, "rewards/margins": 1.620864987373352, "rewards/rejected": -1.7767322063446045, "step": 245 }, { "epoch": 0.9283018867924528, "grad_norm": 0.36119183897972107, "learning_rate": 2.679245283018868e-05, "log_odds_chosen": 16.416719436645508, "log_odds_ratio": -0.20440763235092163, "logits/chosen": -1.4841980934143066, "logits/rejected": -4.816326141357422, "logps/chosen": -1.387542724609375, "logps/rejected": -17.60167121887207, "loss": 1.4418, "nll_loss": 1.4213593006134033, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13875426352024078, "rewards/margins": 1.6214128732681274, "rewards/rejected": -1.7601672410964966, "step": 246 }, { "epoch": 0.9320754716981132, "grad_norm": 0.3182571828365326, "learning_rate": 2.6698113207547172e-05, "log_odds_chosen": 16.2152099609375, "log_odds_ratio": -0.03764305263757706, "logits/chosen": -1.0904548168182373, "logits/rejected": -3.9518043994903564, "logps/chosen": -1.4248569011688232, "logps/rejected": -17.271034240722656, "loss": 1.4531, "nll_loss": 1.4493829011917114, "rewards/accuracies": 1.0, "rewards/chosen": -0.14248570799827576, "rewards/margins": 1.5846177339553833, "rewards/rejected": -1.7271034717559814, "step": 247 }, { "epoch": 0.9358490566037736, "grad_norm": 0.321162611246109, "learning_rate": 2.6603773584905663e-05, "log_odds_chosen": 18.773868560791016, "log_odds_ratio": -1.393025740981102e-05, "logits/chosen": -0.8928079009056091, "logits/rejected": -5.256990432739258, "logps/chosen": -1.6134278774261475, "logps/rejected": -20.060321807861328, "loss": 1.5017, "nll_loss": 1.5017071962356567, "rewards/accuracies": 1.0, "rewards/chosen": -0.1613427847623825, "rewards/margins": 1.8446893692016602, "rewards/rejected": -2.0060322284698486, "step": 248 }, { "epoch": 0.939622641509434, "grad_norm": 0.8356024026870728, "learning_rate": 2.650943396226415e-05, "log_odds_chosen": 16.179561614990234, "log_odds_ratio": -0.04995302855968475, "logits/chosen": -1.4648141860961914, "logits/rejected": -4.60720682144165, "logps/chosen": -1.707082748413086, "logps/rejected": -17.653757095336914, "loss": 1.5352, "nll_loss": 1.5301692485809326, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17070826888084412, "rewards/margins": 1.5946673154830933, "rewards/rejected": -1.7653756141662598, "step": 249 }, { "epoch": 0.9433962264150944, "grad_norm": 0.3033410608768463, "learning_rate": 2.641509433962264e-05, "log_odds_chosen": 16.49986457824707, "log_odds_ratio": -0.0050300052389502525, "logits/chosen": -1.6823077201843262, "logits/rejected": -5.105968475341797, "logps/chosen": -1.6065022945404053, "logps/rejected": -17.84182357788086, "loss": 1.5878, "nll_loss": 1.5873433351516724, "rewards/accuracies": 1.0, "rewards/chosen": -0.16065022349357605, "rewards/margins": 1.6235322952270508, "rewards/rejected": -1.7841825485229492, "step": 250 }, { "epoch": 0.9471698113207547, "grad_norm": 0.35018157958984375, "learning_rate": 2.6320754716981133e-05, "log_odds_chosen": 19.928390502929688, "log_odds_ratio": -8.717207720110309e-07, "logits/chosen": -0.29920998215675354, "logits/rejected": -4.818760395050049, "logps/chosen": -1.2851896286010742, "logps/rejected": -20.66608238220215, "loss": 1.2603, "nll_loss": 1.2603236436843872, "rewards/accuracies": 1.0, "rewards/chosen": -0.1285189688205719, "rewards/margins": 1.93808913230896, "rewards/rejected": -2.066608428955078, "step": 251 }, { "epoch": 0.9509433962264151, "grad_norm": 0.3246742784976959, "learning_rate": 2.6226415094339624e-05, "log_odds_chosen": 17.97346305847168, "log_odds_ratio": -1.0215319889539387e-05, "logits/chosen": -1.2597813606262207, "logits/rejected": -5.395329475402832, "logps/chosen": -1.4511744976043701, "logps/rejected": -19.067272186279297, "loss": 1.492, "nll_loss": 1.4920153617858887, "rewards/accuracies": 1.0, "rewards/chosen": -0.14511744678020477, "rewards/margins": 1.761609673500061, "rewards/rejected": -1.9067270755767822, "step": 252 }, { "epoch": 0.9547169811320755, "grad_norm": 0.3272016942501068, "learning_rate": 2.6132075471698115e-05, "log_odds_chosen": 18.806455612182617, "log_odds_ratio": -0.04364859312772751, "logits/chosen": -1.467919945716858, "logits/rejected": -4.545738220214844, "logps/chosen": -1.4638400077819824, "logps/rejected": -19.876646041870117, "loss": 1.3501, "nll_loss": 1.3457329273223877, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14638400077819824, "rewards/margins": 1.8412805795669556, "rewards/rejected": -1.9876646995544434, "step": 253 }, { "epoch": 0.9584905660377359, "grad_norm": 0.6240985989570618, "learning_rate": 2.6037735849056606e-05, "log_odds_chosen": 19.93435287475586, "log_odds_ratio": -7.450581485102248e-09, "logits/chosen": -0.8115564584732056, "logits/rejected": -6.536435127258301, "logps/chosen": -1.6901774406433105, "logps/rejected": -21.379650115966797, "loss": 1.674, "nll_loss": 1.6740447282791138, "rewards/accuracies": 1.0, "rewards/chosen": -0.1690177470445633, "rewards/margins": 1.968947410583496, "rewards/rejected": -2.137965202331543, "step": 254 }, { "epoch": 0.9622641509433962, "grad_norm": 0.3309653401374817, "learning_rate": 2.5943396226415094e-05, "log_odds_chosen": 19.6575927734375, "log_odds_ratio": -0.01984320767223835, "logits/chosen": -0.7965545654296875, "logits/rejected": -3.3540658950805664, "logps/chosen": -1.5116550922393799, "logps/rejected": -20.71581268310547, "loss": 1.5013, "nll_loss": 1.4993512630462646, "rewards/accuracies": 1.0, "rewards/chosen": -0.15116551518440247, "rewards/margins": 1.9204158782958984, "rewards/rejected": -2.0715813636779785, "step": 255 }, { "epoch": 0.9660377358490566, "grad_norm": 0.3074422776699066, "learning_rate": 2.5849056603773585e-05, "log_odds_chosen": 19.469036102294922, "log_odds_ratio": -0.000835958169773221, "logits/chosen": -0.7382791638374329, "logits/rejected": -3.960020065307617, "logps/chosen": -1.3473050594329834, "logps/rejected": -20.31693458557129, "loss": 1.4874, "nll_loss": 1.4873261451721191, "rewards/accuracies": 1.0, "rewards/chosen": -0.1347305178642273, "rewards/margins": 1.896963119506836, "rewards/rejected": -2.031693696975708, "step": 256 }, { "epoch": 0.969811320754717, "grad_norm": 0.2996930480003357, "learning_rate": 2.5754716981132076e-05, "log_odds_chosen": 19.313798904418945, "log_odds_ratio": -3.367738599990844e-06, "logits/chosen": -1.150359869003296, "logits/rejected": -6.183455467224121, "logps/chosen": -1.4993988275527954, "logps/rejected": -20.48924446105957, "loss": 1.4565, "nll_loss": 1.4564992189407349, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499398946762085, "rewards/margins": 1.898984670639038, "rewards/rejected": -2.048924446105957, "step": 257 }, { "epoch": 0.9735849056603774, "grad_norm": 0.3131140470504761, "learning_rate": 2.5660377358490567e-05, "log_odds_chosen": 18.251684188842773, "log_odds_ratio": -4.395888026920147e-06, "logits/chosen": -0.7550607919692993, "logits/rejected": -4.872794151306152, "logps/chosen": -1.650226354598999, "logps/rejected": -19.56356430053711, "loss": 1.5635, "nll_loss": 1.5635182857513428, "rewards/accuracies": 1.0, "rewards/chosen": -0.16502264142036438, "rewards/margins": 1.791333794593811, "rewards/rejected": -1.956356406211853, "step": 258 }, { "epoch": 0.9773584905660377, "grad_norm": 0.31624630093574524, "learning_rate": 2.5566037735849058e-05, "log_odds_chosen": 17.00825309753418, "log_odds_ratio": -0.0031702774576842785, "logits/chosen": -1.8281440734863281, "logits/rejected": -5.850688934326172, "logps/chosen": -1.6439521312713623, "logps/rejected": -18.4215087890625, "loss": 1.4766, "nll_loss": 1.4762651920318604, "rewards/accuracies": 1.0, "rewards/chosen": -0.16439521312713623, "rewards/margins": 1.6777557134628296, "rewards/rejected": -1.842151165008545, "step": 259 }, { "epoch": 0.9811320754716981, "grad_norm": 0.34498074650764465, "learning_rate": 2.547169811320755e-05, "log_odds_chosen": 18.038841247558594, "log_odds_ratio": -0.0025024032220244408, "logits/chosen": -1.4132332801818848, "logits/rejected": -5.574821949005127, "logps/chosen": -1.6830906867980957, "logps/rejected": -19.414941787719727, "loss": 1.4516, "nll_loss": 1.4513163566589355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1683090627193451, "rewards/margins": 1.773185133934021, "rewards/rejected": -1.9414939880371094, "step": 260 }, { "epoch": 0.9849056603773585, "grad_norm": 0.32891684770584106, "learning_rate": 2.537735849056604e-05, "log_odds_chosen": 19.396223068237305, "log_odds_ratio": -9.61128534981981e-07, "logits/chosen": -1.6232181787490845, "logits/rejected": -6.2155914306640625, "logps/chosen": -1.5656075477600098, "logps/rejected": -20.644664764404297, "loss": 1.3423, "nll_loss": 1.3423418998718262, "rewards/accuracies": 1.0, "rewards/chosen": -0.1565607637166977, "rewards/margins": 1.9079058170318604, "rewards/rejected": -2.0644664764404297, "step": 261 }, { "epoch": 0.9886792452830189, "grad_norm": 0.39039233326911926, "learning_rate": 2.5283018867924528e-05, "log_odds_chosen": 15.765559196472168, "log_odds_ratio": -0.0038383540231734514, "logits/chosen": -0.5578839778900146, "logits/rejected": -5.357296466827393, "logps/chosen": -1.6717973947525024, "logps/rejected": -17.1871395111084, "loss": 1.5226, "nll_loss": 1.522261381149292, "rewards/accuracies": 1.0, "rewards/chosen": -0.16717973351478577, "rewards/margins": 1.5515341758728027, "rewards/rejected": -1.7187139987945557, "step": 262 }, { "epoch": 0.9924528301886792, "grad_norm": 0.2830381989479065, "learning_rate": 2.518867924528302e-05, "log_odds_chosen": 21.367420196533203, "log_odds_ratio": -7.450581485102248e-09, "logits/chosen": -2.053356409072876, "logits/rejected": -5.862036228179932, "logps/chosen": -1.3459280729293823, "logps/rejected": -22.330286026000977, "loss": 1.2824, "nll_loss": 1.2823607921600342, "rewards/accuracies": 1.0, "rewards/chosen": -0.13459281623363495, "rewards/margins": 2.098435878753662, "rewards/rejected": -2.2330286502838135, "step": 263 }, { "epoch": 0.9962264150943396, "grad_norm": 0.34556370973587036, "learning_rate": 2.509433962264151e-05, "log_odds_chosen": 17.593475341796875, "log_odds_ratio": -0.051692862063646317, "logits/chosen": -1.9133423566818237, "logits/rejected": -4.535181999206543, "logps/chosen": -1.6120280027389526, "logps/rejected": -18.84408950805664, "loss": 1.5221, "nll_loss": 1.5169554948806763, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1612028032541275, "rewards/margins": 1.7232062816619873, "rewards/rejected": -1.8844091892242432, "step": 264 }, { "epoch": 1.0, "grad_norm": 0.3379838764667511, "learning_rate": 2.5e-05, "log_odds_chosen": 19.729963302612305, "log_odds_ratio": -3.501869741739938e-06, "logits/chosen": -1.0773916244506836, "logits/rejected": -6.408069610595703, "logps/chosen": -1.8509314060211182, "logps/rejected": -21.368595123291016, "loss": 1.8248, "nll_loss": 1.824795126914978, "rewards/accuracies": 1.0, "rewards/chosen": -0.18509314954280853, "rewards/margins": 1.9517664909362793, "rewards/rejected": -2.136859655380249, "step": 265 }, { "epoch": 1.0037735849056604, "grad_norm": 0.32272571325302124, "learning_rate": 2.4905660377358492e-05, "log_odds_chosen": 19.660472869873047, "log_odds_ratio": -5.528574547497556e-06, "logits/chosen": -2.0058681964874268, "logits/rejected": -6.02784538269043, "logps/chosen": -1.462933897972107, "logps/rejected": -20.805423736572266, "loss": 1.434, "nll_loss": 1.43397057056427, "rewards/accuracies": 1.0, "rewards/chosen": -0.14629340171813965, "rewards/margins": 1.9342491626739502, "rewards/rejected": -2.080542802810669, "step": 266 }, { "epoch": 1.0075471698113208, "grad_norm": 0.3159734308719635, "learning_rate": 2.4811320754716983e-05, "log_odds_chosen": 18.871841430664062, "log_odds_ratio": -1.0505355021450669e-06, "logits/chosen": -1.4568092823028564, "logits/rejected": -5.773335933685303, "logps/chosen": -1.4329451322555542, "logps/rejected": -19.894126892089844, "loss": 1.5099, "nll_loss": 1.5098960399627686, "rewards/accuracies": 1.0, "rewards/chosen": -0.14329451322555542, "rewards/margins": 1.8461179733276367, "rewards/rejected": -1.989412546157837, "step": 267 }, { "epoch": 1.0113207547169811, "grad_norm": 0.280902236700058, "learning_rate": 2.4716981132075474e-05, "log_odds_chosen": 20.24521255493164, "log_odds_ratio": -2.2351767370309972e-07, "logits/chosen": -1.0519423484802246, "logits/rejected": -6.738117218017578, "logps/chosen": -1.7242472171783447, "logps/rejected": -21.655092239379883, "loss": 1.678, "nll_loss": 1.67795729637146, "rewards/accuracies": 1.0, "rewards/chosen": -0.17242471873760223, "rewards/margins": 1.9930845499038696, "rewards/rejected": -2.1655092239379883, "step": 268 }, { "epoch": 1.0150943396226415, "grad_norm": 0.6097087264060974, "learning_rate": 2.4622641509433962e-05, "log_odds_chosen": 17.3674373626709, "log_odds_ratio": -0.06029047444462776, "logits/chosen": -2.0133872032165527, "logits/rejected": -5.912678241729736, "logps/chosen": -1.5788557529449463, "logps/rejected": -18.655887603759766, "loss": 1.5696, "nll_loss": 1.5636141300201416, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1578855961561203, "rewards/margins": 1.7077033519744873, "rewards/rejected": -1.8655890226364136, "step": 269 }, { "epoch": 1.0188679245283019, "grad_norm": 0.3251427710056305, "learning_rate": 2.4528301886792453e-05, "log_odds_chosen": 19.473735809326172, "log_odds_ratio": -0.05202309042215347, "logits/chosen": -2.5387845039367676, "logits/rejected": -6.715412616729736, "logps/chosen": -1.5151475667953491, "logps/rejected": -20.52288055419922, "loss": 1.5736, "nll_loss": 1.5684118270874023, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15151476860046387, "rewards/margins": 1.9007731676101685, "rewards/rejected": -2.052288055419922, "step": 270 }, { "epoch": 1.0226415094339623, "grad_norm": 0.28263339400291443, "learning_rate": 2.4433962264150944e-05, "log_odds_chosen": 21.164031982421875, "log_odds_ratio": -8.56822566674964e-07, "logits/chosen": -1.4600391387939453, "logits/rejected": -5.919426441192627, "logps/chosen": -1.4743690490722656, "logps/rejected": -22.336339950561523, "loss": 1.5115, "nll_loss": 1.51149582862854, "rewards/accuracies": 1.0, "rewards/chosen": -0.14743690192699432, "rewards/margins": 2.0861971378326416, "rewards/rejected": -2.2336339950561523, "step": 271 }, { "epoch": 1.0264150943396226, "grad_norm": 0.28004854917526245, "learning_rate": 2.4339622641509435e-05, "log_odds_chosen": 20.22897720336914, "log_odds_ratio": -1.5720835335741867e-06, "logits/chosen": -2.687819004058838, "logits/rejected": -5.694319248199463, "logps/chosen": -1.3161007165908813, "logps/rejected": -21.028356552124023, "loss": 1.1834, "nll_loss": 1.1834136247634888, "rewards/accuracies": 1.0, "rewards/chosen": -0.13161008059978485, "rewards/margins": 1.971225619316101, "rewards/rejected": -2.1028356552124023, "step": 272 }, { "epoch": 1.030188679245283, "grad_norm": 0.3043549358844757, "learning_rate": 2.4245283018867926e-05, "log_odds_chosen": 18.964893341064453, "log_odds_ratio": -0.04836106672883034, "logits/chosen": -1.09200918674469, "logits/rejected": -5.859206199645996, "logps/chosen": -1.600953221321106, "logps/rejected": -20.301076889038086, "loss": 1.5009, "nll_loss": 1.4960479736328125, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16009533405303955, "rewards/margins": 1.8700122833251953, "rewards/rejected": -2.0301077365875244, "step": 273 }, { "epoch": 1.0339622641509434, "grad_norm": 0.29216280579566956, "learning_rate": 2.4150943396226418e-05, "log_odds_chosen": 18.339717864990234, "log_odds_ratio": -0.00336459930986166, "logits/chosen": -2.02860689163208, "logits/rejected": -6.949286460876465, "logps/chosen": -1.6870245933532715, "logps/rejected": -19.78402328491211, "loss": 1.5731, "nll_loss": 1.5727555751800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.16870248317718506, "rewards/margins": 1.8096998929977417, "rewards/rejected": -1.9784023761749268, "step": 274 }, { "epoch": 1.0377358490566038, "grad_norm": 0.2941288352012634, "learning_rate": 2.405660377358491e-05, "log_odds_chosen": 22.276226043701172, "log_odds_ratio": 0.0, "logits/chosen": -2.0723040103912354, "logits/rejected": -5.220292568206787, "logps/chosen": -1.4046821594238281, "logps/rejected": -23.268173217773438, "loss": 1.3712, "nll_loss": 1.371185541152954, "rewards/accuracies": 1.0, "rewards/chosen": -0.14046822488307953, "rewards/margins": 2.1863491535186768, "rewards/rejected": -2.326817512512207, "step": 275 }, { "epoch": 1.0415094339622641, "grad_norm": 0.29630234837532043, "learning_rate": 2.3962264150943396e-05, "log_odds_chosen": 22.004188537597656, "log_odds_ratio": 0.0, "logits/chosen": -1.7940192222595215, "logits/rejected": -7.220149040222168, "logps/chosen": -1.6048816442489624, "logps/rejected": -23.359508514404297, "loss": 1.4889, "nll_loss": 1.4889363050460815, "rewards/accuracies": 1.0, "rewards/chosen": -0.16048815846443176, "rewards/margins": 2.1754627227783203, "rewards/rejected": -2.3359508514404297, "step": 276 }, { "epoch": 1.0452830188679245, "grad_norm": 0.3317105174064636, "learning_rate": 2.3867924528301887e-05, "log_odds_chosen": 17.85032081604004, "log_odds_ratio": -2.0116699488426093e-06, "logits/chosen": -0.9674760699272156, "logits/rejected": -5.794704914093018, "logps/chosen": -1.475320816040039, "logps/rejected": -18.945966720581055, "loss": 1.3938, "nll_loss": 1.3938060998916626, "rewards/accuracies": 1.0, "rewards/chosen": -0.14753207564353943, "rewards/margins": 1.7470645904541016, "rewards/rejected": -1.8945965766906738, "step": 277 }, { "epoch": 1.049056603773585, "grad_norm": 0.30001261830329895, "learning_rate": 2.377358490566038e-05, "log_odds_chosen": 20.2429256439209, "log_odds_ratio": -4.4703490686970326e-08, "logits/chosen": -0.8269245624542236, "logits/rejected": -5.202162265777588, "logps/chosen": -1.4923174381256104, "logps/rejected": -21.42803192138672, "loss": 1.3961, "nll_loss": 1.3961377143859863, "rewards/accuracies": 1.0, "rewards/chosen": -0.14923176169395447, "rewards/margins": 1.9935715198516846, "rewards/rejected": -2.142803192138672, "step": 278 }, { "epoch": 1.0528301886792453, "grad_norm": 0.35864245891571045, "learning_rate": 2.367924528301887e-05, "log_odds_chosen": 18.86318588256836, "log_odds_ratio": -0.06927872449159622, "logits/chosen": -1.760351538658142, "logits/rejected": -5.471229553222656, "logps/chosen": -1.6287931203842163, "logps/rejected": -20.220352172851562, "loss": 1.4988, "nll_loss": 1.4918767213821411, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1628793179988861, "rewards/margins": 1.8591558933258057, "rewards/rejected": -2.0220353603363037, "step": 279 }, { "epoch": 1.0566037735849056, "grad_norm": 0.3526778519153595, "learning_rate": 2.358490566037736e-05, "log_odds_chosen": 21.42916488647461, "log_odds_ratio": -1.3187654985813424e-06, "logits/chosen": -1.282188057899475, "logits/rejected": -4.82998514175415, "logps/chosen": -1.2494144439697266, "logps/rejected": -22.1539306640625, "loss": 1.2298, "nll_loss": 1.2297983169555664, "rewards/accuracies": 1.0, "rewards/chosen": -0.12494143843650818, "rewards/margins": 2.090451240539551, "rewards/rejected": -2.21539306640625, "step": 280 }, { "epoch": 1.060377358490566, "grad_norm": 0.28869158029556274, "learning_rate": 2.3490566037735852e-05, "log_odds_chosen": 20.201156616210938, "log_odds_ratio": -1.1175878711355836e-07, "logits/chosen": -1.7291052341461182, "logits/rejected": -7.483429431915283, "logps/chosen": -1.5107462406158447, "logps/rejected": -21.40848159790039, "loss": 1.4772, "nll_loss": 1.4772439002990723, "rewards/accuracies": 1.0, "rewards/chosen": -0.15107461810112, "rewards/margins": 1.9897735118865967, "rewards/rejected": -2.140848159790039, "step": 281 }, { "epoch": 1.0641509433962264, "grad_norm": 0.31443092226982117, "learning_rate": 2.339622641509434e-05, "log_odds_chosen": 19.611129760742188, "log_odds_ratio": -3.022684541065246e-05, "logits/chosen": -1.9202224016189575, "logits/rejected": -6.25022029876709, "logps/chosen": -1.5923198461532593, "logps/rejected": -20.92377471923828, "loss": 1.5393, "nll_loss": 1.539318561553955, "rewards/accuracies": 1.0, "rewards/chosen": -0.15923196077346802, "rewards/margins": 1.933145523071289, "rewards/rejected": -2.0923774242401123, "step": 282 }, { "epoch": 1.0679245283018868, "grad_norm": 0.33393925428390503, "learning_rate": 2.330188679245283e-05, "log_odds_chosen": 19.191036224365234, "log_odds_ratio": -0.051419854164123535, "logits/chosen": -1.4811030626296997, "logits/rejected": -5.777919769287109, "logps/chosen": -1.5946496725082397, "logps/rejected": -20.5208797454834, "loss": 1.6181, "nll_loss": 1.6130021810531616, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1594649851322174, "rewards/margins": 1.8926230669021606, "rewards/rejected": -2.0520882606506348, "step": 283 }, { "epoch": 1.0716981132075472, "grad_norm": 0.2892889082431793, "learning_rate": 2.320754716981132e-05, "log_odds_chosen": 20.433334350585938, "log_odds_ratio": -2.980237638894323e-07, "logits/chosen": -0.5262770652770996, "logits/rejected": -5.781856536865234, "logps/chosen": -1.421867847442627, "logps/rejected": -21.404203414916992, "loss": 1.409, "nll_loss": 1.408989667892456, "rewards/accuracies": 1.0, "rewards/chosen": -0.14218679070472717, "rewards/margins": 1.998233675956726, "rewards/rejected": -2.140420436859131, "step": 284 }, { "epoch": 1.0754716981132075, "grad_norm": 0.3078497350215912, "learning_rate": 2.3113207547169813e-05, "log_odds_chosen": 18.807418823242188, "log_odds_ratio": -0.0475982129573822, "logits/chosen": -1.0839369297027588, "logits/rejected": -3.862969398498535, "logps/chosen": -1.431301474571228, "logps/rejected": -19.818119049072266, "loss": 1.5687, "nll_loss": 1.563894510269165, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14313015341758728, "rewards/margins": 1.838681936264038, "rewards/rejected": -1.9818120002746582, "step": 285 }, { "epoch": 1.079245283018868, "grad_norm": 0.9563864469528198, "learning_rate": 2.3018867924528304e-05, "log_odds_chosen": 19.715076446533203, "log_odds_ratio": -7.935160283523146e-06, "logits/chosen": -2.1578338146209717, "logits/rejected": -4.625355243682861, "logps/chosen": -1.4262417554855347, "logps/rejected": -20.82624053955078, "loss": 1.4746, "nll_loss": 1.4745495319366455, "rewards/accuracies": 1.0, "rewards/chosen": -0.14262418448925018, "rewards/margins": 1.9399999380111694, "rewards/rejected": -2.0826241970062256, "step": 286 }, { "epoch": 1.0830188679245283, "grad_norm": 0.29822468757629395, "learning_rate": 2.2924528301886795e-05, "log_odds_chosen": 20.520158767700195, "log_odds_ratio": -5.96046660916727e-08, "logits/chosen": -1.959904670715332, "logits/rejected": -5.203014850616455, "logps/chosen": -1.3631935119628906, "logps/rejected": -21.410646438598633, "loss": 1.4288, "nll_loss": 1.4287554025650024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1363193392753601, "rewards/margins": 2.0047454833984375, "rewards/rejected": -2.1410648822784424, "step": 287 }, { "epoch": 1.0867924528301887, "grad_norm": 0.31314578652381897, "learning_rate": 2.2830188679245286e-05, "log_odds_chosen": 20.801912307739258, "log_odds_ratio": 0.0, "logits/chosen": -2.6818971633911133, "logits/rejected": -7.3150763511657715, "logps/chosen": -1.596503496170044, "logps/rejected": -22.121469497680664, "loss": 1.4044, "nll_loss": 1.4043606519699097, "rewards/accuracies": 1.0, "rewards/chosen": -0.15965035557746887, "rewards/margins": 2.0524966716766357, "rewards/rejected": -2.2121472358703613, "step": 288 }, { "epoch": 1.090566037735849, "grad_norm": 0.3501436412334442, "learning_rate": 2.2735849056603774e-05, "log_odds_chosen": 19.989656448364258, "log_odds_ratio": -1.385821519761521e-06, "logits/chosen": -2.4570391178131104, "logits/rejected": -6.362525463104248, "logps/chosen": -1.6581473350524902, "logps/rejected": -21.345970153808594, "loss": 1.5318, "nll_loss": 1.53177011013031, "rewards/accuracies": 1.0, "rewards/chosen": -0.16581472754478455, "rewards/margins": 1.9687824249267578, "rewards/rejected": -2.134597063064575, "step": 289 }, { "epoch": 1.0943396226415094, "grad_norm": 0.33854931592941284, "learning_rate": 2.2641509433962265e-05, "log_odds_chosen": 19.100032806396484, "log_odds_ratio": -5.811464234284358e-07, "logits/chosen": -0.7418297529220581, "logits/rejected": -4.735363960266113, "logps/chosen": -1.7482143640518188, "logps/rejected": -20.59360694885254, "loss": 1.581, "nll_loss": 1.5810353755950928, "rewards/accuracies": 1.0, "rewards/chosen": -0.17482145130634308, "rewards/margins": 1.884539246559143, "rewards/rejected": -2.0593605041503906, "step": 290 }, { "epoch": 1.0981132075471698, "grad_norm": 0.3294488489627838, "learning_rate": 2.2547169811320756e-05, "log_odds_chosen": 17.665861129760742, "log_odds_ratio": -0.08739493787288666, "logits/chosen": -0.7531633377075195, "logits/rejected": -5.106237888336182, "logps/chosen": -1.710965871810913, "logps/rejected": -19.166933059692383, "loss": 1.5588, "nll_loss": 1.5500967502593994, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1710965931415558, "rewards/margins": 1.7455967664718628, "rewards/rejected": -1.9166933298110962, "step": 291 }, { "epoch": 1.1018867924528302, "grad_norm": 0.3396783769130707, "learning_rate": 2.2452830188679247e-05, "log_odds_chosen": 18.452682495117188, "log_odds_ratio": -0.07296016067266464, "logits/chosen": -0.9494550228118896, "logits/rejected": -4.494377613067627, "logps/chosen": -1.316884994506836, "logps/rejected": -19.361968994140625, "loss": 1.1822, "nll_loss": 1.1749296188354492, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13168850541114807, "rewards/margins": 1.8045084476470947, "rewards/rejected": -1.9361971616744995, "step": 292 }, { "epoch": 1.1056603773584905, "grad_norm": 0.3204915523529053, "learning_rate": 2.2358490566037738e-05, "log_odds_chosen": 21.57263946533203, "log_odds_ratio": -3.62108698936936e-06, "logits/chosen": -0.9738727807998657, "logits/rejected": -5.303981304168701, "logps/chosen": -1.5682296752929688, "logps/rejected": -22.808374404907227, "loss": 1.462, "nll_loss": 1.462032437324524, "rewards/accuracies": 1.0, "rewards/chosen": -0.15682296454906464, "rewards/margins": 2.1240146160125732, "rewards/rejected": -2.280837297439575, "step": 293 }, { "epoch": 1.109433962264151, "grad_norm": 0.3384822607040405, "learning_rate": 2.226415094339623e-05, "log_odds_chosen": 18.951013565063477, "log_odds_ratio": -5.208150469115935e-06, "logits/chosen": -1.3006513118743896, "logits/rejected": -4.634444236755371, "logps/chosen": -1.5552959442138672, "logps/rejected": -20.169574737548828, "loss": 1.5539, "nll_loss": 1.5539031028747559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15552960336208344, "rewards/margins": 1.8614277839660645, "rewards/rejected": -2.0169572830200195, "step": 294 }, { "epoch": 1.1132075471698113, "grad_norm": 1.1054335832595825, "learning_rate": 2.216981132075472e-05, "log_odds_chosen": 18.913715362548828, "log_odds_ratio": -4.693871460403898e-07, "logits/chosen": -0.9914897680282593, "logits/rejected": -5.061755180358887, "logps/chosen": -1.4729012250900269, "logps/rejected": -20.089599609375, "loss": 1.3732, "nll_loss": 1.373215913772583, "rewards/accuracies": 1.0, "rewards/chosen": -0.14729014039039612, "rewards/margins": 1.8616697788238525, "rewards/rejected": -2.008960247039795, "step": 295 }, { "epoch": 1.1169811320754717, "grad_norm": 0.41514357924461365, "learning_rate": 2.2075471698113208e-05, "log_odds_chosen": 20.548986434936523, "log_odds_ratio": -5.2154071283894154e-08, "logits/chosen": -1.4249317646026611, "logits/rejected": -5.687395095825195, "logps/chosen": -1.5955243110656738, "logps/rejected": -21.793594360351562, "loss": 1.4593, "nll_loss": 1.4593462944030762, "rewards/accuracies": 1.0, "rewards/chosen": -0.1595524251461029, "rewards/margins": 2.0198073387145996, "rewards/rejected": -2.1793594360351562, "step": 296 }, { "epoch": 1.120754716981132, "grad_norm": 0.33038532733917236, "learning_rate": 2.19811320754717e-05, "log_odds_chosen": 19.50081443786621, "log_odds_ratio": -1.6255449736490846e-05, "logits/chosen": -2.909379482269287, "logits/rejected": -7.22389030456543, "logps/chosen": -1.4944963455200195, "logps/rejected": -20.643218994140625, "loss": 1.3455, "nll_loss": 1.345502257347107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494496464729309, "rewards/margins": 1.914872169494629, "rewards/rejected": -2.064321756362915, "step": 297 }, { "epoch": 1.1245283018867924, "grad_norm": 0.29738759994506836, "learning_rate": 2.188679245283019e-05, "log_odds_chosen": 20.67082405090332, "log_odds_ratio": -0.0003345193399582058, "logits/chosen": -1.593409776687622, "logits/rejected": -5.576838493347168, "logps/chosen": -1.746903657913208, "logps/rejected": -22.0710506439209, "loss": 1.3993, "nll_loss": 1.3992958068847656, "rewards/accuracies": 1.0, "rewards/chosen": -0.1746903657913208, "rewards/margins": 2.032414674758911, "rewards/rejected": -2.2071051597595215, "step": 298 }, { "epoch": 1.1283018867924528, "grad_norm": 0.4151991605758667, "learning_rate": 2.179245283018868e-05, "log_odds_chosen": 17.427101135253906, "log_odds_ratio": -0.11041603237390518, "logits/chosen": -0.4509122669696808, "logits/rejected": -4.386507034301758, "logps/chosen": -1.5307447910308838, "logps/rejected": -18.620765686035156, "loss": 1.4554, "nll_loss": 1.4443206787109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.1530744880437851, "rewards/margins": 1.7090023756027222, "rewards/rejected": -1.862076759338379, "step": 299 }, { "epoch": 1.1320754716981132, "grad_norm": 0.327581524848938, "learning_rate": 2.1698113207547172e-05, "log_odds_chosen": 17.98784065246582, "log_odds_ratio": -4.783316398970783e-06, "logits/chosen": -1.5300605297088623, "logits/rejected": -6.33521842956543, "logps/chosen": -1.5090255737304688, "logps/rejected": -19.206493377685547, "loss": 1.401, "nll_loss": 1.4010220766067505, "rewards/accuracies": 1.0, "rewards/chosen": -0.15090256929397583, "rewards/margins": 1.7697467803955078, "rewards/rejected": -1.9206492900848389, "step": 300 }, { "epoch": 1.1358490566037736, "grad_norm": 0.33648499846458435, "learning_rate": 2.1603773584905663e-05, "log_odds_chosen": 20.12273597717285, "log_odds_ratio": -5.960480393696344e-07, "logits/chosen": -0.9786512851715088, "logits/rejected": -5.974287986755371, "logps/chosen": -1.504534363746643, "logps/rejected": -21.347797393798828, "loss": 1.4384, "nll_loss": 1.4384446144104004, "rewards/accuracies": 1.0, "rewards/chosen": -0.15045343339443207, "rewards/margins": 1.9843263626098633, "rewards/rejected": -2.134779930114746, "step": 301 }, { "epoch": 1.139622641509434, "grad_norm": 0.4433332681655884, "learning_rate": 2.1509433962264154e-05, "log_odds_chosen": 18.096084594726562, "log_odds_ratio": -0.07683061808347702, "logits/chosen": -1.3693748712539673, "logits/rejected": -5.253786087036133, "logps/chosen": -1.7420886754989624, "logps/rejected": -19.588783264160156, "loss": 1.4441, "nll_loss": 1.4364526271820068, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1742088496685028, "rewards/margins": 1.7846695184707642, "rewards/rejected": -1.9588782787322998, "step": 302 }, { "epoch": 1.1433962264150943, "grad_norm": 0.2902994155883789, "learning_rate": 2.1415094339622642e-05, "log_odds_chosen": 22.471710205078125, "log_odds_ratio": -1.1175880842984043e-07, "logits/chosen": -1.3224496841430664, "logits/rejected": -6.421805381774902, "logps/chosen": -1.4149394035339355, "logps/rejected": -23.561216354370117, "loss": 1.3612, "nll_loss": 1.36124587059021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14149394631385803, "rewards/margins": 2.214627742767334, "rewards/rejected": -2.356121778488159, "step": 303 }, { "epoch": 1.1471698113207547, "grad_norm": 0.35126540064811707, "learning_rate": 2.1320754716981133e-05, "log_odds_chosen": 20.562442779541016, "log_odds_ratio": -9.716237400425598e-06, "logits/chosen": -0.9620351791381836, "logits/rejected": -6.237518787384033, "logps/chosen": -1.6788274049758911, "logps/rejected": -21.986478805541992, "loss": 1.4306, "nll_loss": 1.4306453466415405, "rewards/accuracies": 1.0, "rewards/chosen": -0.1678827404975891, "rewards/margins": 2.0307650566101074, "rewards/rejected": -2.1986477375030518, "step": 304 }, { "epoch": 1.150943396226415, "grad_norm": 0.3252897560596466, "learning_rate": 2.1226415094339624e-05, "log_odds_chosen": 20.130619049072266, "log_odds_ratio": -0.00018702806846704334, "logits/chosen": -1.5204761028289795, "logits/rejected": -4.985358238220215, "logps/chosen": -1.533634901046753, "logps/rejected": -21.230838775634766, "loss": 1.6996, "nll_loss": 1.6996041536331177, "rewards/accuracies": 1.0, "rewards/chosen": -0.15336349606513977, "rewards/margins": 1.9697201251983643, "rewards/rejected": -2.1230835914611816, "step": 305 }, { "epoch": 1.1547169811320754, "grad_norm": 0.4213809072971344, "learning_rate": 2.1132075471698115e-05, "log_odds_chosen": 22.56460952758789, "log_odds_ratio": -1.7881419012155675e-07, "logits/chosen": -1.123805046081543, "logits/rejected": -4.740581035614014, "logps/chosen": -1.4182591438293457, "logps/rejected": -23.586502075195312, "loss": 1.3603, "nll_loss": 1.360290288925171, "rewards/accuracies": 1.0, "rewards/chosen": -0.14182589948177338, "rewards/margins": 2.2168242931365967, "rewards/rejected": -2.3586502075195312, "step": 306 }, { "epoch": 1.1584905660377358, "grad_norm": 0.3281170725822449, "learning_rate": 2.1037735849056606e-05, "log_odds_chosen": 19.17583656311035, "log_odds_ratio": -2.123439571732888e-06, "logits/chosen": -0.20950892567634583, "logits/rejected": -4.772949695587158, "logps/chosen": -1.5309257507324219, "logps/rejected": -20.371936798095703, "loss": 1.4766, "nll_loss": 1.476578950881958, "rewards/accuracies": 1.0, "rewards/chosen": -0.15309256315231323, "rewards/margins": 1.884101390838623, "rewards/rejected": -2.037193775177002, "step": 307 }, { "epoch": 1.1622641509433962, "grad_norm": 0.3546697497367859, "learning_rate": 2.0943396226415098e-05, "log_odds_chosen": 23.225772857666016, "log_odds_ratio": -7.450581485102248e-09, "logits/chosen": -0.4746635854244232, "logits/rejected": -2.8694400787353516, "logps/chosen": -1.2704362869262695, "logps/rejected": -24.101713180541992, "loss": 1.2967, "nll_loss": 1.2966803312301636, "rewards/accuracies": 1.0, "rewards/chosen": -0.12704363465309143, "rewards/margins": 2.283127546310425, "rewards/rejected": -2.4101712703704834, "step": 308 }, { "epoch": 1.1660377358490566, "grad_norm": 1.0174925327301025, "learning_rate": 2.0849056603773585e-05, "log_odds_chosen": 19.03729248046875, "log_odds_ratio": -0.10631541162729263, "logits/chosen": -2.604809522628784, "logits/rejected": -5.2874040603637695, "logps/chosen": -2.420698642730713, "logps/rejected": -21.103818893432617, "loss": 1.6129, "nll_loss": 1.6022355556488037, "rewards/accuracies": 0.9375, "rewards/chosen": -0.24206990003585815, "rewards/margins": 1.8683120012283325, "rewards/rejected": -2.110382080078125, "step": 309 }, { "epoch": 1.169811320754717, "grad_norm": 0.29557734727859497, "learning_rate": 2.0754716981132076e-05, "log_odds_chosen": 16.802288055419922, "log_odds_ratio": -0.02574881538748741, "logits/chosen": -1.028839111328125, "logits/rejected": -4.576976776123047, "logps/chosen": -1.665901780128479, "logps/rejected": -18.113601684570312, "loss": 1.5644, "nll_loss": 1.561858057975769, "rewards/accuracies": 1.0, "rewards/chosen": -0.16659018397331238, "rewards/margins": 1.6447699069976807, "rewards/rejected": -1.8113601207733154, "step": 310 }, { "epoch": 1.1735849056603773, "grad_norm": 0.3357307016849518, "learning_rate": 2.0660377358490567e-05, "log_odds_chosen": 18.195541381835938, "log_odds_ratio": -0.05465611815452576, "logits/chosen": -2.216845989227295, "logits/rejected": -5.583156108856201, "logps/chosen": -1.4862521886825562, "logps/rejected": -19.355636596679688, "loss": 1.4894, "nll_loss": 1.4839730262756348, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1486252397298813, "rewards/margins": 1.7869385480880737, "rewards/rejected": -1.9355638027191162, "step": 311 }, { "epoch": 1.1773584905660377, "grad_norm": 0.43319424986839294, "learning_rate": 2.056603773584906e-05, "log_odds_chosen": 20.049089431762695, "log_odds_ratio": -8.195664804588887e-07, "logits/chosen": -1.8693492412567139, "logits/rejected": -6.614497184753418, "logps/chosen": -1.4651036262512207, "logps/rejected": -21.203433990478516, "loss": 1.3849, "nll_loss": 1.384904384613037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14651036262512207, "rewards/margins": 1.9738330841064453, "rewards/rejected": -2.1203434467315674, "step": 312 }, { "epoch": 1.181132075471698, "grad_norm": 0.3610400855541229, "learning_rate": 2.047169811320755e-05, "log_odds_chosen": 18.725547790527344, "log_odds_ratio": -0.0023514274507761, "logits/chosen": -0.6479529738426208, "logits/rejected": -3.6973776817321777, "logps/chosen": -1.5272626876831055, "logps/rejected": -19.948326110839844, "loss": 1.4189, "nll_loss": 1.4186826944351196, "rewards/accuracies": 1.0, "rewards/chosen": -0.15272627770900726, "rewards/margins": 1.8421063423156738, "rewards/rejected": -1.9948326349258423, "step": 313 }, { "epoch": 1.1849056603773584, "grad_norm": 0.3513402044773102, "learning_rate": 2.037735849056604e-05, "log_odds_chosen": 17.298198699951172, "log_odds_ratio": -0.002320481464266777, "logits/chosen": -1.4968432188034058, "logits/rejected": -5.039670944213867, "logps/chosen": -1.4913229942321777, "logps/rejected": -18.48564910888672, "loss": 1.4084, "nll_loss": 1.4081535339355469, "rewards/accuracies": 1.0, "rewards/chosen": -0.14913231134414673, "rewards/margins": 1.6994324922561646, "rewards/rejected": -1.848564863204956, "step": 314 }, { "epoch": 1.1886792452830188, "grad_norm": 0.3477972447872162, "learning_rate": 2.0283018867924532e-05, "log_odds_chosen": 18.793106079101562, "log_odds_ratio": -8.6095547885634e-05, "logits/chosen": -2.6079301834106445, "logits/rejected": -5.591516494750977, "logps/chosen": -1.5191848278045654, "logps/rejected": -19.985429763793945, "loss": 1.5223, "nll_loss": 1.5223308801651, "rewards/accuracies": 1.0, "rewards/chosen": -0.1519184708595276, "rewards/margins": 1.8466243743896484, "rewards/rejected": -1.9985430240631104, "step": 315 }, { "epoch": 1.1924528301886792, "grad_norm": 0.35549482703208923, "learning_rate": 2.018867924528302e-05, "log_odds_chosen": 18.749046325683594, "log_odds_ratio": -5.386953034758335e-06, "logits/chosen": -0.9891374707221985, "logits/rejected": -5.465826988220215, "logps/chosen": -1.3818237781524658, "logps/rejected": -19.735633850097656, "loss": 1.4975, "nll_loss": 1.4974541664123535, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381823718547821, "rewards/margins": 1.835381031036377, "rewards/rejected": -1.973563313484192, "step": 316 }, { "epoch": 1.1962264150943396, "grad_norm": 0.34496262669563293, "learning_rate": 2.009433962264151e-05, "log_odds_chosen": 15.491250038146973, "log_odds_ratio": -0.20011255145072937, "logits/chosen": -1.0274806022644043, "logits/rejected": -5.168841361999512, "logps/chosen": -1.7780976295471191, "logps/rejected": -17.050220489501953, "loss": 1.6287, "nll_loss": 1.6086933612823486, "rewards/accuracies": 0.875, "rewards/chosen": -0.17780977487564087, "rewards/margins": 1.527212142944336, "rewards/rejected": -1.7050219774246216, "step": 317 }, { "epoch": 1.2, "grad_norm": 0.32769209146499634, "learning_rate": 2e-05, "log_odds_chosen": 16.972900390625, "log_odds_ratio": -0.00024161383043974638, "logits/chosen": -0.6759935617446899, "logits/rejected": -5.338271141052246, "logps/chosen": -1.6053509712219238, "logps/rejected": -18.34259796142578, "loss": 1.4908, "nll_loss": 1.4907457828521729, "rewards/accuracies": 1.0, "rewards/chosen": -0.16053511202335358, "rewards/margins": 1.6737247705459595, "rewards/rejected": -1.8342599868774414, "step": 318 }, { "epoch": 1.2037735849056603, "grad_norm": 0.33330532908439636, "learning_rate": 1.9905660377358493e-05, "log_odds_chosen": 18.765106201171875, "log_odds_ratio": -0.007183433044701815, "logits/chosen": -3.5232436656951904, "logits/rejected": -7.270813465118408, "logps/chosen": -1.494720697402954, "logps/rejected": -19.954343795776367, "loss": 1.4191, "nll_loss": 1.4183340072631836, "rewards/accuracies": 1.0, "rewards/chosen": -0.14947207272052765, "rewards/margins": 1.8459622859954834, "rewards/rejected": -1.9954345226287842, "step": 319 }, { "epoch": 1.2075471698113207, "grad_norm": 0.35828787088394165, "learning_rate": 1.9811320754716984e-05, "log_odds_chosen": 15.424698829650879, "log_odds_ratio": -0.006778358481824398, "logits/chosen": -2.202425956726074, "logits/rejected": -7.088006973266602, "logps/chosen": -1.7607218027114868, "logps/rejected": -16.957534790039062, "loss": 1.6174, "nll_loss": 1.616753339767456, "rewards/accuracies": 1.0, "rewards/chosen": -0.17607218027114868, "rewards/margins": 1.5196813344955444, "rewards/rejected": -1.6957534551620483, "step": 320 }, { "epoch": 1.211320754716981, "grad_norm": 0.34036463499069214, "learning_rate": 1.9716981132075475e-05, "log_odds_chosen": 18.791336059570312, "log_odds_ratio": -3.852009740512585e-06, "logits/chosen": -1.9099091291427612, "logits/rejected": -7.520392417907715, "logps/chosen": -1.561924934387207, "logps/rejected": -19.94056510925293, "loss": 1.471, "nll_loss": 1.4709736108779907, "rewards/accuracies": 1.0, "rewards/chosen": -0.15619248151779175, "rewards/margins": 1.8378640413284302, "rewards/rejected": -1.9940567016601562, "step": 321 }, { "epoch": 1.2150943396226415, "grad_norm": 0.3238532841205597, "learning_rate": 1.9622641509433966e-05, "log_odds_chosen": 17.8562068939209, "log_odds_ratio": -8.6430118244607e-06, "logits/chosen": -0.658406138420105, "logits/rejected": -4.410658836364746, "logps/chosen": -1.2828052043914795, "logps/rejected": -18.753049850463867, "loss": 1.3437, "nll_loss": 1.3436634540557861, "rewards/accuracies": 1.0, "rewards/chosen": -0.12828052043914795, "rewards/margins": 1.7470245361328125, "rewards/rejected": -1.875304937362671, "step": 322 }, { "epoch": 1.2188679245283018, "grad_norm": 0.3303399682044983, "learning_rate": 1.9528301886792454e-05, "log_odds_chosen": 14.796485900878906, "log_odds_ratio": -0.055772680789232254, "logits/chosen": -1.0683139562606812, "logits/rejected": -7.066629409790039, "logps/chosen": -1.8164430856704712, "logps/rejected": -16.421266555786133, "loss": 1.613, "nll_loss": 1.6073920726776123, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18164430558681488, "rewards/margins": 1.4604823589324951, "rewards/rejected": -1.6421265602111816, "step": 323 }, { "epoch": 1.2226415094339622, "grad_norm": 0.35494038462638855, "learning_rate": 1.9433962264150945e-05, "log_odds_chosen": 17.69695472717285, "log_odds_ratio": -0.005540680605918169, "logits/chosen": -1.0188480615615845, "logits/rejected": -4.988126277923584, "logps/chosen": -1.4110499620437622, "logps/rejected": -18.787839889526367, "loss": 1.3428, "nll_loss": 1.3422446250915527, "rewards/accuracies": 1.0, "rewards/chosen": -0.14110499620437622, "rewards/margins": 1.7376790046691895, "rewards/rejected": -1.878783941268921, "step": 324 }, { "epoch": 1.2264150943396226, "grad_norm": 0.35788360238075256, "learning_rate": 1.9339622641509436e-05, "log_odds_chosen": 14.982892990112305, "log_odds_ratio": -0.049817949533462524, "logits/chosen": -1.3258823156356812, "logits/rejected": -4.249111652374268, "logps/chosen": -1.2381442785263062, "logps/rejected": -15.715821266174316, "loss": 1.2877, "nll_loss": 1.2827649116516113, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12381443381309509, "rewards/margins": 1.447767734527588, "rewards/rejected": -1.5715820789337158, "step": 325 }, { "epoch": 1.230188679245283, "grad_norm": 0.39687126874923706, "learning_rate": 1.9245283018867927e-05, "log_odds_chosen": 16.164493560791016, "log_odds_ratio": -0.054912034422159195, "logits/chosen": -1.1859130859375, "logits/rejected": -4.106606960296631, "logps/chosen": -1.3010371923446655, "logps/rejected": -16.917049407958984, "loss": 1.4605, "nll_loss": 1.4549907445907593, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1301037073135376, "rewards/margins": 1.561601161956787, "rewards/rejected": -1.6917049884796143, "step": 326 }, { "epoch": 1.2339622641509433, "grad_norm": 0.3798399865627289, "learning_rate": 1.9150943396226418e-05, "log_odds_chosen": 15.7042875289917, "log_odds_ratio": -0.00472813518717885, "logits/chosen": 0.0541728138923645, "logits/rejected": -4.224884033203125, "logps/chosen": -1.7171664237976074, "logps/rejected": -17.207719802856445, "loss": 1.5367, "nll_loss": 1.5362149477005005, "rewards/accuracies": 1.0, "rewards/chosen": -0.1717166304588318, "rewards/margins": 1.5490553379058838, "rewards/rejected": -1.7207720279693604, "step": 327 }, { "epoch": 1.2377358490566037, "grad_norm": 0.3624114990234375, "learning_rate": 1.905660377358491e-05, "log_odds_chosen": 16.288440704345703, "log_odds_ratio": -0.010537970811128616, "logits/chosen": -0.5711084008216858, "logits/rejected": -4.729043006896973, "logps/chosen": -1.5029557943344116, "logps/rejected": -17.498348236083984, "loss": 1.3411, "nll_loss": 1.340043544769287, "rewards/accuracies": 1.0, "rewards/chosen": -0.15029558539390564, "rewards/margins": 1.5995393991470337, "rewards/rejected": -1.7498348951339722, "step": 328 }, { "epoch": 1.241509433962264, "grad_norm": 0.3725604712963104, "learning_rate": 1.8962264150943397e-05, "log_odds_chosen": 15.354927062988281, "log_odds_ratio": -0.02149188332259655, "logits/chosen": -1.4398235082626343, "logits/rejected": -4.620980739593506, "logps/chosen": -1.4068002700805664, "logps/rejected": -16.40656280517578, "loss": 1.2986, "nll_loss": 1.2964096069335938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14068001508712769, "rewards/margins": 1.4999765157699585, "rewards/rejected": -1.640656590461731, "step": 329 }, { "epoch": 1.2452830188679245, "grad_norm": 0.370463490486145, "learning_rate": 1.8867924528301888e-05, "log_odds_chosen": 14.876998901367188, "log_odds_ratio": -0.05089250206947327, "logits/chosen": -1.1731348037719727, "logits/rejected": -4.278902530670166, "logps/chosen": -1.3039909601211548, "logps/rejected": -15.608766555786133, "loss": 1.4343, "nll_loss": 1.4292408227920532, "rewards/accuracies": 1.0, "rewards/chosen": -0.13039910793304443, "rewards/margins": 1.4304776191711426, "rewards/rejected": -1.5608766078948975, "step": 330 }, { "epoch": 1.2490566037735849, "grad_norm": 0.3387150466442108, "learning_rate": 1.877358490566038e-05, "log_odds_chosen": 15.262411117553711, "log_odds_ratio": -6.615633174078539e-05, "logits/chosen": -2.305337905883789, "logits/rejected": -5.992312431335449, "logps/chosen": -1.5741474628448486, "logps/rejected": -16.528202056884766, "loss": 1.4408, "nll_loss": 1.440836787223816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574147343635559, "rewards/margins": 1.4954053163528442, "rewards/rejected": -1.652820110321045, "step": 331 }, { "epoch": 1.2528301886792452, "grad_norm": 0.37555763125419617, "learning_rate": 1.8679245283018867e-05, "log_odds_chosen": 18.441608428955078, "log_odds_ratio": -1.5244633686961606e-05, "logits/chosen": -2.3576619625091553, "logits/rejected": -4.850790500640869, "logps/chosen": -1.4474366903305054, "logps/rejected": -19.480161666870117, "loss": 1.397, "nll_loss": 1.3970084190368652, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447436660528183, "rewards/margins": 1.8032724857330322, "rewards/rejected": -1.9480161666870117, "step": 332 }, { "epoch": 1.2566037735849056, "grad_norm": 0.37432780861854553, "learning_rate": 1.8584905660377358e-05, "log_odds_chosen": 16.72200584411621, "log_odds_ratio": -2.3494829292758368e-05, "logits/chosen": -0.13491854071617126, "logits/rejected": -3.951087474822998, "logps/chosen": -1.4685932397842407, "logps/rejected": -17.866477966308594, "loss": 1.5172, "nll_loss": 1.5171722173690796, "rewards/accuracies": 1.0, "rewards/chosen": -0.1468593329191208, "rewards/margins": 1.6397886276245117, "rewards/rejected": -1.7866477966308594, "step": 333 }, { "epoch": 1.260377358490566, "grad_norm": 0.3993586301803589, "learning_rate": 1.849056603773585e-05, "log_odds_chosen": 15.771561622619629, "log_odds_ratio": -3.5672157537192106e-05, "logits/chosen": -1.266805648803711, "logits/rejected": -4.964008808135986, "logps/chosen": -1.478631854057312, "logps/rejected": -16.92867088317871, "loss": 1.5646, "nll_loss": 1.5645837783813477, "rewards/accuracies": 1.0, "rewards/chosen": -0.14786317944526672, "rewards/margins": 1.545003890991211, "rewards/rejected": -1.6928670406341553, "step": 334 }, { "epoch": 1.2641509433962264, "grad_norm": 0.34949737787246704, "learning_rate": 1.839622641509434e-05, "log_odds_chosen": 14.615917205810547, "log_odds_ratio": -0.18080249428749084, "logits/chosen": -2.7243032455444336, "logits/rejected": -6.512376308441162, "logps/chosen": -1.9120750427246094, "logps/rejected": -16.35055160522461, "loss": 1.6374, "nll_loss": 1.6193275451660156, "rewards/accuracies": 0.875, "rewards/chosen": -0.19120751321315765, "rewards/margins": 1.44384765625, "rewards/rejected": -1.6350551843643188, "step": 335 }, { "epoch": 1.2679245283018867, "grad_norm": 0.3900119960308075, "learning_rate": 1.830188679245283e-05, "log_odds_chosen": 15.152379035949707, "log_odds_ratio": -0.07246612012386322, "logits/chosen": -2.0148720741271973, "logits/rejected": -5.148480415344238, "logps/chosen": -1.7054778337478638, "logps/rejected": -16.5804443359375, "loss": 1.4763, "nll_loss": 1.4690625667572021, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17054779827594757, "rewards/margins": 1.4874964952468872, "rewards/rejected": -1.6580443382263184, "step": 336 }, { "epoch": 1.271698113207547, "grad_norm": 0.3045012354850769, "learning_rate": 1.820754716981132e-05, "log_odds_chosen": 14.322192192077637, "log_odds_ratio": -0.0033934221137315035, "logits/chosen": -1.8886581659317017, "logits/rejected": -7.217465877532959, "logps/chosen": -1.5194714069366455, "logps/rejected": -15.524861335754395, "loss": 1.6423, "nll_loss": 1.6419386863708496, "rewards/accuracies": 1.0, "rewards/chosen": -0.15194714069366455, "rewards/margins": 1.4005389213562012, "rewards/rejected": -1.5524861812591553, "step": 337 }, { "epoch": 1.2754716981132075, "grad_norm": 0.27202707529067993, "learning_rate": 1.811320754716981e-05, "log_odds_chosen": 17.09416389465332, "log_odds_ratio": -0.01481825951486826, "logits/chosen": -2.577331066131592, "logits/rejected": -6.886340141296387, "logps/chosen": -1.345436930656433, "logps/rejected": -17.985553741455078, "loss": 1.3331, "nll_loss": 1.3316359519958496, "rewards/accuracies": 1.0, "rewards/chosen": -0.13454370200634003, "rewards/margins": 1.6640119552612305, "rewards/rejected": -1.7985554933547974, "step": 338 }, { "epoch": 1.2792452830188679, "grad_norm": 0.35674479603767395, "learning_rate": 1.80188679245283e-05, "log_odds_chosen": 17.09935760498047, "log_odds_ratio": -0.04254509136080742, "logits/chosen": -1.4332383871078491, "logits/rejected": -6.181375503540039, "logps/chosen": -1.9294726848602295, "logps/rejected": -18.842205047607422, "loss": 1.6018, "nll_loss": 1.597574234008789, "rewards/accuracies": 1.0, "rewards/chosen": -0.19294726848602295, "rewards/margins": 1.6912733316421509, "rewards/rejected": -1.8842206001281738, "step": 339 }, { "epoch": 1.2830188679245282, "grad_norm": 0.33350715041160583, "learning_rate": 1.7924528301886792e-05, "log_odds_chosen": 17.224140167236328, "log_odds_ratio": -2.8834060685767327e-06, "logits/chosen": -1.3092684745788574, "logits/rejected": -7.616753578186035, "logps/chosen": -1.7400907278060913, "logps/rejected": -18.713537216186523, "loss": 1.5776, "nll_loss": 1.5776221752166748, "rewards/accuracies": 1.0, "rewards/chosen": -0.1740090698003769, "rewards/margins": 1.6973445415496826, "rewards/rejected": -1.8713536262512207, "step": 340 }, { "epoch": 1.2867924528301886, "grad_norm": 0.31082576513290405, "learning_rate": 1.7830188679245283e-05, "log_odds_chosen": 16.050865173339844, "log_odds_ratio": -0.00012872874503955245, "logits/chosen": -1.133547067642212, "logits/rejected": -5.591988563537598, "logps/chosen": -1.5776842832565308, "logps/rejected": -17.315185546875, "loss": 1.3479, "nll_loss": 1.347907543182373, "rewards/accuracies": 1.0, "rewards/chosen": -0.15776842832565308, "rewards/margins": 1.5737502574920654, "rewards/rejected": -1.7315186262130737, "step": 341 }, { "epoch": 1.290566037735849, "grad_norm": 0.37589412927627563, "learning_rate": 1.7735849056603774e-05, "log_odds_chosen": 15.594026565551758, "log_odds_ratio": -0.08674325048923492, "logits/chosen": -1.7124016284942627, "logits/rejected": -4.819745063781738, "logps/chosen": -1.672520637512207, "logps/rejected": -17.02138328552246, "loss": 1.4996, "nll_loss": 1.4909459352493286, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1672520637512207, "rewards/margins": 1.534886360168457, "rewards/rejected": -1.7021384239196777, "step": 342 }, { "epoch": 1.2943396226415094, "grad_norm": 0.3507995307445526, "learning_rate": 1.7641509433962265e-05, "log_odds_chosen": 16.04661750793457, "log_odds_ratio": -0.007493563462048769, "logits/chosen": -1.3305596113204956, "logits/rejected": -5.7259111404418945, "logps/chosen": -1.474043846130371, "logps/rejected": -17.168848037719727, "loss": 1.4017, "nll_loss": 1.4009082317352295, "rewards/accuracies": 1.0, "rewards/chosen": -0.14740438759326935, "rewards/margins": 1.569480538368225, "rewards/rejected": -1.7168848514556885, "step": 343 }, { "epoch": 1.2981132075471697, "grad_norm": 0.37658241391181946, "learning_rate": 1.7547169811320753e-05, "log_odds_chosen": 17.842830657958984, "log_odds_ratio": -1.3475509149429854e-05, "logits/chosen": -2.2428817749023438, "logits/rejected": -5.751044273376465, "logps/chosen": -1.5199084281921387, "logps/rejected": -19.044391632080078, "loss": 1.4837, "nll_loss": 1.4836554527282715, "rewards/accuracies": 1.0, "rewards/chosen": -0.1519908457994461, "rewards/margins": 1.752448320388794, "rewards/rejected": -1.9044389724731445, "step": 344 }, { "epoch": 1.3018867924528301, "grad_norm": 0.35167789459228516, "learning_rate": 1.7452830188679244e-05, "log_odds_chosen": 15.047775268554688, "log_odds_ratio": -0.008871389552950859, "logits/chosen": -1.8355504274368286, "logits/rejected": -6.161797046661377, "logps/chosen": -1.6795439720153809, "logps/rejected": -16.46615982055664, "loss": 1.488, "nll_loss": 1.4871084690093994, "rewards/accuracies": 1.0, "rewards/chosen": -0.16795440018177032, "rewards/margins": 1.4786615371704102, "rewards/rejected": -1.646615982055664, "step": 345 }, { "epoch": 1.3056603773584905, "grad_norm": 0.35509005188941956, "learning_rate": 1.7358490566037735e-05, "log_odds_chosen": 13.43126106262207, "log_odds_ratio": -0.05844378471374512, "logits/chosen": -0.840483546257019, "logits/rejected": -5.20952844619751, "logps/chosen": -1.282634973526001, "logps/rejected": -14.154840469360352, "loss": 1.3724, "nll_loss": 1.3665363788604736, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12826348841190338, "rewards/margins": 1.2872204780578613, "rewards/rejected": -1.4154839515686035, "step": 346 }, { "epoch": 1.3094339622641509, "grad_norm": 0.3561452031135559, "learning_rate": 1.7264150943396226e-05, "log_odds_chosen": 18.20964813232422, "log_odds_ratio": -4.6193684966056026e-07, "logits/chosen": -2.4437973499298096, "logits/rejected": -6.393451690673828, "logps/chosen": -1.564558982849121, "logps/rejected": -19.447908401489258, "loss": 1.6323, "nll_loss": 1.6323482990264893, "rewards/accuracies": 1.0, "rewards/chosen": -0.1564558893442154, "rewards/margins": 1.788334846496582, "rewards/rejected": -1.9447907209396362, "step": 347 }, { "epoch": 1.3132075471698113, "grad_norm": 0.2806137204170227, "learning_rate": 1.7169811320754717e-05, "log_odds_chosen": 18.3279972076416, "log_odds_ratio": -1.1324946171953343e-06, "logits/chosen": -1.2502247095108032, "logits/rejected": -6.719517230987549, "logps/chosen": -1.4687795639038086, "logps/rejected": -19.442962646484375, "loss": 1.2923, "nll_loss": 1.2922688722610474, "rewards/accuracies": 1.0, "rewards/chosen": -0.1468779593706131, "rewards/margins": 1.7974183559417725, "rewards/rejected": -1.9442962408065796, "step": 348 }, { "epoch": 1.3169811320754716, "grad_norm": 0.3450721502304077, "learning_rate": 1.707547169811321e-05, "log_odds_chosen": 14.217653274536133, "log_odds_ratio": -0.10679548978805542, "logits/chosen": -1.2525098323822021, "logits/rejected": -4.723179340362549, "logps/chosen": -1.651379942893982, "logps/rejected": -15.504240036010742, "loss": 1.5352, "nll_loss": 1.5244849920272827, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16513800621032715, "rewards/margins": 1.3852860927581787, "rewards/rejected": -1.5504240989685059, "step": 349 }, { "epoch": 1.320754716981132, "grad_norm": 0.33943700790405273, "learning_rate": 1.69811320754717e-05, "log_odds_chosen": 16.3641300201416, "log_odds_ratio": -0.061000462621450424, "logits/chosen": -1.4788792133331299, "logits/rejected": -5.779600143432617, "logps/chosen": -1.4309313297271729, "logps/rejected": -17.400362014770508, "loss": 1.4527, "nll_loss": 1.4466229677200317, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14309315383434296, "rewards/margins": 1.5969431400299072, "rewards/rejected": -1.7400362491607666, "step": 350 }, { "epoch": 1.3245283018867924, "grad_norm": 0.32729095220565796, "learning_rate": 1.6886792452830187e-05, "log_odds_chosen": 15.319132804870605, "log_odds_ratio": -0.056552521884441376, "logits/chosen": -0.8255969285964966, "logits/rejected": -5.052517890930176, "logps/chosen": -1.3465955257415771, "logps/rejected": -16.29346466064453, "loss": 1.4911, "nll_loss": 1.4854259490966797, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1346595585346222, "rewards/margins": 1.4946870803833008, "rewards/rejected": -1.6293466091156006, "step": 351 }, { "epoch": 1.3283018867924528, "grad_norm": 0.3329434394836426, "learning_rate": 1.6792452830188678e-05, "log_odds_chosen": 16.610496520996094, "log_odds_ratio": -3.859699427266605e-05, "logits/chosen": -2.8449089527130127, "logits/rejected": -7.928000450134277, "logps/chosen": -1.596476435661316, "logps/rejected": -17.940351486206055, "loss": 1.4448, "nll_loss": 1.4447648525238037, "rewards/accuracies": 1.0, "rewards/chosen": -0.15964765846729279, "rewards/margins": 1.634387493133545, "rewards/rejected": -1.7940351963043213, "step": 352 }, { "epoch": 1.3320754716981131, "grad_norm": 0.3026202321052551, "learning_rate": 1.669811320754717e-05, "log_odds_chosen": 16.932235717773438, "log_odds_ratio": -0.03685789927840233, "logits/chosen": -1.5450692176818848, "logits/rejected": -4.212255001068115, "logps/chosen": -1.2493813037872314, "logps/rejected": -17.693649291992188, "loss": 1.4363, "nll_loss": 1.4326279163360596, "rewards/accuracies": 1.0, "rewards/chosen": -0.12493812292814255, "rewards/margins": 1.644426941871643, "rewards/rejected": -1.7693649530410767, "step": 353 }, { "epoch": 1.3358490566037735, "grad_norm": 0.2926734685897827, "learning_rate": 1.660377358490566e-05, "log_odds_chosen": 17.678836822509766, "log_odds_ratio": -8.680287464812864e-06, "logits/chosen": -2.915510654449463, "logits/rejected": -7.7731733322143555, "logps/chosen": -1.7902599573135376, "logps/rejected": -19.27121353149414, "loss": 1.5991, "nll_loss": 1.5991185903549194, "rewards/accuracies": 1.0, "rewards/chosen": -0.17902600765228271, "rewards/margins": 1.7480952739715576, "rewards/rejected": -1.9271214008331299, "step": 354 }, { "epoch": 1.3396226415094339, "grad_norm": 0.340572327375412, "learning_rate": 1.650943396226415e-05, "log_odds_chosen": 17.665685653686523, "log_odds_ratio": -0.02697843872010708, "logits/chosen": -1.601323127746582, "logits/rejected": -5.377262592315674, "logps/chosen": -1.6611402034759521, "logps/rejected": -19.03213119506836, "loss": 1.4539, "nll_loss": 1.4511959552764893, "rewards/accuracies": 1.0, "rewards/chosen": -0.16611401736736298, "rewards/margins": 1.7370991706848145, "rewards/rejected": -1.9032131433486938, "step": 355 }, { "epoch": 1.3433962264150943, "grad_norm": 1.6760241985321045, "learning_rate": 1.6415094339622643e-05, "log_odds_chosen": 18.934751510620117, "log_odds_ratio": -1.7136345320523105e-07, "logits/chosen": -2.097600221633911, "logits/rejected": -6.444354057312012, "logps/chosen": -1.4015324115753174, "logps/rejected": -19.90370750427246, "loss": 1.4334, "nll_loss": 1.4333903789520264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14015324413776398, "rewards/margins": 1.850217580795288, "rewards/rejected": -1.990370750427246, "step": 356 }, { "epoch": 1.3471698113207546, "grad_norm": 0.3295755386352539, "learning_rate": 1.6320754716981134e-05, "log_odds_chosen": 19.07089614868164, "log_odds_ratio": -5.215408194203519e-08, "logits/chosen": -0.8209649324417114, "logits/rejected": -4.625163555145264, "logps/chosen": -1.615844964981079, "logps/rejected": -20.342763900756836, "loss": 1.7516, "nll_loss": 1.7515586614608765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1615844964981079, "rewards/margins": 1.8726916313171387, "rewards/rejected": -2.034276247024536, "step": 357 }, { "epoch": 1.350943396226415, "grad_norm": 0.3472559154033661, "learning_rate": 1.622641509433962e-05, "log_odds_chosen": 16.93330955505371, "log_odds_ratio": -0.0761246532201767, "logits/chosen": -1.1531541347503662, "logits/rejected": -4.757190704345703, "logps/chosen": -1.6295312643051147, "logps/rejected": -18.269256591796875, "loss": 1.452, "nll_loss": 1.4443397521972656, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16295313835144043, "rewards/margins": 1.6639723777770996, "rewards/rejected": -1.82692551612854, "step": 358 }, { "epoch": 1.3547169811320754, "grad_norm": 0.3795447051525116, "learning_rate": 1.6132075471698112e-05, "log_odds_chosen": 18.87713623046875, "log_odds_ratio": -2.4379320166190155e-05, "logits/chosen": -3.349454402923584, "logits/rejected": -7.033996105194092, "logps/chosen": -1.5019011497497559, "logps/rejected": -20.02358055114746, "loss": 1.4321, "nll_loss": 1.4321367740631104, "rewards/accuracies": 1.0, "rewards/chosen": -0.15019011497497559, "rewards/margins": 1.8521679639816284, "rewards/rejected": -2.0023581981658936, "step": 359 }, { "epoch": 1.3584905660377358, "grad_norm": 0.40565335750579834, "learning_rate": 1.6037735849056604e-05, "log_odds_chosen": 15.8294095993042, "log_odds_ratio": -0.08895085752010345, "logits/chosen": -1.62678861618042, "logits/rejected": -4.0340166091918945, "logps/chosen": -1.596952199935913, "logps/rejected": -17.054447174072266, "loss": 1.4759, "nll_loss": 1.4669734239578247, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15969522297382355, "rewards/margins": 1.5457494258880615, "rewards/rejected": -1.705444574356079, "step": 360 }, { "epoch": 1.3622641509433961, "grad_norm": 0.363148033618927, "learning_rate": 1.5943396226415095e-05, "log_odds_chosen": 18.350400924682617, "log_odds_ratio": -2.0042255073349224e-06, "logits/chosen": -0.9768545031547546, "logits/rejected": -5.307037353515625, "logps/chosen": -1.459285020828247, "logps/rejected": -19.410093307495117, "loss": 1.3574, "nll_loss": 1.3574293851852417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459285020828247, "rewards/margins": 1.7950809001922607, "rewards/rejected": -1.941009283065796, "step": 361 }, { "epoch": 1.3660377358490565, "grad_norm": 0.3766542673110962, "learning_rate": 1.5849056603773586e-05, "log_odds_chosen": 16.924793243408203, "log_odds_ratio": -6.675824806734454e-06, "logits/chosen": -2.368605136871338, "logits/rejected": -7.107510566711426, "logps/chosen": -1.487027883529663, "logps/rejected": -18.08290672302246, "loss": 1.4416, "nll_loss": 1.4416375160217285, "rewards/accuracies": 1.0, "rewards/chosen": -0.14870277047157288, "rewards/margins": 1.6595878601074219, "rewards/rejected": -1.8082906007766724, "step": 362 }, { "epoch": 1.369811320754717, "grad_norm": 0.36386638879776, "learning_rate": 1.5754716981132077e-05, "log_odds_chosen": 18.357254028320312, "log_odds_ratio": -3.6732189983013086e-06, "logits/chosen": -1.288124680519104, "logits/rejected": -5.227755546569824, "logps/chosen": -1.0662007331848145, "logps/rejected": -18.76520538330078, "loss": 1.0875, "nll_loss": 1.0875431299209595, "rewards/accuracies": 1.0, "rewards/chosen": -0.10662007331848145, "rewards/margins": 1.7699006795883179, "rewards/rejected": -1.8765207529067993, "step": 363 }, { "epoch": 1.3735849056603773, "grad_norm": 0.3877186179161072, "learning_rate": 1.5660377358490564e-05, "log_odds_chosen": 17.02109718322754, "log_odds_ratio": -9.321229299530387e-06, "logits/chosen": -1.9352566003799438, "logits/rejected": -5.730966567993164, "logps/chosen": -1.414484977722168, "logps/rejected": -18.040632247924805, "loss": 1.3428, "nll_loss": 1.3428069353103638, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414484977722168, "rewards/margins": 1.6626148223876953, "rewards/rejected": -1.804063320159912, "step": 364 }, { "epoch": 1.3773584905660377, "grad_norm": 1.2752561569213867, "learning_rate": 1.5566037735849056e-05, "log_odds_chosen": 16.804956436157227, "log_odds_ratio": -0.0012535701971501112, "logits/chosen": -1.5871500968933105, "logits/rejected": -5.9648027420043945, "logps/chosen": -1.3970187902450562, "logps/rejected": -17.712099075317383, "loss": 1.5305, "nll_loss": 1.5304073095321655, "rewards/accuracies": 1.0, "rewards/chosen": -0.13970187306404114, "rewards/margins": 1.6315079927444458, "rewards/rejected": -1.7712098360061646, "step": 365 }, { "epoch": 1.3811320754716983, "grad_norm": 0.33747872710227966, "learning_rate": 1.5471698113207547e-05, "log_odds_chosen": 18.296247482299805, "log_odds_ratio": -6.034980515323696e-07, "logits/chosen": -0.8064440488815308, "logits/rejected": -5.531889915466309, "logps/chosen": -1.6278749704360962, "logps/rejected": -19.650760650634766, "loss": 1.5717, "nll_loss": 1.5716900825500488, "rewards/accuracies": 1.0, "rewards/chosen": -0.16278749704360962, "rewards/margins": 1.8022886514663696, "rewards/rejected": -1.965076208114624, "step": 366 }, { "epoch": 1.3849056603773584, "grad_norm": 0.4021959900856018, "learning_rate": 1.5377358490566038e-05, "log_odds_chosen": 17.49240493774414, "log_odds_ratio": -0.0005192816024646163, "logits/chosen": -1.245084285736084, "logits/rejected": -5.5846710205078125, "logps/chosen": -1.4274266958236694, "logps/rejected": -18.540454864501953, "loss": 1.3852, "nll_loss": 1.3851439952850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.14274267852306366, "rewards/margins": 1.7113028764724731, "rewards/rejected": -1.8540456295013428, "step": 367 }, { "epoch": 1.388679245283019, "grad_norm": 0.4979332685470581, "learning_rate": 1.528301886792453e-05, "log_odds_chosen": 16.063011169433594, "log_odds_ratio": -0.06267835944890976, "logits/chosen": -1.0869340896606445, "logits/rejected": -4.391514778137207, "logps/chosen": -1.4439592361450195, "logps/rejected": -17.10696029663086, "loss": 1.4131, "nll_loss": 1.406846046447754, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14439593255519867, "rewards/margins": 1.5663002729415894, "rewards/rejected": -1.7106962203979492, "step": 368 }, { "epoch": 1.3924528301886792, "grad_norm": 1.9730045795440674, "learning_rate": 1.5188679245283018e-05, "log_odds_chosen": 15.107033729553223, "log_odds_ratio": -0.06132856011390686, "logits/chosen": -1.2891849279403687, "logits/rejected": -4.657969951629639, "logps/chosen": -1.6434388160705566, "logps/rejected": -16.488523483276367, "loss": 1.4781, "nll_loss": 1.4719549417495728, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16434389352798462, "rewards/margins": 1.4845083951950073, "rewards/rejected": -1.6488523483276367, "step": 369 }, { "epoch": 1.3962264150943398, "grad_norm": 0.3606973886489868, "learning_rate": 1.509433962264151e-05, "log_odds_chosen": 18.26697540283203, "log_odds_ratio": -2.7567176630327594e-07, "logits/chosen": -0.31442150473594666, "logits/rejected": -4.0157999992370605, "logps/chosen": -1.3492426872253418, "logps/rejected": -19.13711929321289, "loss": 1.4501, "nll_loss": 1.4500954151153564, "rewards/accuracies": 1.0, "rewards/chosen": -0.1349242627620697, "rewards/margins": 1.778787612915039, "rewards/rejected": -1.9137119054794312, "step": 370 }, { "epoch": 1.4, "grad_norm": 0.7839713096618652, "learning_rate": 1.5e-05, "log_odds_chosen": 18.16492462158203, "log_odds_ratio": -0.06902754306793213, "logits/chosen": -2.4219627380371094, "logits/rejected": -5.2284698486328125, "logps/chosen": -1.5058531761169434, "logps/rejected": -19.259458541870117, "loss": 1.5453, "nll_loss": 1.5383750200271606, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1505853235721588, "rewards/margins": 1.7753607034683228, "rewards/rejected": -1.9259459972381592, "step": 371 }, { "epoch": 1.4037735849056605, "grad_norm": 0.3838679790496826, "learning_rate": 1.4905660377358491e-05, "log_odds_chosen": 16.6832218170166, "log_odds_ratio": -0.06244910508394241, "logits/chosen": -1.3226251602172852, "logits/rejected": -6.231893539428711, "logps/chosen": -1.6935502290725708, "logps/rejected": -18.128742218017578, "loss": 1.6526, "nll_loss": 1.6463897228240967, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16935503482818604, "rewards/margins": 1.6435191631317139, "rewards/rejected": -1.8128741979599, "step": 372 }, { "epoch": 1.4075471698113207, "grad_norm": 0.38915058970451355, "learning_rate": 1.4811320754716981e-05, "log_odds_chosen": 16.009296417236328, "log_odds_ratio": -1.8557118892204016e-05, "logits/chosen": -1.1935677528381348, "logits/rejected": -5.60360050201416, "logps/chosen": -1.3822619915008545, "logps/rejected": -17.003089904785156, "loss": 1.3057, "nll_loss": 1.305704951286316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382262110710144, "rewards/margins": 1.5620828866958618, "rewards/rejected": -1.7003090381622314, "step": 373 }, { "epoch": 1.4113207547169813, "grad_norm": 0.31976625323295593, "learning_rate": 1.4716981132075472e-05, "log_odds_chosen": 18.23406982421875, "log_odds_ratio": -0.0007746726041659713, "logits/chosen": -2.0351643562316895, "logits/rejected": -5.288931846618652, "logps/chosen": -1.3592875003814697, "logps/rejected": -19.12387466430664, "loss": 1.4888, "nll_loss": 1.4887058734893799, "rewards/accuracies": 1.0, "rewards/chosen": -0.13592875003814697, "rewards/margins": 1.776458978652954, "rewards/rejected": -1.9123876094818115, "step": 374 }, { "epoch": 1.4150943396226414, "grad_norm": 0.3839375376701355, "learning_rate": 1.4622641509433963e-05, "log_odds_chosen": 16.633087158203125, "log_odds_ratio": -0.18126872181892395, "logits/chosen": -1.4143149852752686, "logits/rejected": -4.575836658477783, "logps/chosen": -1.3354686498641968, "logps/rejected": -17.673622131347656, "loss": 1.4257, "nll_loss": 1.4075796604156494, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1335468739271164, "rewards/margins": 1.6338152885437012, "rewards/rejected": -1.7673622369766235, "step": 375 }, { "epoch": 1.418867924528302, "grad_norm": 0.3899994194507599, "learning_rate": 1.4528301886792452e-05, "log_odds_chosen": 18.611719131469727, "log_odds_ratio": -5.587950795415964e-07, "logits/chosen": -1.2232400178909302, "logits/rejected": -5.466405868530273, "logps/chosen": -1.512157917022705, "logps/rejected": -19.77882194519043, "loss": 1.5621, "nll_loss": 1.5620990991592407, "rewards/accuracies": 1.0, "rewards/chosen": -0.1512157917022705, "rewards/margins": 1.8266665935516357, "rewards/rejected": -1.9778823852539062, "step": 376 }, { "epoch": 1.4226415094339622, "grad_norm": 0.31554582715034485, "learning_rate": 1.4433962264150944e-05, "log_odds_chosen": 17.60826301574707, "log_odds_ratio": -3.2186994758376386e-06, "logits/chosen": -1.3368791341781616, "logits/rejected": -6.721653938293457, "logps/chosen": -1.4264721870422363, "logps/rejected": -18.68645477294922, "loss": 1.37, "nll_loss": 1.3699793815612793, "rewards/accuracies": 1.0, "rewards/chosen": -0.14264723658561707, "rewards/margins": 1.7259981632232666, "rewards/rejected": -1.868645429611206, "step": 377 }, { "epoch": 1.4264150943396228, "grad_norm": 0.3350052535533905, "learning_rate": 1.4339622641509435e-05, "log_odds_chosen": 15.84263801574707, "log_odds_ratio": -0.05588989332318306, "logits/chosen": -2.5817272663116455, "logits/rejected": -7.034815311431885, "logps/chosen": -1.5793471336364746, "logps/rejected": -17.11503791809082, "loss": 1.4305, "nll_loss": 1.4249083995819092, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15793471038341522, "rewards/margins": 1.5535691976547241, "rewards/rejected": -1.7115037441253662, "step": 378 }, { "epoch": 1.430188679245283, "grad_norm": 0.3305111825466156, "learning_rate": 1.4245283018867924e-05, "log_odds_chosen": 17.508865356445312, "log_odds_ratio": -4.04572256229585e-06, "logits/chosen": -1.8427374362945557, "logits/rejected": -7.116032600402832, "logps/chosen": -1.6888805627822876, "logps/rejected": -18.881450653076172, "loss": 1.4913, "nll_loss": 1.491302490234375, "rewards/accuracies": 1.0, "rewards/chosen": -0.16888806223869324, "rewards/margins": 1.719257116317749, "rewards/rejected": -1.888145089149475, "step": 379 }, { "epoch": 1.4339622641509435, "grad_norm": 0.5024957656860352, "learning_rate": 1.4150943396226415e-05, "log_odds_chosen": 16.263195037841797, "log_odds_ratio": -0.0004213673819322139, "logits/chosen": -1.309700608253479, "logits/rejected": -5.588892459869385, "logps/chosen": -1.4294378757476807, "logps/rejected": -17.363752365112305, "loss": 1.3392, "nll_loss": 1.339144229888916, "rewards/accuracies": 1.0, "rewards/chosen": -0.14294379949569702, "rewards/margins": 1.5934314727783203, "rewards/rejected": -1.736375331878662, "step": 380 }, { "epoch": 1.4377358490566037, "grad_norm": 0.3632482588291168, "learning_rate": 1.4056603773584906e-05, "log_odds_chosen": 17.019535064697266, "log_odds_ratio": -5.90098215980106e-06, "logits/chosen": -1.0069024562835693, "logits/rejected": -5.128624439239502, "logps/chosen": -1.4965219497680664, "logps/rejected": -18.188318252563477, "loss": 1.4404, "nll_loss": 1.440355896949768, "rewards/accuracies": 1.0, "rewards/chosen": -0.14965218305587769, "rewards/margins": 1.6691796779632568, "rewards/rejected": -1.8188319206237793, "step": 381 }, { "epoch": 1.4415094339622643, "grad_norm": 0.3996204435825348, "learning_rate": 1.3962264150943397e-05, "log_odds_chosen": 16.09657096862793, "log_odds_ratio": -0.050253380089998245, "logits/chosen": -1.884864330291748, "logits/rejected": -5.274215221405029, "logps/chosen": -1.6420609951019287, "logps/rejected": -17.452035903930664, "loss": 1.556, "nll_loss": 1.5509570837020874, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1642061024904251, "rewards/margins": 1.5809974670410156, "rewards/rejected": -1.7452034950256348, "step": 382 }, { "epoch": 1.4452830188679244, "grad_norm": 0.38239994645118713, "learning_rate": 1.3867924528301887e-05, "log_odds_chosen": 17.446556091308594, "log_odds_ratio": -0.021241456270217896, "logits/chosen": -1.7642720937728882, "logits/rejected": -4.676743507385254, "logps/chosen": -1.4043127298355103, "logps/rejected": -18.422576904296875, "loss": 1.4152, "nll_loss": 1.4130536317825317, "rewards/accuracies": 1.0, "rewards/chosen": -0.14043128490447998, "rewards/margins": 1.7018264532089233, "rewards/rejected": -1.8422577381134033, "step": 383 }, { "epoch": 1.449056603773585, "grad_norm": 0.32839640974998474, "learning_rate": 1.3773584905660378e-05, "log_odds_chosen": 16.299285888671875, "log_odds_ratio": -4.187269496469526e-06, "logits/chosen": -2.137349843978882, "logits/rejected": -8.301512718200684, "logps/chosen": -1.6436606645584106, "logps/rejected": -17.667858123779297, "loss": 1.5086, "nll_loss": 1.5085557699203491, "rewards/accuracies": 1.0, "rewards/chosen": -0.16436606645584106, "rewards/margins": 1.6024198532104492, "rewards/rejected": -1.7667858600616455, "step": 384 }, { "epoch": 1.4528301886792452, "grad_norm": 0.3542121648788452, "learning_rate": 1.3679245283018869e-05, "log_odds_chosen": 15.682723999023438, "log_odds_ratio": -2.753584340098314e-05, "logits/chosen": -1.6206883192062378, "logits/rejected": -5.171188831329346, "logps/chosen": -1.3008809089660645, "logps/rejected": -16.44762420654297, "loss": 1.2725, "nll_loss": 1.2724624872207642, "rewards/accuracies": 1.0, "rewards/chosen": -0.13008809089660645, "rewards/margins": 1.514674425125122, "rewards/rejected": -1.644762396812439, "step": 385 }, { "epoch": 1.4566037735849058, "grad_norm": 0.3344227373600006, "learning_rate": 1.3584905660377358e-05, "log_odds_chosen": 16.07890510559082, "log_odds_ratio": -0.00016838790907058865, "logits/chosen": -1.1785922050476074, "logits/rejected": -5.77630090713501, "logps/chosen": -1.60475492477417, "logps/rejected": -17.42005157470703, "loss": 1.4546, "nll_loss": 1.4545769691467285, "rewards/accuracies": 1.0, "rewards/chosen": -0.160475492477417, "rewards/margins": 1.5815297365188599, "rewards/rejected": -1.7420052289962769, "step": 386 }, { "epoch": 1.460377358490566, "grad_norm": 0.3509989380836487, "learning_rate": 1.349056603773585e-05, "log_odds_chosen": 17.025386810302734, "log_odds_ratio": -4.601534237735905e-05, "logits/chosen": -1.3022335767745972, "logits/rejected": -5.336873531341553, "logps/chosen": -1.3712674379348755, "logps/rejected": -17.987762451171875, "loss": 1.2749, "nll_loss": 1.2749412059783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.13712672889232635, "rewards/margins": 1.6616493463516235, "rewards/rejected": -1.7987761497497559, "step": 387 }, { "epoch": 1.4641509433962265, "grad_norm": 0.35176974534988403, "learning_rate": 1.339622641509434e-05, "log_odds_chosen": 18.190950393676758, "log_odds_ratio": -1.4410341464099474e-05, "logits/chosen": -2.399728298187256, "logits/rejected": -6.07908296585083, "logps/chosen": -1.1239242553710938, "logps/rejected": -18.555187225341797, "loss": 1.3573, "nll_loss": 1.3572728633880615, "rewards/accuracies": 1.0, "rewards/chosen": -0.11239242553710938, "rewards/margins": 1.7431262731552124, "rewards/rejected": -1.8555186986923218, "step": 388 }, { "epoch": 1.4679245283018867, "grad_norm": 0.35888710618019104, "learning_rate": 1.3301886792452831e-05, "log_odds_chosen": 16.425899505615234, "log_odds_ratio": -0.062047071754932404, "logits/chosen": -2.126614570617676, "logits/rejected": -6.522840976715088, "logps/chosen": -1.5465141534805298, "logps/rejected": -17.640609741210938, "loss": 1.4328, "nll_loss": 1.4265483617782593, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15465140342712402, "rewards/margins": 1.6094096899032593, "rewards/rejected": -1.7640612125396729, "step": 389 }, { "epoch": 1.4716981132075473, "grad_norm": 0.4034729301929474, "learning_rate": 1.320754716981132e-05, "log_odds_chosen": 15.568371772766113, "log_odds_ratio": -3.7845486076548696e-05, "logits/chosen": -0.5623266696929932, "logits/rejected": -4.619795799255371, "logps/chosen": -1.4479458332061768, "logps/rejected": -16.667530059814453, "loss": 1.3254, "nll_loss": 1.3253822326660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.14479456841945648, "rewards/margins": 1.5219584703445435, "rewards/rejected": -1.6667530536651611, "step": 390 }, { "epoch": 1.4754716981132074, "grad_norm": 0.3520585596561432, "learning_rate": 1.3113207547169812e-05, "log_odds_chosen": 14.612020492553711, "log_odds_ratio": -0.0023431943263858557, "logits/chosen": -2.2221946716308594, "logits/rejected": -6.539764881134033, "logps/chosen": -1.4834494590759277, "logps/rejected": -15.739931106567383, "loss": 1.4064, "nll_loss": 1.4061977863311768, "rewards/accuracies": 1.0, "rewards/chosen": -0.148344948887825, "rewards/margins": 1.4256480932235718, "rewards/rejected": -1.5739930868148804, "step": 391 }, { "epoch": 1.479245283018868, "grad_norm": 0.36251071095466614, "learning_rate": 1.3018867924528303e-05, "log_odds_chosen": 16.438549041748047, "log_odds_ratio": -0.09116589277982712, "logits/chosen": -2.7520642280578613, "logits/rejected": -7.930850028991699, "logps/chosen": -1.535467267036438, "logps/rejected": -17.66228485107422, "loss": 1.4605, "nll_loss": 1.4513870477676392, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15354672074317932, "rewards/margins": 1.6126817464828491, "rewards/rejected": -1.766228437423706, "step": 392 }, { "epoch": 1.4830188679245282, "grad_norm": 0.34293168783187866, "learning_rate": 1.2924528301886792e-05, "log_odds_chosen": 16.164066314697266, "log_odds_ratio": -0.0003289695887360722, "logits/chosen": -1.1001029014587402, "logits/rejected": -6.432461738586426, "logps/chosen": -1.488477349281311, "logps/rejected": -17.351869583129883, "loss": 1.3264, "nll_loss": 1.3263657093048096, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884772896766663, "rewards/margins": 1.5863392353057861, "rewards/rejected": -1.73518705368042, "step": 393 }, { "epoch": 1.4867924528301888, "grad_norm": 0.3245314359664917, "learning_rate": 1.2830188679245283e-05, "log_odds_chosen": 17.266613006591797, "log_odds_ratio": -0.00029518589144572616, "logits/chosen": -1.3167364597320557, "logits/rejected": -5.948409080505371, "logps/chosen": -1.1578724384307861, "logps/rejected": -17.86254119873047, "loss": 1.2429, "nll_loss": 1.2429192066192627, "rewards/accuracies": 1.0, "rewards/chosen": -0.11578723788261414, "rewards/margins": 1.6704668998718262, "rewards/rejected": -1.7862541675567627, "step": 394 }, { "epoch": 1.490566037735849, "grad_norm": 0.3447877764701843, "learning_rate": 1.2735849056603775e-05, "log_odds_chosen": 15.028793334960938, "log_odds_ratio": -0.1531548947095871, "logits/chosen": -2.7501590251922607, "logits/rejected": -6.11824369430542, "logps/chosen": -1.720342993736267, "logps/rejected": -16.537107467651367, "loss": 1.6015, "nll_loss": 1.5861958265304565, "rewards/accuracies": 0.875, "rewards/chosen": -0.17203430831432343, "rewards/margins": 1.481676459312439, "rewards/rejected": -1.6537107229232788, "step": 395 }, { "epoch": 1.4943396226415095, "grad_norm": 0.3362826704978943, "learning_rate": 1.2641509433962264e-05, "log_odds_chosen": 17.786401748657227, "log_odds_ratio": -3.323038754388108e-06, "logits/chosen": -1.742743730545044, "logits/rejected": -5.260643482208252, "logps/chosen": -1.4009883403778076, "logps/rejected": -18.64501190185547, "loss": 1.4321, "nll_loss": 1.4321409463882446, "rewards/accuracies": 1.0, "rewards/chosen": -0.14009883999824524, "rewards/margins": 1.7244024276733398, "rewards/rejected": -1.8645012378692627, "step": 396 }, { "epoch": 1.4981132075471697, "grad_norm": 0.3690617084503174, "learning_rate": 1.2547169811320755e-05, "log_odds_chosen": 18.10134506225586, "log_odds_ratio": -0.050154730677604675, "logits/chosen": -1.2822532653808594, "logits/rejected": -4.944076061248779, "logps/chosen": -1.3214470148086548, "logps/rejected": -19.015644073486328, "loss": 1.219, "nll_loss": 1.2139716148376465, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13214470446109772, "rewards/margins": 1.7694196701049805, "rewards/rejected": -1.9015644788742065, "step": 397 }, { "epoch": 1.5018867924528303, "grad_norm": 0.3674098551273346, "learning_rate": 1.2452830188679246e-05, "log_odds_chosen": 15.48218059539795, "log_odds_ratio": -0.06490848958492279, "logits/chosen": -1.954714298248291, "logits/rejected": -6.481346130371094, "logps/chosen": -1.3896689414978027, "logps/rejected": -16.480030059814453, "loss": 1.4439, "nll_loss": 1.4373944997787476, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1389668881893158, "rewards/margins": 1.5090359449386597, "rewards/rejected": -1.6480028629302979, "step": 398 }, { "epoch": 1.5056603773584905, "grad_norm": 0.36629459261894226, "learning_rate": 1.2358490566037737e-05, "log_odds_chosen": 16.291677474975586, "log_odds_ratio": -0.08093234151601791, "logits/chosen": -0.5040303468704224, "logits/rejected": -4.219124794006348, "logps/chosen": -1.6667414903640747, "logps/rejected": -17.61298370361328, "loss": 1.6038, "nll_loss": 1.5956670045852661, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1666741520166397, "rewards/margins": 1.594624400138855, "rewards/rejected": -1.761298418045044, "step": 399 }, { "epoch": 1.509433962264151, "grad_norm": 0.4143602252006531, "learning_rate": 1.2264150943396227e-05, "log_odds_chosen": 16.119110107421875, "log_odds_ratio": -0.0009257096680812538, "logits/chosen": -0.8463224172592163, "logits/rejected": -4.586696624755859, "logps/chosen": -1.3971197605133057, "logps/rejected": -17.024677276611328, "loss": 1.4237, "nll_loss": 1.423575758934021, "rewards/accuracies": 1.0, "rewards/chosen": -0.13971199095249176, "rewards/margins": 1.5627559423446655, "rewards/rejected": -1.702467918395996, "step": 400 }, { "epoch": 1.5132075471698112, "grad_norm": 0.3812906742095947, "learning_rate": 1.2169811320754718e-05, "log_odds_chosen": 14.85096549987793, "log_odds_ratio": -0.05318630486726761, "logits/chosen": -0.3447520136833191, "logits/rejected": -4.831925868988037, "logps/chosen": -1.568985939025879, "logps/rejected": -16.13321304321289, "loss": 1.4211, "nll_loss": 1.4157638549804688, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1568985879421234, "rewards/margins": 1.4564226865768433, "rewards/rejected": -1.613321304321289, "step": 401 }, { "epoch": 1.5169811320754718, "grad_norm": 0.3796207010746002, "learning_rate": 1.2075471698113209e-05, "log_odds_chosen": 14.776948928833008, "log_odds_ratio": -0.13772796094417572, "logits/chosen": -2.6085329055786133, "logits/rejected": -5.985441207885742, "logps/chosen": -1.564119815826416, "logps/rejected": -16.096309661865234, "loss": 1.3687, "nll_loss": 1.3549602031707764, "rewards/accuracies": 0.875, "rewards/chosen": -0.15641197562217712, "rewards/margins": 1.4532190561294556, "rewards/rejected": -1.6096309423446655, "step": 402 }, { "epoch": 1.520754716981132, "grad_norm": 0.6695427894592285, "learning_rate": 1.1981132075471698e-05, "log_odds_chosen": 15.497125625610352, "log_odds_ratio": -7.294305760296993e-06, "logits/chosen": -0.9591398239135742, "logits/rejected": -5.608551979064941, "logps/chosen": -1.536940097808838, "logps/rejected": -16.734256744384766, "loss": 1.3521, "nll_loss": 1.3521382808685303, "rewards/accuracies": 1.0, "rewards/chosen": -0.1536940038204193, "rewards/margins": 1.5197317600250244, "rewards/rejected": -1.6734257936477661, "step": 403 }, { "epoch": 1.5245283018867926, "grad_norm": 0.5477868318557739, "learning_rate": 1.188679245283019e-05, "log_odds_chosen": 16.6223087310791, "log_odds_ratio": -0.0006986000225879252, "logits/chosen": -1.419918179512024, "logits/rejected": -4.885513782501221, "logps/chosen": -1.3331007957458496, "logps/rejected": -17.54778480529785, "loss": 1.3093, "nll_loss": 1.3091806173324585, "rewards/accuracies": 1.0, "rewards/chosen": -0.13331007957458496, "rewards/margins": 1.621468424797058, "rewards/rejected": -1.7547786235809326, "step": 404 }, { "epoch": 1.5283018867924527, "grad_norm": 0.333292156457901, "learning_rate": 1.179245283018868e-05, "log_odds_chosen": 18.347660064697266, "log_odds_ratio": -1.3806706192553975e-05, "logits/chosen": -1.4701156616210938, "logits/rejected": -6.684134483337402, "logps/chosen": -1.4482415914535522, "logps/rejected": -19.408504486083984, "loss": 1.3793, "nll_loss": 1.3793381452560425, "rewards/accuracies": 1.0, "rewards/chosen": -0.14482416212558746, "rewards/margins": 1.7960264682769775, "rewards/rejected": -1.9408507347106934, "step": 405 }, { "epoch": 1.5320754716981133, "grad_norm": 0.3505614399909973, "learning_rate": 1.169811320754717e-05, "log_odds_chosen": 18.066612243652344, "log_odds_ratio": -6.847452823421918e-06, "logits/chosen": -2.2876837253570557, "logits/rejected": -6.302707195281982, "logps/chosen": -1.4648969173431396, "logps/rejected": -19.111392974853516, "loss": 1.6152, "nll_loss": 1.6151999235153198, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464896947145462, "rewards/margins": 1.7646493911743164, "rewards/rejected": -1.9111392498016357, "step": 406 }, { "epoch": 1.5358490566037735, "grad_norm": 0.356839120388031, "learning_rate": 1.160377358490566e-05, "log_odds_chosen": 15.59557819366455, "log_odds_ratio": -0.0001072017039405182, "logits/chosen": -0.30015936493873596, "logits/rejected": -6.0860114097595215, "logps/chosen": -1.6691813468933105, "logps/rejected": -17.017396926879883, "loss": 1.4345, "nll_loss": 1.434523344039917, "rewards/accuracies": 1.0, "rewards/chosen": -0.16691814363002777, "rewards/margins": 1.534821629524231, "rewards/rejected": -1.70173978805542, "step": 407 }, { "epoch": 1.539622641509434, "grad_norm": 0.36700817942619324, "learning_rate": 1.1509433962264152e-05, "log_odds_chosen": 17.510787963867188, "log_odds_ratio": -1.4975793192206766e-06, "logits/chosen": -1.014905571937561, "logits/rejected": -5.991207122802734, "logps/chosen": -1.4787532091140747, "logps/rejected": -18.62135887145996, "loss": 1.4582, "nll_loss": 1.4582459926605225, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478753238916397, "rewards/margins": 1.7142606973648071, "rewards/rejected": -1.8621360063552856, "step": 408 }, { "epoch": 1.5433962264150942, "grad_norm": 0.355266273021698, "learning_rate": 1.1415094339622643e-05, "log_odds_chosen": 16.35395050048828, "log_odds_ratio": -7.43008786230348e-05, "logits/chosen": -1.7440268993377686, "logits/rejected": -6.251463890075684, "logps/chosen": -1.4440710544586182, "logps/rejected": -17.370180130004883, "loss": 1.6528, "nll_loss": 1.6527677774429321, "rewards/accuracies": 1.0, "rewards/chosen": -0.14440712332725525, "rewards/margins": 1.5926108360290527, "rewards/rejected": -1.73701810836792, "step": 409 }, { "epoch": 1.5471698113207548, "grad_norm": 0.3182680904865265, "learning_rate": 1.1320754716981132e-05, "log_odds_chosen": 15.64585018157959, "log_odds_ratio": -0.013405690900981426, "logits/chosen": -1.3935030698776245, "logits/rejected": -5.763398170471191, "logps/chosen": -1.366743564605713, "logps/rejected": -16.644275665283203, "loss": 1.4451, "nll_loss": 1.443767786026001, "rewards/accuracies": 1.0, "rewards/chosen": -0.13667435944080353, "rewards/margins": 1.5277531147003174, "rewards/rejected": -1.6644275188446045, "step": 410 }, { "epoch": 1.550943396226415, "grad_norm": 0.3500687777996063, "learning_rate": 1.1226415094339623e-05, "log_odds_chosen": 16.229408264160156, "log_odds_ratio": -5.9309946664143354e-05, "logits/chosen": -2.4652421474456787, "logits/rejected": -6.496399879455566, "logps/chosen": -1.3401199579238892, "logps/rejected": -17.225467681884766, "loss": 1.5363, "nll_loss": 1.5363225936889648, "rewards/accuracies": 1.0, "rewards/chosen": -0.13401198387145996, "rewards/margins": 1.5885348320007324, "rewards/rejected": -1.7225468158721924, "step": 411 }, { "epoch": 1.5547169811320756, "grad_norm": 0.427716463804245, "learning_rate": 1.1132075471698115e-05, "log_odds_chosen": 16.30535888671875, "log_odds_ratio": -0.02768136002123356, "logits/chosen": -1.3295129537582397, "logits/rejected": -5.655584335327148, "logps/chosen": -1.5316812992095947, "logps/rejected": -17.553970336914062, "loss": 1.3546, "nll_loss": 1.3517988920211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.15316811203956604, "rewards/margins": 1.602229118347168, "rewards/rejected": -1.7553972005844116, "step": 412 }, { "epoch": 1.5584905660377357, "grad_norm": 0.3918018639087677, "learning_rate": 1.1037735849056604e-05, "log_odds_chosen": 17.692913055419922, "log_odds_ratio": -9.760309467310435e-07, "logits/chosen": -0.8427505493164062, "logits/rejected": -4.809689521789551, "logps/chosen": -1.7556877136230469, "logps/rejected": -19.217164993286133, "loss": 1.3681, "nll_loss": 1.368148684501648, "rewards/accuracies": 1.0, "rewards/chosen": -0.17556877434253693, "rewards/margins": 1.746147632598877, "rewards/rejected": -1.9217164516448975, "step": 413 }, { "epoch": 1.5622641509433963, "grad_norm": 0.3670799434185028, "learning_rate": 1.0943396226415095e-05, "log_odds_chosen": 17.20905303955078, "log_odds_ratio": -0.020199574530124664, "logits/chosen": -1.5989339351654053, "logits/rejected": -5.965342044830322, "logps/chosen": -1.5377042293548584, "logps/rejected": -18.371013641357422, "loss": 1.5964, "nll_loss": 1.5944006443023682, "rewards/accuracies": 1.0, "rewards/chosen": -0.15377041697502136, "rewards/margins": 1.68333101272583, "rewards/rejected": -1.8371014595031738, "step": 414 }, { "epoch": 1.5660377358490565, "grad_norm": 0.35576295852661133, "learning_rate": 1.0849056603773586e-05, "log_odds_chosen": 15.496426582336426, "log_odds_ratio": -0.049891576170921326, "logits/chosen": -0.8893330097198486, "logits/rejected": -5.53910493850708, "logps/chosen": -1.553781270980835, "logps/rejected": -16.68655776977539, "loss": 1.5559, "nll_loss": 1.5509579181671143, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15537814795970917, "rewards/margins": 1.5132777690887451, "rewards/rejected": -1.6686559915542603, "step": 415 }, { "epoch": 1.569811320754717, "grad_norm": 0.32513803243637085, "learning_rate": 1.0754716981132077e-05, "log_odds_chosen": 17.47341537475586, "log_odds_ratio": -0.0002301803178852424, "logits/chosen": -1.110384225845337, "logits/rejected": -6.549045562744141, "logps/chosen": -1.5312076807022095, "logps/rejected": -18.674665451049805, "loss": 1.5338, "nll_loss": 1.5337331295013428, "rewards/accuracies": 1.0, "rewards/chosen": -0.15312077105045319, "rewards/margins": 1.7143458127975464, "rewards/rejected": -1.867466688156128, "step": 416 }, { "epoch": 1.5735849056603772, "grad_norm": 0.3317815959453583, "learning_rate": 1.0660377358490567e-05, "log_odds_chosen": 17.113929748535156, "log_odds_ratio": -0.00808227900415659, "logits/chosen": -1.527684211730957, "logits/rejected": -6.576870918273926, "logps/chosen": -1.5478432178497314, "logps/rejected": -18.27701187133789, "loss": 1.5447, "nll_loss": 1.5438923835754395, "rewards/accuracies": 1.0, "rewards/chosen": -0.15478432178497314, "rewards/margins": 1.6729168891906738, "rewards/rejected": -1.8277010917663574, "step": 417 }, { "epoch": 1.5773584905660378, "grad_norm": 0.3883110582828522, "learning_rate": 1.0566037735849058e-05, "log_odds_chosen": 16.38488006591797, "log_odds_ratio": -0.01739910989999771, "logits/chosen": -2.0036072731018066, "logits/rejected": -6.386971473693848, "logps/chosen": -1.778263807296753, "logps/rejected": -17.913105010986328, "loss": 1.5031, "nll_loss": 1.5013105869293213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778264045715332, "rewards/margins": 1.6134843826293945, "rewards/rejected": -1.7913107872009277, "step": 418 }, { "epoch": 1.581132075471698, "grad_norm": 0.3066483736038208, "learning_rate": 1.0471698113207549e-05, "log_odds_chosen": 19.433700561523438, "log_odds_ratio": -1.0430817098949774e-07, "logits/chosen": -1.0177987813949585, "logits/rejected": -6.801183700561523, "logps/chosen": -1.4599207639694214, "logps/rejected": -20.431859970092773, "loss": 1.3882, "nll_loss": 1.3881503343582153, "rewards/accuracies": 1.0, "rewards/chosen": -0.14599208533763885, "rewards/margins": 1.8971937894821167, "rewards/rejected": -2.0431859493255615, "step": 419 }, { "epoch": 1.5849056603773586, "grad_norm": 0.40567484498023987, "learning_rate": 1.0377358490566038e-05, "log_odds_chosen": 13.711181640625, "log_odds_ratio": -0.05775582417845726, "logits/chosen": -0.9496514797210693, "logits/rejected": -4.353882789611816, "logps/chosen": -1.6069025993347168, "logps/rejected": -15.056055068969727, "loss": 1.3888, "nll_loss": 1.3829797506332397, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16069024801254272, "rewards/margins": 1.3449151515960693, "rewards/rejected": -1.5056054592132568, "step": 420 }, { "epoch": 1.5886792452830187, "grad_norm": 0.34252166748046875, "learning_rate": 1.028301886792453e-05, "log_odds_chosen": 19.151784896850586, "log_odds_ratio": -1.4499668395728804e-05, "logits/chosen": -1.0097556114196777, "logits/rejected": -5.88823127746582, "logps/chosen": -1.4318840503692627, "logps/rejected": -20.140625, "loss": 1.4247, "nll_loss": 1.4247350692749023, "rewards/accuracies": 1.0, "rewards/chosen": -0.14318840205669403, "rewards/margins": 1.870874285697937, "rewards/rejected": -2.0140626430511475, "step": 421 }, { "epoch": 1.5924528301886793, "grad_norm": 0.4356919527053833, "learning_rate": 1.018867924528302e-05, "log_odds_chosen": 18.37000274658203, "log_odds_ratio": -4.2022656998597085e-06, "logits/chosen": -2.5176637172698975, "logits/rejected": -5.485119819641113, "logps/chosen": -1.699413776397705, "logps/rejected": -19.841552734375, "loss": 1.4642, "nll_loss": 1.4642114639282227, "rewards/accuracies": 1.0, "rewards/chosen": -0.16994138062000275, "rewards/margins": 1.8142141103744507, "rewards/rejected": -1.9841554164886475, "step": 422 }, { "epoch": 1.5962264150943395, "grad_norm": 0.3340418338775635, "learning_rate": 1.009433962264151e-05, "log_odds_chosen": 17.76876449584961, "log_odds_ratio": -1.0319773537048604e-05, "logits/chosen": -1.0354773998260498, "logits/rejected": -6.589682579040527, "logps/chosen": -1.570968508720398, "logps/rejected": -19.032516479492188, "loss": 1.5785, "nll_loss": 1.578470230102539, "rewards/accuracies": 1.0, "rewards/chosen": -0.15709686279296875, "rewards/margins": 1.74615478515625, "rewards/rejected": -1.9032516479492188, "step": 423 }, { "epoch": 1.6, "grad_norm": 0.3202284574508667, "learning_rate": 1e-05, "log_odds_chosen": 17.717674255371094, "log_odds_ratio": -2.1941097656963393e-05, "logits/chosen": -1.6594294309616089, "logits/rejected": -5.430027484893799, "logps/chosen": -1.4836103916168213, "logps/rejected": -18.895004272460938, "loss": 1.6185, "nll_loss": 1.6185247898101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.14836102724075317, "rewards/margins": 1.7411394119262695, "rewards/rejected": -1.889500379562378, "step": 424 }, { "epoch": 1.6037735849056602, "grad_norm": 0.3735036849975586, "learning_rate": 9.905660377358492e-06, "log_odds_chosen": 17.9097843170166, "log_odds_ratio": -2.078727447951678e-06, "logits/chosen": -1.3803160190582275, "logits/rejected": -5.703221797943115, "logps/chosen": -1.2858457565307617, "logps/rejected": -18.800107955932617, "loss": 1.1804, "nll_loss": 1.1804119348526, "rewards/accuracies": 1.0, "rewards/chosen": -0.1285845786333084, "rewards/margins": 1.751426339149475, "rewards/rejected": -1.880010962486267, "step": 425 }, { "epoch": 1.6075471698113208, "grad_norm": 0.5746561288833618, "learning_rate": 9.811320754716983e-06, "log_odds_chosen": 17.22057342529297, "log_odds_ratio": -3.956294222007273e-06, "logits/chosen": -1.1769520044326782, "logits/rejected": -5.837000846862793, "logps/chosen": -1.6268696784973145, "logps/rejected": -18.550504684448242, "loss": 1.653, "nll_loss": 1.6529629230499268, "rewards/accuracies": 1.0, "rewards/chosen": -0.16268697381019592, "rewards/margins": 1.6923635005950928, "rewards/rejected": -1.8550504446029663, "step": 426 }, { "epoch": 1.611320754716981, "grad_norm": 0.363587349653244, "learning_rate": 9.716981132075472e-06, "log_odds_chosen": 14.084954261779785, "log_odds_ratio": -0.07172351330518723, "logits/chosen": -1.2698285579681396, "logits/rejected": -4.79845666885376, "logps/chosen": -1.500381350517273, "logps/rejected": -15.104291915893555, "loss": 1.4545, "nll_loss": 1.4473347663879395, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15003815293312073, "rewards/margins": 1.3603911399841309, "rewards/rejected": -1.5104291439056396, "step": 427 }, { "epoch": 1.6150943396226416, "grad_norm": 0.3944726884365082, "learning_rate": 9.622641509433963e-06, "log_odds_chosen": 16.034841537475586, "log_odds_ratio": -2.1783518604934216e-05, "logits/chosen": -0.33273932337760925, "logits/rejected": -4.981014251708984, "logps/chosen": -1.6068644523620605, "logps/rejected": -17.390186309814453, "loss": 1.5603, "nll_loss": 1.5603405237197876, "rewards/accuracies": 1.0, "rewards/chosen": -0.1606864631175995, "rewards/margins": 1.5783324241638184, "rewards/rejected": -1.7390189170837402, "step": 428 }, { "epoch": 1.6188679245283017, "grad_norm": 0.3169495463371277, "learning_rate": 9.528301886792455e-06, "log_odds_chosen": 17.651002883911133, "log_odds_ratio": -0.08082529902458191, "logits/chosen": -1.4152789115905762, "logits/rejected": -5.411661148071289, "logps/chosen": -1.3260101079940796, "logps/rejected": -18.322998046875, "loss": 1.4476, "nll_loss": 1.439566731452942, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13260100781917572, "rewards/margins": 1.6996989250183105, "rewards/rejected": -1.8322997093200684, "step": 429 }, { "epoch": 1.6226415094339623, "grad_norm": 0.3842892348766327, "learning_rate": 9.433962264150944e-06, "log_odds_chosen": 18.826631546020508, "log_odds_ratio": -5.513446694749291e-07, "logits/chosen": -1.7618896961212158, "logits/rejected": -6.005032062530518, "logps/chosen": -1.2967678308486938, "logps/rejected": -19.6664981842041, "loss": 1.4705, "nll_loss": 1.4704593420028687, "rewards/accuracies": 1.0, "rewards/chosen": -0.12967678904533386, "rewards/margins": 1.8369731903076172, "rewards/rejected": -1.9666498899459839, "step": 430 }, { "epoch": 1.6264150943396225, "grad_norm": 0.38966941833496094, "learning_rate": 9.339622641509433e-06, "log_odds_chosen": 13.728066444396973, "log_odds_ratio": -0.15279138088226318, "logits/chosen": -1.5197663307189941, "logits/rejected": -5.018875598907471, "logps/chosen": -1.4149266481399536, "logps/rejected": -14.751510620117188, "loss": 1.2838, "nll_loss": 1.2685606479644775, "rewards/accuracies": 0.875, "rewards/chosen": -0.14149266481399536, "rewards/margins": 1.3336584568023682, "rewards/rejected": -1.4751511812210083, "step": 431 }, { "epoch": 1.630188679245283, "grad_norm": 0.3509092330932617, "learning_rate": 9.245283018867924e-06, "log_odds_chosen": 16.643827438354492, "log_odds_ratio": -0.06135103851556778, "logits/chosen": -2.0793323516845703, "logits/rejected": -6.901341915130615, "logps/chosen": -1.5519651174545288, "logps/rejected": -17.924978256225586, "loss": 1.3948, "nll_loss": 1.3886274099349976, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15519651770591736, "rewards/margins": 1.6373014450073242, "rewards/rejected": -1.7924977540969849, "step": 432 }, { "epoch": 1.6339622641509433, "grad_norm": 0.374100923538208, "learning_rate": 9.150943396226416e-06, "log_odds_chosen": 16.738018035888672, "log_odds_ratio": -9.723278708406724e-06, "logits/chosen": -0.8103672862052917, "logits/rejected": -5.695781230926514, "logps/chosen": -1.61466646194458, "logps/rejected": -18.09328269958496, "loss": 1.5884, "nll_loss": 1.5884230136871338, "rewards/accuracies": 1.0, "rewards/chosen": -0.16146664321422577, "rewards/margins": 1.6478617191314697, "rewards/rejected": -1.809328317642212, "step": 433 }, { "epoch": 1.6377358490566039, "grad_norm": 0.348984956741333, "learning_rate": 9.056603773584905e-06, "log_odds_chosen": 15.502889633178711, "log_odds_ratio": -0.0006235900218598545, "logits/chosen": -0.9150213003158569, "logits/rejected": -5.924428939819336, "logps/chosen": -1.5006028413772583, "logps/rejected": -16.678489685058594, "loss": 1.3731, "nll_loss": 1.3730108737945557, "rewards/accuracies": 1.0, "rewards/chosen": -0.15006029605865479, "rewards/margins": 1.5177886486053467, "rewards/rejected": -1.6678489446640015, "step": 434 }, { "epoch": 1.641509433962264, "grad_norm": 0.3935360312461853, "learning_rate": 8.962264150943396e-06, "log_odds_chosen": 15.421186447143555, "log_odds_ratio": -4.3803676817333326e-05, "logits/chosen": -2.2583491802215576, "logits/rejected": -6.483213424682617, "logps/chosen": -1.748963475227356, "logps/rejected": -16.925840377807617, "loss": 1.528, "nll_loss": 1.52796471118927, "rewards/accuracies": 1.0, "rewards/chosen": -0.17489634454250336, "rewards/margins": 1.5176876783370972, "rewards/rejected": -1.6925840377807617, "step": 435 }, { "epoch": 1.6452830188679246, "grad_norm": 0.3647741973400116, "learning_rate": 8.867924528301887e-06, "log_odds_chosen": 18.231021881103516, "log_odds_ratio": -1.4775437193748076e-05, "logits/chosen": -1.7286620140075684, "logits/rejected": -3.7592124938964844, "logps/chosen": -1.3811477422714233, "logps/rejected": -19.279529571533203, "loss": 1.5674, "nll_loss": 1.5673856735229492, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381147801876068, "rewards/margins": 1.7898383140563965, "rewards/rejected": -1.9279532432556152, "step": 436 }, { "epoch": 1.6490566037735848, "grad_norm": 0.3980993330478668, "learning_rate": 8.773584905660376e-06, "log_odds_chosen": 18.004159927368164, "log_odds_ratio": -9.220154606737196e-05, "logits/chosen": -0.7007821202278137, "logits/rejected": -4.726859092712402, "logps/chosen": -1.5681556463241577, "logps/rejected": -19.279773712158203, "loss": 1.4785, "nll_loss": 1.478508472442627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1568155735731125, "rewards/margins": 1.7711620330810547, "rewards/rejected": -1.9279775619506836, "step": 437 }, { "epoch": 1.6528301886792454, "grad_norm": 0.38771992921829224, "learning_rate": 8.679245283018868e-06, "log_odds_chosen": 15.956310272216797, "log_odds_ratio": -0.08997757732868195, "logits/chosen": -2.8503222465515137, "logits/rejected": -6.6901421546936035, "logps/chosen": -1.7365132570266724, "logps/rejected": -17.479639053344727, "loss": 1.4619, "nll_loss": 1.4529235363006592, "rewards/accuracies": 0.9375, "rewards/chosen": -0.173651322722435, "rewards/margins": 1.574312448501587, "rewards/rejected": -1.747963786125183, "step": 438 }, { "epoch": 1.6566037735849055, "grad_norm": 0.40687695145606995, "learning_rate": 8.584905660377359e-06, "log_odds_chosen": 17.718862533569336, "log_odds_ratio": -3.934121923521161e-05, "logits/chosen": -1.7964673042297363, "logits/rejected": -5.379039764404297, "logps/chosen": -1.6427710056304932, "logps/rejected": -19.057796478271484, "loss": 1.3497, "nll_loss": 1.349698781967163, "rewards/accuracies": 1.0, "rewards/chosen": -0.1642771065235138, "rewards/margins": 1.7415024042129517, "rewards/rejected": -1.9057796001434326, "step": 439 }, { "epoch": 1.6603773584905661, "grad_norm": 0.41751906275749207, "learning_rate": 8.49056603773585e-06, "log_odds_chosen": 17.8037109375, "log_odds_ratio": -0.0002321999636478722, "logits/chosen": -1.1646761894226074, "logits/rejected": -5.253706455230713, "logps/chosen": -1.5499441623687744, "logps/rejected": -18.97268295288086, "loss": 1.4037, "nll_loss": 1.403627872467041, "rewards/accuracies": 1.0, "rewards/chosen": -0.1549944281578064, "rewards/margins": 1.7422740459442139, "rewards/rejected": -1.897268533706665, "step": 440 }, { "epoch": 1.6641509433962263, "grad_norm": 0.3891851007938385, "learning_rate": 8.396226415094339e-06, "log_odds_chosen": 15.996545791625977, "log_odds_ratio": -0.0381770022213459, "logits/chosen": -0.9191466569900513, "logits/rejected": -4.70245361328125, "logps/chosen": -1.4315601587295532, "logps/rejected": -17.08255386352539, "loss": 1.4039, "nll_loss": 1.4000357389450073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431560218334198, "rewards/margins": 1.5650992393493652, "rewards/rejected": -1.7082552909851074, "step": 441 }, { "epoch": 1.6679245283018869, "grad_norm": 0.3916653096675873, "learning_rate": 8.30188679245283e-06, "log_odds_chosen": 16.95732879638672, "log_odds_ratio": -0.040689367800951004, "logits/chosen": -1.9281044006347656, "logits/rejected": -6.584669589996338, "logps/chosen": -1.4779269695281982, "logps/rejected": -18.117380142211914, "loss": 1.3331, "nll_loss": 1.3289951086044312, "rewards/accuracies": 1.0, "rewards/chosen": -0.14779271185398102, "rewards/margins": 1.6639453172683716, "rewards/rejected": -1.8117380142211914, "step": 442 }, { "epoch": 1.671698113207547, "grad_norm": 0.4993988275527954, "learning_rate": 8.207547169811321e-06, "log_odds_chosen": 12.434319496154785, "log_odds_ratio": -0.25445252656936646, "logits/chosen": -1.8510253429412842, "logits/rejected": -4.986669540405273, "logps/chosen": -1.862165927886963, "logps/rejected": -14.14360237121582, "loss": 1.7, "nll_loss": 1.674567461013794, "rewards/accuracies": 0.75, "rewards/chosen": -0.1862165927886963, "rewards/margins": 1.2281436920166016, "rewards/rejected": -1.4143602848052979, "step": 443 }, { "epoch": 1.6754716981132076, "grad_norm": 0.37089723348617554, "learning_rate": 8.11320754716981e-06, "log_odds_chosen": 18.173118591308594, "log_odds_ratio": -1.9893154785677325e-06, "logits/chosen": -2.578376054763794, "logits/rejected": -6.764125823974609, "logps/chosen": -1.4696475267410278, "logps/rejected": -19.300800323486328, "loss": 1.4572, "nll_loss": 1.4572360515594482, "rewards/accuracies": 1.0, "rewards/chosen": -0.14696475863456726, "rewards/margins": 1.783115267753601, "rewards/rejected": -1.9300800561904907, "step": 444 }, { "epoch": 1.6792452830188678, "grad_norm": 0.35598012804985046, "learning_rate": 8.018867924528302e-06, "log_odds_chosen": 16.349313735961914, "log_odds_ratio": -4.495953908190131e-05, "logits/chosen": -1.1268545389175415, "logits/rejected": -6.885201454162598, "logps/chosen": -1.6435253620147705, "logps/rejected": -17.746475219726562, "loss": 1.5433, "nll_loss": 1.5432648658752441, "rewards/accuracies": 1.0, "rewards/chosen": -0.16435253620147705, "rewards/margins": 1.610295057296753, "rewards/rejected": -1.77464759349823, "step": 445 }, { "epoch": 1.6830188679245284, "grad_norm": 0.413074791431427, "learning_rate": 7.924528301886793e-06, "log_odds_chosen": 17.380868911743164, "log_odds_ratio": -0.023090748116374016, "logits/chosen": -0.9112629890441895, "logits/rejected": -4.611825942993164, "logps/chosen": -1.3334120512008667, "logps/rejected": -18.372407913208008, "loss": 1.3489, "nll_loss": 1.3465766906738281, "rewards/accuracies": 1.0, "rewards/chosen": -0.13334119319915771, "rewards/margins": 1.703899621963501, "rewards/rejected": -1.8372408151626587, "step": 446 }, { "epoch": 1.6867924528301885, "grad_norm": 0.3995283246040344, "learning_rate": 7.830188679245282e-06, "log_odds_chosen": 19.585247039794922, "log_odds_ratio": -8.114101547107566e-06, "logits/chosen": -0.4880484342575073, "logits/rejected": -4.196340084075928, "logps/chosen": -1.2747726440429688, "logps/rejected": -20.202425003051758, "loss": 1.3193, "nll_loss": 1.3193085193634033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1274772584438324, "rewards/margins": 1.8927652835845947, "rewards/rejected": -2.020242691040039, "step": 447 }, { "epoch": 1.6905660377358491, "grad_norm": 0.3784193992614746, "learning_rate": 7.735849056603773e-06, "log_odds_chosen": 18.052352905273438, "log_odds_ratio": -3.911668954970082e-06, "logits/chosen": -1.333806037902832, "logits/rejected": -6.3108344078063965, "logps/chosen": -1.7495572566986084, "logps/rejected": -19.464826583862305, "loss": 1.5705, "nll_loss": 1.5704904794692993, "rewards/accuracies": 1.0, "rewards/chosen": -0.17495572566986084, "rewards/margins": 1.7715270519256592, "rewards/rejected": -1.94648277759552, "step": 448 }, { "epoch": 1.6943396226415093, "grad_norm": 0.37193769216537476, "learning_rate": 7.641509433962264e-06, "log_odds_chosen": 15.382280349731445, "log_odds_ratio": -0.00010735265095718205, "logits/chosen": -1.019675374031067, "logits/rejected": -5.034616470336914, "logps/chosen": -1.6141021251678467, "logps/rejected": -16.724472045898438, "loss": 1.5671, "nll_loss": 1.5670870542526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.16141021251678467, "rewards/margins": 1.511036992073059, "rewards/rejected": -1.6724472045898438, "step": 449 }, { "epoch": 1.6981132075471699, "grad_norm": 0.3708736300468445, "learning_rate": 7.547169811320755e-06, "log_odds_chosen": 17.476974487304688, "log_odds_ratio": -0.018685974180698395, "logits/chosen": -1.8253635168075562, "logits/rejected": -5.652876853942871, "logps/chosen": -1.1619367599487305, "logps/rejected": -18.148921966552734, "loss": 1.23, "nll_loss": 1.228140115737915, "rewards/accuracies": 1.0, "rewards/chosen": -0.11619366705417633, "rewards/margins": 1.69869863986969, "rewards/rejected": -1.814892292022705, "step": 450 }, { "epoch": 1.70188679245283, "grad_norm": 0.4171108603477478, "learning_rate": 7.452830188679246e-06, "log_odds_chosen": 18.037120819091797, "log_odds_ratio": -4.321344704294461e-07, "logits/chosen": -2.2186176776885986, "logits/rejected": -7.5618438720703125, "logps/chosen": -1.7550156116485596, "logps/rejected": -19.57200050354004, "loss": 1.4835, "nll_loss": 1.4834994077682495, "rewards/accuracies": 1.0, "rewards/chosen": -0.17550158500671387, "rewards/margins": 1.7816987037658691, "rewards/rejected": -1.957200050354004, "step": 451 }, { "epoch": 1.7056603773584906, "grad_norm": 0.40649089217185974, "learning_rate": 7.358490566037736e-06, "log_odds_chosen": 17.768571853637695, "log_odds_ratio": -3.509271664370317e-06, "logits/chosen": -1.5206046104431152, "logits/rejected": -6.697060585021973, "logps/chosen": -1.5520222187042236, "logps/rejected": -19.050880432128906, "loss": 1.4503, "nll_loss": 1.4502997398376465, "rewards/accuracies": 1.0, "rewards/chosen": -0.1552022248506546, "rewards/margins": 1.7498859167099, "rewards/rejected": -1.9050880670547485, "step": 452 }, { "epoch": 1.7094339622641508, "grad_norm": 0.379649817943573, "learning_rate": 7.264150943396226e-06, "log_odds_chosen": 18.14077377319336, "log_odds_ratio": -0.002329126000404358, "logits/chosen": -1.208418846130371, "logits/rejected": -4.804601669311523, "logps/chosen": -1.673492431640625, "logps/rejected": -19.51561737060547, "loss": 1.5746, "nll_loss": 1.5743178129196167, "rewards/accuracies": 1.0, "rewards/chosen": -0.16734924912452698, "rewards/margins": 1.7842124700546265, "rewards/rejected": -1.9515619277954102, "step": 453 }, { "epoch": 1.7132075471698114, "grad_norm": 0.37834683060646057, "learning_rate": 7.169811320754717e-06, "log_odds_chosen": 17.196395874023438, "log_odds_ratio": -8.45340036903508e-05, "logits/chosen": -1.6690289974212646, "logits/rejected": -5.095861434936523, "logps/chosen": -1.4087400436401367, "logps/rejected": -18.211074829101562, "loss": 1.4106, "nll_loss": 1.410545825958252, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408740133047104, "rewards/margins": 1.6802334785461426, "rewards/rejected": -1.8211076259613037, "step": 454 }, { "epoch": 1.7169811320754715, "grad_norm": 0.3911416530609131, "learning_rate": 7.0754716981132075e-06, "log_odds_chosen": 14.482208251953125, "log_odds_ratio": -0.18219685554504395, "logits/chosen": -1.772078514099121, "logits/rejected": -4.952263832092285, "logps/chosen": -1.774475336074829, "logps/rejected": -16.061283111572266, "loss": 1.6118, "nll_loss": 1.5935897827148438, "rewards/accuracies": 0.875, "rewards/chosen": -0.17744752764701843, "rewards/margins": 1.4286808967590332, "rewards/rejected": -1.6061283349990845, "step": 455 }, { "epoch": 1.7207547169811321, "grad_norm": 0.3706720471382141, "learning_rate": 6.981132075471699e-06, "log_odds_chosen": 15.722382545471191, "log_odds_ratio": -0.003935753367841244, "logits/chosen": -2.7362916469573975, "logits/rejected": -6.182605743408203, "logps/chosen": -1.4580714702606201, "logps/rejected": -16.79517364501953, "loss": 1.4383, "nll_loss": 1.4378933906555176, "rewards/accuracies": 1.0, "rewards/chosen": -0.145807147026062, "rewards/margins": 1.533710241317749, "rewards/rejected": -1.6795175075531006, "step": 456 }, { "epoch": 1.7245283018867923, "grad_norm": 0.40554314851760864, "learning_rate": 6.886792452830189e-06, "log_odds_chosen": 18.29325294494629, "log_odds_ratio": -8.650338713778183e-06, "logits/chosen": -0.5920654535293579, "logits/rejected": -4.844081401824951, "logps/chosen": -1.4798316955566406, "logps/rejected": -19.450267791748047, "loss": 1.2259, "nll_loss": 1.2258515357971191, "rewards/accuracies": 1.0, "rewards/chosen": -0.14798316359519958, "rewards/margins": 1.7970435619354248, "rewards/rejected": -1.9450266361236572, "step": 457 }, { "epoch": 1.728301886792453, "grad_norm": 0.42811644077301025, "learning_rate": 6.792452830188679e-06, "log_odds_chosen": 17.65870475769043, "log_odds_ratio": -1.3024438885622658e-05, "logits/chosen": -1.6862386465072632, "logits/rejected": -4.607665061950684, "logps/chosen": -1.463801383972168, "logps/rejected": -18.766820907592773, "loss": 1.473, "nll_loss": 1.472968578338623, "rewards/accuracies": 1.0, "rewards/chosen": -0.14638014137744904, "rewards/margins": 1.7303019762039185, "rewards/rejected": -1.8766820430755615, "step": 458 }, { "epoch": 1.732075471698113, "grad_norm": 0.34931331872940063, "learning_rate": 6.69811320754717e-06, "log_odds_chosen": 18.853530883789062, "log_odds_ratio": -1.6465862699988065e-06, "logits/chosen": -1.1972850561141968, "logits/rejected": -5.578787326812744, "logps/chosen": -1.192124366760254, "logps/rejected": -19.51374626159668, "loss": 1.2305, "nll_loss": 1.2305028438568115, "rewards/accuracies": 1.0, "rewards/chosen": -0.11921243369579315, "rewards/margins": 1.8321621417999268, "rewards/rejected": -1.9513745307922363, "step": 459 }, { "epoch": 1.7358490566037736, "grad_norm": 0.38165482878685, "learning_rate": 6.60377358490566e-06, "log_odds_chosen": 17.29033851623535, "log_odds_ratio": -0.003331738291308284, "logits/chosen": -0.4877992570400238, "logits/rejected": -6.1247406005859375, "logps/chosen": -1.6295177936553955, "logps/rejected": -18.68689727783203, "loss": 1.559, "nll_loss": 1.5586189031600952, "rewards/accuracies": 1.0, "rewards/chosen": -0.1629517823457718, "rewards/margins": 1.7057380676269531, "rewards/rejected": -1.8686898946762085, "step": 460 }, { "epoch": 1.7396226415094338, "grad_norm": 0.372586727142334, "learning_rate": 6.5094339622641515e-06, "log_odds_chosen": 16.901809692382812, "log_odds_ratio": -0.0549037791788578, "logits/chosen": -0.6773020625114441, "logits/rejected": -4.9583740234375, "logps/chosen": -1.572051763534546, "logps/rejected": -18.240394592285156, "loss": 1.5153, "nll_loss": 1.509831428527832, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15720517933368683, "rewards/margins": 1.6668341159820557, "rewards/rejected": -1.8240394592285156, "step": 461 }, { "epoch": 1.7433962264150944, "grad_norm": 0.3676230013370514, "learning_rate": 6.415094339622642e-06, "log_odds_chosen": 17.052555084228516, "log_odds_ratio": -0.0029449001885950565, "logits/chosen": -1.5178229808807373, "logits/rejected": -6.080841541290283, "logps/chosen": -1.308215618133545, "logps/rejected": -17.94374656677246, "loss": 1.4127, "nll_loss": 1.4124376773834229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1308215707540512, "rewards/margins": 1.663553237915039, "rewards/rejected": -1.794374704360962, "step": 462 }, { "epoch": 1.7471698113207546, "grad_norm": 0.3529452085494995, "learning_rate": 6.320754716981132e-06, "log_odds_chosen": 17.932321548461914, "log_odds_ratio": -0.034649647772312164, "logits/chosen": -0.5845901966094971, "logits/rejected": -5.674325466156006, "logps/chosen": -1.6330788135528564, "logps/rejected": -19.286094665527344, "loss": 1.5689, "nll_loss": 1.5654515027999878, "rewards/accuracies": 1.0, "rewards/chosen": -0.16330787539482117, "rewards/margins": 1.7653017044067383, "rewards/rejected": -1.9286096096038818, "step": 463 }, { "epoch": 1.7509433962264151, "grad_norm": 0.5019151568412781, "learning_rate": 6.226415094339623e-06, "log_odds_chosen": 14.161150932312012, "log_odds_ratio": -0.0002686173829715699, "logits/chosen": -1.4875158071517944, "logits/rejected": -5.882111549377441, "logps/chosen": -1.6882565021514893, "logps/rejected": -15.569530487060547, "loss": 1.6722, "nll_loss": 1.6721980571746826, "rewards/accuracies": 1.0, "rewards/chosen": -0.1688256412744522, "rewards/margins": 1.388127326965332, "rewards/rejected": -1.556952953338623, "step": 464 }, { "epoch": 1.7547169811320755, "grad_norm": 0.32335349917411804, "learning_rate": 6.132075471698113e-06, "log_odds_chosen": 19.670560836791992, "log_odds_ratio": -9.08325964701362e-05, "logits/chosen": -1.4163541793823242, "logits/rejected": -6.159734725952148, "logps/chosen": -1.548288106918335, "logps/rejected": -20.88932991027832, "loss": 1.4723, "nll_loss": 1.4722576141357422, "rewards/accuracies": 1.0, "rewards/chosen": -0.15482881665229797, "rewards/margins": 1.9341042041778564, "rewards/rejected": -2.088933229446411, "step": 465 }, { "epoch": 1.758490566037736, "grad_norm": 0.4526292681694031, "learning_rate": 6.037735849056604e-06, "log_odds_chosen": 15.761448860168457, "log_odds_ratio": -0.07157308608293533, "logits/chosen": -2.5339674949645996, "logits/rejected": -6.691962242126465, "logps/chosen": -1.6193218231201172, "logps/rejected": -17.078277587890625, "loss": 1.5394, "nll_loss": 1.5322078466415405, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16193217039108276, "rewards/margins": 1.5458955764770508, "rewards/rejected": -1.7078275680541992, "step": 466 }, { "epoch": 1.7622641509433963, "grad_norm": 0.3781883120536804, "learning_rate": 5.943396226415095e-06, "log_odds_chosen": 17.7440128326416, "log_odds_ratio": -2.7567359666136326e-06, "logits/chosen": -0.3988521695137024, "logits/rejected": -3.945779800415039, "logps/chosen": -1.4811797142028809, "logps/rejected": -18.924360275268555, "loss": 1.2401, "nll_loss": 1.2400920391082764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14811795949935913, "rewards/margins": 1.7443180084228516, "rewards/rejected": -1.8924360275268555, "step": 467 }, { "epoch": 1.7660377358490567, "grad_norm": 0.33292269706726074, "learning_rate": 5.849056603773585e-06, "log_odds_chosen": 17.42066192626953, "log_odds_ratio": -0.016988694667816162, "logits/chosen": -0.5932101011276245, "logits/rejected": -6.109551429748535, "logps/chosen": -1.484053611755371, "logps/rejected": -18.59859275817871, "loss": 1.4429, "nll_loss": 1.4412463903427124, "rewards/accuracies": 1.0, "rewards/chosen": -0.14840535819530487, "rewards/margins": 1.711454153060913, "rewards/rejected": -1.8598594665527344, "step": 468 }, { "epoch": 1.769811320754717, "grad_norm": 0.33539214730262756, "learning_rate": 5.754716981132076e-06, "log_odds_chosen": 18.23859977722168, "log_odds_ratio": -0.058486614376306534, "logits/chosen": -0.5683273077011108, "logits/rejected": -6.111762046813965, "logps/chosen": -1.6735715866088867, "logps/rejected": -19.64827537536621, "loss": 1.5931, "nll_loss": 1.5872840881347656, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1673571765422821, "rewards/margins": 1.7974703311920166, "rewards/rejected": -1.9648274183273315, "step": 469 }, { "epoch": 1.7735849056603774, "grad_norm": 0.3669726252555847, "learning_rate": 5.660377358490566e-06, "log_odds_chosen": 16.35857391357422, "log_odds_ratio": -0.0035181858111172915, "logits/chosen": -2.055553913116455, "logits/rejected": -6.248283386230469, "logps/chosen": -1.3732891082763672, "logps/rejected": -17.36093521118164, "loss": 1.3191, "nll_loss": 1.318752408027649, "rewards/accuracies": 1.0, "rewards/chosen": -0.13732890784740448, "rewards/margins": 1.5987646579742432, "rewards/rejected": -1.736093521118164, "step": 470 }, { "epoch": 1.7773584905660378, "grad_norm": 0.3617085814476013, "learning_rate": 5.566037735849057e-06, "log_odds_chosen": 16.787891387939453, "log_odds_ratio": -9.142341696133371e-06, "logits/chosen": -1.4523394107818604, "logits/rejected": -7.092175006866455, "logps/chosen": -1.681269884109497, "logps/rejected": -18.210691452026367, "loss": 1.5766, "nll_loss": 1.5765758752822876, "rewards/accuracies": 1.0, "rewards/chosen": -0.16812700033187866, "rewards/margins": 1.652942180633545, "rewards/rejected": -1.8210691213607788, "step": 471 }, { "epoch": 1.7811320754716982, "grad_norm": 0.3479359447956085, "learning_rate": 5.4716981132075475e-06, "log_odds_chosen": 16.96417808532715, "log_odds_ratio": -0.00023999404220376164, "logits/chosen": -1.1192617416381836, "logits/rejected": -5.284356117248535, "logps/chosen": -1.3302626609802246, "logps/rejected": -17.81631088256836, "loss": 1.3527, "nll_loss": 1.3526952266693115, "rewards/accuracies": 1.0, "rewards/chosen": -0.13302627205848694, "rewards/margins": 1.6486049890518188, "rewards/rejected": -1.7816312313079834, "step": 472 }, { "epoch": 1.7849056603773585, "grad_norm": 0.41884541511535645, "learning_rate": 5.377358490566039e-06, "log_odds_chosen": 18.43604278564453, "log_odds_ratio": -3.442222805460915e-06, "logits/chosen": -1.2943966388702393, "logits/rejected": -5.607792377471924, "logps/chosen": -1.663355827331543, "logps/rejected": -19.86046600341797, "loss": 1.4899, "nll_loss": 1.4899128675460815, "rewards/accuracies": 1.0, "rewards/chosen": -0.1663355678319931, "rewards/margins": 1.8197109699249268, "rewards/rejected": -1.986046552658081, "step": 473 }, { "epoch": 1.788679245283019, "grad_norm": 0.39448869228363037, "learning_rate": 5.283018867924529e-06, "log_odds_chosen": 16.650920867919922, "log_odds_ratio": -0.0011353573063388467, "logits/chosen": -3.2340540885925293, "logits/rejected": -6.999566078186035, "logps/chosen": -1.4379664659500122, "logps/rejected": -17.618440628051758, "loss": 1.4977, "nll_loss": 1.4976294040679932, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437966525554657, "rewards/margins": 1.6180473566055298, "rewards/rejected": -1.7618440389633179, "step": 474 }, { "epoch": 1.7924528301886793, "grad_norm": 0.4289974570274353, "learning_rate": 5.188679245283019e-06, "log_odds_chosen": 17.761314392089844, "log_odds_ratio": -0.03405510634183884, "logits/chosen": -1.8382487297058105, "logits/rejected": -6.273689270019531, "logps/chosen": -1.5025469064712524, "logps/rejected": -18.807750701904297, "loss": 1.3993, "nll_loss": 1.3959349393844604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15025469660758972, "rewards/margins": 1.7305206060409546, "rewards/rejected": -1.8807752132415771, "step": 475 }, { "epoch": 1.7962264150943397, "grad_norm": 0.3701404333114624, "learning_rate": 5.09433962264151e-06, "log_odds_chosen": 18.288116455078125, "log_odds_ratio": -0.08911100029945374, "logits/chosen": -2.354094982147217, "logits/rejected": -4.444386959075928, "logps/chosen": -1.5296881198883057, "logps/rejected": -19.461259841918945, "loss": 1.4101, "nll_loss": 1.401210069656372, "rewards/accuracies": 0.875, "rewards/chosen": -0.15296879410743713, "rewards/margins": 1.7931572198867798, "rewards/rejected": -1.946125864982605, "step": 476 }, { "epoch": 1.8, "grad_norm": 0.3996965289115906, "learning_rate": 5e-06, "log_odds_chosen": 18.797988891601562, "log_odds_ratio": -3.426304101594724e-05, "logits/chosen": -0.7937726378440857, "logits/rejected": -4.634369373321533, "logps/chosen": -1.5706990957260132, "logps/rejected": -20.071020126342773, "loss": 1.4157, "nll_loss": 1.4157121181488037, "rewards/accuracies": 1.0, "rewards/chosen": -0.15706990659236908, "rewards/margins": 1.850032091140747, "rewards/rejected": -2.0071020126342773, "step": 477 }, { "epoch": 1.8037735849056604, "grad_norm": 0.35529616475105286, "learning_rate": 4.9056603773584915e-06, "log_odds_chosen": 18.047014236450195, "log_odds_ratio": -0.0028157131746411324, "logits/chosen": -2.256852149963379, "logits/rejected": -5.895351886749268, "logps/chosen": -1.6656599044799805, "logps/rejected": -19.412460327148438, "loss": 1.448, "nll_loss": 1.4476892948150635, "rewards/accuracies": 1.0, "rewards/chosen": -0.16656598448753357, "rewards/margins": 1.7746801376342773, "rewards/rejected": -1.9412461519241333, "step": 478 }, { "epoch": 1.8075471698113208, "grad_norm": 0.4262011647224426, "learning_rate": 4.811320754716982e-06, "log_odds_chosen": 18.934986114501953, "log_odds_ratio": -2.272450956297689e-06, "logits/chosen": -3.147275924682617, "logits/rejected": -7.059483528137207, "logps/chosen": -1.4018633365631104, "logps/rejected": -19.886085510253906, "loss": 1.3952, "nll_loss": 1.395203948020935, "rewards/accuracies": 1.0, "rewards/chosen": -0.14018632471561432, "rewards/margins": 1.8484222888946533, "rewards/rejected": -1.988608479499817, "step": 479 }, { "epoch": 1.8113207547169812, "grad_norm": 0.47693830728530884, "learning_rate": 4.716981132075472e-06, "log_odds_chosen": 19.023094177246094, "log_odds_ratio": -9.76030378296855e-07, "logits/chosen": -2.5737247467041016, "logits/rejected": -6.724173545837402, "logps/chosen": -1.2855720520019531, "logps/rejected": -19.879941940307617, "loss": 1.3567, "nll_loss": 1.3567023277282715, "rewards/accuracies": 1.0, "rewards/chosen": -0.1285572052001953, "rewards/margins": 1.8594369888305664, "rewards/rejected": -1.9879940748214722, "step": 480 }, { "epoch": 1.8150943396226416, "grad_norm": 0.33889421820640564, "learning_rate": 4.622641509433962e-06, "log_odds_chosen": 18.900848388671875, "log_odds_ratio": -2.5331991082566674e-07, "logits/chosen": -1.0899468660354614, "logits/rejected": -6.098740100860596, "logps/chosen": -1.4805465936660767, "logps/rejected": -20.047718048095703, "loss": 1.3192, "nll_loss": 1.3191674947738647, "rewards/accuracies": 1.0, "rewards/chosen": -0.14805465936660767, "rewards/margins": 1.8567172288894653, "rewards/rejected": -2.0047719478607178, "step": 481 }, { "epoch": 1.818867924528302, "grad_norm": 0.4076652526855469, "learning_rate": 4.5283018867924524e-06, "log_odds_chosen": 17.234874725341797, "log_odds_ratio": -2.1215018932707608e-05, "logits/chosen": -0.29048505425453186, "logits/rejected": -5.432765007019043, "logps/chosen": -1.6638712882995605, "logps/rejected": -18.638235092163086, "loss": 1.631, "nll_loss": 1.631001591682434, "rewards/accuracies": 1.0, "rewards/chosen": -0.166387140750885, "rewards/margins": 1.6974364519119263, "rewards/rejected": -1.863823413848877, "step": 482 }, { "epoch": 1.8226415094339623, "grad_norm": 0.35861098766326904, "learning_rate": 4.4339622641509435e-06, "log_odds_chosen": 18.069866180419922, "log_odds_ratio": -2.9603910661535338e-05, "logits/chosen": -1.9246575832366943, "logits/rejected": -4.993512153625488, "logps/chosen": -1.2558917999267578, "logps/rejected": -18.758874893188477, "loss": 1.5623, "nll_loss": 1.5623277425765991, "rewards/accuracies": 1.0, "rewards/chosen": -0.12558917701244354, "rewards/margins": 1.750298261642456, "rewards/rejected": -1.8758875131607056, "step": 483 }, { "epoch": 1.8264150943396227, "grad_norm": 0.374891459941864, "learning_rate": 4.339622641509434e-06, "log_odds_chosen": 17.780948638916016, "log_odds_ratio": -7.957565685501322e-06, "logits/chosen": -2.0383100509643555, "logits/rejected": -5.80636739730835, "logps/chosen": -1.346254825592041, "logps/rejected": -18.59038543701172, "loss": 1.2654, "nll_loss": 1.2653898000717163, "rewards/accuracies": 1.0, "rewards/chosen": -0.13462547957897186, "rewards/margins": 1.7244129180908203, "rewards/rejected": -1.8590385913848877, "step": 484 }, { "epoch": 1.830188679245283, "grad_norm": 0.37461966276168823, "learning_rate": 4.245283018867925e-06, "log_odds_chosen": 16.527099609375, "log_odds_ratio": -0.003529702080413699, "logits/chosen": -3.2636947631835938, "logits/rejected": -5.3443403244018555, "logps/chosen": -1.5424690246582031, "logps/rejected": -17.72702407836914, "loss": 1.532, "nll_loss": 1.5316277742385864, "rewards/accuracies": 1.0, "rewards/chosen": -0.15424689650535583, "rewards/margins": 1.6184556484222412, "rewards/rejected": -1.7727024555206299, "step": 485 }, { "epoch": 1.8339622641509434, "grad_norm": 0.42353299260139465, "learning_rate": 4.150943396226415e-06, "log_odds_chosen": 17.351669311523438, "log_odds_ratio": -0.02633853070437908, "logits/chosen": -1.5371365547180176, "logits/rejected": -6.403836250305176, "logps/chosen": -1.605021357536316, "logps/rejected": -18.7138671875, "loss": 1.4473, "nll_loss": 1.4446651935577393, "rewards/accuracies": 1.0, "rewards/chosen": -0.1605021357536316, "rewards/margins": 1.7108845710754395, "rewards/rejected": -1.8713867664337158, "step": 486 }, { "epoch": 1.8377358490566038, "grad_norm": 0.34952792525291443, "learning_rate": 4.056603773584905e-06, "log_odds_chosen": 15.732725143432617, "log_odds_ratio": -0.01928078942000866, "logits/chosen": -3.1891517639160156, "logits/rejected": -6.666206359863281, "logps/chosen": -1.8744854927062988, "logps/rejected": -17.41217041015625, "loss": 1.664, "nll_loss": 1.6620932817459106, "rewards/accuracies": 1.0, "rewards/chosen": -0.18744854629039764, "rewards/margins": 1.553768515586853, "rewards/rejected": -1.741217017173767, "step": 487 }, { "epoch": 1.8415094339622642, "grad_norm": 0.36892521381378174, "learning_rate": 3.962264150943396e-06, "log_odds_chosen": 19.160884857177734, "log_odds_ratio": -9.086857608053833e-05, "logits/chosen": -3.0095064640045166, "logits/rejected": -5.858531475067139, "logps/chosen": -1.4025964736938477, "logps/rejected": -20.25691032409668, "loss": 1.5139, "nll_loss": 1.513940691947937, "rewards/accuracies": 1.0, "rewards/chosen": -0.14025965332984924, "rewards/margins": 1.8854316473007202, "rewards/rejected": -2.025691270828247, "step": 488 }, { "epoch": 1.8452830188679246, "grad_norm": 0.4228150546550751, "learning_rate": 3.867924528301887e-06, "log_odds_chosen": 12.711286544799805, "log_odds_ratio": -0.03767332434654236, "logits/chosen": -0.7741556763648987, "logits/rejected": -4.395710468292236, "logps/chosen": -1.6399070024490356, "logps/rejected": -14.126274108886719, "loss": 1.4961, "nll_loss": 1.492356538772583, "rewards/accuracies": 1.0, "rewards/chosen": -0.16399070620536804, "rewards/margins": 1.2486367225646973, "rewards/rejected": -1.4126274585723877, "step": 489 }, { "epoch": 1.849056603773585, "grad_norm": 0.3612087666988373, "learning_rate": 3.7735849056603773e-06, "log_odds_chosen": 20.139719009399414, "log_odds_ratio": -2.384188633186568e-07, "logits/chosen": -1.845539927482605, "logits/rejected": -6.139976978302002, "logps/chosen": -1.3304532766342163, "logps/rejected": -21.026758193969727, "loss": 1.3543, "nll_loss": 1.3543174266815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.13304531574249268, "rewards/margins": 1.969630479812622, "rewards/rejected": -2.1026759147644043, "step": 490 }, { "epoch": 1.8528301886792453, "grad_norm": 0.33791664242744446, "learning_rate": 3.679245283018868e-06, "log_odds_chosen": 17.015119552612305, "log_odds_ratio": -0.00011756140156649053, "logits/chosen": -0.6114660501480103, "logits/rejected": -5.934682369232178, "logps/chosen": -1.4735393524169922, "logps/rejected": -18.142919540405273, "loss": 1.493, "nll_loss": 1.4929838180541992, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735394716262817, "rewards/margins": 1.666938066482544, "rewards/rejected": -1.814292073249817, "step": 491 }, { "epoch": 1.8566037735849057, "grad_norm": 0.42551860213279724, "learning_rate": 3.5849056603773586e-06, "log_odds_chosen": 15.526764869689941, "log_odds_ratio": -0.06459427624940872, "logits/chosen": -0.3648894727230072, "logits/rejected": -5.722745418548584, "logps/chosen": -1.61395263671875, "logps/rejected": -16.911762237548828, "loss": 1.4662, "nll_loss": 1.4597229957580566, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16139525175094604, "rewards/margins": 1.5297811031341553, "rewards/rejected": -1.691176414489746, "step": 492 }, { "epoch": 1.860377358490566, "grad_norm": 0.3618764579296112, "learning_rate": 3.4905660377358493e-06, "log_odds_chosen": 17.239789962768555, "log_odds_ratio": -1.4826684946456226e-06, "logits/chosen": -0.21888774633407593, "logits/rejected": -4.190219879150391, "logps/chosen": -1.3131389617919922, "logps/rejected": -18.119144439697266, "loss": 1.271, "nll_loss": 1.270983099937439, "rewards/accuracies": 1.0, "rewards/chosen": -0.13131389021873474, "rewards/margins": 1.6806005239486694, "rewards/rejected": -1.8119144439697266, "step": 493 }, { "epoch": 1.8641509433962264, "grad_norm": 0.4203263223171234, "learning_rate": 3.3962264150943395e-06, "log_odds_chosen": 17.666906356811523, "log_odds_ratio": -4.238808833179064e-05, "logits/chosen": -2.081303119659424, "logits/rejected": -6.445936679840088, "logps/chosen": -1.6142175197601318, "logps/rejected": -18.978389739990234, "loss": 1.6097, "nll_loss": 1.6096843481063843, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614217460155487, "rewards/margins": 1.736417293548584, "rewards/rejected": -1.897839069366455, "step": 494 }, { "epoch": 1.8679245283018868, "grad_norm": 0.32720765471458435, "learning_rate": 3.30188679245283e-06, "log_odds_chosen": 18.920677185058594, "log_odds_ratio": -1.1772006018873071e-06, "logits/chosen": 0.03428873419761658, "logits/rejected": -4.708007335662842, "logps/chosen": -1.2015572786331177, "logps/rejected": -19.660449981689453, "loss": 1.3305, "nll_loss": 1.3305293321609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.12015574425458908, "rewards/margins": 1.8458893299102783, "rewards/rejected": -1.9660451412200928, "step": 495 }, { "epoch": 1.8716981132075472, "grad_norm": 0.4030371308326721, "learning_rate": 3.207547169811321e-06, "log_odds_chosen": 17.407262802124023, "log_odds_ratio": -9.635530295781791e-05, "logits/chosen": -1.2139384746551514, "logits/rejected": -5.052234172821045, "logps/chosen": -1.58628511428833, "logps/rejected": -18.708999633789062, "loss": 1.4232, "nll_loss": 1.4231928586959839, "rewards/accuracies": 1.0, "rewards/chosen": -0.15862850844860077, "rewards/margins": 1.7122715711593628, "rewards/rejected": -1.87090003490448, "step": 496 }, { "epoch": 1.8754716981132076, "grad_norm": 0.3452160954475403, "learning_rate": 3.1132075471698115e-06, "log_odds_chosen": 19.35751724243164, "log_odds_ratio": -2.719517624427681e-06, "logits/chosen": -1.754660725593567, "logits/rejected": -6.674939155578613, "logps/chosen": -1.4923593997955322, "logps/rejected": -20.52996826171875, "loss": 1.3139, "nll_loss": 1.3138580322265625, "rewards/accuracies": 1.0, "rewards/chosen": -0.14923594892024994, "rewards/margins": 1.9037607908248901, "rewards/rejected": -2.0529966354370117, "step": 497 }, { "epoch": 1.879245283018868, "grad_norm": 0.31435737013816833, "learning_rate": 3.018867924528302e-06, "log_odds_chosen": 17.633760452270508, "log_odds_ratio": -0.07248206436634064, "logits/chosen": -1.433829665184021, "logits/rejected": -6.44128942489624, "logps/chosen": -1.4513802528381348, "logps/rejected": -18.749958038330078, "loss": 1.5574, "nll_loss": 1.5501039028167725, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14513802528381348, "rewards/margins": 1.7298578023910522, "rewards/rejected": -1.8749958276748657, "step": 498 }, { "epoch": 1.8830188679245283, "grad_norm": 0.415445476770401, "learning_rate": 2.9245283018867924e-06, "log_odds_chosen": 16.431304931640625, "log_odds_ratio": -0.002142214449122548, "logits/chosen": -0.9190161824226379, "logits/rejected": -5.215124130249023, "logps/chosen": -1.5988054275512695, "logps/rejected": -17.76313018798828, "loss": 1.5031, "nll_loss": 1.5028951168060303, "rewards/accuracies": 1.0, "rewards/chosen": -0.15988054871559143, "rewards/margins": 1.6164326667785645, "rewards/rejected": -1.776313066482544, "step": 499 }, { "epoch": 1.8867924528301887, "grad_norm": 0.34781375527381897, "learning_rate": 2.830188679245283e-06, "log_odds_chosen": 16.619857788085938, "log_odds_ratio": -0.0440763421356678, "logits/chosen": -1.902965784072876, "logits/rejected": -7.038515090942383, "logps/chosen": -1.5125619173049927, "logps/rejected": -17.874229431152344, "loss": 1.45, "nll_loss": 1.4455456733703613, "rewards/accuracies": 1.0, "rewards/chosen": -0.15125618875026703, "rewards/margins": 1.6361666917800903, "rewards/rejected": -1.7874228954315186, "step": 500 }, { "epoch": 1.890566037735849, "grad_norm": 0.4033926725387573, "learning_rate": 2.7358490566037738e-06, "log_odds_chosen": 18.919782638549805, "log_odds_ratio": -0.04001227393746376, "logits/chosen": -1.2150888442993164, "logits/rejected": -4.071748733520508, "logps/chosen": -1.1845972537994385, "logps/rejected": -19.4738712310791, "loss": 1.2712, "nll_loss": 1.2671959400177002, "rewards/accuracies": 1.0, "rewards/chosen": -0.11845972388982773, "rewards/margins": 1.8289273977279663, "rewards/rejected": -1.9473869800567627, "step": 501 }, { "epoch": 1.8943396226415095, "grad_norm": 0.4240841865539551, "learning_rate": 2.6415094339622644e-06, "log_odds_chosen": 18.493478775024414, "log_odds_ratio": -5.438935772872355e-07, "logits/chosen": -1.6389120817184448, "logits/rejected": -5.454832077026367, "logps/chosen": -1.660611629486084, "logps/rejected": -19.89701271057129, "loss": 1.6124, "nll_loss": 1.6123967170715332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1660611629486084, "rewards/margins": 1.8236401081085205, "rewards/rejected": -1.989701271057129, "step": 502 }, { "epoch": 1.8981132075471698, "grad_norm": 0.379930317401886, "learning_rate": 2.547169811320755e-06, "log_odds_chosen": 18.02582550048828, "log_odds_ratio": -5.4391039157053456e-06, "logits/chosen": -0.07657934725284576, "logits/rejected": -4.238829135894775, "logps/chosen": -1.2943615913391113, "logps/rejected": -18.893966674804688, "loss": 1.2938, "nll_loss": 1.2937697172164917, "rewards/accuracies": 1.0, "rewards/chosen": -0.12943615019321442, "rewards/margins": 1.759960412979126, "rewards/rejected": -1.8893966674804688, "step": 503 }, { "epoch": 1.9018867924528302, "grad_norm": 0.4327671527862549, "learning_rate": 2.4528301886792457e-06, "log_odds_chosen": 17.240550994873047, "log_odds_ratio": -0.08509234338998795, "logits/chosen": -0.8051078915596008, "logits/rejected": -4.25758695602417, "logps/chosen": -1.6059653759002686, "logps/rejected": -18.553573608398438, "loss": 1.4932, "nll_loss": 1.4846444129943848, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16059653460979462, "rewards/margins": 1.694760799407959, "rewards/rejected": -1.85535728931427, "step": 504 }, { "epoch": 1.9056603773584906, "grad_norm": 0.3554707169532776, "learning_rate": 2.358490566037736e-06, "log_odds_chosen": 15.833232879638672, "log_odds_ratio": -0.008360692299902439, "logits/chosen": -2.8660082817077637, "logits/rejected": -5.381879806518555, "logps/chosen": -1.5323365926742554, "logps/rejected": -17.071136474609375, "loss": 1.5256, "nll_loss": 1.5248125791549683, "rewards/accuracies": 1.0, "rewards/chosen": -0.15323364734649658, "rewards/margins": 1.553879976272583, "rewards/rejected": -1.70711350440979, "step": 505 }, { "epoch": 1.909433962264151, "grad_norm": 0.40750032663345337, "learning_rate": 2.2641509433962262e-06, "log_odds_chosen": 18.917707443237305, "log_odds_ratio": -1.3187664080760442e-06, "logits/chosen": -2.4501075744628906, "logits/rejected": -4.550923824310303, "logps/chosen": -1.6083648204803467, "logps/rejected": -20.180192947387695, "loss": 1.422, "nll_loss": 1.4219931364059448, "rewards/accuracies": 1.0, "rewards/chosen": -0.16083648800849915, "rewards/margins": 1.8571827411651611, "rewards/rejected": -2.018019199371338, "step": 506 }, { "epoch": 1.9132075471698113, "grad_norm": 0.3912452459335327, "learning_rate": 2.169811320754717e-06, "log_odds_chosen": 16.282840728759766, "log_odds_ratio": -0.0001460638886783272, "logits/chosen": -1.1887915134429932, "logits/rejected": -5.86929988861084, "logps/chosen": -1.3933725357055664, "logps/rejected": -17.320636749267578, "loss": 1.3639, "nll_loss": 1.3638955354690552, "rewards/accuracies": 1.0, "rewards/chosen": -0.13933727145195007, "rewards/margins": 1.5927265882492065, "rewards/rejected": -1.7320637702941895, "step": 507 }, { "epoch": 1.9169811320754717, "grad_norm": 0.3855321705341339, "learning_rate": 2.0754716981132075e-06, "log_odds_chosen": 17.623600006103516, "log_odds_ratio": -4.701381840277463e-06, "logits/chosen": -0.25701072812080383, "logits/rejected": -5.022630214691162, "logps/chosen": -1.5275490283966064, "logps/rejected": -18.818309783935547, "loss": 1.3883, "nll_loss": 1.3883423805236816, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275491774082184, "rewards/margins": 1.7290761470794678, "rewards/rejected": -1.8818310499191284, "step": 508 }, { "epoch": 1.920754716981132, "grad_norm": 0.4456048607826233, "learning_rate": 1.981132075471698e-06, "log_odds_chosen": 19.397144317626953, "log_odds_ratio": -0.08647830039262772, "logits/chosen": -2.1884102821350098, "logits/rejected": -4.864468574523926, "logps/chosen": -1.1446378231048584, "logps/rejected": -19.82569694519043, "loss": 1.1951, "nll_loss": 1.1864542961120605, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11446378380060196, "rewards/margins": 1.8681057691574097, "rewards/rejected": -1.982569694519043, "step": 509 }, { "epoch": 1.9245283018867925, "grad_norm": 0.38309693336486816, "learning_rate": 1.8867924528301887e-06, "log_odds_chosen": 17.333568572998047, "log_odds_ratio": -0.0018029811326414347, "logits/chosen": -1.732748031616211, "logits/rejected": -5.647907733917236, "logps/chosen": -1.6717514991760254, "logps/rejected": -18.71475601196289, "loss": 1.6, "nll_loss": 1.5998656749725342, "rewards/accuracies": 1.0, "rewards/chosen": -0.16717512905597687, "rewards/margins": 1.7043005228042603, "rewards/rejected": -1.8714755773544312, "step": 510 }, { "epoch": 1.9283018867924528, "grad_norm": 0.42033860087394714, "learning_rate": 1.7924528301886793e-06, "log_odds_chosen": 18.408260345458984, "log_odds_ratio": -0.07517862319946289, "logits/chosen": -1.3036704063415527, "logits/rejected": -6.073916435241699, "logps/chosen": -1.500823736190796, "logps/rejected": -19.579364776611328, "loss": 1.3745, "nll_loss": 1.3670070171356201, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15008236467838287, "rewards/margins": 1.807854175567627, "rewards/rejected": -1.957936406135559, "step": 511 }, { "epoch": 1.9320754716981132, "grad_norm": 0.43365299701690674, "learning_rate": 1.6981132075471698e-06, "log_odds_chosen": 16.9204158782959, "log_odds_ratio": -0.010048388503491879, "logits/chosen": -1.4353740215301514, "logits/rejected": -4.844537734985352, "logps/chosen": -1.4958455562591553, "logps/rejected": -18.103038787841797, "loss": 1.2225, "nll_loss": 1.2214566469192505, "rewards/accuracies": 1.0, "rewards/chosen": -0.14958456158638, "rewards/margins": 1.6607191562652588, "rewards/rejected": -1.810303807258606, "step": 512 }, { "epoch": 1.9358490566037736, "grad_norm": 0.41454991698265076, "learning_rate": 1.6037735849056604e-06, "log_odds_chosen": 17.356592178344727, "log_odds_ratio": -0.02275564707815647, "logits/chosen": -2.385758399963379, "logits/rejected": -6.4152421951293945, "logps/chosen": -1.799842119216919, "logps/rejected": -18.904577255249023, "loss": 1.6663, "nll_loss": 1.664039134979248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1799842119216919, "rewards/margins": 1.7104735374450684, "rewards/rejected": -1.8904577493667603, "step": 513 }, { "epoch": 1.939622641509434, "grad_norm": 0.3880457878112793, "learning_rate": 1.509433962264151e-06, "log_odds_chosen": 15.238203048706055, "log_odds_ratio": -0.01757655292749405, "logits/chosen": -1.1916530132293701, "logits/rejected": -5.214702606201172, "logps/chosen": -1.4446446895599365, "logps/rejected": -16.36115264892578, "loss": 1.5477, "nll_loss": 1.5459623336791992, "rewards/accuracies": 1.0, "rewards/chosen": -0.14446447789669037, "rewards/margins": 1.4916508197784424, "rewards/rejected": -1.636115312576294, "step": 514 }, { "epoch": 1.9433962264150944, "grad_norm": 0.47617748379707336, "learning_rate": 1.4150943396226415e-06, "log_odds_chosen": 16.34947967529297, "log_odds_ratio": -0.0004449410189408809, "logits/chosen": -2.4440126419067383, "logits/rejected": -5.788022994995117, "logps/chosen": -1.6996843814849854, "logps/rejected": -17.788955688476562, "loss": 1.4768, "nll_loss": 1.4767930507659912, "rewards/accuracies": 1.0, "rewards/chosen": -0.16996845602989197, "rewards/margins": 1.6089271306991577, "rewards/rejected": -1.778895616531372, "step": 515 }, { "epoch": 1.9471698113207547, "grad_norm": 0.40238529443740845, "learning_rate": 1.3207547169811322e-06, "log_odds_chosen": 18.627809524536133, "log_odds_ratio": -4.4853818508272525e-06, "logits/chosen": -1.0439683198928833, "logits/rejected": -5.652813911437988, "logps/chosen": -1.4960782527923584, "logps/rejected": -19.751792907714844, "loss": 1.3031, "nll_loss": 1.3030521869659424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496078073978424, "rewards/margins": 1.8255715370178223, "rewards/rejected": -1.9751791954040527, "step": 516 }, { "epoch": 1.950943396226415, "grad_norm": 0.41020315885543823, "learning_rate": 1.2264150943396229e-06, "log_odds_chosen": 17.56089973449707, "log_odds_ratio": -0.049375876784324646, "logits/chosen": -0.404629647731781, "logits/rejected": -5.214582443237305, "logps/chosen": -1.3766056299209595, "logps/rejected": -18.530118942260742, "loss": 1.2775, "nll_loss": 1.2725828886032104, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13766056299209595, "rewards/margins": 1.7153512239456177, "rewards/rejected": -1.8530117273330688, "step": 517 }, { "epoch": 1.9547169811320755, "grad_norm": 0.5101109147071838, "learning_rate": 1.1320754716981131e-06, "log_odds_chosen": 19.110702514648438, "log_odds_ratio": -1.9727249309653416e-05, "logits/chosen": -2.067298412322998, "logits/rejected": -5.900681018829346, "logps/chosen": -1.4211063385009766, "logps/rejected": -20.093542098999023, "loss": 1.5492, "nll_loss": 1.5492104291915894, "rewards/accuracies": 1.0, "rewards/chosen": -0.14211063086986542, "rewards/margins": 1.867243766784668, "rewards/rejected": -2.00935435295105, "step": 518 }, { "epoch": 1.9584905660377359, "grad_norm": 0.37587234377861023, "learning_rate": 1.0377358490566038e-06, "log_odds_chosen": 17.110137939453125, "log_odds_ratio": -0.0024836428929120302, "logits/chosen": -1.4974017143249512, "logits/rejected": -4.80449914932251, "logps/chosen": -1.6387498378753662, "logps/rejected": -18.401721954345703, "loss": 1.3765, "nll_loss": 1.376255989074707, "rewards/accuracies": 1.0, "rewards/chosen": -0.16387496888637543, "rewards/margins": 1.6762971878051758, "rewards/rejected": -1.8401721715927124, "step": 519 }, { "epoch": 1.9622641509433962, "grad_norm": 0.35912734270095825, "learning_rate": 9.433962264150943e-07, "log_odds_chosen": 14.829183578491211, "log_odds_ratio": -0.15696510672569275, "logits/chosen": -1.3042566776275635, "logits/rejected": -6.660241603851318, "logps/chosen": -1.6939544677734375, "logps/rejected": -16.27517318725586, "loss": 1.5792, "nll_loss": 1.5635175704956055, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16939544677734375, "rewards/margins": 1.4581220149993896, "rewards/rejected": -1.6275173425674438, "step": 520 }, { "epoch": 1.9660377358490566, "grad_norm": 0.35668516159057617, "learning_rate": 8.490566037735849e-07, "log_odds_chosen": 18.270919799804688, "log_odds_ratio": -0.09477733820676804, "logits/chosen": -1.378293752670288, "logits/rejected": -4.495629787445068, "logps/chosen": -1.3341569900512695, "logps/rejected": -19.200576782226562, "loss": 1.3796, "nll_loss": 1.3701521158218384, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13341571390628815, "rewards/margins": 1.786642074584961, "rewards/rejected": -1.9200576543807983, "step": 521 }, { "epoch": 1.969811320754717, "grad_norm": 0.40886542201042175, "learning_rate": 7.547169811320755e-07, "log_odds_chosen": 17.687740325927734, "log_odds_ratio": -0.02107013203203678, "logits/chosen": -1.3524720668792725, "logits/rejected": -5.940802574157715, "logps/chosen": -1.4188027381896973, "logps/rejected": -18.724929809570312, "loss": 1.2466, "nll_loss": 1.2445337772369385, "rewards/accuracies": 1.0, "rewards/chosen": -0.14188028872013092, "rewards/margins": 1.730612874031067, "rewards/rejected": -1.8724932670593262, "step": 522 }, { "epoch": 1.9735849056603774, "grad_norm": 0.4124062657356262, "learning_rate": 6.603773584905661e-07, "log_odds_chosen": 15.467195510864258, "log_odds_ratio": -0.046547312289476395, "logits/chosen": -1.658618450164795, "logits/rejected": -6.218527793884277, "logps/chosen": -1.8022215366363525, "logps/rejected": -16.846336364746094, "loss": 1.6649, "nll_loss": 1.660265326499939, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18022215366363525, "rewards/margins": 1.5044115781784058, "rewards/rejected": -1.684633731842041, "step": 523 }, { "epoch": 1.9773584905660377, "grad_norm": 0.38052645325660706, "learning_rate": 5.660377358490566e-07, "log_odds_chosen": 19.262311935424805, "log_odds_ratio": -5.662463422595465e-07, "logits/chosen": -2.7431163787841797, "logits/rejected": -7.38714075088501, "logps/chosen": -1.8242018222808838, "logps/rejected": -20.831439971923828, "loss": 1.4274, "nll_loss": 1.4274204969406128, "rewards/accuracies": 1.0, "rewards/chosen": -0.18242016434669495, "rewards/margins": 1.9007238149642944, "rewards/rejected": -2.083144187927246, "step": 524 }, { "epoch": 1.9811320754716981, "grad_norm": 0.37092652916908264, "learning_rate": 4.7169811320754717e-07, "log_odds_chosen": 18.2442626953125, "log_odds_ratio": -0.06912226229906082, "logits/chosen": -2.5067853927612305, "logits/rejected": -7.028432369232178, "logps/chosen": -1.737499475479126, "logps/rejected": -19.720117568969727, "loss": 1.5818, "nll_loss": 1.5748425722122192, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1737499237060547, "rewards/margins": 1.7982620000839233, "rewards/rejected": -1.9720118045806885, "step": 525 }, { "epoch": 1.9849056603773585, "grad_norm": 0.39420637488365173, "learning_rate": 3.773584905660378e-07, "log_odds_chosen": 16.574909210205078, "log_odds_ratio": -0.07863669842481613, "logits/chosen": -1.2284739017486572, "logits/rejected": -4.368472099304199, "logps/chosen": -1.423435926437378, "logps/rejected": -17.645301818847656, "loss": 1.3318, "nll_loss": 1.323962688446045, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14234358072280884, "rewards/margins": 1.6221866607666016, "rewards/rejected": -1.7645303010940552, "step": 526 }, { "epoch": 1.9886792452830189, "grad_norm": 0.39243245124816895, "learning_rate": 2.830188679245283e-07, "log_odds_chosen": 17.68594741821289, "log_odds_ratio": -3.010060481756227e-06, "logits/chosen": -2.5417604446411133, "logits/rejected": -5.660012722015381, "logps/chosen": -1.545115351676941, "logps/rejected": -18.896028518676758, "loss": 1.6222, "nll_loss": 1.6222350597381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.15451154112815857, "rewards/margins": 1.735091209411621, "rewards/rejected": -1.889602780342102, "step": 527 }, { "epoch": 1.9924528301886792, "grad_norm": 0.37130141258239746, "learning_rate": 1.886792452830189e-07, "log_odds_chosen": 19.309329986572266, "log_odds_ratio": -3.0204251743271016e-05, "logits/chosen": -1.1784321069717407, "logits/rejected": -4.860876083374023, "logps/chosen": -1.4668664932250977, "logps/rejected": -20.424339294433594, "loss": 1.4702, "nll_loss": 1.47021484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466866433620453, "rewards/margins": 1.895747184753418, "rewards/rejected": -2.042433977127075, "step": 528 }, { "epoch": 1.9962264150943396, "grad_norm": 0.3702877461910248, "learning_rate": 9.433962264150944e-08, "log_odds_chosen": 20.07143783569336, "log_odds_ratio": -8.866221037351352e-07, "logits/chosen": -0.7111749649047852, "logits/rejected": -4.296523094177246, "logps/chosen": -1.6536825895309448, "logps/rejected": -21.461524963378906, "loss": 1.5899, "nll_loss": 1.5898981094360352, "rewards/accuracies": 1.0, "rewards/chosen": -0.16536825895309448, "rewards/margins": 1.9807841777801514, "rewards/rejected": -2.1461524963378906, "step": 529 }, { "epoch": 2.0, "grad_norm": 0.5285991430282593, "learning_rate": 0.0, "log_odds_chosen": 17.8989315032959, "log_odds_ratio": -0.0035720046143978834, "logits/chosen": -1.8316022157669067, "logits/rejected": -4.168578147888184, "logps/chosen": -1.5039995908737183, "logps/rejected": -19.07382583618164, "loss": 1.3711, "nll_loss": 1.3707059621810913, "rewards/accuracies": 1.0, "rewards/chosen": -0.15039995312690735, "rewards/margins": 1.7569825649261475, "rewards/rejected": -1.9073827266693115, "step": 530 } ], "logging_steps": 1, "max_steps": 530, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }