{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002, "grad_norm": 65.3963394165039, "learning_rate": 9.99999605215876e-07, "log_odds_chosen": -0.8190158009529114, "log_odds_ratio": -1.2814843654632568, "logits/chosen": 4.715331077575684, "logits/rejected": 4.279630184173584, "logps/chosen": -5.136763572692871, "logps/rejected": -4.324328899383545, "loss": 5.2847, "nll_loss": 5.156502723693848, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.5136763453483582, "rewards/margins": -0.08124345541000366, "rewards/rejected": -0.4324328899383545, "step": 10 }, { "epoch": 0.004, "grad_norm": 31.458417892456055, "learning_rate": 9.99998420864127e-07, "log_odds_chosen": 2.2933459281921387, "log_odds_ratio": -0.16047514975070953, "logits/chosen": 4.675119876861572, "logits/rejected": 4.148844242095947, "logps/chosen": -2.0288569927215576, "logps/rejected": -4.125270843505859, "loss": 2.1545, "nll_loss": 2.138467311859131, "rewards/accuracies": 1.0, "rewards/chosen": -0.202885702252388, "rewards/margins": 0.20964138209819794, "rewards/rejected": -0.4125271439552307, "step": 20 }, { "epoch": 0.006, "grad_norm": 2.146294355392456, "learning_rate": 9.999964469466235e-07, "log_odds_chosen": 4.871854782104492, "log_odds_ratio": -0.011376792564988136, "logits/chosen": 4.699484825134277, "logits/rejected": 4.198317527770996, "logps/chosen": -0.46535515785217285, "logps/rejected": -4.1831560134887695, "loss": 0.673, "nll_loss": 0.6718279123306274, "rewards/accuracies": 1.0, "rewards/chosen": -0.046535518020391464, "rewards/margins": 0.37178006768226624, "rewards/rejected": -0.418315589427948, "step": 30 }, { "epoch": 0.008, "grad_norm": 1.2165888547897339, "learning_rate": 9.999936834664829e-07, "log_odds_chosen": 5.287127494812012, "log_odds_ratio": -0.012974077835679054, "logits/chosen": 5.084625720977783, "logits/rejected": 4.31998872756958, "logps/chosen": -0.42751750349998474, "logps/rejected": -4.2079010009765625, "loss": 0.581, "nll_loss": 0.5796665549278259, "rewards/accuracies": 1.0, "rewards/chosen": -0.042751748114824295, "rewards/margins": 0.37803831696510315, "rewards/rejected": -0.4207901060581207, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.303725242614746, "learning_rate": 9.999901304280684e-07, "log_odds_chosen": 5.532227993011475, "log_odds_ratio": -0.004792512394487858, "logits/chosen": 5.3110761642456055, "logits/rejected": 4.4331769943237305, "logps/chosen": -0.25468605756759644, "logps/rejected": -4.176350116729736, "loss": 0.4505, "nll_loss": 0.4500355124473572, "rewards/accuracies": 1.0, "rewards/chosen": -0.025468608364462852, "rewards/margins": 0.392166405916214, "rewards/rejected": -0.4176350235939026, "step": 50 }, { "epoch": 0.012, "grad_norm": 1.1405003070831299, "learning_rate": 9.999857878369915e-07, "log_odds_chosen": 5.661147117614746, "log_odds_ratio": -0.0041319518350064754, "logits/chosen": 5.300236701965332, "logits/rejected": 4.518916130065918, "logps/chosen": -0.23303405940532684, "logps/rejected": -4.197211265563965, "loss": 0.4324, "nll_loss": 0.43202918767929077, "rewards/accuracies": 1.0, "rewards/chosen": -0.023303406313061714, "rewards/margins": 0.3964177370071411, "rewards/rejected": -0.4197211265563965, "step": 60 }, { "epoch": 0.014, "grad_norm": 1.0366944074630737, "learning_rate": 9.999806557001091e-07, "log_odds_chosen": 5.830837726593018, "log_odds_ratio": -0.0040290202014148235, "logits/chosen": 5.22878885269165, "logits/rejected": 4.501748085021973, "logps/chosen": -0.2715502679347992, "logps/rejected": -4.354897499084473, "loss": 0.4535, "nll_loss": 0.4530617594718933, "rewards/accuracies": 1.0, "rewards/chosen": -0.02715502679347992, "rewards/margins": 0.40833473205566406, "rewards/rejected": -0.4354897439479828, "step": 70 }, { "epoch": 0.016, "grad_norm": 1.7524036169052124, "learning_rate": 9.999747340255258e-07, "log_odds_chosen": 5.399399757385254, "log_odds_ratio": -0.03860381990671158, "logits/chosen": 5.2602057456970215, "logits/rejected": 4.485162258148193, "logps/chosen": -0.4779418408870697, "logps/rejected": -4.250370979309082, "loss": 0.6027, "nll_loss": 0.5988515615463257, "rewards/accuracies": 1.0, "rewards/chosen": -0.04779418930411339, "rewards/margins": 0.37724289298057556, "rewards/rejected": -0.42503708600997925, "step": 80 }, { "epoch": 0.018, "grad_norm": 1.5966495275497437, "learning_rate": 9.999680228225929e-07, "log_odds_chosen": 5.465554714202881, "log_odds_ratio": -0.0058350348845124245, "logits/chosen": 5.391225814819336, "logits/rejected": 4.655618190765381, "logps/chosen": -0.3645828366279602, "logps/rejected": -4.291352272033691, "loss": 0.5409, "nll_loss": 0.5403406023979187, "rewards/accuracies": 1.0, "rewards/chosen": -0.03645828738808632, "rewards/margins": 0.3926769495010376, "rewards/rejected": -0.4291352331638336, "step": 90 }, { "epoch": 0.02, "grad_norm": 1.346524953842163, "learning_rate": 9.999605221019081e-07, "log_odds_chosen": 5.4150872230529785, "log_odds_ratio": -0.007754945661872625, "logits/chosen": 5.327327728271484, "logits/rejected": 4.481075763702393, "logps/chosen": -0.32950958609580994, "logps/rejected": -4.129395008087158, "loss": 0.523, "nll_loss": 0.5222245454788208, "rewards/accuracies": 1.0, "rewards/chosen": -0.03295096009969711, "rewards/margins": 0.37998852133750916, "rewards/rejected": -0.41293948888778687, "step": 100 }, { "epoch": 0.022, "grad_norm": 1.507949709892273, "learning_rate": 9.99952231875316e-07, "log_odds_chosen": 5.820103168487549, "log_odds_ratio": -0.004413021262735128, "logits/chosen": 5.473015308380127, "logits/rejected": 4.637102127075195, "logps/chosen": -0.23467369377613068, "logps/rejected": -4.182504653930664, "loss": 0.4463, "nll_loss": 0.44582876563072205, "rewards/accuracies": 1.0, "rewards/chosen": -0.023467373102903366, "rewards/margins": 0.3947831392288208, "rewards/rejected": -0.4182504713535309, "step": 110 }, { "epoch": 0.024, "grad_norm": 1.0920977592468262, "learning_rate": 9.99943152155908e-07, "log_odds_chosen": 5.703483581542969, "log_odds_ratio": -0.004920828156173229, "logits/chosen": 5.438079357147217, "logits/rejected": 4.6812543869018555, "logps/chosen": -0.2602264881134033, "logps/rejected": -4.301055908203125, "loss": 0.4553, "nll_loss": 0.4547879099845886, "rewards/accuracies": 1.0, "rewards/chosen": -0.026022648438811302, "rewards/margins": 0.40408292412757874, "rewards/rejected": -0.430105596780777, "step": 120 }, { "epoch": 0.026, "grad_norm": 4.339075088500977, "learning_rate": 9.999332829580226e-07, "log_odds_chosen": 6.10992431640625, "log_odds_ratio": -0.003227031324058771, "logits/chosen": 5.582632541656494, "logits/rejected": 4.824194431304932, "logps/chosen": -0.22963353991508484, "logps/rejected": -4.447937965393066, "loss": 0.3871, "nll_loss": 0.3867906928062439, "rewards/accuracies": 1.0, "rewards/chosen": -0.022963354364037514, "rewards/margins": 0.4218304753303528, "rewards/rejected": -0.44479379057884216, "step": 130 }, { "epoch": 0.028, "grad_norm": 1.552656888961792, "learning_rate": 9.999226242972443e-07, "log_odds_chosen": 5.268128395080566, "log_odds_ratio": -0.0394761897623539, "logits/chosen": 5.4539618492126465, "logits/rejected": 4.794400215148926, "logps/chosen": -0.5745034217834473, "logps/rejected": -4.274801731109619, "loss": 0.6375, "nll_loss": 0.6335927248001099, "rewards/accuracies": 1.0, "rewards/chosen": -0.057450342923402786, "rewards/margins": 0.37002983689308167, "rewards/rejected": -0.42748013138771057, "step": 140 }, { "epoch": 0.03, "grad_norm": 1.268286943435669, "learning_rate": 9.999111761904044e-07, "log_odds_chosen": 5.7115678787231445, "log_odds_ratio": -0.004843422677367926, "logits/chosen": 5.66718864440918, "logits/rejected": 4.800919532775879, "logps/chosen": -0.24551162123680115, "logps/rejected": -4.150035381317139, "loss": 0.4222, "nll_loss": 0.42170292139053345, "rewards/accuracies": 1.0, "rewards/chosen": -0.024551164358854294, "rewards/margins": 0.39045244455337524, "rewards/rejected": -0.41500353813171387, "step": 150 }, { "epoch": 0.032, "grad_norm": 0.9994913339614868, "learning_rate": 9.998989386555814e-07, "log_odds_chosen": 5.558432579040527, "log_odds_ratio": -0.03805621713399887, "logits/chosen": 5.588236331939697, "logits/rejected": 4.655357837677002, "logps/chosen": -0.39581215381622314, "logps/rejected": -4.203229904174805, "loss": 0.564, "nll_loss": 0.5602010488510132, "rewards/accuracies": 1.0, "rewards/chosen": -0.039581213146448135, "rewards/margins": 0.38074177503585815, "rewards/rejected": -0.42032304406166077, "step": 160 }, { "epoch": 0.034, "grad_norm": 1.5232833623886108, "learning_rate": 9.998859117121e-07, "log_odds_chosen": 5.822688579559326, "log_odds_ratio": -0.00494243111461401, "logits/chosen": 5.531490325927734, "logits/rejected": 4.725270748138428, "logps/chosen": -0.30422043800354004, "logps/rejected": -4.311378479003906, "loss": 0.4878, "nll_loss": 0.48729926347732544, "rewards/accuracies": 1.0, "rewards/chosen": -0.030422046780586243, "rewards/margins": 0.40071582794189453, "rewards/rejected": -0.43113788962364197, "step": 170 }, { "epoch": 0.036, "grad_norm": 1.8162505626678467, "learning_rate": 9.99872095380531e-07, "log_odds_chosen": 5.460837364196777, "log_odds_ratio": -0.006293863989412785, "logits/chosen": 5.799867153167725, "logits/rejected": 4.889405250549316, "logps/chosen": -0.4189876616001129, "logps/rejected": -4.497962951660156, "loss": 0.5841, "nll_loss": 0.5834853053092957, "rewards/accuracies": 1.0, "rewards/chosen": -0.041898760944604874, "rewards/margins": 0.4078975319862366, "rewards/rejected": -0.44979625940322876, "step": 180 }, { "epoch": 0.038, "grad_norm": 1.0612788200378418, "learning_rate": 9.99857489682693e-07, "log_odds_chosen": 5.999560356140137, "log_odds_ratio": -0.0036633089184761047, "logits/chosen": 5.7635297775268555, "logits/rejected": 4.811951637268066, "logps/chosen": -0.23803098499774933, "logps/rejected": -4.333648204803467, "loss": 0.405, "nll_loss": 0.4046503007411957, "rewards/accuracies": 1.0, "rewards/chosen": -0.023803099989891052, "rewards/margins": 0.40956172347068787, "rewards/rejected": -0.4333648681640625, "step": 190 }, { "epoch": 0.04, "grad_norm": 1.027740716934204, "learning_rate": 9.998420946416499e-07, "log_odds_chosen": 5.840235710144043, "log_odds_ratio": -0.0032528419978916645, "logits/chosen": 6.116091728210449, "logits/rejected": 4.838078498840332, "logps/chosen": -0.17583955824375153, "logps/rejected": -4.136788368225098, "loss": 0.34, "nll_loss": 0.33965107798576355, "rewards/accuracies": 1.0, "rewards/chosen": -0.017583955079317093, "rewards/margins": 0.3960948884487152, "rewards/rejected": -0.4136788249015808, "step": 200 }, { "epoch": 0.042, "grad_norm": 1.241952657699585, "learning_rate": 9.99825910281713e-07, "log_odds_chosen": 5.606609344482422, "log_odds_ratio": -0.004507771693170071, "logits/chosen": 5.888861179351807, "logits/rejected": 4.8203277587890625, "logps/chosen": -0.3218591511249542, "logps/rejected": -4.385091781616211, "loss": 0.5027, "nll_loss": 0.5022562742233276, "rewards/accuracies": 1.0, "rewards/chosen": -0.03218591958284378, "rewards/margins": 0.40632328391075134, "rewards/rejected": -0.43850916624069214, "step": 210 }, { "epoch": 0.044, "grad_norm": 1.4364992380142212, "learning_rate": 9.998089366284391e-07, "log_odds_chosen": 5.468771934509277, "log_odds_ratio": -0.005312102846801281, "logits/chosen": 6.073598384857178, "logits/rejected": 4.962668418884277, "logps/chosen": -0.26109611988067627, "logps/rejected": -4.004148006439209, "loss": 0.4251, "nll_loss": 0.42458876967430115, "rewards/accuracies": 1.0, "rewards/chosen": -0.026109611615538597, "rewards/margins": 0.37430518865585327, "rewards/rejected": -0.4004148542881012, "step": 220 }, { "epoch": 0.046, "grad_norm": 1.757817268371582, "learning_rate": 9.99791173708632e-07, "log_odds_chosen": 5.703909873962402, "log_odds_ratio": -0.005749993491917849, "logits/chosen": 5.923938751220703, "logits/rejected": 4.9076128005981445, "logps/chosen": -0.303940087556839, "logps/rejected": -4.2231125831604, "loss": 0.502, "nll_loss": 0.5013762712478638, "rewards/accuracies": 1.0, "rewards/chosen": -0.03039400652050972, "rewards/margins": 0.39191722869873047, "rewards/rejected": -0.4223112463951111, "step": 230 }, { "epoch": 0.048, "grad_norm": 1.7104049921035767, "learning_rate": 9.997726215503421e-07, "log_odds_chosen": 5.704580783843994, "log_odds_ratio": -0.0046667782589793205, "logits/chosen": 6.144198894500732, "logits/rejected": 5.00585412979126, "logps/chosen": -0.23261837661266327, "logps/rejected": -4.192623615264893, "loss": 0.406, "nll_loss": 0.4055107533931732, "rewards/accuracies": 1.0, "rewards/chosen": -0.023261837661266327, "rewards/margins": 0.3960005044937134, "rewards/rejected": -0.4192623496055603, "step": 240 }, { "epoch": 0.05, "grad_norm": 1.3610446453094482, "learning_rate": 9.997532801828658e-07, "log_odds_chosen": 5.620147705078125, "log_odds_ratio": -0.006411246955394745, "logits/chosen": 6.17356014251709, "logits/rejected": 4.94666051864624, "logps/chosen": -0.37518149614334106, "logps/rejected": -4.470229625701904, "loss": 0.5417, "nll_loss": 0.541038990020752, "rewards/accuracies": 1.0, "rewards/chosen": -0.03751814737915993, "rewards/margins": 0.40950489044189453, "rewards/rejected": -0.4470230042934418, "step": 250 }, { "epoch": 0.052, "grad_norm": 1.0927062034606934, "learning_rate": 9.997331496367454e-07, "log_odds_chosen": 5.706599235534668, "log_odds_ratio": -0.03751751780509949, "logits/chosen": 5.932498931884766, "logits/rejected": 4.845080375671387, "logps/chosen": -0.372866690158844, "logps/rejected": -4.376112937927246, "loss": 0.511, "nll_loss": 0.5072113275527954, "rewards/accuracies": 1.0, "rewards/chosen": -0.0372866652905941, "rewards/margins": 0.40032467246055603, "rewards/rejected": -0.4376114010810852, "step": 260 }, { "epoch": 0.054, "grad_norm": 1.3025884628295898, "learning_rate": 9.9971222994377e-07, "log_odds_chosen": 5.923962593078613, "log_odds_ratio": -0.00389072741381824, "logits/chosen": 6.127083778381348, "logits/rejected": 5.024117946624756, "logps/chosen": -0.2511846721172333, "logps/rejected": -4.392674446105957, "loss": 0.4287, "nll_loss": 0.4283398687839508, "rewards/accuracies": 1.0, "rewards/chosen": -0.025118466466665268, "rewards/margins": 0.4141489863395691, "rewards/rejected": -0.43926748633384705, "step": 270 }, { "epoch": 0.056, "grad_norm": 1.2036632299423218, "learning_rate": 9.996905211369745e-07, "log_odds_chosen": 5.720652103424072, "log_odds_ratio": -0.004509281367063522, "logits/chosen": 6.336692810058594, "logits/rejected": 4.863389492034912, "logps/chosen": -0.23792394995689392, "logps/rejected": -4.175551414489746, "loss": 0.4007, "nll_loss": 0.40022626519203186, "rewards/accuracies": 1.0, "rewards/chosen": -0.02379239723086357, "rewards/margins": 0.3937627375125885, "rewards/rejected": -0.4175550937652588, "step": 280 }, { "epoch": 0.058, "grad_norm": 1.5273919105529785, "learning_rate": 9.996680232506403e-07, "log_odds_chosen": 5.752829551696777, "log_odds_ratio": -0.0047701271250844, "logits/chosen": 6.426280975341797, "logits/rejected": 5.035511493682861, "logps/chosen": -0.26676952838897705, "logps/rejected": -4.218510150909424, "loss": 0.433, "nll_loss": 0.43251532316207886, "rewards/accuracies": 1.0, "rewards/chosen": -0.026676952838897705, "rewards/margins": 0.3951739966869354, "rewards/rejected": -0.4218510091304779, "step": 290 }, { "epoch": 0.06, "grad_norm": 1.0958389043807983, "learning_rate": 9.996447363202945e-07, "log_odds_chosen": 6.128770351409912, "log_odds_ratio": -0.0030450369231402874, "logits/chosen": 6.263443470001221, "logits/rejected": 4.961173057556152, "logps/chosen": -0.1671239286661148, "logps/rejected": -4.129599571228027, "loss": 0.3439, "nll_loss": 0.3435896039009094, "rewards/accuracies": 1.0, "rewards/chosen": -0.01671239361166954, "rewards/margins": 0.3962475657463074, "rewards/rejected": -0.4129599630832672, "step": 300 }, { "epoch": 0.062, "grad_norm": 1.805482029914856, "learning_rate": 9.996206603827104e-07, "log_odds_chosen": 5.737055778503418, "log_odds_ratio": -0.005403733812272549, "logits/chosen": 6.4715704917907715, "logits/rejected": 5.08980655670166, "logps/chosen": -0.259274959564209, "logps/rejected": -4.267286777496338, "loss": 0.4205, "nll_loss": 0.41998934745788574, "rewards/accuracies": 1.0, "rewards/chosen": -0.02592749521136284, "rewards/margins": 0.4008011221885681, "rewards/rejected": -0.42672863602638245, "step": 310 }, { "epoch": 0.064, "grad_norm": 2.0311777591705322, "learning_rate": 9.995957954759072e-07, "log_odds_chosen": 5.937811851501465, "log_odds_ratio": -0.0035209276247769594, "logits/chosen": 6.572950839996338, "logits/rejected": 5.044772624969482, "logps/chosen": -0.20874032378196716, "logps/rejected": -4.212109565734863, "loss": 0.3839, "nll_loss": 0.38349801301956177, "rewards/accuracies": 1.0, "rewards/chosen": -0.020874034613370895, "rewards/margins": 0.4003369212150574, "rewards/rejected": -0.4212109446525574, "step": 320 }, { "epoch": 0.066, "grad_norm": 0.9871509075164795, "learning_rate": 9.995701416391498e-07, "log_odds_chosen": 5.633445739746094, "log_odds_ratio": -0.006787987891584635, "logits/chosen": 6.359896659851074, "logits/rejected": 5.004020690917969, "logps/chosen": -0.3151080906391144, "logps/rejected": -4.1641435623168945, "loss": 0.4868, "nll_loss": 0.48611459136009216, "rewards/accuracies": 1.0, "rewards/chosen": -0.03151080757379532, "rewards/margins": 0.38490360975265503, "rewards/rejected": -0.41641440987586975, "step": 330 }, { "epoch": 0.068, "grad_norm": 1.1638275384902954, "learning_rate": 9.995436989129493e-07, "log_odds_chosen": 5.565814018249512, "log_odds_ratio": -0.0380808524787426, "logits/chosen": 6.562009334564209, "logits/rejected": 5.072074890136719, "logps/chosen": -0.4138278365135193, "logps/rejected": -4.205318450927734, "loss": 0.5372, "nll_loss": 0.5333948731422424, "rewards/accuracies": 1.0, "rewards/chosen": -0.04138278216123581, "rewards/margins": 0.37914904952049255, "rewards/rejected": -0.42053183913230896, "step": 340 }, { "epoch": 0.07, "grad_norm": 1.259528398513794, "learning_rate": 9.995164673390625e-07, "log_odds_chosen": 5.665590763092041, "log_odds_ratio": -0.004536316730082035, "logits/chosen": 6.59546422958374, "logits/rejected": 5.061656475067139, "logps/chosen": -0.24752482771873474, "logps/rejected": -4.178080081939697, "loss": 0.436, "nll_loss": 0.435528039932251, "rewards/accuracies": 1.0, "rewards/chosen": -0.024752482771873474, "rewards/margins": 0.3930554986000061, "rewards/rejected": -0.41780799627304077, "step": 350 }, { "epoch": 0.072, "grad_norm": 0.9618722200393677, "learning_rate": 9.994884469604912e-07, "log_odds_chosen": 5.650336742401123, "log_odds_ratio": -0.006273099686950445, "logits/chosen": 6.78259801864624, "logits/rejected": 5.114623069763184, "logps/chosen": -0.3190111815929413, "logps/rejected": -4.265050888061523, "loss": 0.492, "nll_loss": 0.49138012528419495, "rewards/accuracies": 1.0, "rewards/chosen": -0.03190112113952637, "rewards/margins": 0.39460399746894836, "rewards/rejected": -0.42650508880615234, "step": 360 }, { "epoch": 0.074, "grad_norm": 1.8128719329833984, "learning_rate": 9.99459637821484e-07, "log_odds_chosen": 5.660717010498047, "log_odds_ratio": -0.00564736919477582, "logits/chosen": 6.645632743835449, "logits/rejected": 5.070769309997559, "logps/chosen": -0.3160380721092224, "logps/rejected": -4.343341827392578, "loss": 0.4867, "nll_loss": 0.48614615201950073, "rewards/accuracies": 1.0, "rewards/chosen": -0.03160380572080612, "rewards/margins": 0.40273040533065796, "rewards/rejected": -0.4343341886997223, "step": 370 }, { "epoch": 0.076, "grad_norm": 0.8544633984565735, "learning_rate": 9.994300399675341e-07, "log_odds_chosen": 5.958064079284668, "log_odds_ratio": -0.0036747027188539505, "logits/chosen": 6.629894256591797, "logits/rejected": 5.200135231018066, "logps/chosen": -0.24141213297843933, "logps/rejected": -4.325425624847412, "loss": 0.3942, "nll_loss": 0.39384061098098755, "rewards/accuracies": 1.0, "rewards/chosen": -0.02414121851325035, "rewards/margins": 0.40840139985084534, "rewards/rejected": -0.432542622089386, "step": 380 }, { "epoch": 0.078, "grad_norm": 1.1105395555496216, "learning_rate": 9.99399653445381e-07, "log_odds_chosen": 5.5304718017578125, "log_odds_ratio": -0.005698531400412321, "logits/chosen": 6.621185302734375, "logits/rejected": 5.155228614807129, "logps/chosen": -0.29173988103866577, "logps/rejected": -4.212221622467041, "loss": 0.4526, "nll_loss": 0.4519975781440735, "rewards/accuracies": 1.0, "rewards/chosen": -0.029173988848924637, "rewards/margins": 0.3920482397079468, "rewards/rejected": -0.4212222099304199, "step": 390 }, { "epoch": 0.08, "grad_norm": 1.6442934274673462, "learning_rate": 9.993684783030088e-07, "log_odds_chosen": 5.126379489898682, "log_odds_ratio": -0.07202430069446564, "logits/chosen": 6.615963935852051, "logits/rejected": 5.162625312805176, "logps/chosen": -0.6974700689315796, "logps/rejected": -4.2643022537231445, "loss": 0.7109, "nll_loss": 0.703654408454895, "rewards/accuracies": 1.0, "rewards/chosen": -0.06974700093269348, "rewards/margins": 0.3566831648349762, "rewards/rejected": -0.4264301657676697, "step": 400 }, { "epoch": 0.082, "grad_norm": 1.3002727031707764, "learning_rate": 9.993365145896472e-07, "log_odds_chosen": 5.979889392852783, "log_odds_ratio": -0.004915163386613131, "logits/chosen": 6.857092380523682, "logits/rejected": 5.179081916809082, "logps/chosen": -0.24974127113819122, "logps/rejected": -4.301809310913086, "loss": 0.4222, "nll_loss": 0.4217225909233093, "rewards/accuracies": 1.0, "rewards/chosen": -0.024974128231406212, "rewards/margins": 0.4052067697048187, "rewards/rejected": -0.43018093705177307, "step": 410 }, { "epoch": 0.084, "grad_norm": 1.097319483757019, "learning_rate": 9.993037623557716e-07, "log_odds_chosen": 5.912144660949707, "log_odds_ratio": -0.0037007895298302174, "logits/chosen": 6.805932521820068, "logits/rejected": 5.209836006164551, "logps/chosen": -0.20354747772216797, "logps/rejected": -4.239697456359863, "loss": 0.367, "nll_loss": 0.3666548430919647, "rewards/accuracies": 1.0, "rewards/chosen": -0.020354747772216797, "rewards/margins": 0.4036150574684143, "rewards/rejected": -0.42396974563598633, "step": 420 }, { "epoch": 0.086, "grad_norm": 1.1259775161743164, "learning_rate": 9.99270221653102e-07, "log_odds_chosen": 5.578958034515381, "log_odds_ratio": -0.004651770927011967, "logits/chosen": 6.696142673492432, "logits/rejected": 5.181495666503906, "logps/chosen": -0.26944500207901, "logps/rejected": -4.172377586364746, "loss": 0.4469, "nll_loss": 0.4464372992515564, "rewards/accuracies": 1.0, "rewards/chosen": -0.026944497600197792, "rewards/margins": 0.39029327034950256, "rewards/rejected": -0.4172378182411194, "step": 430 }, { "epoch": 0.088, "grad_norm": 0.9064623117446899, "learning_rate": 9.992358925346039e-07, "log_odds_chosen": 5.705955505371094, "log_odds_ratio": -0.004217900801450014, "logits/chosen": 6.9355645179748535, "logits/rejected": 5.180816650390625, "logps/chosen": -0.24397726356983185, "logps/rejected": -4.252895355224609, "loss": 0.4094, "nll_loss": 0.4089633822441101, "rewards/accuracies": 1.0, "rewards/chosen": -0.024397725239396095, "rewards/margins": 0.40089184045791626, "rewards/rejected": -0.4252895414829254, "step": 440 }, { "epoch": 0.09, "grad_norm": 0.8521889448165894, "learning_rate": 9.992007750544876e-07, "log_odds_chosen": 5.821618556976318, "log_odds_ratio": -0.004431777633726597, "logits/chosen": 6.8784499168396, "logits/rejected": 5.235538005828857, "logps/chosen": -0.19668449461460114, "logps/rejected": -4.064878940582275, "loss": 0.3568, "nll_loss": 0.35638895630836487, "rewards/accuracies": 1.0, "rewards/chosen": -0.019668448716402054, "rewards/margins": 0.3868194818496704, "rewards/rejected": -0.40648794174194336, "step": 450 }, { "epoch": 0.092, "grad_norm": 1.6359530687332153, "learning_rate": 9.991648692682082e-07, "log_odds_chosen": 5.4888739585876465, "log_odds_ratio": -0.006954997777938843, "logits/chosen": 6.953388214111328, "logits/rejected": 5.251856803894043, "logps/chosen": -0.3691895604133606, "logps/rejected": -4.369284629821777, "loss": 0.5277, "nll_loss": 0.527037501335144, "rewards/accuracies": 1.0, "rewards/chosen": -0.03691895306110382, "rewards/margins": 0.40000948309898376, "rewards/rejected": -0.43692851066589355, "step": 460 }, { "epoch": 0.094, "grad_norm": 0.8356350064277649, "learning_rate": 9.991281752324662e-07, "log_odds_chosen": 5.758913993835449, "log_odds_ratio": -0.009601155295968056, "logits/chosen": 7.113343238830566, "logits/rejected": 5.304739475250244, "logps/chosen": -0.40266579389572144, "logps/rejected": -4.459908485412598, "loss": 0.5367, "nll_loss": 0.5357569456100464, "rewards/accuracies": 1.0, "rewards/chosen": -0.04026658087968826, "rewards/margins": 0.4057241976261139, "rewards/rejected": -0.44599080085754395, "step": 470 }, { "epoch": 0.096, "grad_norm": 1.1993978023529053, "learning_rate": 9.990906930052064e-07, "log_odds_chosen": 5.660733222961426, "log_odds_ratio": -0.0048911115154623985, "logits/chosen": 6.888608455657959, "logits/rejected": 5.173002243041992, "logps/chosen": -0.2543579638004303, "logps/rejected": -4.178557872772217, "loss": 0.4326, "nll_loss": 0.432079941034317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0254357997328043, "rewards/margins": 0.3924200236797333, "rewards/rejected": -0.41785579919815063, "step": 480 }, { "epoch": 0.098, "grad_norm": 1.227656602859497, "learning_rate": 9.990524226456181e-07, "log_odds_chosen": 5.751139163970947, "log_odds_ratio": -0.0039621032774448395, "logits/chosen": 7.126898765563965, "logits/rejected": 5.232128143310547, "logps/chosen": -0.21529486775398254, "logps/rejected": -4.235083103179932, "loss": 0.3931, "nll_loss": 0.3926805853843689, "rewards/accuracies": 1.0, "rewards/chosen": -0.021529484540224075, "rewards/margins": 0.40197882056236267, "rewards/rejected": -0.42350831627845764, "step": 490 }, { "epoch": 0.1, "grad_norm": 1.734540581703186, "learning_rate": 9.990133642141357e-07, "log_odds_chosen": 5.843937873840332, "log_odds_ratio": -0.004580129869282246, "logits/chosen": 6.9687180519104, "logits/rejected": 5.231310844421387, "logps/chosen": -0.26360735297203064, "logps/rejected": -4.283794403076172, "loss": 0.4435, "nll_loss": 0.44299355149269104, "rewards/accuracies": 1.0, "rewards/chosen": -0.026360735297203064, "rewards/margins": 0.4020187258720398, "rewards/rejected": -0.42837944626808167, "step": 500 }, { "epoch": 0.102, "grad_norm": 0.9491193294525146, "learning_rate": 9.989735177724376e-07, "log_odds_chosen": 5.786317348480225, "log_odds_ratio": -0.004183798097074032, "logits/chosen": 7.18112850189209, "logits/rejected": 5.287343502044678, "logps/chosen": -0.22414204478263855, "logps/rejected": -4.21284294128418, "loss": 0.4007, "nll_loss": 0.40025025606155396, "rewards/accuracies": 1.0, "rewards/chosen": -0.022414203733205795, "rewards/margins": 0.398870050907135, "rewards/rejected": -0.4212842881679535, "step": 510 }, { "epoch": 0.104, "grad_norm": 1.030848741531372, "learning_rate": 9.98932883383447e-07, "log_odds_chosen": 5.739052772521973, "log_odds_ratio": -0.0045068832114338875, "logits/chosen": 7.108189582824707, "logits/rejected": 5.303038120269775, "logps/chosen": -0.23939397931098938, "logps/rejected": -4.202080726623535, "loss": 0.4088, "nll_loss": 0.40833625197410583, "rewards/accuracies": 1.0, "rewards/chosen": -0.02393939718604088, "rewards/margins": 0.3962686359882355, "rewards/rejected": -0.42020803689956665, "step": 520 }, { "epoch": 0.106, "grad_norm": 1.0825598239898682, "learning_rate": 9.98891461111331e-07, "log_odds_chosen": 5.774600028991699, "log_odds_ratio": -0.006199331022799015, "logits/chosen": 6.995899200439453, "logits/rejected": 5.291529655456543, "logps/chosen": -0.3272141218185425, "logps/rejected": -4.27773904800415, "loss": 0.5016, "nll_loss": 0.5009850263595581, "rewards/accuracies": 1.0, "rewards/chosen": -0.03272141516208649, "rewards/margins": 0.3950525224208832, "rewards/rejected": -0.42777395248413086, "step": 530 }, { "epoch": 0.108, "grad_norm": 1.3412989377975464, "learning_rate": 9.98849251021501e-07, "log_odds_chosen": 5.7968854904174805, "log_odds_ratio": -0.0045556845143437386, "logits/chosen": 7.20325231552124, "logits/rejected": 5.244124889373779, "logps/chosen": -0.24636170268058777, "logps/rejected": -4.205013275146484, "loss": 0.4178, "nll_loss": 0.41730350255966187, "rewards/accuracies": 1.0, "rewards/chosen": -0.024636171758174896, "rewards/margins": 0.39586514234542847, "rewards/rejected": -0.4205012917518616, "step": 540 }, { "epoch": 0.11, "grad_norm": 1.575515866279602, "learning_rate": 9.988062531806124e-07, "log_odds_chosen": 5.310318946838379, "log_odds_ratio": -0.005928396712988615, "logits/chosen": 7.1364030838012695, "logits/rejected": 5.408736228942871, "logps/chosen": -0.29959380626678467, "logps/rejected": -4.096985340118408, "loss": 0.4559, "nll_loss": 0.4553408622741699, "rewards/accuracies": 1.0, "rewards/chosen": -0.029959384351968765, "rewards/margins": 0.3797391355037689, "rewards/rejected": -0.409698486328125, "step": 550 }, { "epoch": 0.112, "grad_norm": 1.533535122871399, "learning_rate": 9.98762467656565e-07, "log_odds_chosen": 5.585514068603516, "log_odds_ratio": -0.005855232011526823, "logits/chosen": 7.099005222320557, "logits/rejected": 5.344486236572266, "logps/chosen": -0.26959171891212463, "logps/rejected": -4.101973533630371, "loss": 0.4464, "nll_loss": 0.44586220383644104, "rewards/accuracies": 1.0, "rewards/chosen": -0.026959169656038284, "rewards/margins": 0.38323819637298584, "rewards/rejected": -0.4101974070072174, "step": 560 }, { "epoch": 0.114, "grad_norm": 1.2855852842330933, "learning_rate": 9.987178945185019e-07, "log_odds_chosen": 5.4633026123046875, "log_odds_ratio": -0.04020964354276657, "logits/chosen": 7.327073574066162, "logits/rejected": 5.485535144805908, "logps/chosen": -0.527285635471344, "logps/rejected": -4.342796802520752, "loss": 0.5844, "nll_loss": 0.580333411693573, "rewards/accuracies": 1.0, "rewards/chosen": -0.0527285635471344, "rewards/margins": 0.3815511465072632, "rewards/rejected": -0.43427973985671997, "step": 570 }, { "epoch": 0.116, "grad_norm": 1.5283328294754028, "learning_rate": 9.9867253383681e-07, "log_odds_chosen": 5.730422019958496, "log_odds_ratio": -0.004346213769167662, "logits/chosen": 7.3099493980407715, "logits/rejected": 5.391806602478027, "logps/chosen": -0.21712183952331543, "logps/rejected": -4.153719425201416, "loss": 0.3985, "nll_loss": 0.3980781137943268, "rewards/accuracies": 1.0, "rewards/chosen": -0.021712180227041245, "rewards/margins": 0.39365971088409424, "rewards/rejected": -0.4153718948364258, "step": 580 }, { "epoch": 0.118, "grad_norm": 1.4839589595794678, "learning_rate": 9.986263856831204e-07, "log_odds_chosen": 5.6398820877075195, "log_odds_ratio": -0.0066932193003594875, "logits/chosen": 7.262752532958984, "logits/rejected": 5.236682891845703, "logps/chosen": -0.3139415979385376, "logps/rejected": -4.306033611297607, "loss": 0.4748, "nll_loss": 0.47409239411354065, "rewards/accuracies": 1.0, "rewards/chosen": -0.03139416128396988, "rewards/margins": 0.399209201335907, "rewards/rejected": -0.43060335516929626, "step": 590 }, { "epoch": 0.12, "grad_norm": 0.9886634945869446, "learning_rate": 9.98579450130307e-07, "log_odds_chosen": 5.754696846008301, "log_odds_ratio": -0.00456590112298727, "logits/chosen": 7.449967384338379, "logits/rejected": 5.422483444213867, "logps/chosen": -0.27530747652053833, "logps/rejected": -4.352094650268555, "loss": 0.4306, "nll_loss": 0.4301741123199463, "rewards/accuracies": 1.0, "rewards/chosen": -0.027530744671821594, "rewards/margins": 0.40767866373062134, "rewards/rejected": -0.4352094531059265, "step": 600 }, { "epoch": 0.122, "grad_norm": 1.4325470924377441, "learning_rate": 9.985317272524874e-07, "log_odds_chosen": 5.595803260803223, "log_odds_ratio": -0.03834452107548714, "logits/chosen": 7.211038112640381, "logits/rejected": 5.284869194030762, "logps/chosen": -0.47327929735183716, "logps/rejected": -4.319891929626465, "loss": 0.5631, "nll_loss": 0.5592620372772217, "rewards/accuracies": 1.0, "rewards/chosen": -0.047327931970357895, "rewards/margins": 0.38466131687164307, "rewards/rejected": -0.4319891929626465, "step": 610 }, { "epoch": 0.124, "grad_norm": 2.981921434402466, "learning_rate": 9.98483217125023e-07, "log_odds_chosen": 5.390117645263672, "log_odds_ratio": -0.04195297136902809, "logits/chosen": 7.140059471130371, "logits/rejected": 5.075173377990723, "logps/chosen": -0.5093042850494385, "logps/rejected": -4.201330661773682, "loss": 0.6128, "nll_loss": 0.6085899472236633, "rewards/accuracies": 1.0, "rewards/chosen": -0.05093042925000191, "rewards/margins": 0.3692026138305664, "rewards/rejected": -0.4201330244541168, "step": 620 }, { "epoch": 0.126, "grad_norm": 1.075921654701233, "learning_rate": 9.984339198245173e-07, "log_odds_chosen": 5.808260917663574, "log_odds_ratio": -0.004262637346982956, "logits/chosen": 7.3148956298828125, "logits/rejected": 5.2592692375183105, "logps/chosen": -0.2157396525144577, "logps/rejected": -4.103490352630615, "loss": 0.3892, "nll_loss": 0.38874632120132446, "rewards/accuracies": 1.0, "rewards/chosen": -0.02157396636903286, "rewards/margins": 0.3887750506401062, "rewards/rejected": -0.4103490710258484, "step": 630 }, { "epoch": 0.128, "grad_norm": 1.1971662044525146, "learning_rate": 9.98383835428818e-07, "log_odds_chosen": 5.86260461807251, "log_odds_ratio": -0.004329982213675976, "logits/chosen": 7.283989906311035, "logits/rejected": 5.19638729095459, "logps/chosen": -0.20845699310302734, "logps/rejected": -4.228349208831787, "loss": 0.3953, "nll_loss": 0.3948729336261749, "rewards/accuracies": 1.0, "rewards/chosen": -0.020845701918005943, "rewards/margins": 0.401989221572876, "rewards/rejected": -0.4228348731994629, "step": 640 }, { "epoch": 0.13, "grad_norm": 1.2581068277359009, "learning_rate": 9.983329640170147e-07, "log_odds_chosen": 5.425110816955566, "log_odds_ratio": -0.006511847022920847, "logits/chosen": 7.272087097167969, "logits/rejected": 5.347121238708496, "logps/chosen": -0.3270640969276428, "logps/rejected": -4.076705455780029, "loss": 0.5046, "nll_loss": 0.5039029717445374, "rewards/accuracies": 1.0, "rewards/chosen": -0.03270640969276428, "rewards/margins": 0.37496417760849, "rewards/rejected": -0.4076705574989319, "step": 650 }, { "epoch": 0.132, "grad_norm": 1.01791250705719, "learning_rate": 9.98281305669441e-07, "log_odds_chosen": 6.078815460205078, "log_odds_ratio": -0.03692106530070305, "logits/chosen": 7.250723361968994, "logits/rejected": 5.302999973297119, "logps/chosen": -0.3468395471572876, "logps/rejected": -4.299695014953613, "loss": 0.4662, "nll_loss": 0.4625566005706787, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0346839539706707, "rewards/margins": 0.3952855169773102, "rewards/rejected": -0.42996945977211, "step": 660 }, { "epoch": 0.134, "grad_norm": 1.122180461883545, "learning_rate": 9.982288604676718e-07, "log_odds_chosen": 5.818023681640625, "log_odds_ratio": -0.004336019977927208, "logits/chosen": 7.457089424133301, "logits/rejected": 5.3300580978393555, "logps/chosen": -0.2588104009628296, "logps/rejected": -4.38973331451416, "loss": 0.4135, "nll_loss": 0.4130234718322754, "rewards/accuracies": 1.0, "rewards/chosen": -0.02588104084134102, "rewards/margins": 0.41309231519699097, "rewards/rejected": -0.4389733672142029, "step": 670 }, { "epoch": 0.136, "grad_norm": 1.0593383312225342, "learning_rate": 9.981756284945255e-07, "log_odds_chosen": 5.7178754806518555, "log_odds_ratio": -0.005921982694417238, "logits/chosen": 7.282762050628662, "logits/rejected": 5.327040195465088, "logps/chosen": -0.301537424325943, "logps/rejected": -4.317378997802734, "loss": 0.4691, "nll_loss": 0.468517541885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.03015374206006527, "rewards/margins": 0.4015841484069824, "rewards/rejected": -0.43173784017562866, "step": 680 }, { "epoch": 0.138, "grad_norm": 0.9468008279800415, "learning_rate": 9.981216098340627e-07, "log_odds_chosen": 5.965208053588867, "log_odds_ratio": -0.0031548873521387577, "logits/chosen": 7.590447425842285, "logits/rejected": 5.359862327575684, "logps/chosen": -0.1855352222919464, "logps/rejected": -4.264601707458496, "loss": 0.3458, "nll_loss": 0.34546294808387756, "rewards/accuracies": 1.0, "rewards/chosen": -0.01855352520942688, "rewards/margins": 0.4079066216945648, "rewards/rejected": -0.4264601767063141, "step": 690 }, { "epoch": 0.14, "grad_norm": 1.06123685836792, "learning_rate": 9.980668045715862e-07, "log_odds_chosen": 5.978029251098633, "log_odds_ratio": -0.003460073145106435, "logits/chosen": 7.531003475189209, "logits/rejected": 5.088316917419434, "logps/chosen": -0.21890413761138916, "logps/rejected": -4.297341346740723, "loss": 0.3989, "nll_loss": 0.3985983729362488, "rewards/accuracies": 1.0, "rewards/chosen": -0.021890413016080856, "rewards/margins": 0.40784376859664917, "rewards/rejected": -0.42973417043685913, "step": 700 }, { "epoch": 0.142, "grad_norm": 1.053776741027832, "learning_rate": 9.980112127936409e-07, "log_odds_chosen": 6.1902618408203125, "log_odds_ratio": -0.003118072869256139, "logits/chosen": 7.4942307472229, "logits/rejected": 5.289072513580322, "logps/chosen": -0.17368003726005554, "logps/rejected": -4.381903648376465, "loss": 0.3448, "nll_loss": 0.3445047438144684, "rewards/accuracies": 1.0, "rewards/chosen": -0.017368001863360405, "rewards/margins": 0.4208224415779114, "rewards/rejected": -0.43819040060043335, "step": 710 }, { "epoch": 0.144, "grad_norm": 1.3939045667648315, "learning_rate": 9.97954834588014e-07, "log_odds_chosen": 5.788779258728027, "log_odds_ratio": -0.0040774294175207615, "logits/chosen": 7.601674556732178, "logits/rejected": 5.371070861816406, "logps/chosen": -0.19214893877506256, "logps/rejected": -4.052358150482178, "loss": 0.354, "nll_loss": 0.35358208417892456, "rewards/accuracies": 1.0, "rewards/chosen": -0.019214894622564316, "rewards/margins": 0.3860209584236145, "rewards/rejected": -0.40523582696914673, "step": 720 }, { "epoch": 0.146, "grad_norm": 1.0089713335037231, "learning_rate": 9.97897670043734e-07, "log_odds_chosen": 5.96481990814209, "log_odds_ratio": -0.0040161386132240295, "logits/chosen": 7.472073554992676, "logits/rejected": 5.23934268951416, "logps/chosen": -0.1985219419002533, "logps/rejected": -4.087368488311768, "loss": 0.3694, "nll_loss": 0.36903810501098633, "rewards/accuracies": 1.0, "rewards/chosen": -0.01985219679772854, "rewards/margins": 0.38888460397720337, "rewards/rejected": -0.40873679518699646, "step": 730 }, { "epoch": 0.148, "grad_norm": 1.3487112522125244, "learning_rate": 9.97839719251072e-07, "log_odds_chosen": 5.731852054595947, "log_odds_ratio": -0.004747814033180475, "logits/chosen": 7.481254577636719, "logits/rejected": 5.359426498413086, "logps/chosen": -0.2614215314388275, "logps/rejected": -4.317784309387207, "loss": 0.4178, "nll_loss": 0.41731005907058716, "rewards/accuracies": 1.0, "rewards/chosen": -0.02614215388894081, "rewards/margins": 0.4056363105773926, "rewards/rejected": -0.4317784905433655, "step": 740 }, { "epoch": 0.15, "grad_norm": 1.03911292552948, "learning_rate": 9.9778098230154e-07, "log_odds_chosen": 5.725098609924316, "log_odds_ratio": -0.007257412187755108, "logits/chosen": 7.547998905181885, "logits/rejected": 5.353067874908447, "logps/chosen": -0.3198131024837494, "logps/rejected": -4.283266544342041, "loss": 0.4522, "nll_loss": 0.45146507024765015, "rewards/accuracies": 1.0, "rewards/chosen": -0.03198130801320076, "rewards/margins": 0.3963452875614166, "rewards/rejected": -0.4283266067504883, "step": 750 }, { "epoch": 0.152, "grad_norm": 0.9007735848426819, "learning_rate": 9.977214592878915e-07, "log_odds_chosen": 5.889931678771973, "log_odds_ratio": -0.00549688283354044, "logits/chosen": 7.534062385559082, "logits/rejected": 5.352395534515381, "logps/chosen": -0.27770304679870605, "logps/rejected": -4.336967468261719, "loss": 0.4419, "nll_loss": 0.441303014755249, "rewards/accuracies": 1.0, "rewards/chosen": -0.027770301327109337, "rewards/margins": 0.4059264659881592, "rewards/rejected": -0.4336967468261719, "step": 760 }, { "epoch": 0.154, "grad_norm": 1.6320549249649048, "learning_rate": 9.976611503041218e-07, "log_odds_chosen": 5.800196170806885, "log_odds_ratio": -0.0049342745915055275, "logits/chosen": 7.562623500823975, "logits/rejected": 5.433527946472168, "logps/chosen": -0.31904828548431396, "logps/rejected": -4.434618949890137, "loss": 0.4648, "nll_loss": 0.4642762541770935, "rewards/accuracies": 1.0, "rewards/chosen": -0.03190482780337334, "rewards/margins": 0.41155704855918884, "rewards/rejected": -0.44346189498901367, "step": 770 }, { "epoch": 0.156, "grad_norm": 0.9901538491249084, "learning_rate": 9.976000554454666e-07, "log_odds_chosen": 5.945862770080566, "log_odds_ratio": -0.0030944941099733114, "logits/chosen": 7.408653259277344, "logits/rejected": 5.243103504180908, "logps/chosen": -0.19992288947105408, "logps/rejected": -4.22150182723999, "loss": 0.3788, "nll_loss": 0.37845197319984436, "rewards/accuracies": 1.0, "rewards/chosen": -0.019992288202047348, "rewards/margins": 0.4021579325199127, "rewards/rejected": -0.422150194644928, "step": 780 }, { "epoch": 0.158, "grad_norm": 1.1534910202026367, "learning_rate": 9.975381748084033e-07, "log_odds_chosen": 6.43671178817749, "log_odds_ratio": -0.0020275753922760487, "logits/chosen": 7.339398384094238, "logits/rejected": 5.278860569000244, "logps/chosen": -0.14634238183498383, "logps/rejected": -4.393942832946777, "loss": 0.3274, "nll_loss": 0.32719743251800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.014634238556027412, "rewards/margins": 0.42476004362106323, "rewards/rejected": -0.4393942952156067, "step": 790 }, { "epoch": 0.16, "grad_norm": 1.9627504348754883, "learning_rate": 9.9747550849065e-07, "log_odds_chosen": 5.828178405761719, "log_odds_ratio": -0.004523201379925013, "logits/chosen": 7.4914231300354, "logits/rejected": 5.270649433135986, "logps/chosen": -0.28040188550949097, "logps/rejected": -4.286971569061279, "loss": 0.4534, "nll_loss": 0.45290812849998474, "rewards/accuracies": 1.0, "rewards/chosen": -0.028040189296007156, "rewards/margins": 0.4006569981575012, "rewards/rejected": -0.4286971986293793, "step": 800 }, { "epoch": 0.162, "grad_norm": 0.9587048292160034, "learning_rate": 9.974120565911651e-07, "log_odds_chosen": 6.062479496002197, "log_odds_ratio": -0.0031441678293049335, "logits/chosen": 7.561063289642334, "logits/rejected": 5.218833923339844, "logps/chosen": -0.21453571319580078, "logps/rejected": -4.421036720275879, "loss": 0.3754, "nll_loss": 0.3750832676887512, "rewards/accuracies": 1.0, "rewards/chosen": -0.02145357057452202, "rewards/margins": 0.42065009474754333, "rewards/rejected": -0.44210368394851685, "step": 810 }, { "epoch": 0.164, "grad_norm": 1.4657803773880005, "learning_rate": 9.97347819210148e-07, "log_odds_chosen": 5.592413902282715, "log_odds_ratio": -0.0052118608728051186, "logits/chosen": 7.572528839111328, "logits/rejected": 5.36314582824707, "logps/chosen": -0.30143600702285767, "logps/rejected": -4.261000633239746, "loss": 0.4603, "nll_loss": 0.45981383323669434, "rewards/accuracies": 1.0, "rewards/chosen": -0.030143599957227707, "rewards/margins": 0.395956426858902, "rewards/rejected": -0.42610007524490356, "step": 820 }, { "epoch": 0.166, "grad_norm": 1.4666260480880737, "learning_rate": 9.97282796449038e-07, "log_odds_chosen": 5.584683418273926, "log_odds_ratio": -0.007546792272478342, "logits/chosen": 7.5849480628967285, "logits/rejected": 5.3258280754089355, "logps/chosen": -0.3349876403808594, "logps/rejected": -4.169242858886719, "loss": 0.4877, "nll_loss": 0.4869515299797058, "rewards/accuracies": 1.0, "rewards/chosen": -0.03349876403808594, "rewards/margins": 0.3834255039691925, "rewards/rejected": -0.41692429780960083, "step": 830 }, { "epoch": 0.168, "grad_norm": 1.4497456550598145, "learning_rate": 9.972169884105153e-07, "log_odds_chosen": 6.032774448394775, "log_odds_ratio": -0.0031315877567976713, "logits/chosen": 7.573624610900879, "logits/rejected": 5.294590950012207, "logps/chosen": -0.1933312714099884, "logps/rejected": -4.253373146057129, "loss": 0.3604, "nll_loss": 0.36010730266571045, "rewards/accuracies": 1.0, "rewards/chosen": -0.0193331241607666, "rewards/margins": 0.4060041308403015, "rewards/rejected": -0.4253372550010681, "step": 840 }, { "epoch": 0.17, "grad_norm": 1.259766936302185, "learning_rate": 9.971503951984993e-07, "log_odds_chosen": 5.0729169845581055, "log_odds_ratio": -0.039060186594724655, "logits/chosen": 7.487587928771973, "logits/rejected": 5.450077533721924, "logps/chosen": -0.5655471086502075, "logps/rejected": -4.2695488929748535, "loss": 0.6346, "nll_loss": 0.6306672096252441, "rewards/accuracies": 1.0, "rewards/chosen": -0.056554704904556274, "rewards/margins": 0.37040016055107117, "rewards/rejected": -0.42695489525794983, "step": 850 }, { "epoch": 0.172, "grad_norm": 2.791877508163452, "learning_rate": 9.970830169181504e-07, "log_odds_chosen": 5.903360366821289, "log_odds_ratio": -0.01223450805991888, "logits/chosen": 7.624690055847168, "logits/rejected": 5.448235511779785, "logps/chosen": -0.37687212228775024, "logps/rejected": -4.317425727844238, "loss": 0.4756, "nll_loss": 0.47436314821243286, "rewards/accuracies": 1.0, "rewards/chosen": -0.03768720477819443, "rewards/margins": 0.39405539631843567, "rewards/rejected": -0.4317425787448883, "step": 860 }, { "epoch": 0.174, "grad_norm": 0.9917593598365784, "learning_rate": 9.970148536758676e-07, "log_odds_chosen": 5.894608497619629, "log_odds_ratio": -0.005637046415358782, "logits/chosen": 7.659219264984131, "logits/rejected": 5.262508392333984, "logps/chosen": -0.22204343974590302, "logps/rejected": -4.173386573791504, "loss": 0.3837, "nll_loss": 0.3831316828727722, "rewards/accuracies": 1.0, "rewards/chosen": -0.022204341366887093, "rewards/margins": 0.395134299993515, "rewards/rejected": -0.41733866930007935, "step": 870 }, { "epoch": 0.176, "grad_norm": 1.0825738906860352, "learning_rate": 9.969459055792902e-07, "log_odds_chosen": 5.653293609619141, "log_odds_ratio": -0.008326241746544838, "logits/chosen": 7.564648628234863, "logits/rejected": 5.331233501434326, "logps/chosen": -0.3375949263572693, "logps/rejected": -4.308921813964844, "loss": 0.5005, "nll_loss": 0.4996330142021179, "rewards/accuracies": 1.0, "rewards/chosen": -0.03375949338078499, "rewards/margins": 0.3971327543258667, "rewards/rejected": -0.4308921694755554, "step": 880 }, { "epoch": 0.178, "grad_norm": 1.0792062282562256, "learning_rate": 9.968761727372963e-07, "log_odds_chosen": 5.699173927307129, "log_odds_ratio": -0.012045633979141712, "logits/chosen": 7.633315086364746, "logits/rejected": 5.386175632476807, "logps/chosen": -0.3688681721687317, "logps/rejected": -4.171936511993408, "loss": 0.4637, "nll_loss": 0.4624978005886078, "rewards/accuracies": 1.0, "rewards/chosen": -0.03688682243227959, "rewards/margins": 0.38030683994293213, "rewards/rejected": -0.4171936511993408, "step": 890 }, { "epoch": 0.18, "grad_norm": 0.8818485736846924, "learning_rate": 9.968056552600042e-07, "log_odds_chosen": 5.852365493774414, "log_odds_ratio": -0.00795703660696745, "logits/chosen": 7.371837615966797, "logits/rejected": 5.316601276397705, "logps/chosen": -0.31957727670669556, "logps/rejected": -4.242649555206299, "loss": 0.47, "nll_loss": 0.46920377016067505, "rewards/accuracies": 1.0, "rewards/chosen": -0.031957730650901794, "rewards/margins": 0.39230725169181824, "rewards/rejected": -0.42426496744155884, "step": 900 }, { "epoch": 0.182, "grad_norm": 1.5758718252182007, "learning_rate": 9.967343532587702e-07, "log_odds_chosen": 5.586694240570068, "log_odds_ratio": -0.005340826231986284, "logits/chosen": 7.567517280578613, "logits/rejected": 5.130992889404297, "logps/chosen": -0.2748351991176605, "logps/rejected": -4.137576103210449, "loss": 0.4301, "nll_loss": 0.4296058714389801, "rewards/accuracies": 1.0, "rewards/chosen": -0.02748352289199829, "rewards/margins": 0.3862740695476532, "rewards/rejected": -0.4137576222419739, "step": 910 }, { "epoch": 0.184, "grad_norm": 1.834955096244812, "learning_rate": 9.966622668461898e-07, "log_odds_chosen": 5.858423233032227, "log_odds_ratio": -0.005584989674389362, "logits/chosen": 7.461129665374756, "logits/rejected": 5.18427038192749, "logps/chosen": -0.2928406298160553, "logps/rejected": -4.333837032318115, "loss": 0.4537, "nll_loss": 0.45309847593307495, "rewards/accuracies": 1.0, "rewards/chosen": -0.02928406372666359, "rewards/margins": 0.40409964323043823, "rewards/rejected": -0.4333837032318115, "step": 920 }, { "epoch": 0.186, "grad_norm": 1.0350888967514038, "learning_rate": 9.965893961360976e-07, "log_odds_chosen": 5.678126335144043, "log_odds_ratio": -0.004537554923444986, "logits/chosen": 7.53134822845459, "logits/rejected": 5.303735733032227, "logps/chosen": -0.27928534150123596, "logps/rejected": -4.282819747924805, "loss": 0.4312, "nll_loss": 0.43078380823135376, "rewards/accuracies": 1.0, "rewards/chosen": -0.027928534895181656, "rewards/margins": 0.40035343170166016, "rewards/rejected": -0.4282819628715515, "step": 930 }, { "epoch": 0.188, "grad_norm": 1.056127905845642, "learning_rate": 9.965157412435662e-07, "log_odds_chosen": 5.9277143478393555, "log_odds_ratio": -0.003572776447981596, "logits/chosen": 7.6487321853637695, "logits/rejected": 5.354024410247803, "logps/chosen": -0.2134413719177246, "logps/rejected": -4.29018497467041, "loss": 0.3762, "nll_loss": 0.37579578161239624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0213441364467144, "rewards/margins": 0.4076743721961975, "rewards/rejected": -0.429018497467041, "step": 940 }, { "epoch": 0.19, "grad_norm": 1.5949771404266357, "learning_rate": 9.964413022849067e-07, "log_odds_chosen": 5.631707668304443, "log_odds_ratio": -0.005154115613549948, "logits/chosen": 7.622084140777588, "logits/rejected": 5.245339870452881, "logps/chosen": -0.30032452940940857, "logps/rejected": -4.3146185874938965, "loss": 0.4599, "nll_loss": 0.45942267775535583, "rewards/accuracies": 1.0, "rewards/chosen": -0.030032450333237648, "rewards/margins": 0.4014294147491455, "rewards/rejected": -0.4314618706703186, "step": 950 }, { "epoch": 0.192, "grad_norm": 1.31623113155365, "learning_rate": 9.963660793776687e-07, "log_odds_chosen": 5.3844428062438965, "log_odds_ratio": -0.03943685442209244, "logits/chosen": 7.258811950683594, "logits/rejected": 5.0058488845825195, "logps/chosen": -0.462016761302948, "logps/rejected": -4.211369514465332, "loss": 0.608, "nll_loss": 0.6040086150169373, "rewards/accuracies": 1.0, "rewards/chosen": -0.0462016761302948, "rewards/margins": 0.37493523955345154, "rewards/rejected": -0.42113691568374634, "step": 960 }, { "epoch": 0.194, "grad_norm": 1.0345897674560547, "learning_rate": 9.962900726406388e-07, "log_odds_chosen": 5.740065574645996, "log_odds_ratio": -0.005163499154150486, "logits/chosen": 7.4238433837890625, "logits/rejected": 5.216811656951904, "logps/chosen": -0.2751191556453705, "logps/rejected": -4.177524089813232, "loss": 0.4579, "nll_loss": 0.4573970437049866, "rewards/accuracies": 1.0, "rewards/chosen": -0.027511918917298317, "rewards/margins": 0.39024052023887634, "rewards/rejected": -0.4177524149417877, "step": 970 }, { "epoch": 0.196, "grad_norm": 0.9940302968025208, "learning_rate": 9.962132821938428e-07, "log_odds_chosen": 5.744112968444824, "log_odds_ratio": -0.03689805790781975, "logits/chosen": 7.384932518005371, "logits/rejected": 5.067351818084717, "logps/chosen": -0.33022090792655945, "logps/rejected": -4.089345932006836, "loss": 0.4822, "nll_loss": 0.4785475730895996, "rewards/accuracies": 1.0, "rewards/chosen": -0.03302209451794624, "rewards/margins": 0.37591248750686646, "rewards/rejected": -0.40893465280532837, "step": 980 }, { "epoch": 0.198, "grad_norm": 1.1531234979629517, "learning_rate": 9.961357081585428e-07, "log_odds_chosen": 5.376118183135986, "log_odds_ratio": -0.022148486226797104, "logits/chosen": 7.235922813415527, "logits/rejected": 5.215376853942871, "logps/chosen": -0.5111210346221924, "logps/rejected": -4.528676509857178, "loss": 0.5969, "nll_loss": 0.5946488380432129, "rewards/accuracies": 1.0, "rewards/chosen": -0.051112107932567596, "rewards/margins": 0.40175557136535645, "rewards/rejected": -0.45286765694618225, "step": 990 }, { "epoch": 0.2, "grad_norm": 1.1310498714447021, "learning_rate": 9.960573506572389e-07, "log_odds_chosen": 6.096063613891602, "log_odds_ratio": -0.005234484560787678, "logits/chosen": 7.514624118804932, "logits/rejected": 5.238080024719238, "logps/chosen": -0.25344419479370117, "logps/rejected": -4.35067081451416, "loss": 0.3929, "nll_loss": 0.3923715353012085, "rewards/accuracies": 1.0, "rewards/chosen": -0.025344420224428177, "rewards/margins": 0.40972262620925903, "rewards/rejected": -0.4350670278072357, "step": 1000 }, { "epoch": 0.202, "grad_norm": 0.9447557926177979, "learning_rate": 9.959782098136682e-07, "log_odds_chosen": 5.962520599365234, "log_odds_ratio": -0.0035225413739681244, "logits/chosen": 7.512228488922119, "logits/rejected": 5.214698791503906, "logps/chosen": -0.21637240052223206, "logps/rejected": -4.3315653800964355, "loss": 0.3937, "nll_loss": 0.3933403789997101, "rewards/accuracies": 1.0, "rewards/chosen": -0.021637242287397385, "rewards/margins": 0.4115193486213684, "rewards/rejected": -0.4331565797328949, "step": 1010 }, { "epoch": 0.204, "grad_norm": 0.9828423857688904, "learning_rate": 9.95898285752805e-07, "log_odds_chosen": 6.232080936431885, "log_odds_ratio": -0.0033578909933567047, "logits/chosen": 7.448879241943359, "logits/rejected": 5.227917671203613, "logps/chosen": -0.22506611049175262, "logps/rejected": -4.497435569763184, "loss": 0.4103, "nll_loss": 0.4099271297454834, "rewards/accuracies": 1.0, "rewards/chosen": -0.02250661328434944, "rewards/margins": 0.4272368848323822, "rewards/rejected": -0.44974350929260254, "step": 1020 }, { "epoch": 0.206, "grad_norm": 1.0765581130981445, "learning_rate": 9.958175786008603e-07, "log_odds_chosen": 4.95219612121582, "log_odds_ratio": -0.04934389144182205, "logits/chosen": 7.108341217041016, "logits/rejected": 5.1365132331848145, "logps/chosen": -0.5623514652252197, "logps/rejected": -4.098600387573242, "loss": 0.705, "nll_loss": 0.7000438570976257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05623514577746391, "rewards/margins": 0.3536248803138733, "rewards/rejected": -0.4098600447177887, "step": 1030 }, { "epoch": 0.208, "grad_norm": 1.3046690225601196, "learning_rate": 9.957360884852816e-07, "log_odds_chosen": 5.6285200119018555, "log_odds_ratio": -0.004786486737430096, "logits/chosen": 7.563647270202637, "logits/rejected": 5.190787315368652, "logps/chosen": -0.27233460545539856, "logps/rejected": -4.206488609313965, "loss": 0.4522, "nll_loss": 0.4517253339290619, "rewards/accuracies": 1.0, "rewards/chosen": -0.027233460918068886, "rewards/margins": 0.3934153914451599, "rewards/rejected": -0.42064887285232544, "step": 1040 }, { "epoch": 0.21, "grad_norm": 1.1193593740463257, "learning_rate": 9.956538155347532e-07, "log_odds_chosen": 5.727882385253906, "log_odds_ratio": -0.004526951350271702, "logits/chosen": 7.662752628326416, "logits/rejected": 5.200858116149902, "logps/chosen": -0.2606583535671234, "logps/rejected": -4.275498390197754, "loss": 0.4184, "nll_loss": 0.4179052710533142, "rewards/accuracies": 1.0, "rewards/chosen": -0.02606583572924137, "rewards/margins": 0.40148407220840454, "rewards/rejected": -0.42754989862442017, "step": 1050 }, { "epoch": 0.212, "grad_norm": 1.3582788705825806, "learning_rate": 9.955707598791952e-07, "log_odds_chosen": 5.943669319152832, "log_odds_ratio": -0.0036736621987074614, "logits/chosen": 7.498809814453125, "logits/rejected": 5.163520336151123, "logps/chosen": -0.23491314053535461, "logps/rejected": -4.325177192687988, "loss": 0.3904, "nll_loss": 0.39005306363105774, "rewards/accuracies": 1.0, "rewards/chosen": -0.023491311818361282, "rewards/margins": 0.4090263843536377, "rewards/rejected": -0.4325177073478699, "step": 1060 }, { "epoch": 0.214, "grad_norm": 1.2922824621200562, "learning_rate": 9.954869216497633e-07, "log_odds_chosen": 5.3597187995910645, "log_odds_ratio": -0.03851377218961716, "logits/chosen": 7.482877254486084, "logits/rejected": 5.3320417404174805, "logps/chosen": -0.5398969054222107, "logps/rejected": -4.422436714172363, "loss": 0.5953, "nll_loss": 0.5914831161499023, "rewards/accuracies": 1.0, "rewards/chosen": -0.053989700973033905, "rewards/margins": 0.38825398683547974, "rewards/rejected": -0.44224366545677185, "step": 1070 }, { "epoch": 0.216, "grad_norm": 1.1303489208221436, "learning_rate": 9.954023009788504e-07, "log_odds_chosen": 5.813111305236816, "log_odds_ratio": -0.004791552666574717, "logits/chosen": 7.573173522949219, "logits/rejected": 5.263712406158447, "logps/chosen": -0.3129696249961853, "logps/rejected": -4.4625020027160645, "loss": 0.4768, "nll_loss": 0.4763554632663727, "rewards/accuracies": 1.0, "rewards/chosen": -0.03129696473479271, "rewards/margins": 0.41495323181152344, "rewards/rejected": -0.4462502598762512, "step": 1080 }, { "epoch": 0.218, "grad_norm": 1.6667126417160034, "learning_rate": 9.953168980000833e-07, "log_odds_chosen": 5.70503044128418, "log_odds_ratio": -0.038300465792417526, "logits/chosen": 7.530763149261475, "logits/rejected": 5.172323703765869, "logps/chosen": -0.4293263554573059, "logps/rejected": -4.235454082489014, "loss": 0.5536, "nll_loss": 0.5498100519180298, "rewards/accuracies": 1.0, "rewards/chosen": -0.04293262958526611, "rewards/margins": 0.3806127607822418, "rewards/rejected": -0.4235454201698303, "step": 1090 }, { "epoch": 0.22, "grad_norm": 1.2214115858078003, "learning_rate": 9.952307128483256e-07, "log_odds_chosen": 5.37918758392334, "log_odds_ratio": -0.03910639137029648, "logits/chosen": 7.223762512207031, "logits/rejected": 5.174170970916748, "logps/chosen": -0.5494061708450317, "logps/rejected": -4.363043785095215, "loss": 0.6681, "nll_loss": 0.6641685962677002, "rewards/accuracies": 1.0, "rewards/chosen": -0.05494061857461929, "rewards/margins": 0.38136374950408936, "rewards/rejected": -0.43630433082580566, "step": 1100 }, { "epoch": 0.222, "grad_norm": 1.4985145330429077, "learning_rate": 9.95143745659675e-07, "log_odds_chosen": 5.382311820983887, "log_odds_ratio": -0.007471742574125528, "logits/chosen": 7.555838584899902, "logits/rejected": 5.195348262786865, "logps/chosen": -0.3972010016441345, "logps/rejected": -4.21198844909668, "loss": 0.526, "nll_loss": 0.5252867937088013, "rewards/accuracies": 1.0, "rewards/chosen": -0.03972009941935539, "rewards/margins": 0.3814787268638611, "rewards/rejected": -0.42119884490966797, "step": 1110 }, { "epoch": 0.224, "grad_norm": 2.023250102996826, "learning_rate": 9.950559965714646e-07, "log_odds_chosen": 5.624068260192871, "log_odds_ratio": -0.00614691898226738, "logits/chosen": 7.53164529800415, "logits/rejected": 5.229598045349121, "logps/chosen": -0.3347712457180023, "logps/rejected": -4.271878719329834, "loss": 0.4784, "nll_loss": 0.47782689332962036, "rewards/accuracies": 1.0, "rewards/chosen": -0.03347712755203247, "rewards/margins": 0.39371076226234436, "rewards/rejected": -0.42718783020973206, "step": 1120 }, { "epoch": 0.226, "grad_norm": 0.9277468323707581, "learning_rate": 9.949674657222623e-07, "log_odds_chosen": 5.7697014808654785, "log_odds_ratio": -0.004470593761652708, "logits/chosen": 7.389612674713135, "logits/rejected": 5.303890705108643, "logps/chosen": -0.23467814922332764, "logps/rejected": -4.165981292724609, "loss": 0.4034, "nll_loss": 0.40292781591415405, "rewards/accuracies": 1.0, "rewards/chosen": -0.023467818275094032, "rewards/margins": 0.3931303024291992, "rewards/rejected": -0.4165981411933899, "step": 1130 }, { "epoch": 0.228, "grad_norm": 0.8006287813186646, "learning_rate": 9.948781532518704e-07, "log_odds_chosen": 5.555264949798584, "log_odds_ratio": -0.005465766880661249, "logits/chosen": 7.404051303863525, "logits/rejected": 5.164650917053223, "logps/chosen": -0.28252720832824707, "logps/rejected": -4.127666473388672, "loss": 0.4765, "nll_loss": 0.47591105103492737, "rewards/accuracies": 1.0, "rewards/chosen": -0.028252724558115005, "rewards/margins": 0.3845139443874359, "rewards/rejected": -0.412766695022583, "step": 1140 }, { "epoch": 0.23, "grad_norm": 1.1401197910308838, "learning_rate": 9.947880593013254e-07, "log_odds_chosen": 6.212497234344482, "log_odds_ratio": -0.0025186841376125813, "logits/chosen": 7.419493675231934, "logits/rejected": 5.204658508300781, "logps/chosen": -0.1534021645784378, "logps/rejected": -4.309787750244141, "loss": 0.3419, "nll_loss": 0.34168750047683716, "rewards/accuracies": 1.0, "rewards/chosen": -0.015340214595198631, "rewards/margins": 0.41563859581947327, "rewards/rejected": -0.43097877502441406, "step": 1150 }, { "epoch": 0.232, "grad_norm": 1.1171846389770508, "learning_rate": 9.946971840128981e-07, "log_odds_chosen": 6.079745292663574, "log_odds_ratio": -0.0034279648680239916, "logits/chosen": 7.424662113189697, "logits/rejected": 5.211994647979736, "logps/chosen": -0.2395186722278595, "logps/rejected": -4.388947010040283, "loss": 0.4072, "nll_loss": 0.40683382749557495, "rewards/accuracies": 1.0, "rewards/chosen": -0.023951871320605278, "rewards/margins": 0.41494283080101013, "rewards/rejected": -0.438894659280777, "step": 1160 }, { "epoch": 0.234, "grad_norm": 1.0636237859725952, "learning_rate": 9.946055275300927e-07, "log_odds_chosen": 5.697154998779297, "log_odds_ratio": -0.005017982330173254, "logits/chosen": 7.5275163650512695, "logits/rejected": 5.254751682281494, "logps/chosen": -0.24566921591758728, "logps/rejected": -4.080471038818359, "loss": 0.3959, "nll_loss": 0.39537373185157776, "rewards/accuracies": 1.0, "rewards/chosen": -0.024566922336816788, "rewards/margins": 0.3834801912307739, "rewards/rejected": -0.4080471098423004, "step": 1170 }, { "epoch": 0.236, "grad_norm": 1.1244925260543823, "learning_rate": 9.945130899976476e-07, "log_odds_chosen": 5.8849310874938965, "log_odds_ratio": -0.003177474718540907, "logits/chosen": 7.508843421936035, "logits/rejected": 5.118249416351318, "logps/chosen": -0.20523090660572052, "logps/rejected": -4.2543044090271, "loss": 0.3949, "nll_loss": 0.39462482929229736, "rewards/accuracies": 1.0, "rewards/chosen": -0.020523089915513992, "rewards/margins": 0.4049074053764343, "rewards/rejected": -0.4254304766654968, "step": 1180 }, { "epoch": 0.238, "grad_norm": 0.9625526666641235, "learning_rate": 9.94419871561534e-07, "log_odds_chosen": 5.916973114013672, "log_odds_ratio": -0.0037661988753825426, "logits/chosen": 7.547001838684082, "logits/rejected": 5.197099685668945, "logps/chosen": -0.17666833102703094, "logps/rejected": -4.154223442077637, "loss": 0.3807, "nll_loss": 0.38032615184783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.017666833475232124, "rewards/margins": 0.39775556325912476, "rewards/rejected": -0.41542237997055054, "step": 1190 }, { "epoch": 0.24, "grad_norm": 1.937224268913269, "learning_rate": 9.94325872368957e-07, "log_odds_chosen": 5.47687292098999, "log_odds_ratio": -0.0070913685485720634, "logits/chosen": 7.287858009338379, "logits/rejected": 5.183866024017334, "logps/chosen": -0.3745271861553192, "logps/rejected": -4.211296081542969, "loss": 0.5434, "nll_loss": 0.5427122712135315, "rewards/accuracies": 1.0, "rewards/chosen": -0.03745272383093834, "rewards/margins": 0.3836769461631775, "rewards/rejected": -0.42112961411476135, "step": 1200 }, { "epoch": 0.242, "grad_norm": 1.088942289352417, "learning_rate": 9.942310925683537e-07, "log_odds_chosen": 5.619528293609619, "log_odds_ratio": -0.01529604010283947, "logits/chosen": 7.542412757873535, "logits/rejected": 5.266120433807373, "logps/chosen": -0.4378971457481384, "logps/rejected": -4.471851348876953, "loss": 0.5574, "nll_loss": 0.5559155941009521, "rewards/accuracies": 1.0, "rewards/chosen": -0.04378972202539444, "rewards/margins": 0.403395414352417, "rewards/rejected": -0.44718512892723083, "step": 1210 }, { "epoch": 0.244, "grad_norm": 1.346421480178833, "learning_rate": 9.941355323093943e-07, "log_odds_chosen": 5.42678689956665, "log_odds_ratio": -0.006506240461021662, "logits/chosen": 7.398824214935303, "logits/rejected": 5.247359752655029, "logps/chosen": -0.40193161368370056, "logps/rejected": -4.435702323913574, "loss": 0.5593, "nll_loss": 0.5586473941802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.040193162858486176, "rewards/margins": 0.40337711572647095, "rewards/rejected": -0.44357022643089294, "step": 1220 }, { "epoch": 0.246, "grad_norm": 1.1042898893356323, "learning_rate": 9.940391917429818e-07, "log_odds_chosen": 6.264995098114014, "log_odds_ratio": -0.00249803951010108, "logits/chosen": 7.583203315734863, "logits/rejected": 5.147339820861816, "logps/chosen": -0.14802715182304382, "logps/rejected": -4.205270290374756, "loss": 0.2942, "nll_loss": 0.2939774692058563, "rewards/accuracies": 1.0, "rewards/chosen": -0.014802716672420502, "rewards/margins": 0.4057242274284363, "rewards/rejected": -0.42052698135375977, "step": 1230 }, { "epoch": 0.248, "grad_norm": 1.8860212564468384, "learning_rate": 9.93942071021251e-07, "log_odds_chosen": 5.467048168182373, "log_odds_ratio": -0.016816187649965286, "logits/chosen": 7.5400495529174805, "logits/rejected": 5.2583231925964355, "logps/chosen": -0.461718887090683, "logps/rejected": -4.441669940948486, "loss": 0.5204, "nll_loss": 0.5187442898750305, "rewards/accuracies": 1.0, "rewards/chosen": -0.0461718887090683, "rewards/margins": 0.39799514412879944, "rewards/rejected": -0.44416695833206177, "step": 1240 }, { "epoch": 0.25, "grad_norm": 1.9489353895187378, "learning_rate": 9.938441702975689e-07, "log_odds_chosen": 5.234245777130127, "log_odds_ratio": -0.04068918898701668, "logits/chosen": 7.308568000793457, "logits/rejected": 5.1906890869140625, "logps/chosen": -0.6226158142089844, "logps/rejected": -4.223973274230957, "loss": 0.6789, "nll_loss": 0.67488032579422, "rewards/accuracies": 1.0, "rewards/chosen": -0.06226158142089844, "rewards/margins": 0.3601357340812683, "rewards/rejected": -0.42239728569984436, "step": 1250 }, { "epoch": 0.252, "grad_norm": 1.1673338413238525, "learning_rate": 9.937454897265337e-07, "log_odds_chosen": 5.7851762771606445, "log_odds_ratio": -0.0061762891709804535, "logits/chosen": 7.399752616882324, "logits/rejected": 5.199285507202148, "logps/chosen": -0.3009653687477112, "logps/rejected": -4.304694175720215, "loss": 0.4508, "nll_loss": 0.45019203424453735, "rewards/accuracies": 1.0, "rewards/chosen": -0.030096540227532387, "rewards/margins": 0.4003728926181793, "rewards/rejected": -0.43046945333480835, "step": 1260 }, { "epoch": 0.254, "grad_norm": 1.401612401008606, "learning_rate": 9.93646029463976e-07, "log_odds_chosen": 5.745970249176025, "log_odds_ratio": -0.017143476754426956, "logits/chosen": 7.483550071716309, "logits/rejected": 5.234131813049316, "logps/chosen": -0.37959927320480347, "logps/rejected": -4.246535301208496, "loss": 0.4987, "nll_loss": 0.4969923496246338, "rewards/accuracies": 1.0, "rewards/chosen": -0.037959933280944824, "rewards/margins": 0.38669365644454956, "rewards/rejected": -0.4246535301208496, "step": 1270 }, { "epoch": 0.256, "grad_norm": 1.6490076780319214, "learning_rate": 9.935457896669568e-07, "log_odds_chosen": 5.970763206481934, "log_odds_ratio": -0.0037278614472597837, "logits/chosen": 7.562868595123291, "logits/rejected": 5.296934604644775, "logps/chosen": -0.20572200417518616, "logps/rejected": -4.193833827972412, "loss": 0.3669, "nll_loss": 0.36654555797576904, "rewards/accuracies": 1.0, "rewards/chosen": -0.020572198554873466, "rewards/margins": 0.3988111615180969, "rewards/rejected": -0.41938337683677673, "step": 1280 }, { "epoch": 0.258, "grad_norm": 1.0330965518951416, "learning_rate": 9.934447704937682e-07, "log_odds_chosen": 5.65505313873291, "log_odds_ratio": -0.004878881387412548, "logits/chosen": 7.491122245788574, "logits/rejected": 5.089749336242676, "logps/chosen": -0.25617295503616333, "logps/rejected": -4.200645446777344, "loss": 0.4119, "nll_loss": 0.41143307089805603, "rewards/accuracies": 1.0, "rewards/chosen": -0.025617297738790512, "rewards/margins": 0.3944472670555115, "rewards/rejected": -0.4200645089149475, "step": 1290 }, { "epoch": 0.26, "grad_norm": 1.111147165298462, "learning_rate": 9.93342972103934e-07, "log_odds_chosen": 5.590101718902588, "log_odds_ratio": -0.03816352039575577, "logits/chosen": 7.310394287109375, "logits/rejected": 5.292299270629883, "logps/chosen": -0.48741888999938965, "logps/rejected": -4.348744869232178, "loss": 0.5616, "nll_loss": 0.5577803254127502, "rewards/accuracies": 1.0, "rewards/chosen": -0.04874188452959061, "rewards/margins": 0.3861326575279236, "rewards/rejected": -0.4348745346069336, "step": 1300 }, { "epoch": 0.262, "grad_norm": 2.152653455734253, "learning_rate": 9.932403946582071e-07, "log_odds_chosen": 5.494030952453613, "log_odds_ratio": -0.011813756078481674, "logits/chosen": 7.391989707946777, "logits/rejected": 5.2646002769470215, "logps/chosen": -0.3399931788444519, "logps/rejected": -4.045591354370117, "loss": 0.4532, "nll_loss": 0.45196953415870667, "rewards/accuracies": 1.0, "rewards/chosen": -0.03399931639432907, "rewards/margins": 0.3705598711967468, "rewards/rejected": -0.4045591354370117, "step": 1310 }, { "epoch": 0.264, "grad_norm": 1.1690629720687866, "learning_rate": 9.931370383185716e-07, "log_odds_chosen": 5.579436779022217, "log_odds_ratio": -0.007383177988231182, "logits/chosen": 7.272534370422363, "logits/rejected": 5.239912986755371, "logps/chosen": -0.3325873017311096, "logps/rejected": -4.264285087585449, "loss": 0.4888, "nll_loss": 0.4880245625972748, "rewards/accuracies": 1.0, "rewards/chosen": -0.03325873240828514, "rewards/margins": 0.3931698203086853, "rewards/rejected": -0.42642855644226074, "step": 1320 }, { "epoch": 0.266, "grad_norm": 2.0592825412750244, "learning_rate": 9.93032903248241e-07, "log_odds_chosen": 6.040884971618652, "log_odds_ratio": -0.00385277159512043, "logits/chosen": 7.327881813049316, "logits/rejected": 5.207053184509277, "logps/chosen": -0.2394016981124878, "logps/rejected": -4.385708808898926, "loss": 0.4156, "nll_loss": 0.4151867926120758, "rewards/accuracies": 1.0, "rewards/chosen": -0.02394016832113266, "rewards/margins": 0.4146307110786438, "rewards/rejected": -0.4385708272457123, "step": 1330 }, { "epoch": 0.268, "grad_norm": 0.8587827682495117, "learning_rate": 9.929279896116593e-07, "log_odds_chosen": 5.872379302978516, "log_odds_ratio": -0.003530902788043022, "logits/chosen": 7.441779136657715, "logits/rejected": 5.1309733390808105, "logps/chosen": -0.21260687708854675, "logps/rejected": -4.347057342529297, "loss": 0.3759, "nll_loss": 0.37558406591415405, "rewards/accuracies": 1.0, "rewards/chosen": -0.021260689944028854, "rewards/margins": 0.41344505548477173, "rewards/rejected": -0.4347057342529297, "step": 1340 }, { "epoch": 0.27, "grad_norm": 1.10453462600708, "learning_rate": 9.92822297574499e-07, "log_odds_chosen": 5.400199890136719, "log_odds_ratio": -0.03924388438463211, "logits/chosen": 7.217312812805176, "logits/rejected": 5.103687286376953, "logps/chosen": -0.4668886661529541, "logps/rejected": -4.228199005126953, "loss": 0.5554, "nll_loss": 0.551488995552063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04668886587023735, "rewards/margins": 0.37613099813461304, "rewards/rejected": -0.42281991243362427, "step": 1350 }, { "epoch": 0.272, "grad_norm": 1.0144788026809692, "learning_rate": 9.927158273036623e-07, "log_odds_chosen": 6.017350196838379, "log_odds_ratio": -0.0034888796508312225, "logits/chosen": 7.483733177185059, "logits/rejected": 5.2075395584106445, "logps/chosen": -0.1623542159795761, "logps/rejected": -4.09697961807251, "loss": 0.3274, "nll_loss": 0.32701432704925537, "rewards/accuracies": 1.0, "rewards/chosen": -0.01623542234301567, "rewards/margins": 0.39346247911453247, "rewards/rejected": -0.40969792008399963, "step": 1360 }, { "epoch": 0.274, "grad_norm": 0.9025424122810364, "learning_rate": 9.926085789672805e-07, "log_odds_chosen": 5.6722283363342285, "log_odds_ratio": -0.004435035865753889, "logits/chosen": 7.526766777038574, "logits/rejected": 5.0602264404296875, "logps/chosen": -0.22532348334789276, "logps/rejected": -4.134978294372559, "loss": 0.3904, "nll_loss": 0.38996267318725586, "rewards/accuracies": 1.0, "rewards/chosen": -0.022532349452376366, "rewards/margins": 0.3909655213356018, "rewards/rejected": -0.41349783539772034, "step": 1370 }, { "epoch": 0.276, "grad_norm": 1.2166154384613037, "learning_rate": 9.92500552734713e-07, "log_odds_chosen": 5.630692481994629, "log_odds_ratio": -0.006432310678064823, "logits/chosen": 7.520768642425537, "logits/rejected": 5.288580894470215, "logps/chosen": -0.34127092361450195, "logps/rejected": -4.296700477600098, "loss": 0.5067, "nll_loss": 0.5060566663742065, "rewards/accuracies": 1.0, "rewards/chosen": -0.034127090126276016, "rewards/margins": 0.3955429792404175, "rewards/rejected": -0.4296700358390808, "step": 1380 }, { "epoch": 0.278, "grad_norm": 2.531686305999756, "learning_rate": 9.923917487765483e-07, "log_odds_chosen": 5.620418548583984, "log_odds_ratio": -0.013204716145992279, "logits/chosen": 7.379885673522949, "logits/rejected": 5.104581356048584, "logps/chosen": -0.3313469886779785, "logps/rejected": -4.172677516937256, "loss": 0.4686, "nll_loss": 0.4673161506652832, "rewards/accuracies": 1.0, "rewards/chosen": -0.03313469514250755, "rewards/margins": 0.38413310050964355, "rewards/rejected": -0.4172678589820862, "step": 1390 }, { "epoch": 0.28, "grad_norm": 1.1893247365951538, "learning_rate": 9.922821672646027e-07, "log_odds_chosen": 5.815966606140137, "log_odds_ratio": -0.004415798466652632, "logits/chosen": 7.352552890777588, "logits/rejected": 5.181215763092041, "logps/chosen": -0.25012391805648804, "logps/rejected": -4.304505825042725, "loss": 0.4173, "nll_loss": 0.4168306887149811, "rewards/accuracies": 1.0, "rewards/chosen": -0.025012388825416565, "rewards/margins": 0.40543824434280396, "rewards/rejected": -0.43045058846473694, "step": 1400 }, { "epoch": 0.282, "grad_norm": 1.2020412683486938, "learning_rate": 9.921718083719201e-07, "log_odds_chosen": 5.0974202156066895, "log_odds_ratio": -0.033812783658504486, "logits/chosen": 7.313817024230957, "logits/rejected": 5.204531192779541, "logps/chosen": -0.5506921410560608, "logps/rejected": -4.169431209564209, "loss": 0.6097, "nll_loss": 0.6062796711921692, "rewards/accuracies": 1.0, "rewards/chosen": -0.0550692155957222, "rewards/margins": 0.36187392473220825, "rewards/rejected": -0.41694316267967224, "step": 1410 }, { "epoch": 0.284, "grad_norm": 1.0757732391357422, "learning_rate": 9.920606722727724e-07, "log_odds_chosen": 5.6786980628967285, "log_odds_ratio": -0.008853314444422722, "logits/chosen": 7.357745170593262, "logits/rejected": 5.0751633644104, "logps/chosen": -0.32154518365859985, "logps/rejected": -4.405569553375244, "loss": 0.4755, "nll_loss": 0.4745716452598572, "rewards/accuracies": 1.0, "rewards/chosen": -0.032154519110918045, "rewards/margins": 0.4084024429321289, "rewards/rejected": -0.44055694341659546, "step": 1420 }, { "epoch": 0.286, "grad_norm": 1.1059741973876953, "learning_rate": 9.91948759142659e-07, "log_odds_chosen": 5.524752140045166, "log_odds_ratio": -0.038332708179950714, "logits/chosen": 7.397654056549072, "logits/rejected": 5.342809200286865, "logps/chosen": -0.4252486824989319, "logps/rejected": -4.2289252281188965, "loss": 0.5175, "nll_loss": 0.5136864185333252, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04252486303448677, "rewards/margins": 0.3803676962852478, "rewards/rejected": -0.4228925108909607, "step": 1430 }, { "epoch": 0.288, "grad_norm": 1.2763829231262207, "learning_rate": 9.918360691583054e-07, "log_odds_chosen": 5.631793975830078, "log_odds_ratio": -0.007081352174282074, "logits/chosen": 7.369285583496094, "logits/rejected": 5.2017388343811035, "logps/chosen": -0.35442084074020386, "logps/rejected": -4.311988830566406, "loss": 0.4973, "nll_loss": 0.4965900480747223, "rewards/accuracies": 1.0, "rewards/chosen": -0.035442084074020386, "rewards/margins": 0.3957567811012268, "rewards/rejected": -0.43119892477989197, "step": 1440 }, { "epoch": 0.29, "grad_norm": 1.1761308908462524, "learning_rate": 9.91722602497665e-07, "log_odds_chosen": 6.0382585525512695, "log_odds_ratio": -0.003215277101844549, "logits/chosen": 7.411018371582031, "logits/rejected": 5.23700475692749, "logps/chosen": -0.2106390744447708, "logps/rejected": -4.331351280212402, "loss": 0.3902, "nll_loss": 0.38988322019577026, "rewards/accuracies": 1.0, "rewards/chosen": -0.021063905209302902, "rewards/margins": 0.41207122802734375, "rewards/rejected": -0.43313512206077576, "step": 1450 }, { "epoch": 0.292, "grad_norm": 1.7112200260162354, "learning_rate": 9.916083593399165e-07, "log_odds_chosen": 5.435639381408691, "log_odds_ratio": -0.03752727061510086, "logits/chosen": 7.329455375671387, "logits/rejected": 5.064894199371338, "logps/chosen": -0.5085526704788208, "logps/rejected": -4.282395362854004, "loss": 0.5549, "nll_loss": 0.5511032342910767, "rewards/accuracies": 1.0, "rewards/chosen": -0.050855278968811035, "rewards/margins": 0.3773842453956604, "rewards/rejected": -0.4282395839691162, "step": 1460 }, { "epoch": 0.294, "grad_norm": 0.9914406538009644, "learning_rate": 9.914933398654661e-07, "log_odds_chosen": 5.957636833190918, "log_odds_ratio": -0.003187952097505331, "logits/chosen": 7.561884880065918, "logits/rejected": 5.170864105224609, "logps/chosen": -0.21880654990673065, "logps/rejected": -4.421985626220703, "loss": 0.3822, "nll_loss": 0.3818785548210144, "rewards/accuracies": 1.0, "rewards/chosen": -0.021880654618144035, "rewards/margins": 0.4203178882598877, "rewards/rejected": -0.44219857454299927, "step": 1470 }, { "epoch": 0.296, "grad_norm": 1.5452316999435425, "learning_rate": 9.91377544255945e-07, "log_odds_chosen": 5.7686028480529785, "log_odds_ratio": -0.004100257996469736, "logits/chosen": 7.501455783843994, "logits/rejected": 5.217379570007324, "logps/chosen": -0.21842703223228455, "logps/rejected": -4.202721118927002, "loss": 0.3764, "nll_loss": 0.3760250210762024, "rewards/accuracies": 1.0, "rewards/chosen": -0.021842705085873604, "rewards/margins": 0.39842936396598816, "rewards/rejected": -0.4202720522880554, "step": 1480 }, { "epoch": 0.298, "grad_norm": 1.11637282371521, "learning_rate": 9.912609726942102e-07, "log_odds_chosen": 5.642951965332031, "log_odds_ratio": -0.004948162939399481, "logits/chosen": 7.4747467041015625, "logits/rejected": 5.231175422668457, "logps/chosen": -0.263342946767807, "logps/rejected": -4.174736499786377, "loss": 0.4287, "nll_loss": 0.428208589553833, "rewards/accuracies": 1.0, "rewards/chosen": -0.02633429691195488, "rewards/margins": 0.39113935828208923, "rewards/rejected": -0.4174736440181732, "step": 1490 }, { "epoch": 0.3, "grad_norm": 12.382030487060547, "learning_rate": 9.911436253643443e-07, "log_odds_chosen": 6.313652038574219, "log_odds_ratio": -0.0022971085272729397, "logits/chosen": 7.473972320556641, "logits/rejected": 5.252344608306885, "logps/chosen": -0.17816001176834106, "logps/rejected": -4.438265323638916, "loss": 0.3495, "nll_loss": 0.3492964804172516, "rewards/accuracies": 1.0, "rewards/chosen": -0.017816001549363136, "rewards/margins": 0.4260105490684509, "rewards/rejected": -0.4438265264034271, "step": 1500 }, { "epoch": 0.302, "grad_norm": 1.5420053005218506, "learning_rate": 9.910255024516546e-07, "log_odds_chosen": 5.834127426147461, "log_odds_ratio": -0.03642209246754646, "logits/chosen": 7.2394232749938965, "logits/rejected": 5.088610649108887, "logps/chosen": -0.4239932596683502, "logps/rejected": -4.301197528839111, "loss": 0.4924, "nll_loss": 0.4887908399105072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0423993282020092, "rewards/margins": 0.38772040605545044, "rewards/rejected": -0.43011975288391113, "step": 1510 }, { "epoch": 0.304, "grad_norm": 1.8410111665725708, "learning_rate": 9.909066041426733e-07, "log_odds_chosen": 5.745290279388428, "log_odds_ratio": -0.005913994275033474, "logits/chosen": 7.465371608734131, "logits/rejected": 5.231773376464844, "logps/chosen": -0.32383549213409424, "logps/rejected": -4.333510398864746, "loss": 0.4813, "nll_loss": 0.4806704521179199, "rewards/accuracies": 1.0, "rewards/chosen": -0.032383549958467484, "rewards/margins": 0.400967538356781, "rewards/rejected": -0.4333510398864746, "step": 1520 }, { "epoch": 0.306, "grad_norm": 1.3745018243789673, "learning_rate": 9.90786930625157e-07, "log_odds_chosen": 6.155256271362305, "log_odds_ratio": -0.00331195374019444, "logits/chosen": 7.452122688293457, "logits/rejected": 5.124255657196045, "logps/chosen": -0.2091052234172821, "logps/rejected": -4.26733922958374, "loss": 0.3612, "nll_loss": 0.3608582019805908, "rewards/accuracies": 1.0, "rewards/chosen": -0.02091052569448948, "rewards/margins": 0.40582340955734253, "rewards/rejected": -0.42673391103744507, "step": 1530 }, { "epoch": 0.308, "grad_norm": 0.9846609234809875, "learning_rate": 9.906664820880868e-07, "log_odds_chosen": 5.915782928466797, "log_odds_ratio": -0.003720006672665477, "logits/chosen": 7.431731224060059, "logits/rejected": 5.098736763000488, "logps/chosen": -0.22018344700336456, "logps/rejected": -4.246945381164551, "loss": 0.3883, "nll_loss": 0.38788729906082153, "rewards/accuracies": 1.0, "rewards/chosen": -0.022018346935510635, "rewards/margins": 0.40267616510391235, "rewards/rejected": -0.4246944785118103, "step": 1540 }, { "epoch": 0.31, "grad_norm": 8.05805778503418, "learning_rate": 9.90545258721667e-07, "log_odds_chosen": 6.6551194190979, "log_odds_ratio": -0.0017244673799723387, "logits/chosen": 7.490072727203369, "logits/rejected": 5.03413724899292, "logps/chosen": -0.12565842270851135, "logps/rejected": -4.355261325836182, "loss": 0.2863, "nll_loss": 0.28614071011543274, "rewards/accuracies": 1.0, "rewards/chosen": -0.012565843760967255, "rewards/margins": 0.4229602813720703, "rewards/rejected": -0.43552613258361816, "step": 1550 }, { "epoch": 0.312, "grad_norm": 1.2431893348693848, "learning_rate": 9.904232607173261e-07, "log_odds_chosen": 6.026459217071533, "log_odds_ratio": -0.003022167831659317, "logits/chosen": 7.506147861480713, "logits/rejected": 5.3114423751831055, "logps/chosen": -0.1722608059644699, "logps/rejected": -4.1966166496276855, "loss": 0.3355, "nll_loss": 0.3351745307445526, "rewards/accuracies": 1.0, "rewards/chosen": -0.01722608134150505, "rewards/margins": 0.40243563055992126, "rewards/rejected": -0.4196617603302002, "step": 1560 }, { "epoch": 0.314, "grad_norm": 1.4285873174667358, "learning_rate": 9.903004882677155e-07, "log_odds_chosen": 5.887869358062744, "log_odds_ratio": -0.00350547069683671, "logits/chosen": 7.4884033203125, "logits/rejected": 5.2395243644714355, "logps/chosen": -0.20736388862133026, "logps/rejected": -4.208883285522461, "loss": 0.369, "nll_loss": 0.36860889196395874, "rewards/accuracies": 1.0, "rewards/chosen": -0.020736388862133026, "rewards/margins": 0.40015190839767456, "rewards/rejected": -0.42088833451271057, "step": 1570 }, { "epoch": 0.316, "grad_norm": 1.0590928792953491, "learning_rate": 9.901769415667099e-07, "log_odds_chosen": 5.343173980712891, "log_odds_ratio": -0.007820920087397099, "logits/chosen": 7.397088527679443, "logits/rejected": 5.122618198394775, "logps/chosen": -0.33227771520614624, "logps/rejected": -4.14691686630249, "loss": 0.4774, "nll_loss": 0.4765797555446625, "rewards/accuracies": 1.0, "rewards/chosen": -0.033227771520614624, "rewards/margins": 0.3814639151096344, "rewards/rejected": -0.414691686630249, "step": 1580 }, { "epoch": 0.318, "grad_norm": 2.557055711746216, "learning_rate": 9.90052620809406e-07, "log_odds_chosen": 5.91445255279541, "log_odds_ratio": -0.004795178771018982, "logits/chosen": 7.321536064147949, "logits/rejected": 5.178659915924072, "logps/chosen": -0.2775324583053589, "logps/rejected": -4.477307319641113, "loss": 0.4438, "nll_loss": 0.443291038274765, "rewards/accuracies": 1.0, "rewards/chosen": -0.02775324322283268, "rewards/margins": 0.41997742652893066, "rewards/rejected": -0.44773069024086, "step": 1590 }, { "epoch": 0.32, "grad_norm": 0.7924497127532959, "learning_rate": 9.899275261921233e-07, "log_odds_chosen": 5.494145393371582, "log_odds_ratio": -0.0383731834590435, "logits/chosen": 7.241972923278809, "logits/rejected": 5.170699596405029, "logps/chosen": -0.4423399865627289, "logps/rejected": -4.189929962158203, "loss": 0.5481, "nll_loss": 0.5442711710929871, "rewards/accuracies": 1.0, "rewards/chosen": -0.04423400014638901, "rewards/margins": 0.3747590184211731, "rewards/rejected": -0.4189929962158203, "step": 1600 }, { "epoch": 0.322, "grad_norm": 1.1314396858215332, "learning_rate": 9.898016579124036e-07, "log_odds_chosen": 5.831032752990723, "log_odds_ratio": -0.004236978478729725, "logits/chosen": 7.405346870422363, "logits/rejected": 5.132233619689941, "logps/chosen": -0.23942604660987854, "logps/rejected": -4.223696708679199, "loss": 0.4119, "nll_loss": 0.41144776344299316, "rewards/accuracies": 1.0, "rewards/chosen": -0.023942606523633003, "rewards/margins": 0.3984271287918091, "rewards/rejected": -0.4223697781562805, "step": 1610 }, { "epoch": 0.324, "grad_norm": 0.871146023273468, "learning_rate": 9.8967501616901e-07, "log_odds_chosen": 5.426685810089111, "log_odds_ratio": -0.011712992563843727, "logits/chosen": 7.2729034423828125, "logits/rejected": 5.138895034790039, "logps/chosen": -0.40390366315841675, "logps/rejected": -4.30460786819458, "loss": 0.5338, "nll_loss": 0.5326588153839111, "rewards/accuracies": 1.0, "rewards/chosen": -0.040390368551015854, "rewards/margins": 0.39007043838500977, "rewards/rejected": -0.4304608404636383, "step": 1620 }, { "epoch": 0.326, "grad_norm": 0.95765221118927, "learning_rate": 9.895476011619269e-07, "log_odds_chosen": 5.578465461730957, "log_odds_ratio": -0.006413482129573822, "logits/chosen": 7.305914402008057, "logits/rejected": 5.026200771331787, "logps/chosen": -0.2929762303829193, "logps/rejected": -4.084146022796631, "loss": 0.459, "nll_loss": 0.4584007263183594, "rewards/accuracies": 1.0, "rewards/chosen": -0.02929762378334999, "rewards/margins": 0.379116952419281, "rewards/rejected": -0.4084146022796631, "step": 1630 }, { "epoch": 0.328, "grad_norm": 0.9978180527687073, "learning_rate": 9.8941941309236e-07, "log_odds_chosen": 5.644251823425293, "log_odds_ratio": -0.006328556686639786, "logits/chosen": 7.31610107421875, "logits/rejected": 5.098010063171387, "logps/chosen": -0.31363964080810547, "logps/rejected": -4.210002422332764, "loss": 0.4491, "nll_loss": 0.4484790861606598, "rewards/accuracies": 1.0, "rewards/chosen": -0.03136396408081055, "rewards/margins": 0.38963621854782104, "rewards/rejected": -0.42100024223327637, "step": 1640 }, { "epoch": 0.33, "grad_norm": 0.8929892778396606, "learning_rate": 9.89290452162736e-07, "log_odds_chosen": 5.869933128356934, "log_odds_ratio": -0.007871742360293865, "logits/chosen": 7.234385013580322, "logits/rejected": 5.100123405456543, "logps/chosen": -0.31613823771476746, "logps/rejected": -4.338244438171387, "loss": 0.4774, "nll_loss": 0.47660261392593384, "rewards/accuracies": 1.0, "rewards/chosen": -0.031613823026418686, "rewards/margins": 0.40221065282821655, "rewards/rejected": -0.43382447957992554, "step": 1650 }, { "epoch": 0.332, "grad_norm": 1.6483993530273438, "learning_rate": 9.891607185767018e-07, "log_odds_chosen": 5.76729679107666, "log_odds_ratio": -0.0046206251718103886, "logits/chosen": 7.368878364562988, "logits/rejected": 5.12404727935791, "logps/chosen": -0.26662683486938477, "logps/rejected": -4.29711389541626, "loss": 0.4281, "nll_loss": 0.4276413023471832, "rewards/accuracies": 1.0, "rewards/chosen": -0.026662686839699745, "rewards/margins": 0.40304869413375854, "rewards/rejected": -0.42971134185791016, "step": 1660 }, { "epoch": 0.334, "grad_norm": 0.9966711401939392, "learning_rate": 9.890302125391239e-07, "log_odds_chosen": 5.832968711853027, "log_odds_ratio": -0.004164249636232853, "logits/chosen": 7.341619968414307, "logits/rejected": 5.166337966918945, "logps/chosen": -0.26732027530670166, "logps/rejected": -4.303147792816162, "loss": 0.4274, "nll_loss": 0.42701855301856995, "rewards/accuracies": 1.0, "rewards/chosen": -0.026732027530670166, "rewards/margins": 0.40358275175094604, "rewards/rejected": -0.430314838886261, "step": 1670 }, { "epoch": 0.336, "grad_norm": 0.971267580986023, "learning_rate": 9.888989342560897e-07, "log_odds_chosen": 5.662961483001709, "log_odds_ratio": -0.005916085094213486, "logits/chosen": 7.246541500091553, "logits/rejected": 4.984644889831543, "logps/chosen": -0.2561892569065094, "logps/rejected": -4.065072059631348, "loss": 0.435, "nll_loss": 0.4344196319580078, "rewards/accuracies": 1.0, "rewards/chosen": -0.02561892196536064, "rewards/margins": 0.38088831305503845, "rewards/rejected": -0.4065072536468506, "step": 1680 }, { "epoch": 0.338, "grad_norm": 1.5333366394042969, "learning_rate": 9.887668839349056e-07, "log_odds_chosen": 6.054771900177002, "log_odds_ratio": -0.0034750483464449644, "logits/chosen": 7.422045707702637, "logits/rejected": 5.326841354370117, "logps/chosen": -0.23771199584007263, "logps/rejected": -4.369995594024658, "loss": 0.3933, "nll_loss": 0.392922967672348, "rewards/accuracies": 1.0, "rewards/chosen": -0.023771202191710472, "rewards/margins": 0.4132283329963684, "rewards/rejected": -0.43699946999549866, "step": 1690 }, { "epoch": 0.34, "grad_norm": 1.1619340181350708, "learning_rate": 9.886340617840966e-07, "log_odds_chosen": 5.701807498931885, "log_odds_ratio": -0.004383867606520653, "logits/chosen": 7.376176357269287, "logits/rejected": 5.26918888092041, "logps/chosen": -0.25686973333358765, "logps/rejected": -4.324838161468506, "loss": 0.4329, "nll_loss": 0.4324212074279785, "rewards/accuracies": 1.0, "rewards/chosen": -0.025686975568532944, "rewards/margins": 0.4067968726158142, "rewards/rejected": -0.43248385190963745, "step": 1700 }, { "epoch": 0.342, "grad_norm": 1.1047449111938477, "learning_rate": 9.885004680134074e-07, "log_odds_chosen": 5.914380073547363, "log_odds_ratio": -0.004231063649058342, "logits/chosen": 7.2370195388793945, "logits/rejected": 5.116887092590332, "logps/chosen": -0.284504234790802, "logps/rejected": -4.427483081817627, "loss": 0.4546, "nll_loss": 0.4542156755924225, "rewards/accuracies": 1.0, "rewards/chosen": -0.02845042571425438, "rewards/margins": 0.4142979085445404, "rewards/rejected": -0.44274836778640747, "step": 1710 }, { "epoch": 0.344, "grad_norm": 1.9082728624343872, "learning_rate": 9.883661028338008e-07, "log_odds_chosen": 5.45181941986084, "log_odds_ratio": -0.00904394406825304, "logits/chosen": 7.160737037658691, "logits/rejected": 4.87823486328125, "logps/chosen": -0.3282908797264099, "logps/rejected": -4.0363850593566895, "loss": 0.461, "nll_loss": 0.46009474992752075, "rewards/accuracies": 1.0, "rewards/chosen": -0.03282908722758293, "rewards/margins": 0.3708093762397766, "rewards/rejected": -0.40363845229148865, "step": 1720 }, { "epoch": 0.346, "grad_norm": 1.1167490482330322, "learning_rate": 9.882309664574573e-07, "log_odds_chosen": 5.662477016448975, "log_odds_ratio": -0.004740270785987377, "logits/chosen": 7.293498992919922, "logits/rejected": 5.12443733215332, "logps/chosen": -0.26413100957870483, "logps/rejected": -4.201347827911377, "loss": 0.42, "nll_loss": 0.41952353715896606, "rewards/accuracies": 1.0, "rewards/chosen": -0.026413097977638245, "rewards/margins": 0.39372164011001587, "rewards/rejected": -0.4201347231864929, "step": 1730 }, { "epoch": 0.348, "grad_norm": 1.2674144506454468, "learning_rate": 9.880950590977763e-07, "log_odds_chosen": 5.584587574005127, "log_odds_ratio": -0.035904061049222946, "logits/chosen": 7.137834072113037, "logits/rejected": 5.167104244232178, "logps/chosen": -0.35630282759666443, "logps/rejected": -4.022552967071533, "loss": 0.4605, "nll_loss": 0.45690956711769104, "rewards/accuracies": 1.0, "rewards/chosen": -0.03563028201460838, "rewards/margins": 0.366625040769577, "rewards/rejected": -0.40225523710250854, "step": 1740 }, { "epoch": 0.35, "grad_norm": 1.4967706203460693, "learning_rate": 9.879583809693736e-07, "log_odds_chosen": 5.348392963409424, "log_odds_ratio": -0.03830765560269356, "logits/chosen": 7.277920722961426, "logits/rejected": 5.277440071105957, "logps/chosen": -0.5377594232559204, "logps/rejected": -4.2842206954956055, "loss": 0.5942, "nll_loss": 0.5903370976448059, "rewards/accuracies": 1.0, "rewards/chosen": -0.05377594381570816, "rewards/margins": 0.3746461570262909, "rewards/rejected": -0.42842212319374084, "step": 1750 }, { "epoch": 0.352, "grad_norm": 1.1275849342346191, "learning_rate": 9.87820932288083e-07, "log_odds_chosen": 6.1195969581604, "log_odds_ratio": -0.0035813343711197376, "logits/chosen": 7.397531986236572, "logits/rejected": 5.275883674621582, "logps/chosen": -0.22719499468803406, "logps/rejected": -4.368670463562012, "loss": 0.3855, "nll_loss": 0.3851153254508972, "rewards/accuracies": 1.0, "rewards/chosen": -0.022719500586390495, "rewards/margins": 0.4141475558280945, "rewards/rejected": -0.4368670880794525, "step": 1760 }, { "epoch": 0.354, "grad_norm": 2.2277450561523438, "learning_rate": 9.876827132709544e-07, "log_odds_chosen": 5.883446216583252, "log_odds_ratio": -0.007893886417150497, "logits/chosen": 7.152438163757324, "logits/rejected": 5.152782440185547, "logps/chosen": -0.2617136240005493, "logps/rejected": -4.215182304382324, "loss": 0.4301, "nll_loss": 0.429318904876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.026171362027525902, "rewards/margins": 0.3953469395637512, "rewards/rejected": -0.42151832580566406, "step": 1770 }, { "epoch": 0.356, "grad_norm": 1.3810962438583374, "learning_rate": 9.875437241362544e-07, "log_odds_chosen": 5.984401226043701, "log_odds_ratio": -0.003971877973526716, "logits/chosen": 7.29552698135376, "logits/rejected": 5.111233711242676, "logps/chosen": -0.24459965527057648, "logps/rejected": -4.305853366851807, "loss": 0.414, "nll_loss": 0.4135836660861969, "rewards/accuracies": 1.0, "rewards/chosen": -0.024459967389702797, "rewards/margins": 0.4061253070831299, "rewards/rejected": -0.4305852949619293, "step": 1780 }, { "epoch": 0.358, "grad_norm": 1.7222627401351929, "learning_rate": 9.874039651034665e-07, "log_odds_chosen": 5.5936665534973145, "log_odds_ratio": -0.00648477440699935, "logits/chosen": 7.253741264343262, "logits/rejected": 5.216595649719238, "logps/chosen": -0.37139391899108887, "logps/rejected": -4.407113075256348, "loss": 0.5198, "nll_loss": 0.5191629528999329, "rewards/accuracies": 1.0, "rewards/chosen": -0.037139393389225006, "rewards/margins": 0.4035719037055969, "rewards/rejected": -0.4407113194465637, "step": 1790 }, { "epoch": 0.36, "grad_norm": 1.4279757738113403, "learning_rate": 9.872634363932886e-07, "log_odds_chosen": 5.946005821228027, "log_odds_ratio": -0.003626496996730566, "logits/chosen": 7.3228302001953125, "logits/rejected": 5.1803998947143555, "logps/chosen": -0.20768947899341583, "logps/rejected": -4.241631984710693, "loss": 0.3663, "nll_loss": 0.36598286032676697, "rewards/accuracies": 1.0, "rewards/chosen": -0.020768947899341583, "rewards/margins": 0.40339428186416626, "rewards/rejected": -0.42416325211524963, "step": 1800 }, { "epoch": 0.362, "grad_norm": 1.6530871391296387, "learning_rate": 9.87122138227635e-07, "log_odds_chosen": 5.9450178146362305, "log_odds_ratio": -0.004707613959908485, "logits/chosen": 7.256194114685059, "logits/rejected": 5.155023574829102, "logps/chosen": -0.31250426173210144, "logps/rejected": -4.401583671569824, "loss": 0.4693, "nll_loss": 0.4688665270805359, "rewards/accuracies": 1.0, "rewards/chosen": -0.031250424683094025, "rewards/margins": 0.408907949924469, "rewards/rejected": -0.44015830755233765, "step": 1810 }, { "epoch": 0.364, "grad_norm": 1.309054970741272, "learning_rate": 9.869800708296346e-07, "log_odds_chosen": 5.441808700561523, "log_odds_ratio": -0.006064875982701778, "logits/chosen": 7.192408561706543, "logits/rejected": 5.255804061889648, "logps/chosen": -0.3377319574356079, "logps/rejected": -4.199339866638184, "loss": 0.5006, "nll_loss": 0.49999991059303284, "rewards/accuracies": 1.0, "rewards/chosen": -0.03377319499850273, "rewards/margins": 0.3861607611179352, "rewards/rejected": -0.4199339747428894, "step": 1820 }, { "epoch": 0.366, "grad_norm": 1.4780160188674927, "learning_rate": 9.868372344236312e-07, "log_odds_chosen": 5.811670303344727, "log_odds_ratio": -0.004406947176903486, "logits/chosen": 7.2221879959106445, "logits/rejected": 5.166553020477295, "logps/chosen": -0.27409496903419495, "logps/rejected": -4.351373195648193, "loss": 0.4291, "nll_loss": 0.42864689230918884, "rewards/accuracies": 1.0, "rewards/chosen": -0.027409493923187256, "rewards/margins": 0.4077278673648834, "rewards/rejected": -0.4351373314857483, "step": 1830 }, { "epoch": 0.368, "grad_norm": 1.1757631301879883, "learning_rate": 9.866936292351834e-07, "log_odds_chosen": 5.653604984283447, "log_odds_ratio": -0.00482721533626318, "logits/chosen": 7.314610481262207, "logits/rejected": 5.389569282531738, "logps/chosen": -0.28096479177474976, "logps/rejected": -4.243836402893066, "loss": 0.4306, "nll_loss": 0.4301210045814514, "rewards/accuracies": 1.0, "rewards/chosen": -0.028096476569771767, "rewards/margins": 0.39628714323043823, "rewards/rejected": -0.42438364028930664, "step": 1840 }, { "epoch": 0.37, "grad_norm": 1.7128921747207642, "learning_rate": 9.865492554910632e-07, "log_odds_chosen": 5.852066516876221, "log_odds_ratio": -0.004294468555599451, "logits/chosen": 7.172349452972412, "logits/rejected": 5.245110511779785, "logps/chosen": -0.2433428317308426, "logps/rejected": -4.159404754638672, "loss": 0.4037, "nll_loss": 0.4033051133155823, "rewards/accuracies": 1.0, "rewards/chosen": -0.02433428354561329, "rewards/margins": 0.3916062116622925, "rewards/rejected": -0.415940523147583, "step": 1850 }, { "epoch": 0.372, "grad_norm": 2.052316665649414, "learning_rate": 9.864041134192563e-07, "log_odds_chosen": 5.049309253692627, "log_odds_ratio": -0.040328554809093475, "logits/chosen": 6.906167030334473, "logits/rejected": 4.95104455947876, "logps/chosen": -0.5196540355682373, "logps/rejected": -4.284266471862793, "loss": 0.6713, "nll_loss": 0.6672333478927612, "rewards/accuracies": 1.0, "rewards/chosen": -0.05196540430188179, "rewards/margins": 0.3764612078666687, "rewards/rejected": -0.428426593542099, "step": 1860 }, { "epoch": 0.374, "grad_norm": 1.0285218954086304, "learning_rate": 9.86258203248962e-07, "log_odds_chosen": 6.184111595153809, "log_odds_ratio": -0.002841468434780836, "logits/chosen": 7.298880100250244, "logits/rejected": 5.223191738128662, "logps/chosen": -0.19230951368808746, "logps/rejected": -4.411667823791504, "loss": 0.3619, "nll_loss": 0.361654132604599, "rewards/accuracies": 1.0, "rewards/chosen": -0.019230952486395836, "rewards/margins": 0.4219358563423157, "rewards/rejected": -0.44116678833961487, "step": 1870 }, { "epoch": 0.376, "grad_norm": 1.4448466300964355, "learning_rate": 9.86111525210592e-07, "log_odds_chosen": 5.856717109680176, "log_odds_ratio": -0.00372381997294724, "logits/chosen": 7.229593753814697, "logits/rejected": 5.231394290924072, "logps/chosen": -0.28699642419815063, "logps/rejected": -4.4279303550720215, "loss": 0.4304, "nll_loss": 0.4300025403499603, "rewards/accuracies": 1.0, "rewards/chosen": -0.028699645772576332, "rewards/margins": 0.41409340500831604, "rewards/rejected": -0.44279298186302185, "step": 1880 }, { "epoch": 0.378, "grad_norm": 0.902667760848999, "learning_rate": 9.859640795357714e-07, "log_odds_chosen": 5.714160919189453, "log_odds_ratio": -0.036413319408893585, "logits/chosen": 7.1261305809021, "logits/rejected": 5.065919399261475, "logps/chosen": -0.45602598786354065, "logps/rejected": -4.468377113342285, "loss": 0.5658, "nll_loss": 0.5621947646141052, "rewards/accuracies": 1.0, "rewards/chosen": -0.045602597296237946, "rewards/margins": 0.4012351632118225, "rewards/rejected": -0.44683772325515747, "step": 1890 }, { "epoch": 0.38, "grad_norm": 0.9819309711456299, "learning_rate": 9.85815866457337e-07, "log_odds_chosen": 6.057186126708984, "log_odds_ratio": -0.03660420700907707, "logits/chosen": 7.2056684494018555, "logits/rejected": 5.045108318328857, "logps/chosen": -0.37533506751060486, "logps/rejected": -4.414210319519043, "loss": 0.4899, "nll_loss": 0.4862174093723297, "rewards/accuracies": 1.0, "rewards/chosen": -0.037533506751060486, "rewards/margins": 0.4038875102996826, "rewards/rejected": -0.4414209723472595, "step": 1900 }, { "epoch": 0.382, "grad_norm": 1.0113606452941895, "learning_rate": 9.85666886209337e-07, "log_odds_chosen": 5.6562418937683105, "log_odds_ratio": -0.0074615320190787315, "logits/chosen": 7.275574684143066, "logits/rejected": 5.218985557556152, "logps/chosen": -0.3481915593147278, "logps/rejected": -4.391164302825928, "loss": 0.5, "nll_loss": 0.4992207884788513, "rewards/accuracies": 1.0, "rewards/chosen": -0.03481915965676308, "rewards/margins": 0.4042973518371582, "rewards/rejected": -0.4391164779663086, "step": 1910 }, { "epoch": 0.384, "grad_norm": 0.9529950618743896, "learning_rate": 9.855171390270324e-07, "log_odds_chosen": 6.023596286773682, "log_odds_ratio": -0.003930752165615559, "logits/chosen": 7.212313652038574, "logits/rejected": 5.062221050262451, "logps/chosen": -0.2773911952972412, "logps/rejected": -4.522246360778809, "loss": 0.4559, "nll_loss": 0.4554991126060486, "rewards/accuracies": 1.0, "rewards/chosen": -0.027739116922020912, "rewards/margins": 0.42448553442955017, "rewards/rejected": -0.45222464203834534, "step": 1920 }, { "epoch": 0.386, "grad_norm": 0.8731659054756165, "learning_rate": 9.853666251468936e-07, "log_odds_chosen": 5.887088298797607, "log_odds_ratio": -0.004586219787597656, "logits/chosen": 7.287647247314453, "logits/rejected": 5.216372966766357, "logps/chosen": -0.27427157759666443, "logps/rejected": -4.276706695556641, "loss": 0.4286, "nll_loss": 0.42818665504455566, "rewards/accuracies": 1.0, "rewards/chosen": -0.02742716111242771, "rewards/margins": 0.40024346113204956, "rewards/rejected": -0.4276706576347351, "step": 1930 }, { "epoch": 0.388, "grad_norm": 1.1291331052780151, "learning_rate": 9.85215344806603e-07, "log_odds_chosen": 5.977851867675781, "log_odds_ratio": -0.003077890258282423, "logits/chosen": 7.251131534576416, "logits/rejected": 5.307425498962402, "logps/chosen": -0.19659049808979034, "logps/rejected": -4.294132232666016, "loss": 0.3491, "nll_loss": 0.34884172677993774, "rewards/accuracies": 1.0, "rewards/chosen": -0.019659049808979034, "rewards/margins": 0.4097541272640228, "rewards/rejected": -0.42941316962242126, "step": 1940 }, { "epoch": 0.39, "grad_norm": 1.5082964897155762, "learning_rate": 9.85063298245053e-07, "log_odds_chosen": 5.862225532531738, "log_odds_ratio": -0.017939943820238113, "logits/chosen": 7.111065864562988, "logits/rejected": 5.037448883056641, "logps/chosen": -0.3104456961154938, "logps/rejected": -4.239745140075684, "loss": 0.4627, "nll_loss": 0.460923433303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.03104456700384617, "rewards/margins": 0.39292997121810913, "rewards/rejected": -0.42397457361221313, "step": 1950 }, { "epoch": 0.392, "grad_norm": 1.3137136697769165, "learning_rate": 9.849104857023454e-07, "log_odds_chosen": 5.387862205505371, "log_odds_ratio": -0.03897368162870407, "logits/chosen": 7.117815971374512, "logits/rejected": 5.029662132263184, "logps/chosen": -0.4494946599006653, "logps/rejected": -4.232203960418701, "loss": 0.5598, "nll_loss": 0.5559412240982056, "rewards/accuracies": 1.0, "rewards/chosen": -0.04494946449995041, "rewards/margins": 0.3782709836959839, "rewards/rejected": -0.4232204854488373, "step": 1960 }, { "epoch": 0.394, "grad_norm": 1.053065299987793, "learning_rate": 9.847569074197926e-07, "log_odds_chosen": 5.579679489135742, "log_odds_ratio": -0.00621079234406352, "logits/chosen": 7.267678260803223, "logits/rejected": 4.996432304382324, "logps/chosen": -0.30525460839271545, "logps/rejected": -4.197161674499512, "loss": 0.4667, "nll_loss": 0.46608585119247437, "rewards/accuracies": 1.0, "rewards/chosen": -0.030525457113981247, "rewards/margins": 0.3891907036304474, "rewards/rejected": -0.4197161793708801, "step": 1970 }, { "epoch": 0.396, "grad_norm": 1.4188255071640015, "learning_rate": 9.846025636399151e-07, "log_odds_chosen": 6.0943217277526855, "log_odds_ratio": -0.003346022218465805, "logits/chosen": 7.326678276062012, "logits/rejected": 5.10103702545166, "logps/chosen": -0.24915584921836853, "logps/rejected": -4.478102684020996, "loss": 0.4208, "nll_loss": 0.42045527696609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.024915587157011032, "rewards/margins": 0.4228946566581726, "rewards/rejected": -0.44781026244163513, "step": 1980 }, { "epoch": 0.398, "grad_norm": 1.0826441049575806, "learning_rate": 9.844474546064434e-07, "log_odds_chosen": 5.572127342224121, "log_odds_ratio": -0.038166593760252, "logits/chosen": 7.152172088623047, "logits/rejected": 5.209120750427246, "logps/chosen": -0.42499589920043945, "logps/rejected": -4.240741729736328, "loss": 0.5101, "nll_loss": 0.506237804889679, "rewards/accuracies": 1.0, "rewards/chosen": -0.04249958693981171, "rewards/margins": 0.38157448172569275, "rewards/rejected": -0.42407411336898804, "step": 1990 }, { "epoch": 0.4, "grad_norm": 0.9536112546920776, "learning_rate": 9.842915805643156e-07, "log_odds_chosen": 5.568290710449219, "log_odds_ratio": -0.005493448581546545, "logits/chosen": 7.238665580749512, "logits/rejected": 5.142437934875488, "logps/chosen": -0.3472997844219208, "logps/rejected": -4.307689666748047, "loss": 0.5093, "nll_loss": 0.5087252855300903, "rewards/accuracies": 1.0, "rewards/chosen": -0.0347299799323082, "rewards/margins": 0.3960389494895935, "rewards/rejected": -0.4307689666748047, "step": 2000 }, { "epoch": 0.402, "grad_norm": 1.8863816261291504, "learning_rate": 9.841349417596778e-07, "log_odds_chosen": 5.548676490783691, "log_odds_ratio": -0.005780138541013002, "logits/chosen": 7.156739234924316, "logits/rejected": 5.038767337799072, "logps/chosen": -0.3093355596065521, "logps/rejected": -4.296778202056885, "loss": 0.4839, "nll_loss": 0.48328322172164917, "rewards/accuracies": 1.0, "rewards/chosen": -0.030933553352952003, "rewards/margins": 0.39874422550201416, "rewards/rejected": -0.4296777844429016, "step": 2010 }, { "epoch": 0.404, "grad_norm": 1.2806062698364258, "learning_rate": 9.839775384398846e-07, "log_odds_chosen": 5.667038917541504, "log_odds_ratio": -0.005467468407005072, "logits/chosen": 7.290290832519531, "logits/rejected": 5.151783466339111, "logps/chosen": -0.31576424837112427, "logps/rejected": -4.28903341293335, "loss": 0.459, "nll_loss": 0.4584733545780182, "rewards/accuracies": 1.0, "rewards/chosen": -0.031576428562402725, "rewards/margins": 0.39732691645622253, "rewards/rejected": -0.42890340089797974, "step": 2020 }, { "epoch": 0.406, "grad_norm": 2.1256818771362305, "learning_rate": 9.838193708534968e-07, "log_odds_chosen": 5.395106792449951, "log_odds_ratio": -0.0420830175280571, "logits/chosen": 6.988595008850098, "logits/rejected": 5.152772903442383, "logps/chosen": -0.45317965745925903, "logps/rejected": -4.191874027252197, "loss": 0.5712, "nll_loss": 0.5669547915458679, "rewards/accuracies": 1.0, "rewards/chosen": -0.04531797021627426, "rewards/margins": 0.3738694489002228, "rewards/rejected": -0.41918739676475525, "step": 2030 }, { "epoch": 0.408, "grad_norm": 1.7794371843338013, "learning_rate": 9.83660439250283e-07, "log_odds_chosen": 5.691781520843506, "log_odds_ratio": -0.0368848480284214, "logits/chosen": 7.049103736877441, "logits/rejected": 5.143183708190918, "logps/chosen": -0.43760520219802856, "logps/rejected": -4.372210502624512, "loss": 0.5448, "nll_loss": 0.5411056280136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.043760523200035095, "rewards/margins": 0.39346054196357727, "rewards/rejected": -0.43722105026245117, "step": 2040 }, { "epoch": 0.41, "grad_norm": 1.342478632926941, "learning_rate": 9.835007438812175e-07, "log_odds_chosen": 5.9547529220581055, "log_odds_ratio": -0.0044191316701471806, "logits/chosen": 7.231217384338379, "logits/rejected": 5.127516746520996, "logps/chosen": -0.2685083746910095, "logps/rejected": -4.373982906341553, "loss": 0.422, "nll_loss": 0.4215773940086365, "rewards/accuracies": 1.0, "rewards/chosen": -0.02685084380209446, "rewards/margins": 0.41054749488830566, "rewards/rejected": -0.4373982846736908, "step": 2050 }, { "epoch": 0.412, "grad_norm": 0.8433020114898682, "learning_rate": 9.833402849984814e-07, "log_odds_chosen": 5.742605209350586, "log_odds_ratio": -0.014783238992094994, "logits/chosen": 7.251817226409912, "logits/rejected": 5.20513916015625, "logps/chosen": -0.41100549697875977, "logps/rejected": -4.436692237854004, "loss": 0.4946, "nll_loss": 0.4931601881980896, "rewards/accuracies": 1.0, "rewards/chosen": -0.041100550442934036, "rewards/margins": 0.40256866812705994, "rewards/rejected": -0.44366922974586487, "step": 2060 }, { "epoch": 0.414, "grad_norm": 1.0080342292785645, "learning_rate": 9.831790628554612e-07, "log_odds_chosen": 5.910843849182129, "log_odds_ratio": -0.0035058080684393644, "logits/chosen": 7.285062313079834, "logits/rejected": 5.199868202209473, "logps/chosen": -0.24587003886699677, "logps/rejected": -4.379849433898926, "loss": 0.3939, "nll_loss": 0.3935766816139221, "rewards/accuracies": 1.0, "rewards/chosen": -0.024587005376815796, "rewards/margins": 0.41339796781539917, "rewards/rejected": -0.4379849433898926, "step": 2070 }, { "epoch": 0.416, "grad_norm": 1.3747555017471313, "learning_rate": 9.830170777067484e-07, "log_odds_chosen": 5.781675338745117, "log_odds_ratio": -0.036599524319171906, "logits/chosen": 7.185873985290527, "logits/rejected": 5.1331305503845215, "logps/chosen": -0.4346732497215271, "logps/rejected": -4.545178413391113, "loss": 0.5349, "nll_loss": 0.5312885642051697, "rewards/accuracies": 1.0, "rewards/chosen": -0.04346733167767525, "rewards/margins": 0.4110504984855652, "rewards/rejected": -0.45451778173446655, "step": 2080 }, { "epoch": 0.418, "grad_norm": 7.138680458068848, "learning_rate": 9.8285432980814e-07, "log_odds_chosen": 5.722869873046875, "log_odds_ratio": -0.004585809074342251, "logits/chosen": 7.236684322357178, "logits/rejected": 5.1494245529174805, "logps/chosen": -0.28951603174209595, "logps/rejected": -4.341964244842529, "loss": 0.4418, "nll_loss": 0.44137588143348694, "rewards/accuracies": 1.0, "rewards/chosen": -0.028951605781912804, "rewards/margins": 0.4052448272705078, "rewards/rejected": -0.4341963827610016, "step": 2090 }, { "epoch": 0.42, "grad_norm": 1.0184482336044312, "learning_rate": 9.826908194166368e-07, "log_odds_chosen": 5.8822021484375, "log_odds_ratio": -0.0047309137880802155, "logits/chosen": 7.276869773864746, "logits/rejected": 5.245121955871582, "logps/chosen": -0.2882796823978424, "logps/rejected": -4.435556888580322, "loss": 0.4526, "nll_loss": 0.4521394670009613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0288279689848423, "rewards/margins": 0.4147277772426605, "rewards/rejected": -0.4435557723045349, "step": 2100 }, { "epoch": 0.422, "grad_norm": 1.3198717832565308, "learning_rate": 9.825265467904444e-07, "log_odds_chosen": 5.768711566925049, "log_odds_ratio": -0.007968591526150703, "logits/chosen": 7.195503234863281, "logits/rejected": 5.095211505889893, "logps/chosen": -0.3321160078048706, "logps/rejected": -4.273484706878662, "loss": 0.468, "nll_loss": 0.4671611785888672, "rewards/accuracies": 1.0, "rewards/chosen": -0.0332116037607193, "rewards/margins": 0.394136905670166, "rewards/rejected": -0.4273485243320465, "step": 2110 }, { "epoch": 0.424, "grad_norm": 1.0794814825057983, "learning_rate": 9.823615121889714e-07, "log_odds_chosen": 5.549058437347412, "log_odds_ratio": -0.005356503650546074, "logits/chosen": 7.219763278961182, "logits/rejected": 5.152971267700195, "logps/chosen": -0.33359426259994507, "logps/rejected": -4.348034858703613, "loss": 0.4957, "nll_loss": 0.49513357877731323, "rewards/accuracies": 1.0, "rewards/chosen": -0.033359430730342865, "rewards/margins": 0.40144410729408264, "rewards/rejected": -0.43480348587036133, "step": 2120 }, { "epoch": 0.426, "grad_norm": 1.103000521659851, "learning_rate": 9.8219571587283e-07, "log_odds_chosen": 5.8652167320251465, "log_odds_ratio": -0.003976141102612019, "logits/chosen": 7.239581108093262, "logits/rejected": 5.071024417877197, "logps/chosen": -0.25768905878067017, "logps/rejected": -4.397727966308594, "loss": 0.4351, "nll_loss": 0.4347432553768158, "rewards/accuracies": 1.0, "rewards/chosen": -0.025768905878067017, "rewards/margins": 0.4140039384365082, "rewards/rejected": -0.4397728443145752, "step": 2130 }, { "epoch": 0.428, "grad_norm": 1.7716073989868164, "learning_rate": 9.820291581038354e-07, "log_odds_chosen": 5.760499000549316, "log_odds_ratio": -0.0037711653858423233, "logits/chosen": 7.17581033706665, "logits/rejected": 4.975937843322754, "logps/chosen": -0.26474589109420776, "logps/rejected": -4.327369213104248, "loss": 0.4255, "nll_loss": 0.4251111149787903, "rewards/accuracies": 1.0, "rewards/chosen": -0.026474589481949806, "rewards/margins": 0.4062623381614685, "rewards/rejected": -0.432736873626709, "step": 2140 }, { "epoch": 0.43, "grad_norm": 2.1355087757110596, "learning_rate": 9.818618391450049e-07, "log_odds_chosen": 5.627618312835693, "log_odds_ratio": -0.005895455367863178, "logits/chosen": 7.1693434715271, "logits/rejected": 4.876114845275879, "logps/chosen": -0.3278542459011078, "logps/rejected": -4.321989059448242, "loss": 0.4838, "nll_loss": 0.4832424223423004, "rewards/accuracies": 1.0, "rewards/chosen": -0.03278542309999466, "rewards/margins": 0.399413526058197, "rewards/rejected": -0.4321989417076111, "step": 2150 }, { "epoch": 0.432, "grad_norm": 1.2235689163208008, "learning_rate": 9.81693759260558e-07, "log_odds_chosen": 5.355124473571777, "log_odds_ratio": -0.019273359328508377, "logits/chosen": 6.969818115234375, "logits/rejected": 4.924574375152588, "logps/chosen": -0.42836809158325195, "logps/rejected": -4.347854137420654, "loss": 0.5541, "nll_loss": 0.5521974563598633, "rewards/accuracies": 1.0, "rewards/chosen": -0.04283680394291878, "rewards/margins": 0.3919486403465271, "rewards/rejected": -0.4347854554653168, "step": 2160 }, { "epoch": 0.434, "grad_norm": 1.0290077924728394, "learning_rate": 9.815249187159157e-07, "log_odds_chosen": 6.032009601593018, "log_odds_ratio": -0.03535761684179306, "logits/chosen": 7.181973934173584, "logits/rejected": 4.920680046081543, "logps/chosen": -0.3861771821975708, "logps/rejected": -4.371733665466309, "loss": 0.4452, "nll_loss": 0.4416409134864807, "rewards/accuracies": 1.0, "rewards/chosen": -0.03861771896481514, "rewards/margins": 0.3985556960105896, "rewards/rejected": -0.43717342615127563, "step": 2170 }, { "epoch": 0.436, "grad_norm": 2.096916675567627, "learning_rate": 9.813553177777004e-07, "log_odds_chosen": 5.981003284454346, "log_odds_ratio": -0.0042080748826265335, "logits/chosen": 7.201444149017334, "logits/rejected": 5.039297580718994, "logps/chosen": -0.28893086314201355, "logps/rejected": -4.481270790100098, "loss": 0.4308, "nll_loss": 0.43041953444480896, "rewards/accuracies": 1.0, "rewards/chosen": -0.028893087059259415, "rewards/margins": 0.419234037399292, "rewards/rejected": -0.4481271803379059, "step": 2180 }, { "epoch": 0.438, "grad_norm": 1.2102628946304321, "learning_rate": 9.811849567137348e-07, "log_odds_chosen": 5.974119663238525, "log_odds_ratio": -0.0040050228126347065, "logits/chosen": 7.207158088684082, "logits/rejected": 5.016327857971191, "logps/chosen": -0.21995648741722107, "logps/rejected": -4.1179351806640625, "loss": 0.393, "nll_loss": 0.3926301598548889, "rewards/accuracies": 1.0, "rewards/chosen": -0.021995652467012405, "rewards/margins": 0.3897978663444519, "rewards/rejected": -0.4117935597896576, "step": 2190 }, { "epoch": 0.44, "grad_norm": 1.0707316398620605, "learning_rate": 9.81013835793043e-07, "log_odds_chosen": 5.866847991943359, "log_odds_ratio": -0.01316093374043703, "logits/chosen": 7.169647216796875, "logits/rejected": 4.883145332336426, "logps/chosen": -0.2827267050743103, "logps/rejected": -4.255131721496582, "loss": 0.4459, "nll_loss": 0.44456878304481506, "rewards/accuracies": 1.0, "rewards/chosen": -0.028272677212953568, "rewards/margins": 0.3972404897212982, "rewards/rejected": -0.4255131781101227, "step": 2200 }, { "epoch": 0.442, "grad_norm": 1.0766632556915283, "learning_rate": 9.808419552858476e-07, "log_odds_chosen": 5.820223808288574, "log_odds_ratio": -0.022114738821983337, "logits/chosen": 7.110598087310791, "logits/rejected": 4.893355846405029, "logps/chosen": -0.32914042472839355, "logps/rejected": -4.368524551391602, "loss": 0.4772, "nll_loss": 0.4749859869480133, "rewards/accuracies": 1.0, "rewards/chosen": -0.03291403874754906, "rewards/margins": 0.4039383828639984, "rewards/rejected": -0.43685245513916016, "step": 2210 }, { "epoch": 0.444, "grad_norm": 1.8624870777130127, "learning_rate": 9.806693154635718e-07, "log_odds_chosen": 6.161287307739258, "log_odds_ratio": -0.002652075607329607, "logits/chosen": 7.2920684814453125, "logits/rejected": 5.16158390045166, "logps/chosen": -0.16930846869945526, "logps/rejected": -4.228781700134277, "loss": 0.3356, "nll_loss": 0.3353388011455536, "rewards/accuracies": 1.0, "rewards/chosen": -0.016930848360061646, "rewards/margins": 0.40594735741615295, "rewards/rejected": -0.4228782057762146, "step": 2220 }, { "epoch": 0.446, "grad_norm": 1.5164976119995117, "learning_rate": 9.804959165988368e-07, "log_odds_chosen": 5.697483062744141, "log_odds_ratio": -0.008660875260829926, "logits/chosen": 7.238577365875244, "logits/rejected": 5.19179630279541, "logps/chosen": -0.34223562479019165, "logps/rejected": -4.285733222961426, "loss": 0.4948, "nll_loss": 0.4938952922821045, "rewards/accuracies": 1.0, "rewards/chosen": -0.034223563969135284, "rewards/margins": 0.39434975385665894, "rewards/rejected": -0.4285733103752136, "step": 2230 }, { "epoch": 0.448, "grad_norm": 0.8817088007926941, "learning_rate": 9.80321758965464e-07, "log_odds_chosen": 5.9715046882629395, "log_odds_ratio": -0.0031883579213172197, "logits/chosen": 7.2090654373168945, "logits/rejected": 5.02892541885376, "logps/chosen": -0.19825336337089539, "logps/rejected": -4.301044940948486, "loss": 0.3663, "nll_loss": 0.36601394414901733, "rewards/accuracies": 1.0, "rewards/chosen": -0.019825337454676628, "rewards/margins": 0.41027921438217163, "rewards/rejected": -0.4301045536994934, "step": 2240 }, { "epoch": 0.45, "grad_norm": 1.0536890029907227, "learning_rate": 9.801468428384716e-07, "log_odds_chosen": 5.799782752990723, "log_odds_ratio": -0.008008052594959736, "logits/chosen": 7.145357608795166, "logits/rejected": 5.1901140213012695, "logps/chosen": -0.3059942126274109, "logps/rejected": -4.344581604003906, "loss": 0.4268, "nll_loss": 0.4260258674621582, "rewards/accuracies": 1.0, "rewards/chosen": -0.03059942089021206, "rewards/margins": 0.4038587510585785, "rewards/rejected": -0.4344581663608551, "step": 2250 }, { "epoch": 0.452, "grad_norm": 1.0702831745147705, "learning_rate": 9.79971168494076e-07, "log_odds_chosen": 6.019318103790283, "log_odds_ratio": -0.003540799720212817, "logits/chosen": 7.1799821853637695, "logits/rejected": 4.883571624755859, "logps/chosen": -0.1720452904701233, "logps/rejected": -4.116283893585205, "loss": 0.3312, "nll_loss": 0.3308761715888977, "rewards/accuracies": 1.0, "rewards/chosen": -0.0172045286744833, "rewards/margins": 0.39442384243011475, "rewards/rejected": -0.4116283357143402, "step": 2260 }, { "epoch": 0.454, "grad_norm": 0.7779252529144287, "learning_rate": 9.797947362096907e-07, "log_odds_chosen": 5.882676124572754, "log_odds_ratio": -0.00475617591291666, "logits/chosen": 7.194193363189697, "logits/rejected": 5.1162567138671875, "logps/chosen": -0.2865826189517975, "logps/rejected": -4.505490779876709, "loss": 0.4537, "nll_loss": 0.4531765878200531, "rewards/accuracies": 1.0, "rewards/chosen": -0.02865826152265072, "rewards/margins": 0.42189082503318787, "rewards/rejected": -0.45054906606674194, "step": 2270 }, { "epoch": 0.456, "grad_norm": 1.1394964456558228, "learning_rate": 9.79617546263927e-07, "log_odds_chosen": 5.669812202453613, "log_odds_ratio": -0.005119840148836374, "logits/chosen": 7.133307456970215, "logits/rejected": 5.045985221862793, "logps/chosen": -0.3632362484931946, "logps/rejected": -4.553731441497803, "loss": 0.5276, "nll_loss": 0.5270571708679199, "rewards/accuracies": 1.0, "rewards/chosen": -0.03632361814379692, "rewards/margins": 0.41904956102371216, "rewards/rejected": -0.45537319779396057, "step": 2280 }, { "epoch": 0.458, "grad_norm": 0.9868494272232056, "learning_rate": 9.794395989365918e-07, "log_odds_chosen": 5.559133529663086, "log_odds_ratio": -0.01318379770964384, "logits/chosen": 7.148597717285156, "logits/rejected": 5.020571708679199, "logps/chosen": -0.30136558413505554, "logps/rejected": -4.013764381408691, "loss": 0.4244, "nll_loss": 0.42309969663619995, "rewards/accuracies": 1.0, "rewards/chosen": -0.030136561021208763, "rewards/margins": 0.37123990058898926, "rewards/rejected": -0.4013764262199402, "step": 2290 }, { "epoch": 0.46, "grad_norm": 1.6430943012237549, "learning_rate": 9.792608945086879e-07, "log_odds_chosen": 5.856534481048584, "log_odds_ratio": -0.007333035115152597, "logits/chosen": 7.20650577545166, "logits/rejected": 4.918787956237793, "logps/chosen": -0.2613426744937897, "logps/rejected": -4.1380510330200195, "loss": 0.4039, "nll_loss": 0.4031354486942291, "rewards/accuracies": 1.0, "rewards/chosen": -0.026134267449378967, "rewards/margins": 0.3876708149909973, "rewards/rejected": -0.4138050675392151, "step": 2300 }, { "epoch": 0.462, "grad_norm": 1.075416922569275, "learning_rate": 9.790814332624142e-07, "log_odds_chosen": 5.223599433898926, "log_odds_ratio": -0.051153361797332764, "logits/chosen": 7.005494117736816, "logits/rejected": 5.013777256011963, "logps/chosen": -0.5031911134719849, "logps/rejected": -4.117071151733398, "loss": 0.5542, "nll_loss": 0.5490965843200684, "rewards/accuracies": 1.0, "rewards/chosen": -0.050319116562604904, "rewards/margins": 0.36138802766799927, "rewards/rejected": -0.4117071032524109, "step": 2310 }, { "epoch": 0.464, "grad_norm": 1.0322649478912354, "learning_rate": 9.789012154811646e-07, "log_odds_chosen": 5.8819899559021, "log_odds_ratio": -0.00485277408733964, "logits/chosen": 7.224032402038574, "logits/rejected": 4.983740329742432, "logps/chosen": -0.2609236240386963, "logps/rejected": -4.2970805168151855, "loss": 0.4206, "nll_loss": 0.4200953543186188, "rewards/accuracies": 1.0, "rewards/chosen": -0.02609236165881157, "rewards/margins": 0.40361571311950684, "rewards/rejected": -0.4297080934047699, "step": 2320 }, { "epoch": 0.466, "grad_norm": 1.114264965057373, "learning_rate": 9.787202414495275e-07, "log_odds_chosen": 6.184950828552246, "log_odds_ratio": -0.0035944569390267134, "logits/chosen": 7.175538063049316, "logits/rejected": 5.152920722961426, "logps/chosen": -0.24146418273448944, "logps/rejected": -4.407698154449463, "loss": 0.4144, "nll_loss": 0.41407841444015503, "rewards/accuracies": 1.0, "rewards/chosen": -0.024146419018507004, "rewards/margins": 0.41662341356277466, "rewards/rejected": -0.44076982140541077, "step": 2330 }, { "epoch": 0.468, "grad_norm": 1.3615304231643677, "learning_rate": 9.785385114532855e-07, "log_odds_chosen": 5.581146717071533, "log_odds_ratio": -0.02095155045390129, "logits/chosen": 7.093208312988281, "logits/rejected": 4.933882713317871, "logps/chosen": -0.3445821702480316, "logps/rejected": -4.25756311416626, "loss": 0.4854, "nll_loss": 0.4833485186100006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0344582162797451, "rewards/margins": 0.3912980556488037, "rewards/rejected": -0.4257562756538391, "step": 2340 }, { "epoch": 0.47, "grad_norm": 1.2122712135314941, "learning_rate": 9.783560257794152e-07, "log_odds_chosen": 6.0543975830078125, "log_odds_ratio": -0.0027961155865341425, "logits/chosen": 7.264103889465332, "logits/rejected": 5.131812572479248, "logps/chosen": -0.2022647112607956, "logps/rejected": -4.4119062423706055, "loss": 0.3672, "nll_loss": 0.36695507168769836, "rewards/accuracies": 1.0, "rewards/chosen": -0.02022647112607956, "rewards/margins": 0.42096418142318726, "rewards/rejected": -0.441190630197525, "step": 2350 }, { "epoch": 0.472, "grad_norm": 1.0783827304840088, "learning_rate": 9.781727847160863e-07, "log_odds_chosen": 6.060303688049316, "log_odds_ratio": -0.003842658596113324, "logits/chosen": 7.135100364685059, "logits/rejected": 5.001148700714111, "logps/chosen": -0.2425350695848465, "logps/rejected": -4.482863426208496, "loss": 0.4004, "nll_loss": 0.3999825119972229, "rewards/accuracies": 1.0, "rewards/chosen": -0.02425350621342659, "rewards/margins": 0.42403292655944824, "rewards/rejected": -0.44828638434410095, "step": 2360 }, { "epoch": 0.474, "grad_norm": 1.3036895990371704, "learning_rate": 9.779887885526615e-07, "log_odds_chosen": 5.7917022705078125, "log_odds_ratio": -0.004657205659896135, "logits/chosen": 7.094339847564697, "logits/rejected": 5.123249053955078, "logps/chosen": -0.2863914668560028, "logps/rejected": -4.39786434173584, "loss": 0.4365, "nll_loss": 0.4360283315181732, "rewards/accuracies": 1.0, "rewards/chosen": -0.02863914705812931, "rewards/margins": 0.41114726662635803, "rewards/rejected": -0.43978649377822876, "step": 2370 }, { "epoch": 0.476, "grad_norm": 1.0770423412322998, "learning_rate": 9.77804037579696e-07, "log_odds_chosen": 5.80265474319458, "log_odds_ratio": -0.03651793301105499, "logits/chosen": 7.064836025238037, "logits/rejected": 5.093796253204346, "logps/chosen": -0.39035969972610474, "logps/rejected": -4.325036525726318, "loss": 0.5049, "nll_loss": 0.5012386441230774, "rewards/accuracies": 1.0, "rewards/chosen": -0.039035968482494354, "rewards/margins": 0.3934676945209503, "rewards/rejected": -0.4325036406517029, "step": 2380 }, { "epoch": 0.478, "grad_norm": 2.1634256839752197, "learning_rate": 9.776185320889362e-07, "log_odds_chosen": 5.945674896240234, "log_odds_ratio": -0.003969812300056219, "logits/chosen": 7.174337863922119, "logits/rejected": 5.0761189460754395, "logps/chosen": -0.2454901933670044, "logps/rejected": -4.493991851806641, "loss": 0.4218, "nll_loss": 0.4214422106742859, "rewards/accuracies": 1.0, "rewards/chosen": -0.02454901859164238, "rewards/margins": 0.42485013604164124, "rewards/rejected": -0.4493991732597351, "step": 2390 }, { "epoch": 0.48, "grad_norm": 3.9406349658966064, "learning_rate": 9.774322723733214e-07, "log_odds_chosen": 5.75554895401001, "log_odds_ratio": -0.03719993680715561, "logits/chosen": 6.947228908538818, "logits/rejected": 4.94131326675415, "logps/chosen": -0.3684941232204437, "logps/rejected": -4.318734169006348, "loss": 0.5209, "nll_loss": 0.5171318650245667, "rewards/accuracies": 1.0, "rewards/chosen": -0.03684941306710243, "rewards/margins": 0.39502397179603577, "rewards/rejected": -0.4318733811378479, "step": 2400 }, { "epoch": 0.482, "grad_norm": 1.6578948497772217, "learning_rate": 9.772452587269806e-07, "log_odds_chosen": 5.827768802642822, "log_odds_ratio": -0.005279929842799902, "logits/chosen": 7.110537052154541, "logits/rejected": 5.004127025604248, "logps/chosen": -0.33146944642066956, "logps/rejected": -4.429327011108398, "loss": 0.4773, "nll_loss": 0.47676047682762146, "rewards/accuracies": 1.0, "rewards/chosen": -0.03314695134758949, "rewards/margins": 0.40978574752807617, "rewards/rejected": -0.44293269515037537, "step": 2410 }, { "epoch": 0.484, "grad_norm": 0.9745287895202637, "learning_rate": 9.770574914452342e-07, "log_odds_chosen": 5.986898899078369, "log_odds_ratio": -0.0036109983921051025, "logits/chosen": 7.141024589538574, "logits/rejected": 5.069266319274902, "logps/chosen": -0.2511236071586609, "logps/rejected": -4.446177005767822, "loss": 0.4153, "nll_loss": 0.4149658679962158, "rewards/accuracies": 1.0, "rewards/chosen": -0.02511236071586609, "rewards/margins": 0.41950535774230957, "rewards/rejected": -0.44461768865585327, "step": 2420 }, { "epoch": 0.486, "grad_norm": 1.1810561418533325, "learning_rate": 9.76868970824592e-07, "log_odds_chosen": 5.783780097961426, "log_odds_ratio": -0.005966469645500183, "logits/chosen": 7.072328090667725, "logits/rejected": 5.048162460327148, "logps/chosen": -0.2974976897239685, "logps/rejected": -4.377941608428955, "loss": 0.4489, "nll_loss": 0.4483070969581604, "rewards/accuracies": 1.0, "rewards/chosen": -0.02974977158010006, "rewards/margins": 0.40804439783096313, "rewards/rejected": -0.43779414892196655, "step": 2430 }, { "epoch": 0.488, "grad_norm": 1.122028112411499, "learning_rate": 9.766796971627543e-07, "log_odds_chosen": 5.993317604064941, "log_odds_ratio": -0.003269766690209508, "logits/chosen": 7.1867170333862305, "logits/rejected": 5.128939628601074, "logps/chosen": -0.23361611366271973, "logps/rejected": -4.478310585021973, "loss": 0.3747, "nll_loss": 0.3743254542350769, "rewards/accuracies": 1.0, "rewards/chosen": -0.023361612111330032, "rewards/margins": 0.4244694709777832, "rewards/rejected": -0.44783106446266174, "step": 2440 }, { "epoch": 0.49, "grad_norm": 1.2895504236221313, "learning_rate": 9.764896707586094e-07, "log_odds_chosen": 6.337594032287598, "log_odds_ratio": -0.0030408487655222416, "logits/chosen": 7.18514347076416, "logits/rejected": 5.002875804901123, "logps/chosen": -0.1850179135799408, "logps/rejected": -4.446319103240967, "loss": 0.3316, "nll_loss": 0.33134251832962036, "rewards/accuracies": 1.0, "rewards/chosen": -0.01850179210305214, "rewards/margins": 0.42613014578819275, "rewards/rejected": -0.4446318745613098, "step": 2450 }, { "epoch": 0.492, "grad_norm": 1.030693531036377, "learning_rate": 9.762988919122354e-07, "log_odds_chosen": 5.939651012420654, "log_odds_ratio": -0.003971798811107874, "logits/chosen": 7.09478759765625, "logits/rejected": 5.19265079498291, "logps/chosen": -0.27173835039138794, "logps/rejected": -4.462666034698486, "loss": 0.4186, "nll_loss": 0.41819295287132263, "rewards/accuracies": 1.0, "rewards/chosen": -0.027173832058906555, "rewards/margins": 0.4190928041934967, "rewards/rejected": -0.44626665115356445, "step": 2460 }, { "epoch": 0.494, "grad_norm": 2.7018139362335205, "learning_rate": 9.76107360924898e-07, "log_odds_chosen": 5.855226039886475, "log_odds_ratio": -0.004332238342612982, "logits/chosen": 7.094228267669678, "logits/rejected": 5.046046733856201, "logps/chosen": -0.2355802059173584, "logps/rejected": -4.259759902954102, "loss": 0.376, "nll_loss": 0.37560686469078064, "rewards/accuracies": 1.0, "rewards/chosen": -0.02355801872909069, "rewards/margins": 0.40241798758506775, "rewards/rejected": -0.425976037979126, "step": 2470 }, { "epoch": 0.496, "grad_norm": 0.8022181987762451, "learning_rate": 9.759150780990507e-07, "log_odds_chosen": 6.144402027130127, "log_odds_ratio": -0.0030495659448206425, "logits/chosen": 7.098886966705322, "logits/rejected": 4.938762187957764, "logps/chosen": -0.19751155376434326, "logps/rejected": -4.327453136444092, "loss": 0.3686, "nll_loss": 0.36834391951560974, "rewards/accuracies": 1.0, "rewards/chosen": -0.019751155748963356, "rewards/margins": 0.4129941463470459, "rewards/rejected": -0.4327453076839447, "step": 2480 }, { "epoch": 0.498, "grad_norm": 0.9581158757209778, "learning_rate": 9.757220437383345e-07, "log_odds_chosen": 5.811981678009033, "log_odds_ratio": -0.004570376127958298, "logits/chosen": 7.119135856628418, "logits/rejected": 5.035684108734131, "logps/chosen": -0.24913497269153595, "logps/rejected": -4.2907514572143555, "loss": 0.4099, "nll_loss": 0.409432977437973, "rewards/accuracies": 1.0, "rewards/chosen": -0.024913497269153595, "rewards/margins": 0.40416163206100464, "rewards/rejected": -0.4290751516819, "step": 2490 }, { "epoch": 0.5, "grad_norm": 0.9699593782424927, "learning_rate": 9.755282581475767e-07, "log_odds_chosen": 5.817929267883301, "log_odds_ratio": -0.0039031878113746643, "logits/chosen": 7.146562099456787, "logits/rejected": 5.024096488952637, "logps/chosen": -0.21198499202728271, "logps/rejected": -4.2261834144592285, "loss": 0.3539, "nll_loss": 0.35354170203208923, "rewards/accuracies": 1.0, "rewards/chosen": -0.02119849994778633, "rewards/margins": 0.40141981840133667, "rewards/rejected": -0.4226182997226715, "step": 2500 }, { "epoch": 0.502, "grad_norm": 1.3769968748092651, "learning_rate": 9.753337216327917e-07, "log_odds_chosen": 5.867251396179199, "log_odds_ratio": -0.003420781809836626, "logits/chosen": 7.089252471923828, "logits/rejected": 5.168013572692871, "logps/chosen": -0.22413067519664764, "logps/rejected": -4.2582197189331055, "loss": 0.3844, "nll_loss": 0.384024441242218, "rewards/accuracies": 1.0, "rewards/chosen": -0.022413069382309914, "rewards/margins": 0.40340882539749146, "rewards/rejected": -0.4258219301700592, "step": 2510 }, { "epoch": 0.504, "grad_norm": 1.0614523887634277, "learning_rate": 9.751384345011787e-07, "log_odds_chosen": 5.936653137207031, "log_odds_ratio": -0.00493913097307086, "logits/chosen": 7.122114658355713, "logits/rejected": 4.9395365715026855, "logps/chosen": -0.24961456656455994, "logps/rejected": -4.201776504516602, "loss": 0.3969, "nll_loss": 0.3963638246059418, "rewards/accuracies": 1.0, "rewards/chosen": -0.024961456656455994, "rewards/margins": 0.3952162265777588, "rewards/rejected": -0.420177698135376, "step": 2520 }, { "epoch": 0.506, "grad_norm": 0.9063993692398071, "learning_rate": 9.74942397061123e-07, "log_odds_chosen": 6.008925437927246, "log_odds_ratio": -0.004553680773824453, "logits/chosen": 7.0527215003967285, "logits/rejected": 5.005249977111816, "logps/chosen": -0.2877780795097351, "logps/rejected": -4.45419979095459, "loss": 0.4499, "nll_loss": 0.4494311809539795, "rewards/accuracies": 1.0, "rewards/chosen": -0.02877780795097351, "rewards/margins": 0.4166422486305237, "rewards/rejected": -0.4454200267791748, "step": 2530 }, { "epoch": 0.508, "grad_norm": 1.1945149898529053, "learning_rate": 9.747456096221945e-07, "log_odds_chosen": 5.93853759765625, "log_odds_ratio": -0.004089950583875179, "logits/chosen": 7.141083717346191, "logits/rejected": 5.051807403564453, "logps/chosen": -0.29563215374946594, "logps/rejected": -4.521162509918213, "loss": 0.4631, "nll_loss": 0.46264225244522095, "rewards/accuracies": 1.0, "rewards/chosen": -0.029563214629888535, "rewards/margins": 0.4225529730319977, "rewards/rejected": -0.4521161615848541, "step": 2540 }, { "epoch": 0.51, "grad_norm": 1.3230482339859009, "learning_rate": 9.745480724951473e-07, "log_odds_chosen": 5.500484466552734, "log_odds_ratio": -0.0382462777197361, "logits/chosen": 6.935909271240234, "logits/rejected": 4.918919563293457, "logps/chosen": -0.45821017026901245, "logps/rejected": -4.234060287475586, "loss": 0.5563, "nll_loss": 0.5525153875350952, "rewards/accuracies": 1.0, "rewards/chosen": -0.045821018517017365, "rewards/margins": 0.3775850236415863, "rewards/rejected": -0.42340603470802307, "step": 2550 }, { "epoch": 0.512, "grad_norm": 1.4995919466018677, "learning_rate": 9.743497859919196e-07, "log_odds_chosen": 5.4575653076171875, "log_odds_ratio": -0.04677724093198776, "logits/chosen": 6.957936763763428, "logits/rejected": 5.054501056671143, "logps/chosen": -0.6180285215377808, "logps/rejected": -4.37651252746582, "loss": 0.6594, "nll_loss": 0.6547147631645203, "rewards/accuracies": 1.0, "rewards/chosen": -0.061802852898836136, "rewards/margins": 0.3758484423160553, "rewards/rejected": -0.43765124678611755, "step": 2560 }, { "epoch": 0.514, "grad_norm": 1.2397574186325073, "learning_rate": 9.741507504256326e-07, "log_odds_chosen": 5.658846855163574, "log_odds_ratio": -0.004397653508931398, "logits/chosen": 7.068001747131348, "logits/rejected": 4.983913898468018, "logps/chosen": -0.269986629486084, "logps/rejected": -4.269055366516113, "loss": 0.4256, "nll_loss": 0.42514973878860474, "rewards/accuracies": 1.0, "rewards/chosen": -0.02699866332113743, "rewards/margins": 0.3999069035053253, "rewards/rejected": -0.4269055724143982, "step": 2570 }, { "epoch": 0.516, "grad_norm": 0.7643338441848755, "learning_rate": 9.73950966110591e-07, "log_odds_chosen": 6.034090518951416, "log_odds_ratio": -0.004354351200163364, "logits/chosen": 7.053378105163574, "logits/rejected": 5.091214179992676, "logps/chosen": -0.2596290707588196, "logps/rejected": -4.441673755645752, "loss": 0.4264, "nll_loss": 0.42598414421081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.025962907820940018, "rewards/margins": 0.4182044565677643, "rewards/rejected": -0.4441673755645752, "step": 2580 }, { "epoch": 0.518, "grad_norm": 1.3650175333023071, "learning_rate": 9.737504333622813e-07, "log_odds_chosen": 5.888009071350098, "log_odds_ratio": -0.003633237909525633, "logits/chosen": 7.0453691482543945, "logits/rejected": 4.994603633880615, "logps/chosen": -0.2530125677585602, "logps/rejected": -4.465452194213867, "loss": 0.4065, "nll_loss": 0.4061064124107361, "rewards/accuracies": 1.0, "rewards/chosen": -0.025301257148385048, "rewards/margins": 0.42124396562576294, "rewards/rejected": -0.446545273065567, "step": 2590 }, { "epoch": 0.52, "grad_norm": 0.9597733616828918, "learning_rate": 9.735491524973721e-07, "log_odds_chosen": 6.1127238273620605, "log_odds_ratio": -0.003729215357452631, "logits/chosen": 7.008172512054443, "logits/rejected": 4.956660747528076, "logps/chosen": -0.2335708886384964, "logps/rejected": -4.378061294555664, "loss": 0.3985, "nll_loss": 0.39817285537719727, "rewards/accuracies": 1.0, "rewards/chosen": -0.02335709147155285, "rewards/margins": 0.4144490659236908, "rewards/rejected": -0.4378061294555664, "step": 2600 }, { "epoch": 0.522, "grad_norm": 0.859712541103363, "learning_rate": 9.733471238337134e-07, "log_odds_chosen": 5.934764862060547, "log_odds_ratio": -0.004030735231935978, "logits/chosen": 7.064033508300781, "logits/rejected": 5.0436506271362305, "logps/chosen": -0.2444593459367752, "logps/rejected": -4.283261775970459, "loss": 0.3923, "nll_loss": 0.39193302392959595, "rewards/accuracies": 1.0, "rewards/chosen": -0.02444593235850334, "rewards/margins": 0.4038802683353424, "rewards/rejected": -0.42832618951797485, "step": 2610 }, { "epoch": 0.524, "grad_norm": 0.7898460626602173, "learning_rate": 9.73144347690336e-07, "log_odds_chosen": 6.187491416931152, "log_odds_ratio": -0.002828532364219427, "logits/chosen": 7.1494855880737305, "logits/rejected": 5.0616350173950195, "logps/chosen": -0.19332340359687805, "logps/rejected": -4.311853885650635, "loss": 0.3459, "nll_loss": 0.34559038281440735, "rewards/accuracies": 1.0, "rewards/chosen": -0.019332339987158775, "rewards/margins": 0.41185301542282104, "rewards/rejected": -0.4311853349208832, "step": 2620 }, { "epoch": 0.526, "grad_norm": 1.2834848165512085, "learning_rate": 9.72940824387451e-07, "log_odds_chosen": 6.130953788757324, "log_odds_ratio": -0.0032100111711770296, "logits/chosen": 7.092158317565918, "logits/rejected": 4.98604679107666, "logps/chosen": -0.2428741455078125, "logps/rejected": -4.516552925109863, "loss": 0.4006, "nll_loss": 0.4002839922904968, "rewards/accuracies": 1.0, "rewards/chosen": -0.02428741380572319, "rewards/margins": 0.42736783623695374, "rewards/rejected": -0.4516552984714508, "step": 2630 }, { "epoch": 0.528, "grad_norm": 1.593633770942688, "learning_rate": 9.727365542464496e-07, "log_odds_chosen": 5.646476745605469, "log_odds_ratio": -0.007242923136800528, "logits/chosen": 7.013239860534668, "logits/rejected": 5.1167707443237305, "logps/chosen": -0.4144660532474518, "logps/rejected": -4.523609161376953, "loss": 0.5589, "nll_loss": 0.5581674575805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.041446611285209656, "rewards/margins": 0.4109143316745758, "rewards/rejected": -0.45236092805862427, "step": 2640 }, { "epoch": 0.53, "grad_norm": 0.9291999340057373, "learning_rate": 9.725315375899023e-07, "log_odds_chosen": 5.862090110778809, "log_odds_ratio": -0.007308448664844036, "logits/chosen": 6.98413610458374, "logits/rejected": 4.971373558044434, "logps/chosen": -0.37488099932670593, "logps/rejected": -4.4934282302856445, "loss": 0.5192, "nll_loss": 0.5184504389762878, "rewards/accuracies": 1.0, "rewards/chosen": -0.03748810291290283, "rewards/margins": 0.41185468435287476, "rewards/rejected": -0.4493428170681, "step": 2650 }, { "epoch": 0.532, "grad_norm": 0.7933661937713623, "learning_rate": 9.723257747415584e-07, "log_odds_chosen": 5.688076496124268, "log_odds_ratio": -0.0051224431954324245, "logits/chosen": 7.008152961730957, "logits/rejected": 4.997590065002441, "logps/chosen": -0.27813521027565, "logps/rejected": -4.29982852935791, "loss": 0.4367, "nll_loss": 0.43617814779281616, "rewards/accuracies": 1.0, "rewards/chosen": -0.02781352400779724, "rewards/margins": 0.40216928720474243, "rewards/rejected": -0.4299828112125397, "step": 2660 }, { "epoch": 0.534, "grad_norm": 1.3520539999008179, "learning_rate": 9.721192660263452e-07, "log_odds_chosen": 5.906854152679443, "log_odds_ratio": -0.003194476244971156, "logits/chosen": 6.947086334228516, "logits/rejected": 4.854525089263916, "logps/chosen": -0.24058739840984344, "logps/rejected": -4.415884971618652, "loss": 0.4079, "nll_loss": 0.4075564444065094, "rewards/accuracies": 1.0, "rewards/chosen": -0.024058738723397255, "rewards/margins": 0.41752976179122925, "rewards/rejected": -0.44158855080604553, "step": 2670 }, { "epoch": 0.536, "grad_norm": 1.2368338108062744, "learning_rate": 9.719120117703686e-07, "log_odds_chosen": 5.984002113342285, "log_odds_ratio": -0.0042824093252420425, "logits/chosen": 6.987722873687744, "logits/rejected": 5.032439231872559, "logps/chosen": -0.27262645959854126, "logps/rejected": -4.443886756896973, "loss": 0.442, "nll_loss": 0.4415791928768158, "rewards/accuracies": 1.0, "rewards/chosen": -0.027262646704912186, "rewards/margins": 0.41712602972984314, "rewards/rejected": -0.4443887174129486, "step": 2680 }, { "epoch": 0.538, "grad_norm": 1.3972927331924438, "learning_rate": 9.71704012300911e-07, "log_odds_chosen": 5.935458183288574, "log_odds_ratio": -0.01674758270382881, "logits/chosen": 6.987430572509766, "logits/rejected": 5.086708068847656, "logps/chosen": -0.38126805424690247, "logps/rejected": -4.555361747741699, "loss": 0.5073, "nll_loss": 0.5055931806564331, "rewards/accuracies": 1.0, "rewards/chosen": -0.03812680393457413, "rewards/margins": 0.41740933060646057, "rewards/rejected": -0.4555361270904541, "step": 2690 }, { "epoch": 0.54, "grad_norm": 1.1218030452728271, "learning_rate": 9.714952679464323e-07, "log_odds_chosen": 5.917917728424072, "log_odds_ratio": -0.003864885540679097, "logits/chosen": 7.002470970153809, "logits/rejected": 5.021410942077637, "logps/chosen": -0.2848735451698303, "logps/rejected": -4.533073902130127, "loss": 0.436, "nll_loss": 0.435588538646698, "rewards/accuracies": 1.0, "rewards/chosen": -0.028487354516983032, "rewards/margins": 0.42482003569602966, "rewards/rejected": -0.4533073902130127, "step": 2700 }, { "epoch": 0.542, "grad_norm": 0.8948553800582886, "learning_rate": 9.712857790365678e-07, "log_odds_chosen": 5.743096351623535, "log_odds_ratio": -0.009176122024655342, "logits/chosen": 6.966290473937988, "logits/rejected": 4.902226448059082, "logps/chosen": -0.38678106665611267, "logps/rejected": -4.454582691192627, "loss": 0.4966, "nll_loss": 0.4956628382205963, "rewards/accuracies": 1.0, "rewards/chosen": -0.038678113371133804, "rewards/margins": 0.4067801535129547, "rewards/rejected": -0.44545823335647583, "step": 2710 }, { "epoch": 0.544, "grad_norm": 1.1452341079711914, "learning_rate": 9.710755459021295e-07, "log_odds_chosen": 5.907994270324707, "log_odds_ratio": -0.0045977248810231686, "logits/chosen": 6.982369899749756, "logits/rejected": 4.94795560836792, "logps/chosen": -0.2651437520980835, "logps/rejected": -4.379322052001953, "loss": 0.4354, "nll_loss": 0.43492013216018677, "rewards/accuracies": 1.0, "rewards/chosen": -0.02651437558233738, "rewards/margins": 0.4114178717136383, "rewards/rejected": -0.43793225288391113, "step": 2720 }, { "epoch": 0.546, "grad_norm": 1.7501866817474365, "learning_rate": 9.708645688751044e-07, "log_odds_chosen": 6.293181419372559, "log_odds_ratio": -0.0026437011547386646, "logits/chosen": 6.9741106033325195, "logits/rejected": 4.964402675628662, "logps/chosen": -0.1810915321111679, "logps/rejected": -4.4909772872924805, "loss": 0.3412, "nll_loss": 0.3409053385257721, "rewards/accuracies": 1.0, "rewards/chosen": -0.01810915395617485, "rewards/margins": 0.430988609790802, "rewards/rejected": -0.44909772276878357, "step": 2730 }, { "epoch": 0.548, "grad_norm": 0.890571653842926, "learning_rate": 9.706528482886533e-07, "log_odds_chosen": 5.9992899894714355, "log_odds_ratio": -0.013877838850021362, "logits/chosen": 6.936367988586426, "logits/rejected": 5.040072441101074, "logps/chosen": -0.30415141582489014, "logps/rejected": -4.377964496612549, "loss": 0.4368, "nll_loss": 0.4354294240474701, "rewards/accuracies": 1.0, "rewards/chosen": -0.030415143817663193, "rewards/margins": 0.4073813557624817, "rewards/rejected": -0.4377964437007904, "step": 2740 }, { "epoch": 0.55, "grad_norm": 1.2041089534759521, "learning_rate": 9.704403844771127e-07, "log_odds_chosen": 5.543812274932861, "log_odds_ratio": -0.042007140815258026, "logits/chosen": 6.64944314956665, "logits/rejected": 4.813943386077881, "logps/chosen": -0.44115763902664185, "logps/rejected": -4.3769211769104, "loss": 0.5785, "nll_loss": 0.574299693107605, "rewards/accuracies": 1.0, "rewards/chosen": -0.044115763157606125, "rewards/margins": 0.39357632398605347, "rewards/rejected": -0.4376921057701111, "step": 2750 }, { "epoch": 0.552, "grad_norm": 3.1762099266052246, "learning_rate": 9.702271777759915e-07, "log_odds_chosen": 5.559386730194092, "log_odds_ratio": -0.03797551244497299, "logits/chosen": 6.837467193603516, "logits/rejected": 4.945069313049316, "logps/chosen": -0.5124744772911072, "logps/rejected": -4.493350982666016, "loss": 0.6104, "nll_loss": 0.6066404581069946, "rewards/accuracies": 1.0, "rewards/chosen": -0.051247455179691315, "rewards/margins": 0.39808768033981323, "rewards/rejected": -0.44933515787124634, "step": 2760 }, { "epoch": 0.554, "grad_norm": 1.2859935760498047, "learning_rate": 9.700132285219721e-07, "log_odds_chosen": 6.166256904602051, "log_odds_ratio": -0.003280543489381671, "logits/chosen": 6.940118312835693, "logits/rejected": 4.944952011108398, "logps/chosen": -0.21502713859081268, "logps/rejected": -4.455073356628418, "loss": 0.3737, "nll_loss": 0.3733840882778168, "rewards/accuracies": 1.0, "rewards/chosen": -0.021502716466784477, "rewards/margins": 0.42400461435317993, "rewards/rejected": -0.445507287979126, "step": 2770 }, { "epoch": 0.556, "grad_norm": 1.1370830535888672, "learning_rate": 9.6979853705291e-07, "log_odds_chosen": 5.9801130294799805, "log_odds_ratio": -0.0034650485031306744, "logits/chosen": 6.931124687194824, "logits/rejected": 4.877668380737305, "logps/chosen": -0.19816707074642181, "logps/rejected": -4.199582099914551, "loss": 0.3429, "nll_loss": 0.34260231256484985, "rewards/accuracies": 1.0, "rewards/chosen": -0.01981670781970024, "rewards/margins": 0.40014147758483887, "rewards/rejected": -0.4199582040309906, "step": 2780 }, { "epoch": 0.558, "grad_norm": 1.377846598625183, "learning_rate": 9.695831037078322e-07, "log_odds_chosen": 6.050197601318359, "log_odds_ratio": -0.004460202995687723, "logits/chosen": 6.95965051651001, "logits/rejected": 4.743739128112793, "logps/chosen": -0.2835105061531067, "logps/rejected": -4.556159019470215, "loss": 0.4516, "nll_loss": 0.4511871337890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.02835104800760746, "rewards/margins": 0.42726483941078186, "rewards/rejected": -0.45561590790748596, "step": 2790 }, { "epoch": 0.56, "grad_norm": 1.2141679525375366, "learning_rate": 9.69366928826937e-07, "log_odds_chosen": 5.7788190841674805, "log_odds_ratio": -0.03736796975135803, "logits/chosen": 6.6829047203063965, "logits/rejected": 4.774948596954346, "logps/chosen": -0.3852924406528473, "logps/rejected": -4.408897876739502, "loss": 0.5235, "nll_loss": 0.5197579264640808, "rewards/accuracies": 1.0, "rewards/chosen": -0.03852924332022667, "rewards/margins": 0.4023605287075043, "rewards/rejected": -0.440889835357666, "step": 2800 }, { "epoch": 0.562, "grad_norm": 0.9323951601982117, "learning_rate": 9.691500127515943e-07, "log_odds_chosen": 6.353205680847168, "log_odds_ratio": -0.0022744606249034405, "logits/chosen": 6.997060298919678, "logits/rejected": 4.881977081298828, "logps/chosen": -0.19023975729942322, "logps/rejected": -4.562615394592285, "loss": 0.3774, "nll_loss": 0.3771989345550537, "rewards/accuracies": 1.0, "rewards/chosen": -0.019023975357413292, "rewards/margins": 0.43723759055137634, "rewards/rejected": -0.456261545419693, "step": 2810 }, { "epoch": 0.564, "grad_norm": 1.8450298309326172, "learning_rate": 9.689323558243444e-07, "log_odds_chosen": 5.752005577087402, "log_odds_ratio": -0.004660186357796192, "logits/chosen": 6.925914764404297, "logits/rejected": 4.861429691314697, "logps/chosen": -0.3213292956352234, "logps/rejected": -4.4868364334106445, "loss": 0.4785, "nll_loss": 0.4780771732330322, "rewards/accuracies": 1.0, "rewards/chosen": -0.03213292732834816, "rewards/margins": 0.41655072569847107, "rewards/rejected": -0.44868364930152893, "step": 2820 }, { "epoch": 0.566, "grad_norm": 0.984520435333252, "learning_rate": 9.68713958388897e-07, "log_odds_chosen": 6.615459442138672, "log_odds_ratio": -0.0016456510638818145, "logits/chosen": 7.054934024810791, "logits/rejected": 4.867163181304932, "logps/chosen": -0.1487729847431183, "logps/rejected": -4.580409526824951, "loss": 0.3114, "nll_loss": 0.3111964464187622, "rewards/accuracies": 1.0, "rewards/chosen": -0.014877298846840858, "rewards/margins": 0.4431636929512024, "rewards/rejected": -0.45804089307785034, "step": 2830 }, { "epoch": 0.568, "grad_norm": 0.9965059757232666, "learning_rate": 9.684948207901314e-07, "log_odds_chosen": 5.465020179748535, "log_odds_ratio": -0.039202023297548294, "logits/chosen": 6.791934013366699, "logits/rejected": 4.7264604568481445, "logps/chosen": -0.41625165939331055, "logps/rejected": -4.301068305969238, "loss": 0.5621, "nll_loss": 0.5581761598587036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.041625164449214935, "rewards/margins": 0.3884817063808441, "rewards/rejected": -0.43010687828063965, "step": 2840 }, { "epoch": 0.57, "grad_norm": 1.0718120336532593, "learning_rate": 9.68274943374096e-07, "log_odds_chosen": 5.935288906097412, "log_odds_ratio": -0.0036810890305787325, "logits/chosen": 7.034595489501953, "logits/rejected": 4.9372239112854, "logps/chosen": -0.23421737551689148, "logps/rejected": -4.362795829772949, "loss": 0.4002, "nll_loss": 0.39978644251823425, "rewards/accuracies": 1.0, "rewards/chosen": -0.023421740159392357, "rewards/margins": 0.41285791993141174, "rewards/rejected": -0.43627968430519104, "step": 2850 }, { "epoch": 0.572, "grad_norm": 1.0468945503234863, "learning_rate": 9.680543264880075e-07, "log_odds_chosen": 6.091261386871338, "log_odds_ratio": -0.002703175414353609, "logits/chosen": 6.975077152252197, "logits/rejected": 4.935511589050293, "logps/chosen": -0.19542375206947327, "logps/rejected": -4.369900703430176, "loss": 0.349, "nll_loss": 0.348766028881073, "rewards/accuracies": 1.0, "rewards/chosen": -0.019542377442121506, "rewards/margins": 0.4174477458000183, "rewards/rejected": -0.4369901716709137, "step": 2860 }, { "epoch": 0.574, "grad_norm": 1.062721610069275, "learning_rate": 9.678329704802493e-07, "log_odds_chosen": 6.052565574645996, "log_odds_ratio": -0.003278195159509778, "logits/chosen": 6.943784236907959, "logits/rejected": 4.807796478271484, "logps/chosen": -0.284737765789032, "logps/rejected": -4.662905693054199, "loss": 0.4425, "nll_loss": 0.44220542907714844, "rewards/accuracies": 1.0, "rewards/chosen": -0.02847377397119999, "rewards/margins": 0.437816858291626, "rewards/rejected": -0.4662906229496002, "step": 2870 }, { "epoch": 0.576, "grad_norm": 1.231082558631897, "learning_rate": 9.676108757003734e-07, "log_odds_chosen": 6.038345813751221, "log_odds_ratio": -0.003779389662668109, "logits/chosen": 6.933203220367432, "logits/rejected": 4.857470512390137, "logps/chosen": -0.24802866578102112, "logps/rejected": -4.435871124267578, "loss": 0.3903, "nll_loss": 0.3899526596069336, "rewards/accuracies": 1.0, "rewards/chosen": -0.02480286732316017, "rewards/margins": 0.4187842309474945, "rewards/rejected": -0.44358712434768677, "step": 2880 }, { "epoch": 0.578, "grad_norm": 0.8696639537811279, "learning_rate": 9.673880424990977e-07, "log_odds_chosen": 6.100459098815918, "log_odds_ratio": -0.00314835156314075, "logits/chosen": 6.9687323570251465, "logits/rejected": 4.730871200561523, "logps/chosen": -0.21518416702747345, "logps/rejected": -4.393637180328369, "loss": 0.4032, "nll_loss": 0.4028453230857849, "rewards/accuracies": 1.0, "rewards/chosen": -0.021518414840102196, "rewards/margins": 0.41784530878067017, "rewards/rejected": -0.4393637180328369, "step": 2890 }, { "epoch": 0.58, "grad_norm": 0.9046807885169983, "learning_rate": 9.67164471228306e-07, "log_odds_chosen": 6.177165985107422, "log_odds_ratio": -0.003945144824683666, "logits/chosen": 6.994019985198975, "logits/rejected": 4.824427604675293, "logps/chosen": -0.2337941825389862, "logps/rejected": -4.418322563171387, "loss": 0.3946, "nll_loss": 0.3941754102706909, "rewards/accuracies": 1.0, "rewards/chosen": -0.02337941899895668, "rewards/margins": 0.41845282912254333, "rewards/rejected": -0.4418322443962097, "step": 2900 }, { "epoch": 0.582, "grad_norm": 1.4647530317306519, "learning_rate": 9.66940162241048e-07, "log_odds_chosen": 5.897489070892334, "log_odds_ratio": -0.005235307849943638, "logits/chosen": 6.951867580413818, "logits/rejected": 4.782492637634277, "logps/chosen": -0.30330759286880493, "logps/rejected": -4.493199348449707, "loss": 0.4587, "nll_loss": 0.4581443667411804, "rewards/accuracies": 1.0, "rewards/chosen": -0.030330762267112732, "rewards/margins": 0.4189891815185547, "rewards/rejected": -0.4493199288845062, "step": 2910 }, { "epoch": 0.584, "grad_norm": 0.8605162501335144, "learning_rate": 9.66715115891538e-07, "log_odds_chosen": 6.208121299743652, "log_odds_ratio": -0.003376380307599902, "logits/chosen": 6.929304599761963, "logits/rejected": 4.868403434753418, "logps/chosen": -0.21409156918525696, "logps/rejected": -4.433931827545166, "loss": 0.3768, "nll_loss": 0.37646859884262085, "rewards/accuracies": 1.0, "rewards/chosen": -0.021409157663583755, "rewards/margins": 0.4219840466976166, "rewards/rejected": -0.4433932304382324, "step": 2920 }, { "epoch": 0.586, "grad_norm": 1.1506779193878174, "learning_rate": 9.664893325351554e-07, "log_odds_chosen": 6.147149562835693, "log_odds_ratio": -0.0036640171892941, "logits/chosen": 6.976885795593262, "logits/rejected": 4.875080108642578, "logps/chosen": -0.2407902479171753, "logps/rejected": -4.3878068923950195, "loss": 0.3875, "nll_loss": 0.3871701657772064, "rewards/accuracies": 1.0, "rewards/chosen": -0.02407902665436268, "rewards/margins": 0.4147017002105713, "rewards/rejected": -0.4387807250022888, "step": 2930 }, { "epoch": 0.588, "grad_norm": 3.4090864658355713, "learning_rate": 9.662628125284424e-07, "log_odds_chosen": 6.031951427459717, "log_odds_ratio": -0.005007453262805939, "logits/chosen": 6.8610711097717285, "logits/rejected": 4.806985378265381, "logps/chosen": -0.321739137172699, "logps/rejected": -4.622390270233154, "loss": 0.4761, "nll_loss": 0.47557616233825684, "rewards/accuracies": 1.0, "rewards/chosen": -0.03217391297221184, "rewards/margins": 0.4300650954246521, "rewards/rejected": -0.46223902702331543, "step": 2940 }, { "epoch": 0.59, "grad_norm": 1.2161290645599365, "learning_rate": 9.660355562291054e-07, "log_odds_chosen": 6.204380989074707, "log_odds_ratio": -0.003698834450915456, "logits/chosen": 6.955269813537598, "logits/rejected": 4.829334735870361, "logps/chosen": -0.23907765746116638, "logps/rejected": -4.538744926452637, "loss": 0.3974, "nll_loss": 0.39698758721351624, "rewards/accuracies": 1.0, "rewards/chosen": -0.023907765746116638, "rewards/margins": 0.4299667477607727, "rewards/rejected": -0.4538744390010834, "step": 2950 }, { "epoch": 0.592, "grad_norm": 0.8615589737892151, "learning_rate": 9.658075639960129e-07, "log_odds_chosen": 5.613450050354004, "log_odds_ratio": -0.03709098696708679, "logits/chosen": 6.780953407287598, "logits/rejected": 4.785006999969482, "logps/chosen": -0.4838925898075104, "logps/rejected": -4.432929039001465, "loss": 0.5381, "nll_loss": 0.5344233512878418, "rewards/accuracies": 1.0, "rewards/chosen": -0.0483892597258091, "rewards/margins": 0.39490365982055664, "rewards/rejected": -0.4432929456233978, "step": 2960 }, { "epoch": 0.594, "grad_norm": 1.6917359828948975, "learning_rate": 9.655788361891958e-07, "log_odds_chosen": 5.766692638397217, "log_odds_ratio": -0.005349588580429554, "logits/chosen": 6.86934757232666, "logits/rejected": 4.87322473526001, "logps/chosen": -0.3062397837638855, "logps/rejected": -4.429779052734375, "loss": 0.4494, "nll_loss": 0.4488566815853119, "rewards/accuracies": 1.0, "rewards/chosen": -0.03062397800385952, "rewards/margins": 0.4123539328575134, "rewards/rejected": -0.4429779052734375, "step": 2970 }, { "epoch": 0.596, "grad_norm": 1.0229865312576294, "learning_rate": 9.653493731698466e-07, "log_odds_chosen": 6.062826156616211, "log_odds_ratio": -0.0030325683765113354, "logits/chosen": 6.874600410461426, "logits/rejected": 4.827445030212402, "logps/chosen": -0.20613741874694824, "logps/rejected": -4.382088661193848, "loss": 0.3686, "nll_loss": 0.3683159351348877, "rewards/accuracies": 1.0, "rewards/chosen": -0.020613742992281914, "rewards/margins": 0.4175950884819031, "rewards/rejected": -0.43820881843566895, "step": 2980 }, { "epoch": 0.598, "grad_norm": 1.1835602521896362, "learning_rate": 9.651191753003186e-07, "log_odds_chosen": 6.1847639083862305, "log_odds_ratio": -0.004566240590065718, "logits/chosen": 6.8703508377075195, "logits/rejected": 4.771268844604492, "logps/chosen": -0.25019749999046326, "logps/rejected": -4.420385837554932, "loss": 0.4128, "nll_loss": 0.4123150408267975, "rewards/accuracies": 1.0, "rewards/chosen": -0.025019753724336624, "rewards/margins": 0.4170188307762146, "rewards/rejected": -0.4420385956764221, "step": 2990 }, { "epoch": 0.6, "grad_norm": 1.560935616493225, "learning_rate": 9.648882429441256e-07, "log_odds_chosen": 5.991194725036621, "log_odds_ratio": -0.004355542827397585, "logits/chosen": 6.85166072845459, "logits/rejected": 4.764639377593994, "logps/chosen": -0.29722434282302856, "logps/rejected": -4.565263748168945, "loss": 0.4535, "nll_loss": 0.45308002829551697, "rewards/accuracies": 1.0, "rewards/chosen": -0.029722433537244797, "rewards/margins": 0.42680391669273376, "rewards/rejected": -0.45652633905410767, "step": 3000 }, { "epoch": 0.602, "grad_norm": 1.255961298942566, "learning_rate": 9.646565764659415e-07, "log_odds_chosen": 5.964068412780762, "log_odds_ratio": -0.003962772898375988, "logits/chosen": 6.906510829925537, "logits/rejected": 4.754809379577637, "logps/chosen": -0.2704375684261322, "logps/rejected": -4.47653865814209, "loss": 0.4062, "nll_loss": 0.405830442905426, "rewards/accuracies": 1.0, "rewards/chosen": -0.02704375982284546, "rewards/margins": 0.42061010003089905, "rewards/rejected": -0.4476538598537445, "step": 3010 }, { "epoch": 0.604, "grad_norm": 1.266332745552063, "learning_rate": 9.644241762315994e-07, "log_odds_chosen": 5.967362880706787, "log_odds_ratio": -0.007197369821369648, "logits/chosen": 6.894207000732422, "logits/rejected": 4.963929176330566, "logps/chosen": -0.369792640209198, "logps/rejected": -4.700117111206055, "loss": 0.496, "nll_loss": 0.4952927529811859, "rewards/accuracies": 1.0, "rewards/chosen": -0.03697926551103592, "rewards/margins": 0.43303245306015015, "rewards/rejected": -0.47001171112060547, "step": 3020 }, { "epoch": 0.606, "grad_norm": 1.0327953100204468, "learning_rate": 9.641910426080908e-07, "log_odds_chosen": 5.925717830657959, "log_odds_ratio": -0.03774061053991318, "logits/chosen": 6.711656093597412, "logits/rejected": 4.680695533752441, "logps/chosen": -0.346699982881546, "logps/rejected": -4.474590301513672, "loss": 0.5156, "nll_loss": 0.5118131041526794, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03466999903321266, "rewards/margins": 0.4127890169620514, "rewards/rejected": -0.44745898246765137, "step": 3030 }, { "epoch": 0.608, "grad_norm": 1.0930484533309937, "learning_rate": 9.639571759635654e-07, "log_odds_chosen": 6.081190586090088, "log_odds_ratio": -0.003914904780685902, "logits/chosen": 6.847410678863525, "logits/rejected": 4.728359699249268, "logps/chosen": -0.2632494270801544, "logps/rejected": -4.393155097961426, "loss": 0.4283, "nll_loss": 0.42790985107421875, "rewards/accuracies": 1.0, "rewards/chosen": -0.026324938982725143, "rewards/margins": 0.41299062967300415, "rewards/rejected": -0.4393155574798584, "step": 3040 }, { "epoch": 0.61, "grad_norm": 1.3059108257293701, "learning_rate": 9.637225766673306e-07, "log_odds_chosen": 5.926067352294922, "log_odds_ratio": -0.0033551405649632215, "logits/chosen": 6.926202297210693, "logits/rejected": 4.914649486541748, "logps/chosen": -0.23530378937721252, "logps/rejected": -4.465136528015137, "loss": 0.3829, "nll_loss": 0.3825792372226715, "rewards/accuracies": 1.0, "rewards/chosen": -0.0235303808003664, "rewards/margins": 0.422983318567276, "rewards/rejected": -0.44651371240615845, "step": 3050 }, { "epoch": 0.612, "grad_norm": 0.9290785789489746, "learning_rate": 9.634872450898509e-07, "log_odds_chosen": 6.101790428161621, "log_odds_ratio": -0.0029785074293613434, "logits/chosen": 6.938509464263916, "logits/rejected": 4.815983295440674, "logps/chosen": -0.22676710784435272, "logps/rejected": -4.518226146697998, "loss": 0.3744, "nll_loss": 0.37412697076797485, "rewards/accuracies": 1.0, "rewards/chosen": -0.02267671562731266, "rewards/margins": 0.4291459023952484, "rewards/rejected": -0.4518226087093353, "step": 3060 }, { "epoch": 0.614, "grad_norm": 1.3665109872817993, "learning_rate": 9.63251181602747e-07, "log_odds_chosen": 6.068543434143066, "log_odds_ratio": -0.0031138374470174313, "logits/chosen": 6.955361366271973, "logits/rejected": 4.9303483963012695, "logps/chosen": -0.24933156371116638, "logps/rejected": -4.597203731536865, "loss": 0.4122, "nll_loss": 0.41189780831336975, "rewards/accuracies": 1.0, "rewards/chosen": -0.024933159351348877, "rewards/margins": 0.4347872734069824, "rewards/rejected": -0.4597204625606537, "step": 3070 }, { "epoch": 0.616, "grad_norm": 1.0233341455459595, "learning_rate": 9.63014386578795e-07, "log_odds_chosen": 5.973790168762207, "log_odds_ratio": -0.003919602371752262, "logits/chosen": 6.822132110595703, "logits/rejected": 4.8025126457214355, "logps/chosen": -0.26449865102767944, "logps/rejected": -4.4126386642456055, "loss": 0.422, "nll_loss": 0.42162036895751953, "rewards/accuracies": 1.0, "rewards/chosen": -0.026449864730238914, "rewards/margins": 0.4148139953613281, "rewards/rejected": -0.4412638545036316, "step": 3080 }, { "epoch": 0.618, "grad_norm": 1.2924399375915527, "learning_rate": 9.62776860391927e-07, "log_odds_chosen": 6.137755870819092, "log_odds_ratio": -0.0038653328083455563, "logits/chosen": 6.869757652282715, "logits/rejected": 4.709858417510986, "logps/chosen": -0.23113378882408142, "logps/rejected": -4.435709476470947, "loss": 0.4067, "nll_loss": 0.4063486158847809, "rewards/accuracies": 1.0, "rewards/chosen": -0.023113379254937172, "rewards/margins": 0.42045751214027405, "rewards/rejected": -0.44357091188430786, "step": 3090 }, { "epoch": 0.62, "grad_norm": 1.1115061044692993, "learning_rate": 9.62538603417229e-07, "log_odds_chosen": 6.357248783111572, "log_odds_ratio": -0.0025746747851371765, "logits/chosen": 7.000245571136475, "logits/rejected": 4.847705364227295, "logps/chosen": -0.1936735212802887, "logps/rejected": -4.644345283508301, "loss": 0.3526, "nll_loss": 0.3523901402950287, "rewards/accuracies": 1.0, "rewards/chosen": -0.01936735399067402, "rewards/margins": 0.44506722688674927, "rewards/rejected": -0.46443456411361694, "step": 3100 }, { "epoch": 0.622, "grad_norm": 1.1565918922424316, "learning_rate": 9.622996160309414e-07, "log_odds_chosen": 6.012430191040039, "log_odds_ratio": -0.03661229833960533, "logits/chosen": 6.6815080642700195, "logits/rejected": 4.630860328674316, "logps/chosen": -0.3178529143333435, "logps/rejected": -4.346273899078369, "loss": 0.4637, "nll_loss": 0.46002197265625, "rewards/accuracies": 1.0, "rewards/chosen": -0.03178529068827629, "rewards/margins": 0.40284204483032227, "rewards/rejected": -0.43462735414505005, "step": 3110 }, { "epoch": 0.624, "grad_norm": 1.0130504369735718, "learning_rate": 9.620598986104578e-07, "log_odds_chosen": 5.7093939781188965, "log_odds_ratio": -0.005965926684439182, "logits/chosen": 6.867581844329834, "logits/rejected": 4.840780258178711, "logps/chosen": -0.3259144127368927, "logps/rejected": -4.490423679351807, "loss": 0.4919, "nll_loss": 0.49134644865989685, "rewards/accuracies": 1.0, "rewards/chosen": -0.03259144350886345, "rewards/margins": 0.4164509177207947, "rewards/rejected": -0.449042409658432, "step": 3120 }, { "epoch": 0.626, "grad_norm": 1.0149836540222168, "learning_rate": 9.618194515343249e-07, "log_odds_chosen": 6.618735313415527, "log_odds_ratio": -0.002011980628594756, "logits/chosen": 6.936055660247803, "logits/rejected": 4.904490947723389, "logps/chosen": -0.15638861060142517, "logps/rejected": -4.57242488861084, "loss": 0.3077, "nll_loss": 0.30745142698287964, "rewards/accuracies": 1.0, "rewards/chosen": -0.015638861805200577, "rewards/margins": 0.4416036605834961, "rewards/rejected": -0.45724257826805115, "step": 3130 }, { "epoch": 0.628, "grad_norm": 1.1742640733718872, "learning_rate": 9.615782751822412e-07, "log_odds_chosen": 6.215134620666504, "log_odds_ratio": -0.0028915114235132933, "logits/chosen": 6.997368812561035, "logits/rejected": 4.892775058746338, "logps/chosen": -0.2166542112827301, "logps/rejected": -4.583033561706543, "loss": 0.3794, "nll_loss": 0.3791211247444153, "rewards/accuracies": 1.0, "rewards/chosen": -0.0216654185205698, "rewards/margins": 0.4366379678249359, "rewards/rejected": -0.45830339193344116, "step": 3140 }, { "epoch": 0.63, "grad_norm": 2.352386236190796, "learning_rate": 9.613363699350574e-07, "log_odds_chosen": 6.191835880279541, "log_odds_ratio": -0.004708480555564165, "logits/chosen": 6.957228660583496, "logits/rejected": 5.04957914352417, "logps/chosen": -0.24517324566841125, "logps/rejected": -4.415521144866943, "loss": 0.399, "nll_loss": 0.3985520005226135, "rewards/accuracies": 1.0, "rewards/chosen": -0.024517327547073364, "rewards/margins": 0.4170348048210144, "rewards/rejected": -0.4415521025657654, "step": 3150 }, { "epoch": 0.632, "grad_norm": 1.8692067861557007, "learning_rate": 9.610937361747746e-07, "log_odds_chosen": 6.139657974243164, "log_odds_ratio": -0.0038348392117768526, "logits/chosen": 6.93899393081665, "logits/rejected": 4.875581741333008, "logps/chosen": -0.2159363329410553, "logps/rejected": -4.370583534240723, "loss": 0.3816, "nll_loss": 0.3812391459941864, "rewards/accuracies": 1.0, "rewards/chosen": -0.02159363403916359, "rewards/margins": 0.41546472907066345, "rewards/rejected": -0.43705835938453674, "step": 3160 }, { "epoch": 0.634, "grad_norm": 1.9700640439987183, "learning_rate": 9.608503742845448e-07, "log_odds_chosen": 5.974238395690918, "log_odds_ratio": -0.003971747122704983, "logits/chosen": 6.846986293792725, "logits/rejected": 4.904706954956055, "logps/chosen": -0.2532915472984314, "logps/rejected": -4.376032829284668, "loss": 0.4154, "nll_loss": 0.4150022864341736, "rewards/accuracies": 1.0, "rewards/chosen": -0.02532915212213993, "rewards/margins": 0.4122741222381592, "rewards/rejected": -0.43760329484939575, "step": 3170 }, { "epoch": 0.636, "grad_norm": 1.3329371213912964, "learning_rate": 9.606062846486696e-07, "log_odds_chosen": 5.9187164306640625, "log_odds_ratio": -0.003419716376811266, "logits/chosen": 6.879953861236572, "logits/rejected": 4.787402153015137, "logps/chosen": -0.2593214511871338, "logps/rejected": -4.4850263595581055, "loss": 0.4266, "nll_loss": 0.42624562978744507, "rewards/accuracies": 1.0, "rewards/chosen": -0.025932148098945618, "rewards/margins": 0.42257052659988403, "rewards/rejected": -0.44850268959999084, "step": 3180 }, { "epoch": 0.638, "grad_norm": 0.8411152958869934, "learning_rate": 9.603614676526e-07, "log_odds_chosen": 5.684605598449707, "log_odds_ratio": -0.006016885861754417, "logits/chosen": 6.759622097015381, "logits/rejected": 4.893376350402832, "logps/chosen": -0.3958422541618347, "logps/rejected": -4.4466552734375, "loss": 0.5434, "nll_loss": 0.5427879691123962, "rewards/accuracies": 1.0, "rewards/chosen": -0.03958422690629959, "rewards/margins": 0.40508127212524414, "rewards/rejected": -0.4446655213832855, "step": 3190 }, { "epoch": 0.64, "grad_norm": 1.4364819526672363, "learning_rate": 9.601159236829351e-07, "log_odds_chosen": 5.842034339904785, "log_odds_ratio": -0.007458795793354511, "logits/chosen": 6.762391090393066, "logits/rejected": 4.810338020324707, "logps/chosen": -0.4187234044075012, "logps/rejected": -4.5368146896362305, "loss": 0.5675, "nll_loss": 0.566769003868103, "rewards/accuracies": 1.0, "rewards/chosen": -0.04187234491109848, "rewards/margins": 0.4118090569972992, "rewards/rejected": -0.45368140935897827, "step": 3200 }, { "epoch": 0.642, "grad_norm": 1.1847556829452515, "learning_rate": 9.598696531274226e-07, "log_odds_chosen": 6.216060161590576, "log_odds_ratio": -0.002862032502889633, "logits/chosen": 6.906699180603027, "logits/rejected": 4.826231956481934, "logps/chosen": -0.21701736748218536, "logps/rejected": -4.56675386428833, "loss": 0.3699, "nll_loss": 0.3696367144584656, "rewards/accuracies": 1.0, "rewards/chosen": -0.021701736375689507, "rewards/margins": 0.43497365713119507, "rewards/rejected": -0.45667538046836853, "step": 3210 }, { "epoch": 0.644, "grad_norm": 0.9123293161392212, "learning_rate": 9.596226563749575e-07, "log_odds_chosen": 6.494159698486328, "log_odds_ratio": -0.0023675814736634493, "logits/chosen": 6.937389373779297, "logits/rejected": 4.754617691040039, "logps/chosen": -0.17693521082401276, "logps/rejected": -4.568483352661133, "loss": 0.3294, "nll_loss": 0.3291811943054199, "rewards/accuracies": 1.0, "rewards/chosen": -0.017693523317575455, "rewards/margins": 0.43915480375289917, "rewards/rejected": -0.4568483233451843, "step": 3220 }, { "epoch": 0.646, "grad_norm": 0.9420410394668579, "learning_rate": 9.593749338155808e-07, "log_odds_chosen": 6.553798675537109, "log_odds_ratio": -0.0022558816708624363, "logits/chosen": 6.8527326583862305, "logits/rejected": 4.7832207679748535, "logps/chosen": -0.16571514308452606, "logps/rejected": -4.502490043640137, "loss": 0.3158, "nll_loss": 0.31552720069885254, "rewards/accuracies": 1.0, "rewards/chosen": -0.016571514308452606, "rewards/margins": 0.4336775243282318, "rewards/rejected": -0.4502490162849426, "step": 3230 }, { "epoch": 0.648, "grad_norm": 1.2275550365447998, "learning_rate": 9.591264858404808e-07, "log_odds_chosen": 6.109059810638428, "log_odds_ratio": -0.0026511745527386665, "logits/chosen": 6.849384307861328, "logits/rejected": 4.701358795166016, "logps/chosen": -0.18854305148124695, "logps/rejected": -4.372559070587158, "loss": 0.3505, "nll_loss": 0.35019928216934204, "rewards/accuracies": 1.0, "rewards/chosen": -0.018854303285479546, "rewards/margins": 0.4184016287326813, "rewards/rejected": -0.43725594878196716, "step": 3240 }, { "epoch": 0.65, "grad_norm": 1.1321555376052856, "learning_rate": 9.588773128419905e-07, "log_odds_chosen": 6.5141143798828125, "log_odds_ratio": -0.0019397391006350517, "logits/chosen": 6.842618465423584, "logits/rejected": 4.741979598999023, "logps/chosen": -0.1608838587999344, "logps/rejected": -4.586119651794434, "loss": 0.323, "nll_loss": 0.322787344455719, "rewards/accuracies": 1.0, "rewards/chosen": -0.016088386997580528, "rewards/margins": 0.4425235688686371, "rewards/rejected": -0.45861196517944336, "step": 3250 }, { "epoch": 0.652, "grad_norm": 1.1181703805923462, "learning_rate": 9.586274152135883e-07, "log_odds_chosen": 5.211886405944824, "log_odds_ratio": -0.04175068065524101, "logits/chosen": 6.695764064788818, "logits/rejected": 4.819982528686523, "logps/chosen": -0.6216837763786316, "logps/rejected": -4.346752166748047, "loss": 0.6591, "nll_loss": 0.654919445514679, "rewards/accuracies": 1.0, "rewards/chosen": -0.06216838210821152, "rewards/margins": 0.37250688672065735, "rewards/rejected": -0.43467527627944946, "step": 3260 }, { "epoch": 0.654, "grad_norm": 1.0467872619628906, "learning_rate": 9.583767933498964e-07, "log_odds_chosen": 6.48073673248291, "log_odds_ratio": -0.002234103623777628, "logits/chosen": 6.854666709899902, "logits/rejected": 4.784970283508301, "logps/chosen": -0.15016287565231323, "logps/rejected": -4.499089241027832, "loss": 0.3221, "nll_loss": 0.3218592405319214, "rewards/accuracies": 1.0, "rewards/chosen": -0.015016289427876472, "rewards/margins": 0.43489256501197815, "rewards/rejected": -0.4499088227748871, "step": 3270 }, { "epoch": 0.656, "grad_norm": 0.7921714186668396, "learning_rate": 9.58125447646681e-07, "log_odds_chosen": 6.680395603179932, "log_odds_ratio": -0.0017159419367089868, "logits/chosen": 6.9325151443481445, "logits/rejected": 4.918732166290283, "logps/chosen": -0.12707030773162842, "logps/rejected": -4.465980052947998, "loss": 0.3042, "nll_loss": 0.30399638414382935, "rewards/accuracies": 1.0, "rewards/chosen": -0.012707029469311237, "rewards/margins": 0.4338909983634949, "rewards/rejected": -0.4465980529785156, "step": 3280 }, { "epoch": 0.658, "grad_norm": 1.0841420888900757, "learning_rate": 9.578733785008512e-07, "log_odds_chosen": 5.979531288146973, "log_odds_ratio": -0.006113490555435419, "logits/chosen": 6.820050239562988, "logits/rejected": 4.765440464019775, "logps/chosen": -0.3224744498729706, "logps/rejected": -4.519586086273193, "loss": 0.4658, "nll_loss": 0.46515601873397827, "rewards/accuracies": 1.0, "rewards/chosen": -0.03224744647741318, "rewards/margins": 0.419711172580719, "rewards/rejected": -0.4519585967063904, "step": 3290 }, { "epoch": 0.66, "grad_norm": 1.3061290979385376, "learning_rate": 9.576205863104586e-07, "log_odds_chosen": 5.979447364807129, "log_odds_ratio": -0.003666600678116083, "logits/chosen": 6.789540767669678, "logits/rejected": 4.941833972930908, "logps/chosen": -0.2616361975669861, "logps/rejected": -4.457348823547363, "loss": 0.4101, "nll_loss": 0.40976014733314514, "rewards/accuracies": 1.0, "rewards/chosen": -0.02616361901164055, "rewards/margins": 0.41957125067710876, "rewards/rejected": -0.4457348883152008, "step": 3300 }, { "epoch": 0.662, "grad_norm": 1.0924592018127441, "learning_rate": 9.573670714746972e-07, "log_odds_chosen": 6.217164039611816, "log_odds_ratio": -0.002774735214188695, "logits/chosen": 6.89877986907959, "logits/rejected": 4.93044900894165, "logps/chosen": -0.16620835661888123, "logps/rejected": -4.3400983810424805, "loss": 0.3293, "nll_loss": 0.3290106952190399, "rewards/accuracies": 1.0, "rewards/chosen": -0.016620837152004242, "rewards/margins": 0.41738900542259216, "rewards/rejected": -0.434009850025177, "step": 3310 }, { "epoch": 0.664, "grad_norm": 1.0545133352279663, "learning_rate": 9.571128343939005e-07, "log_odds_chosen": 5.933407306671143, "log_odds_ratio": -0.03723212331533432, "logits/chosen": 6.6626996994018555, "logits/rejected": 4.884574890136719, "logps/chosen": -0.4243057370185852, "logps/rejected": -4.522220134735107, "loss": 0.5045, "nll_loss": 0.5007517337799072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0424305722117424, "rewards/margins": 0.4097914695739746, "rewards/rejected": -0.4522220194339752, "step": 3320 }, { "epoch": 0.666, "grad_norm": 1.2922015190124512, "learning_rate": 9.568578754695441e-07, "log_odds_chosen": 5.738656997680664, "log_odds_ratio": -0.03825997933745384, "logits/chosen": 6.654119968414307, "logits/rejected": 4.855849266052246, "logps/chosen": -0.5139026641845703, "logps/rejected": -4.658411979675293, "loss": 0.6114, "nll_loss": 0.6076027154922485, "rewards/accuracies": 1.0, "rewards/chosen": -0.051390260457992554, "rewards/margins": 0.4144509434700012, "rewards/rejected": -0.465841144323349, "step": 3330 }, { "epoch": 0.668, "grad_norm": 1.7951455116271973, "learning_rate": 9.566021951042432e-07, "log_odds_chosen": 6.1013875007629395, "log_odds_ratio": -0.002960762707516551, "logits/chosen": 6.815550327301025, "logits/rejected": 4.787686347961426, "logps/chosen": -0.23223021626472473, "logps/rejected": -4.590798377990723, "loss": 0.394, "nll_loss": 0.3937439024448395, "rewards/accuracies": 1.0, "rewards/chosen": -0.023223021999001503, "rewards/margins": 0.43585681915283203, "rewards/rejected": -0.45907989144325256, "step": 3340 }, { "epoch": 0.67, "grad_norm": 1.1026407480239868, "learning_rate": 9.563457937017514e-07, "log_odds_chosen": 6.078001976013184, "log_odds_ratio": -0.00443290127441287, "logits/chosen": 6.752438545227051, "logits/rejected": 4.826934337615967, "logps/chosen": -0.2808952033519745, "logps/rejected": -4.553843975067139, "loss": 0.4312, "nll_loss": 0.4307554364204407, "rewards/accuracies": 1.0, "rewards/chosen": -0.02808951959013939, "rewards/margins": 0.42729490995407104, "rewards/rejected": -0.45538440346717834, "step": 3350 }, { "epoch": 0.672, "grad_norm": 1.2791149616241455, "learning_rate": 9.56088671666962e-07, "log_odds_chosen": 5.934088706970215, "log_odds_ratio": -0.0049879057332873344, "logits/chosen": 6.805753231048584, "logits/rejected": 4.7262444496154785, "logps/chosen": -0.36884549260139465, "logps/rejected": -4.6767897605896, "loss": 0.5149, "nll_loss": 0.514370322227478, "rewards/accuracies": 1.0, "rewards/chosen": -0.036884550005197525, "rewards/margins": 0.4307944178581238, "rewards/rejected": -0.4676790237426758, "step": 3360 }, { "epoch": 0.674, "grad_norm": 0.890902578830719, "learning_rate": 9.558308294059053e-07, "log_odds_chosen": 5.588893890380859, "log_odds_ratio": -0.0720808133482933, "logits/chosen": 6.490678310394287, "logits/rejected": 4.751962184906006, "logps/chosen": -0.7096683979034424, "logps/rejected": -4.609912395477295, "loss": 0.7639, "nll_loss": 0.7566892504692078, "rewards/accuracies": 1.0, "rewards/chosen": -0.07096684724092484, "rewards/margins": 0.3900243639945984, "rewards/rejected": -0.460991233587265, "step": 3370 }, { "epoch": 0.676, "grad_norm": 1.4305604696273804, "learning_rate": 9.555722673257501e-07, "log_odds_chosen": 6.132017612457275, "log_odds_ratio": -0.03635253757238388, "logits/chosen": 6.547123908996582, "logits/rejected": 4.604372978210449, "logps/chosen": -0.2912115752696991, "logps/rejected": -4.4163384437561035, "loss": 0.4549, "nll_loss": 0.45130887627601624, "rewards/accuracies": 1.0, "rewards/chosen": -0.02912116050720215, "rewards/margins": 0.4125126302242279, "rewards/rejected": -0.44163379073143005, "step": 3380 }, { "epoch": 0.678, "grad_norm": 1.1770350933074951, "learning_rate": 9.553129858348005e-07, "log_odds_chosen": 6.150223255157471, "log_odds_ratio": -0.003945477772504091, "logits/chosen": 6.741508483886719, "logits/rejected": 4.798121929168701, "logps/chosen": -0.2534763514995575, "logps/rejected": -4.6352972984313965, "loss": 0.4244, "nll_loss": 0.423966646194458, "rewards/accuracies": 1.0, "rewards/chosen": -0.02534763514995575, "rewards/margins": 0.4381820559501648, "rewards/rejected": -0.4635297358036041, "step": 3390 }, { "epoch": 0.68, "grad_norm": 2.260467290878296, "learning_rate": 9.550529853424978e-07, "log_odds_chosen": 5.98559045791626, "log_odds_ratio": -0.006756530608981848, "logits/chosen": 6.724745750427246, "logits/rejected": 4.837583065032959, "logps/chosen": -0.3363467752933502, "logps/rejected": -4.638557434082031, "loss": 0.4799, "nll_loss": 0.4792361855506897, "rewards/accuracies": 1.0, "rewards/chosen": -0.03363468125462532, "rewards/margins": 0.4302210807800293, "rewards/rejected": -0.4638558030128479, "step": 3400 }, { "epoch": 0.682, "grad_norm": 0.9557954668998718, "learning_rate": 9.547922662594183e-07, "log_odds_chosen": 6.153815269470215, "log_odds_ratio": -0.0033664063084870577, "logits/chosen": 6.676331520080566, "logits/rejected": 4.738353252410889, "logps/chosen": -0.2219506800174713, "logps/rejected": -4.353157997131348, "loss": 0.3762, "nll_loss": 0.37585288286209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.02219506725668907, "rewards/margins": 0.4131208062171936, "rewards/rejected": -0.43531590700149536, "step": 3410 }, { "epoch": 0.684, "grad_norm": 0.9021245837211609, "learning_rate": 9.545308289972726e-07, "log_odds_chosen": 5.657356262207031, "log_odds_ratio": -0.005452731624245644, "logits/chosen": 6.677636623382568, "logits/rejected": 4.718436241149902, "logps/chosen": -0.33909347653388977, "logps/rejected": -4.482512474060059, "loss": 0.4866, "nll_loss": 0.4860571324825287, "rewards/accuracies": 1.0, "rewards/chosen": -0.033909350633621216, "rewards/margins": 0.41434186697006226, "rewards/rejected": -0.44825124740600586, "step": 3420 }, { "epoch": 0.686, "grad_norm": 1.765757441520691, "learning_rate": 9.542686739689063e-07, "log_odds_chosen": 6.363637447357178, "log_odds_ratio": -0.00367589364759624, "logits/chosen": 6.689681053161621, "logits/rejected": 4.738308429718018, "logps/chosen": -0.24853992462158203, "logps/rejected": -4.644752025604248, "loss": 0.4164, "nll_loss": 0.4160144329071045, "rewards/accuracies": 1.0, "rewards/chosen": -0.024853995069861412, "rewards/margins": 0.4396212100982666, "rewards/rejected": -0.46447524428367615, "step": 3430 }, { "epoch": 0.688, "grad_norm": 1.111033320426941, "learning_rate": 9.540058015882977e-07, "log_odds_chosen": 6.138362884521484, "log_odds_ratio": -0.002711248816922307, "logits/chosen": 6.695155143737793, "logits/rejected": 4.778390407562256, "logps/chosen": -0.20664432644844055, "logps/rejected": -4.552805423736572, "loss": 0.3759, "nll_loss": 0.37566831707954407, "rewards/accuracies": 1.0, "rewards/chosen": -0.020664429292082787, "rewards/margins": 0.4346160888671875, "rewards/rejected": -0.45528048276901245, "step": 3440 }, { "epoch": 0.69, "grad_norm": 1.465714693069458, "learning_rate": 9.537422122705583e-07, "log_odds_chosen": 6.063322067260742, "log_odds_ratio": -0.004283384885638952, "logits/chosen": 6.695496559143066, "logits/rejected": 4.826331615447998, "logps/chosen": -0.2565765082836151, "logps/rejected": -4.526566505432129, "loss": 0.4034, "nll_loss": 0.402984082698822, "rewards/accuracies": 1.0, "rewards/chosen": -0.02565765380859375, "rewards/margins": 0.42699894309043884, "rewards/rejected": -0.45265665650367737, "step": 3450 }, { "epoch": 0.692, "grad_norm": 1.200494408607483, "learning_rate": 9.534779064319317e-07, "log_odds_chosen": 6.086178779602051, "log_odds_ratio": -0.03493190556764603, "logits/chosen": 6.664952278137207, "logits/rejected": 4.859691619873047, "logps/chosen": -0.39895597100257874, "logps/rejected": -4.569448947906494, "loss": 0.4726, "nll_loss": 0.4691184163093567, "rewards/accuracies": 1.0, "rewards/chosen": -0.03989559784531593, "rewards/margins": 0.41704925894737244, "rewards/rejected": -0.45694488286972046, "step": 3460 }, { "epoch": 0.694, "grad_norm": 1.0152499675750732, "learning_rate": 9.532128844897926e-07, "log_odds_chosen": 6.038632392883301, "log_odds_ratio": -0.005328078754246235, "logits/chosen": 6.742576599121094, "logits/rejected": 4.848451137542725, "logps/chosen": -0.2910585105419159, "logps/rejected": -4.558623790740967, "loss": 0.4344, "nll_loss": 0.43384799361228943, "rewards/accuracies": 1.0, "rewards/chosen": -0.02910585142672062, "rewards/margins": 0.42675653100013733, "rewards/rejected": -0.4558623731136322, "step": 3470 }, { "epoch": 0.696, "grad_norm": 0.9751112461090088, "learning_rate": 9.529471468626471e-07, "log_odds_chosen": 5.772881984710693, "log_odds_ratio": -0.049028556793928146, "logits/chosen": 6.732418060302734, "logits/rejected": 4.869028568267822, "logps/chosen": -0.5415237545967102, "logps/rejected": -4.627030372619629, "loss": 0.568, "nll_loss": 0.5631148219108582, "rewards/accuracies": 1.0, "rewards/chosen": -0.05415236949920654, "rewards/margins": 0.4085506796836853, "rewards/rejected": -0.46270304918289185, "step": 3480 }, { "epoch": 0.698, "grad_norm": 1.2237615585327148, "learning_rate": 9.526806939701308e-07, "log_odds_chosen": 6.598018646240234, "log_odds_ratio": -0.0022543922532349825, "logits/chosen": 6.728968620300293, "logits/rejected": 4.742864608764648, "logps/chosen": -0.21272091567516327, "logps/rejected": -4.71630859375, "loss": 0.3777, "nll_loss": 0.3775244653224945, "rewards/accuracies": 1.0, "rewards/chosen": -0.021272091194987297, "rewards/margins": 0.45035871863365173, "rewards/rejected": -0.4716308116912842, "step": 3490 }, { "epoch": 0.7, "grad_norm": 0.9681755304336548, "learning_rate": 9.524135262330098e-07, "log_odds_chosen": 6.195248603820801, "log_odds_ratio": -0.0033101532608270645, "logits/chosen": 6.724611759185791, "logits/rejected": 4.739993095397949, "logps/chosen": -0.26688241958618164, "logps/rejected": -4.750565052032471, "loss": 0.4291, "nll_loss": 0.4288058876991272, "rewards/accuracies": 1.0, "rewards/chosen": -0.026688242331147194, "rewards/margins": 0.44836822152137756, "rewards/rejected": -0.4750564694404602, "step": 3500 }, { "epoch": 0.702, "grad_norm": 0.994633674621582, "learning_rate": 9.521456440731779e-07, "log_odds_chosen": 5.964617729187012, "log_odds_ratio": -0.03630734980106354, "logits/chosen": 6.636133670806885, "logits/rejected": 4.7571024894714355, "logps/chosen": -0.43226370215415955, "logps/rejected": -4.571776866912842, "loss": 0.5247, "nll_loss": 0.5210951566696167, "rewards/accuracies": 1.0, "rewards/chosen": -0.043226368725299835, "rewards/margins": 0.4139513373374939, "rewards/rejected": -0.45717769861221313, "step": 3510 }, { "epoch": 0.704, "grad_norm": 1.2725156545639038, "learning_rate": 9.518770479136577e-07, "log_odds_chosen": 6.062389850616455, "log_odds_ratio": -0.03658118098974228, "logits/chosen": 6.620941162109375, "logits/rejected": 4.725488662719727, "logps/chosen": -0.392116904258728, "logps/rejected": -4.547221660614014, "loss": 0.4956, "nll_loss": 0.49191221594810486, "rewards/accuracies": 1.0, "rewards/chosen": -0.0392116941511631, "rewards/margins": 0.41551047563552856, "rewards/rejected": -0.45472216606140137, "step": 3520 }, { "epoch": 0.706, "grad_norm": 1.3568408489227295, "learning_rate": 9.516077381785994e-07, "log_odds_chosen": 5.973536014556885, "log_odds_ratio": -0.005608093924820423, "logits/chosen": 6.756857872009277, "logits/rejected": 4.763613224029541, "logps/chosen": -0.3217596113681793, "logps/rejected": -4.686300754547119, "loss": 0.4721, "nll_loss": 0.47153106331825256, "rewards/accuracies": 1.0, "rewards/chosen": -0.03217596188187599, "rewards/margins": 0.4364541172981262, "rewards/rejected": -0.4686300754547119, "step": 3530 }, { "epoch": 0.708, "grad_norm": 1.109150767326355, "learning_rate": 9.513377152932796e-07, "log_odds_chosen": 5.679347991943359, "log_odds_ratio": -0.00663009425625205, "logits/chosen": 6.570439338684082, "logits/rejected": 4.660685062408447, "logps/chosen": -0.36339640617370605, "logps/rejected": -4.523767471313477, "loss": 0.5054, "nll_loss": 0.5046892166137695, "rewards/accuracies": 1.0, "rewards/chosen": -0.036339644342660904, "rewards/margins": 0.4160371422767639, "rewards/rejected": -0.4523767828941345, "step": 3540 }, { "epoch": 0.71, "grad_norm": 1.2025765180587769, "learning_rate": 9.510669796841013e-07, "log_odds_chosen": 6.222804069519043, "log_odds_ratio": -0.004240462090820074, "logits/chosen": 6.732369422912598, "logits/rejected": 4.714385032653809, "logps/chosen": -0.2582331597805023, "logps/rejected": -4.569441795349121, "loss": 0.4224, "nll_loss": 0.42197632789611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.0258233193308115, "rewards/margins": 0.4311208724975586, "rewards/rejected": -0.45694416761398315, "step": 3550 }, { "epoch": 0.712, "grad_norm": 1.2483786344528198, "learning_rate": 9.507955317785933e-07, "log_odds_chosen": 6.005194664001465, "log_odds_ratio": -0.00481772143393755, "logits/chosen": 6.715165138244629, "logits/rejected": 4.67357873916626, "logps/chosen": -0.3188095688819885, "logps/rejected": -4.599113941192627, "loss": 0.4647, "nll_loss": 0.46421104669570923, "rewards/accuracies": 1.0, "rewards/chosen": -0.031880952417850494, "rewards/margins": 0.42803043127059937, "rewards/rejected": -0.45991140604019165, "step": 3560 }, { "epoch": 0.714, "grad_norm": 1.069321870803833, "learning_rate": 9.505233720054085e-07, "log_odds_chosen": 6.071554660797119, "log_odds_ratio": -0.0030045583844184875, "logits/chosen": 6.755363464355469, "logits/rejected": 4.676332950592041, "logps/chosen": -0.21831181645393372, "logps/rejected": -4.458126068115234, "loss": 0.3905, "nll_loss": 0.39017704129219055, "rewards/accuracies": 1.0, "rewards/chosen": -0.021831179037690163, "rewards/margins": 0.4239814281463623, "rewards/rejected": -0.4458126425743103, "step": 3570 }, { "epoch": 0.716, "grad_norm": 0.9190080761909485, "learning_rate": 9.502505007943247e-07, "log_odds_chosen": 6.385018825531006, "log_odds_ratio": -0.0022671015467494726, "logits/chosen": 6.801658630371094, "logits/rejected": 4.840987205505371, "logps/chosen": -0.17996814846992493, "logps/rejected": -4.546315670013428, "loss": 0.3332, "nll_loss": 0.3329278230667114, "rewards/accuracies": 1.0, "rewards/chosen": -0.01799681782722473, "rewards/margins": 0.4366348385810852, "rewards/rejected": -0.45463162660598755, "step": 3580 }, { "epoch": 0.718, "grad_norm": 0.9835497736930847, "learning_rate": 9.499769185762423e-07, "log_odds_chosen": 6.361479759216309, "log_odds_ratio": -0.0023029407020658255, "logits/chosen": 6.736503601074219, "logits/rejected": 4.6881422996521, "logps/chosen": -0.20891280472278595, "logps/rejected": -4.717375755310059, "loss": 0.3828, "nll_loss": 0.38260170817375183, "rewards/accuracies": 1.0, "rewards/chosen": -0.020891282707452774, "rewards/margins": 0.45084628462791443, "rewards/rejected": -0.4717375338077545, "step": 3590 }, { "epoch": 0.72, "grad_norm": 1.056037187576294, "learning_rate": 9.497026257831855e-07, "log_odds_chosen": 6.154527187347412, "log_odds_ratio": -0.003435468766838312, "logits/chosen": 6.711350440979004, "logits/rejected": 4.793358325958252, "logps/chosen": -0.25790029764175415, "logps/rejected": -4.626272678375244, "loss": 0.4155, "nll_loss": 0.41511449217796326, "rewards/accuracies": 1.0, "rewards/chosen": -0.025790026411414146, "rewards/margins": 0.43683725595474243, "rewards/rejected": -0.4626273214817047, "step": 3600 }, { "epoch": 0.722, "grad_norm": 0.8346394300460815, "learning_rate": 9.494276228482997e-07, "log_odds_chosen": 6.069630146026611, "log_odds_ratio": -0.017301451414823532, "logits/chosen": 6.664294242858887, "logits/rejected": 4.757964134216309, "logps/chosen": -0.34903740882873535, "logps/rejected": -4.558464527130127, "loss": 0.4671, "nll_loss": 0.46541184186935425, "rewards/accuracies": 1.0, "rewards/chosen": -0.034903742372989655, "rewards/margins": 0.4209426939487457, "rewards/rejected": -0.4558464586734772, "step": 3610 }, { "epoch": 0.724, "grad_norm": 0.9139754176139832, "learning_rate": 9.491519102058522e-07, "log_odds_chosen": 6.382798194885254, "log_odds_ratio": -0.03517334163188934, "logits/chosen": 6.642828464508057, "logits/rejected": 4.976870536804199, "logps/chosen": -0.32022109627723694, "logps/rejected": -4.57511043548584, "loss": 0.4267, "nll_loss": 0.42318692803382874, "rewards/accuracies": 1.0, "rewards/chosen": -0.03202211111783981, "rewards/margins": 0.4254889488220215, "rewards/rejected": -0.4575110375881195, "step": 3620 }, { "epoch": 0.726, "grad_norm": 1.153228998184204, "learning_rate": 9.488754882912308e-07, "log_odds_chosen": 6.120534420013428, "log_odds_ratio": -0.004076553508639336, "logits/chosen": 6.666945457458496, "logits/rejected": 4.8834357261657715, "logps/chosen": -0.2514968812465668, "logps/rejected": -4.538965702056885, "loss": 0.4142, "nll_loss": 0.4137750267982483, "rewards/accuracies": 1.0, "rewards/chosen": -0.025149691849946976, "rewards/margins": 0.42874687910079956, "rewards/rejected": -0.4538966119289398, "step": 3630 }, { "epoch": 0.728, "grad_norm": 0.9141550660133362, "learning_rate": 9.485983575409437e-07, "log_odds_chosen": 6.462430000305176, "log_odds_ratio": -0.0020550640765577555, "logits/chosen": 6.767651557922363, "logits/rejected": 4.761670112609863, "logps/chosen": -0.1569090038537979, "logps/rejected": -4.520168304443359, "loss": 0.3323, "nll_loss": 0.33208590745925903, "rewards/accuracies": 1.0, "rewards/chosen": -0.01569090038537979, "rewards/margins": 0.43632596731185913, "rewards/rejected": -0.4520168900489807, "step": 3640 }, { "epoch": 0.73, "grad_norm": 1.4779176712036133, "learning_rate": 9.48320518392618e-07, "log_odds_chosen": 6.307051658630371, "log_odds_ratio": -0.0036566369235515594, "logits/chosen": 6.755928993225098, "logits/rejected": 4.746167182922363, "logps/chosen": -0.2782224118709564, "logps/rejected": -4.801351070404053, "loss": 0.436, "nll_loss": 0.4355931282043457, "rewards/accuracies": 1.0, "rewards/chosen": -0.027822241187095642, "rewards/margins": 0.4523128569126129, "rewards/rejected": -0.48013514280319214, "step": 3650 }, { "epoch": 0.732, "grad_norm": 0.8299224972724915, "learning_rate": 9.480419712849994e-07, "log_odds_chosen": 6.348866939544678, "log_odds_ratio": -0.003741420339792967, "logits/chosen": 6.659091949462891, "logits/rejected": 4.583787441253662, "logps/chosen": -0.24210770428180695, "logps/rejected": -4.653124809265137, "loss": 0.3859, "nll_loss": 0.38548436760902405, "rewards/accuracies": 1.0, "rewards/chosen": -0.024210769683122635, "rewards/margins": 0.44110172986984253, "rewards/rejected": -0.46531254053115845, "step": 3660 }, { "epoch": 0.734, "grad_norm": 1.4500404596328735, "learning_rate": 9.477627166579522e-07, "log_odds_chosen": 6.141175270080566, "log_odds_ratio": -0.003461550921201706, "logits/chosen": 6.6613311767578125, "logits/rejected": 4.6547441482543945, "logps/chosen": -0.2552589476108551, "logps/rejected": -4.599064826965332, "loss": 0.4097, "nll_loss": 0.40931734442710876, "rewards/accuracies": 1.0, "rewards/chosen": -0.02552589401602745, "rewards/margins": 0.43438059091567993, "rewards/rejected": -0.4599064886569977, "step": 3670 }, { "epoch": 0.736, "grad_norm": 0.7817884683609009, "learning_rate": 9.474827549524574e-07, "log_odds_chosen": 5.8537468910217285, "log_odds_ratio": -0.00457662483677268, "logits/chosen": 6.6441192626953125, "logits/rejected": 4.649380683898926, "logps/chosen": -0.2706553637981415, "logps/rejected": -4.479011535644531, "loss": 0.437, "nll_loss": 0.43658891320228577, "rewards/accuracies": 1.0, "rewards/chosen": -0.027065539732575417, "rewards/margins": 0.4208356440067291, "rewards/rejected": -0.4479011595249176, "step": 3680 }, { "epoch": 0.738, "grad_norm": 1.0547524690628052, "learning_rate": 9.472020866106128e-07, "log_odds_chosen": 6.509143829345703, "log_odds_ratio": -0.0022461186163127422, "logits/chosen": 6.694189548492432, "logits/rejected": 4.663470268249512, "logps/chosen": -0.1741502583026886, "logps/rejected": -4.472692012786865, "loss": 0.3316, "nll_loss": 0.3313416540622711, "rewards/accuracies": 1.0, "rewards/chosen": -0.01741502620279789, "rewards/margins": 0.429854154586792, "rewards/rejected": -0.4472692608833313, "step": 3690 }, { "epoch": 0.74, "grad_norm": 1.0655072927474976, "learning_rate": 9.469207120756318e-07, "log_odds_chosen": 6.969532012939453, "log_odds_ratio": -0.0012454413808882236, "logits/chosen": 6.6550493240356445, "logits/rejected": 4.618973255157471, "logps/chosen": -0.11813586950302124, "logps/rejected": -4.7031378746032715, "loss": 0.2773, "nll_loss": 0.27713990211486816, "rewards/accuracies": 1.0, "rewards/chosen": -0.011813587509095669, "rewards/margins": 0.4585002064704895, "rewards/rejected": -0.4703138470649719, "step": 3700 }, { "epoch": 0.742, "grad_norm": 1.2572715282440186, "learning_rate": 9.466386317918436e-07, "log_odds_chosen": 6.088499546051025, "log_odds_ratio": -0.004946433939039707, "logits/chosen": 6.590832710266113, "logits/rejected": 4.688363075256348, "logps/chosen": -0.354200154542923, "logps/rejected": -4.755717754364014, "loss": 0.5, "nll_loss": 0.49952882528305054, "rewards/accuracies": 1.0, "rewards/chosen": -0.0354200154542923, "rewards/margins": 0.44015178084373474, "rewards/rejected": -0.47557181119918823, "step": 3710 }, { "epoch": 0.744, "grad_norm": 0.7931895852088928, "learning_rate": 9.463558462046911e-07, "log_odds_chosen": 6.189723014831543, "log_odds_ratio": -0.010515530593693256, "logits/chosen": 6.567660331726074, "logits/rejected": 4.562296390533447, "logps/chosen": -0.3056243062019348, "logps/rejected": -4.6537885665893555, "loss": 0.4633, "nll_loss": 0.46220558881759644, "rewards/accuracies": 1.0, "rewards/chosen": -0.03056243620812893, "rewards/margins": 0.43481650948524475, "rewards/rejected": -0.46537891030311584, "step": 3720 }, { "epoch": 0.746, "grad_norm": 1.0329134464263916, "learning_rate": 9.460723557607315e-07, "log_odds_chosen": 6.458944797515869, "log_odds_ratio": -0.0026562418788671494, "logits/chosen": 6.675509452819824, "logits/rejected": 4.661811828613281, "logps/chosen": -0.2459965944290161, "logps/rejected": -4.816906929016113, "loss": 0.409, "nll_loss": 0.4087492823600769, "rewards/accuracies": 1.0, "rewards/chosen": -0.02459966018795967, "rewards/margins": 0.4570910334587097, "rewards/rejected": -0.4816907048225403, "step": 3730 }, { "epoch": 0.748, "grad_norm": 1.787111759185791, "learning_rate": 9.45788160907635e-07, "log_odds_chosen": 5.711038112640381, "log_odds_ratio": -0.0071804760955274105, "logits/chosen": 6.486269950866699, "logits/rejected": 4.578497409820557, "logps/chosen": -0.36175066232681274, "logps/rejected": -4.490954399108887, "loss": 0.5272, "nll_loss": 0.5264855623245239, "rewards/accuracies": 1.0, "rewards/chosen": -0.036175064742565155, "rewards/margins": 0.41292041540145874, "rewards/rejected": -0.4490954875946045, "step": 3740 }, { "epoch": 0.75, "grad_norm": 1.172288417816162, "learning_rate": 9.455032620941839e-07, "log_odds_chosen": 6.400683403015137, "log_odds_ratio": -0.0020650653168559074, "logits/chosen": 6.707922458648682, "logits/rejected": 4.567200183868408, "logps/chosen": -0.20412924885749817, "logps/rejected": -4.72115421295166, "loss": 0.373, "nll_loss": 0.37278467416763306, "rewards/accuracies": 1.0, "rewards/chosen": -0.020412925630807877, "rewards/margins": 0.4517025053501129, "rewards/rejected": -0.4721154272556305, "step": 3750 }, { "epoch": 0.752, "grad_norm": 2.844287633895874, "learning_rate": 9.452176597702724e-07, "log_odds_chosen": 5.495478630065918, "log_odds_ratio": -0.0404941700398922, "logits/chosen": 6.448145866394043, "logits/rejected": 4.668438911437988, "logps/chosen": -0.6220987439155579, "logps/rejected": -4.708490371704102, "loss": 0.6773, "nll_loss": 0.6732999086380005, "rewards/accuracies": 1.0, "rewards/chosen": -0.06220986694097519, "rewards/margins": 0.40863919258117676, "rewards/rejected": -0.47084903717041016, "step": 3760 }, { "epoch": 0.754, "grad_norm": 1.2006936073303223, "learning_rate": 9.449313543869056e-07, "log_odds_chosen": 6.2620954513549805, "log_odds_ratio": -0.0034134904853999615, "logits/chosen": 6.606619358062744, "logits/rejected": 4.53889274597168, "logps/chosen": -0.27388444542884827, "logps/rejected": -4.845607757568359, "loss": 0.4232, "nll_loss": 0.422839879989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.027388444170355797, "rewards/margins": 0.45717233419418335, "rewards/rejected": -0.4845608174800873, "step": 3770 }, { "epoch": 0.756, "grad_norm": 0.9860701560974121, "learning_rate": 9.446443463961985e-07, "log_odds_chosen": 6.293400764465332, "log_odds_ratio": -0.0024785210844129324, "logits/chosen": 6.679415702819824, "logits/rejected": 4.616342067718506, "logps/chosen": -0.21211834251880646, "logps/rejected": -4.688812732696533, "loss": 0.3603, "nll_loss": 0.3600924015045166, "rewards/accuracies": 1.0, "rewards/chosen": -0.021211836487054825, "rewards/margins": 0.44766950607299805, "rewards/rejected": -0.46888136863708496, "step": 3780 }, { "epoch": 0.758, "grad_norm": 0.7558427453041077, "learning_rate": 9.443566362513761e-07, "log_odds_chosen": 6.341399669647217, "log_odds_ratio": -0.0029024602845311165, "logits/chosen": 6.620394706726074, "logits/rejected": 4.684639930725098, "logps/chosen": -0.25413745641708374, "logps/rejected": -4.750526428222656, "loss": 0.411, "nll_loss": 0.4106958508491516, "rewards/accuracies": 1.0, "rewards/chosen": -0.025413746014237404, "rewards/margins": 0.4496389329433441, "rewards/rejected": -0.4750526547431946, "step": 3790 }, { "epoch": 0.76, "grad_norm": 0.9181793332099915, "learning_rate": 9.440682244067722e-07, "log_odds_chosen": 6.16724157333374, "log_odds_ratio": -0.005154365207999945, "logits/chosen": 6.566745758056641, "logits/rejected": 4.600834846496582, "logps/chosen": -0.29638510942459106, "logps/rejected": -4.67746114730835, "loss": 0.4522, "nll_loss": 0.4517127573490143, "rewards/accuracies": 1.0, "rewards/chosen": -0.029638510197401047, "rewards/margins": 0.43810757994651794, "rewards/rejected": -0.4677460789680481, "step": 3800 }, { "epoch": 0.762, "grad_norm": 1.0311533212661743, "learning_rate": 9.437791113178282e-07, "log_odds_chosen": 6.339176177978516, "log_odds_ratio": -0.002290437463670969, "logits/chosen": 6.6611008644104, "logits/rejected": 4.658873558044434, "logps/chosen": -0.2037779986858368, "logps/rejected": -4.658268928527832, "loss": 0.3645, "nll_loss": 0.3642996847629547, "rewards/accuracies": 1.0, "rewards/chosen": -0.02037780173122883, "rewards/margins": 0.4454491138458252, "rewards/rejected": -0.4658268988132477, "step": 3810 }, { "epoch": 0.764, "grad_norm": 0.9074738621711731, "learning_rate": 9.434892974410932e-07, "log_odds_chosen": 6.270812034606934, "log_odds_ratio": -0.03288381174206734, "logits/chosen": 6.600484371185303, "logits/rejected": 4.626296043395996, "logps/chosen": -0.3898524045944214, "logps/rejected": -4.735042572021484, "loss": 0.4535, "nll_loss": 0.4501887261867523, "rewards/accuracies": 1.0, "rewards/chosen": -0.0389852412045002, "rewards/margins": 0.434518963098526, "rewards/rejected": -0.4735042452812195, "step": 3820 }, { "epoch": 0.766, "grad_norm": 1.8169771432876587, "learning_rate": 9.431987832342228e-07, "log_odds_chosen": 6.242013454437256, "log_odds_ratio": -0.003058331785723567, "logits/chosen": 6.636113166809082, "logits/rejected": 4.699237823486328, "logps/chosen": -0.2846682071685791, "logps/rejected": -4.769736289978027, "loss": 0.4411, "nll_loss": 0.440819650888443, "rewards/accuracies": 1.0, "rewards/chosen": -0.028466826304793358, "rewards/margins": 0.44850677251815796, "rewards/rejected": -0.47697362303733826, "step": 3830 }, { "epoch": 0.768, "grad_norm": 0.9792589545249939, "learning_rate": 9.429075691559787e-07, "log_odds_chosen": 5.942510604858398, "log_odds_ratio": -0.003470363561064005, "logits/chosen": 6.580155372619629, "logits/rejected": 4.687350749969482, "logps/chosen": -0.31594616174697876, "logps/rejected": -4.697531700134277, "loss": 0.4702, "nll_loss": 0.4698142409324646, "rewards/accuracies": 1.0, "rewards/chosen": -0.031594615429639816, "rewards/margins": 0.4381586015224457, "rewards/rejected": -0.4697532057762146, "step": 3840 }, { "epoch": 0.77, "grad_norm": 1.5158520936965942, "learning_rate": 9.426156556662275e-07, "log_odds_chosen": 6.195994853973389, "log_odds_ratio": -0.004257439635694027, "logits/chosen": 6.616616725921631, "logits/rejected": 4.594165802001953, "logps/chosen": -0.2909700274467468, "logps/rejected": -4.706193447113037, "loss": 0.427, "nll_loss": 0.42653974890708923, "rewards/accuracies": 1.0, "rewards/chosen": -0.029097001999616623, "rewards/margins": 0.4415223002433777, "rewards/rejected": -0.4706193506717682, "step": 3850 }, { "epoch": 0.772, "grad_norm": 1.2915552854537964, "learning_rate": 9.423230432259408e-07, "log_odds_chosen": 6.281500816345215, "log_odds_ratio": -0.003065995406359434, "logits/chosen": 6.634377956390381, "logits/rejected": 4.647149085998535, "logps/chosen": -0.28222498297691345, "logps/rejected": -4.850414752960205, "loss": 0.4389, "nll_loss": 0.4385627806186676, "rewards/accuracies": 1.0, "rewards/chosen": -0.028222501277923584, "rewards/margins": 0.45681896805763245, "rewards/rejected": -0.48504143953323364, "step": 3860 }, { "epoch": 0.774, "grad_norm": 1.3209842443466187, "learning_rate": 9.420297322971932e-07, "log_odds_chosen": 6.727807521820068, "log_odds_ratio": -0.0019263799767941236, "logits/chosen": 6.683464050292969, "logits/rejected": 4.620616436004639, "logps/chosen": -0.21941617131233215, "logps/rejected": -4.992384910583496, "loss": 0.3867, "nll_loss": 0.3864966630935669, "rewards/accuracies": 1.0, "rewards/chosen": -0.021941618993878365, "rewards/margins": 0.4772968888282776, "rewards/rejected": -0.4992384910583496, "step": 3870 }, { "epoch": 0.776, "grad_norm": 0.9914597868919373, "learning_rate": 9.417357233431629e-07, "log_odds_chosen": 6.162102222442627, "log_odds_ratio": -0.034497179090976715, "logits/chosen": 6.622941493988037, "logits/rejected": 4.700266361236572, "logps/chosen": -0.3562375009059906, "logps/rejected": -4.6284918785095215, "loss": 0.4381, "nll_loss": 0.43467020988464355, "rewards/accuracies": 1.0, "rewards/chosen": -0.03562375530600548, "rewards/margins": 0.4272254407405853, "rewards/rejected": -0.4628492295742035, "step": 3880 }, { "epoch": 0.778, "grad_norm": 3.1936731338500977, "learning_rate": 9.414410168281302e-07, "log_odds_chosen": 6.474684238433838, "log_odds_ratio": -0.00251408526673913, "logits/chosen": 6.658877372741699, "logits/rejected": 4.621586322784424, "logps/chosen": -0.20323987305164337, "logps/rejected": -4.669999599456787, "loss": 0.3661, "nll_loss": 0.3658328354358673, "rewards/accuracies": 1.0, "rewards/chosen": -0.020323988050222397, "rewards/margins": 0.44667601585388184, "rewards/rejected": -0.46700000762939453, "step": 3890 }, { "epoch": 0.78, "grad_norm": 1.114815592765808, "learning_rate": 9.411456132174766e-07, "log_odds_chosen": 6.526251792907715, "log_odds_ratio": -0.0022695516236126423, "logits/chosen": 6.5841169357299805, "logits/rejected": 4.509058952331543, "logps/chosen": -0.20976388454437256, "logps/rejected": -4.812465667724609, "loss": 0.3907, "nll_loss": 0.3905121386051178, "rewards/accuracies": 1.0, "rewards/chosen": -0.020976392552256584, "rewards/margins": 0.4602702558040619, "rewards/rejected": -0.48124662041664124, "step": 3900 }, { "epoch": 0.782, "grad_norm": 1.078898549079895, "learning_rate": 9.40849512977685e-07, "log_odds_chosen": 6.612547397613525, "log_odds_ratio": -0.0021586106158792973, "logits/chosen": 6.601372718811035, "logits/rejected": 4.615688323974609, "logps/chosen": -0.19511762261390686, "logps/rejected": -4.806479454040527, "loss": 0.3508, "nll_loss": 0.35056668519973755, "rewards/accuracies": 1.0, "rewards/chosen": -0.019511764869093895, "rewards/margins": 0.46113619208335876, "rewards/rejected": -0.4806479513645172, "step": 3910 }, { "epoch": 0.784, "grad_norm": 1.060619592666626, "learning_rate": 9.405527165763383e-07, "log_odds_chosen": 6.68032693862915, "log_odds_ratio": -0.0022846513893455267, "logits/chosen": 6.537921905517578, "logits/rejected": 4.637419700622559, "logps/chosen": -0.25082045793533325, "logps/rejected": -4.952718257904053, "loss": 0.4075, "nll_loss": 0.407231867313385, "rewards/accuracies": 1.0, "rewards/chosen": -0.025082046166062355, "rewards/margins": 0.47018980979919434, "rewards/rejected": -0.49527183175086975, "step": 3920 }, { "epoch": 0.786, "grad_norm": 0.9896897673606873, "learning_rate": 9.40255224482118e-07, "log_odds_chosen": 6.36956262588501, "log_odds_ratio": -0.004407832864671946, "logits/chosen": 6.563521385192871, "logits/rejected": 4.49956750869751, "logps/chosen": -0.2755076289176941, "logps/rejected": -4.901342391967773, "loss": 0.4531, "nll_loss": 0.4527057111263275, "rewards/accuracies": 1.0, "rewards/chosen": -0.02755076251924038, "rewards/margins": 0.4625834822654724, "rewards/rejected": -0.49013423919677734, "step": 3930 }, { "epoch": 0.788, "grad_norm": 0.8739743232727051, "learning_rate": 9.399570371648051e-07, "log_odds_chosen": 6.255387783050537, "log_odds_ratio": -0.0034497049637138844, "logits/chosen": 6.556182861328125, "logits/rejected": 4.7083740234375, "logps/chosen": -0.2576831877231598, "logps/rejected": -4.6777191162109375, "loss": 0.4288, "nll_loss": 0.42842262983322144, "rewards/accuracies": 1.0, "rewards/chosen": -0.02576831914484501, "rewards/margins": 0.44200363755226135, "rewards/rejected": -0.4677719473838806, "step": 3940 }, { "epoch": 0.79, "grad_norm": 1.2166273593902588, "learning_rate": 9.39658155095278e-07, "log_odds_chosen": 6.182577610015869, "log_odds_ratio": -0.002925105392932892, "logits/chosen": 6.564701080322266, "logits/rejected": 4.725858688354492, "logps/chosen": -0.26298266649246216, "logps/rejected": -4.767263412475586, "loss": 0.4242, "nll_loss": 0.4239206314086914, "rewards/accuracies": 1.0, "rewards/chosen": -0.026298265904188156, "rewards/margins": 0.4504280686378479, "rewards/rejected": -0.47672635316848755, "step": 3950 }, { "epoch": 0.792, "grad_norm": 1.0176589488983154, "learning_rate": 9.393585787455123e-07, "log_odds_chosen": 6.10490083694458, "log_odds_ratio": -0.06972827017307281, "logits/chosen": 6.364744663238525, "logits/rejected": 4.6489057540893555, "logps/chosen": -0.6333247423171997, "logps/rejected": -4.745326995849609, "loss": 0.6712, "nll_loss": 0.6642202138900757, "rewards/accuracies": 1.0, "rewards/chosen": -0.06333247572183609, "rewards/margins": 0.4112001955509186, "rewards/rejected": -0.4745326638221741, "step": 3960 }, { "epoch": 0.794, "grad_norm": 1.0189268589019775, "learning_rate": 9.390583085885799e-07, "log_odds_chosen": 6.526806831359863, "log_odds_ratio": -0.0025084775406867266, "logits/chosen": 6.5134453773498535, "logits/rejected": 4.618073463439941, "logps/chosen": -0.23019881546497345, "logps/rejected": -4.953539848327637, "loss": 0.3904, "nll_loss": 0.3901728093624115, "rewards/accuracies": 1.0, "rewards/chosen": -0.023019883781671524, "rewards/margins": 0.4723341464996338, "rewards/rejected": -0.4953540861606598, "step": 3970 }, { "epoch": 0.796, "grad_norm": 1.1985210180282593, "learning_rate": 9.387573450986484e-07, "log_odds_chosen": 6.811542510986328, "log_odds_ratio": -0.00216773827560246, "logits/chosen": 6.598989009857178, "logits/rejected": 4.511253833770752, "logps/chosen": -0.1937006264925003, "logps/rejected": -5.046412944793701, "loss": 0.3391, "nll_loss": 0.3388502299785614, "rewards/accuracies": 1.0, "rewards/chosen": -0.0193700660020113, "rewards/margins": 0.485271155834198, "rewards/rejected": -0.5046412348747253, "step": 3980 }, { "epoch": 0.798, "grad_norm": 4.838850021362305, "learning_rate": 9.384556887509801e-07, "log_odds_chosen": 6.427767276763916, "log_odds_ratio": -0.009560179896652699, "logits/chosen": 6.510036468505859, "logits/rejected": 4.55327033996582, "logps/chosen": -0.29675811529159546, "logps/rejected": -4.667016983032227, "loss": 0.4345, "nll_loss": 0.4335672855377197, "rewards/accuracies": 1.0, "rewards/chosen": -0.029675811529159546, "rewards/margins": 0.43702587485313416, "rewards/rejected": -0.4667016863822937, "step": 3990 }, { "epoch": 0.8, "grad_norm": 0.8917582035064697, "learning_rate": 9.381533400219317e-07, "log_odds_chosen": 6.294993877410889, "log_odds_ratio": -0.004421303514391184, "logits/chosen": 6.481747627258301, "logits/rejected": 4.75466775894165, "logps/chosen": -0.3517534136772156, "logps/rejected": -5.000483512878418, "loss": 0.5081, "nll_loss": 0.5077053308486938, "rewards/accuracies": 1.0, "rewards/chosen": -0.03517534211277962, "rewards/margins": 0.4648730158805847, "rewards/rejected": -0.5000484585762024, "step": 4000 }, { "epoch": 0.802, "grad_norm": 0.8133851885795593, "learning_rate": 9.378502993889533e-07, "log_odds_chosen": 6.54354190826416, "log_odds_ratio": -0.0020702355541288853, "logits/chosen": 6.564396858215332, "logits/rejected": 4.621206760406494, "logps/chosen": -0.24004271626472473, "logps/rejected": -4.992854595184326, "loss": 0.3993, "nll_loss": 0.39911654591560364, "rewards/accuracies": 1.0, "rewards/chosen": -0.024004273116588593, "rewards/margins": 0.4752812385559082, "rewards/rejected": -0.4992854595184326, "step": 4010 }, { "epoch": 0.804, "grad_norm": 0.8337978720664978, "learning_rate": 9.375465673305868e-07, "log_odds_chosen": 6.498175621032715, "log_odds_ratio": -0.002247690688818693, "logits/chosen": 6.544095516204834, "logits/rejected": 4.506018161773682, "logps/chosen": -0.2607877850532532, "logps/rejected": -5.060286998748779, "loss": 0.4111, "nll_loss": 0.41089901328086853, "rewards/accuracies": 1.0, "rewards/chosen": -0.026078781113028526, "rewards/margins": 0.479949951171875, "rewards/rejected": -0.5060287714004517, "step": 4020 }, { "epoch": 0.806, "grad_norm": 0.9056381583213806, "learning_rate": 9.372421443264671e-07, "log_odds_chosen": 6.760701656341553, "log_odds_ratio": -0.0038182311691343784, "logits/chosen": 6.546082973480225, "logits/rejected": 4.615324020385742, "logps/chosen": -0.28112632036209106, "logps/rejected": -5.0882415771484375, "loss": 0.4239, "nll_loss": 0.42355817556381226, "rewards/accuracies": 1.0, "rewards/chosen": -0.028112631291151047, "rewards/margins": 0.48071154952049255, "rewards/rejected": -0.5088241696357727, "step": 4030 }, { "epoch": 0.808, "grad_norm": 0.8287837505340576, "learning_rate": 9.369370308573197e-07, "log_odds_chosen": 6.745752811431885, "log_odds_ratio": -0.003010040381923318, "logits/chosen": 6.5443878173828125, "logits/rejected": 4.53652811050415, "logps/chosen": -0.2576248347759247, "logps/rejected": -5.017306327819824, "loss": 0.4183, "nll_loss": 0.4180288314819336, "rewards/accuracies": 1.0, "rewards/chosen": -0.025762487202882767, "rewards/margins": 0.4759681820869446, "rewards/rejected": -0.5017306804656982, "step": 4040 }, { "epoch": 0.81, "grad_norm": 0.956188976764679, "learning_rate": 9.3663122740496e-07, "log_odds_chosen": 6.843710422515869, "log_odds_ratio": -0.002082309452816844, "logits/chosen": 6.602359771728516, "logits/rejected": 4.5758562088012695, "logps/chosen": -0.22262653708457947, "logps/rejected": -5.055722713470459, "loss": 0.3769, "nll_loss": 0.3767373859882355, "rewards/accuracies": 1.0, "rewards/chosen": -0.022262655198574066, "rewards/margins": 0.4833095967769623, "rewards/rejected": -0.5055721998214722, "step": 4050 }, { "epoch": 0.812, "grad_norm": 1.0118839740753174, "learning_rate": 9.363247344522938e-07, "log_odds_chosen": 6.165936470031738, "log_odds_ratio": -0.004640187602490187, "logits/chosen": 6.493437767028809, "logits/rejected": 4.635892391204834, "logps/chosen": -0.3001118004322052, "logps/rejected": -4.712682723999023, "loss": 0.4749, "nll_loss": 0.47445034980773926, "rewards/accuracies": 1.0, "rewards/chosen": -0.03001118078827858, "rewards/margins": 0.4412570893764496, "rewards/rejected": -0.47126826643943787, "step": 4060 }, { "epoch": 0.814, "grad_norm": 1.1881448030471802, "learning_rate": 9.360175524833151e-07, "log_odds_chosen": 6.16970157623291, "log_odds_ratio": -0.003071318846195936, "logits/chosen": 6.568917274475098, "logits/rejected": 4.707606315612793, "logps/chosen": -0.27156561613082886, "logps/rejected": -4.738466739654541, "loss": 0.4255, "nll_loss": 0.42523932456970215, "rewards/accuracies": 1.0, "rewards/chosen": -0.027156557887792587, "rewards/margins": 0.44669008255004883, "rewards/rejected": -0.4738466739654541, "step": 4070 }, { "epoch": 0.816, "grad_norm": 0.7629494667053223, "learning_rate": 9.357096819831063e-07, "log_odds_chosen": 6.730434417724609, "log_odds_ratio": -0.0018555577844381332, "logits/chosen": 6.549307346343994, "logits/rejected": 4.530759334564209, "logps/chosen": -0.1756828874349594, "logps/rejected": -4.871087074279785, "loss": 0.3266, "nll_loss": 0.32636696100234985, "rewards/accuracies": 1.0, "rewards/chosen": -0.017568286508321762, "rewards/margins": 0.4695405066013336, "rewards/rejected": -0.4871087968349457, "step": 4080 }, { "epoch": 0.818, "grad_norm": 0.9551812410354614, "learning_rate": 9.354011234378369e-07, "log_odds_chosen": 6.382393836975098, "log_odds_ratio": -0.004550147335976362, "logits/chosen": 6.56207799911499, "logits/rejected": 4.67268705368042, "logps/chosen": -0.35162705183029175, "logps/rejected": -4.998082160949707, "loss": 0.5038, "nll_loss": 0.5033711194992065, "rewards/accuracies": 1.0, "rewards/chosen": -0.03516270965337753, "rewards/margins": 0.46464547514915466, "rewards/rejected": -0.4998082220554352, "step": 4090 }, { "epoch": 0.82, "grad_norm": 1.21990168094635, "learning_rate": 9.350918773347628e-07, "log_odds_chosen": 6.482780456542969, "log_odds_ratio": -0.03609771281480789, "logits/chosen": 6.464389801025391, "logits/rejected": 4.595126152038574, "logps/chosen": -0.3993699848651886, "logps/rejected": -4.871867656707764, "loss": 0.5112, "nll_loss": 0.5075944662094116, "rewards/accuracies": 1.0, "rewards/chosen": -0.03993699699640274, "rewards/margins": 0.44724971055984497, "rewards/rejected": -0.4871867299079895, "step": 4100 }, { "epoch": 0.822, "grad_norm": 2.617293357849121, "learning_rate": 9.34781944162226e-07, "log_odds_chosen": 6.2491374015808105, "log_odds_ratio": -0.015428786166012287, "logits/chosen": 6.428941249847412, "logits/rejected": 4.434457302093506, "logps/chosen": -0.3324061930179596, "logps/rejected": -4.9250030517578125, "loss": 0.4853, "nll_loss": 0.4837496280670166, "rewards/accuracies": 1.0, "rewards/chosen": -0.03324061259627342, "rewards/margins": 0.45925965905189514, "rewards/rejected": -0.49250030517578125, "step": 4110 }, { "epoch": 0.824, "grad_norm": 1.2230145931243896, "learning_rate": 9.344713244096532e-07, "log_odds_chosen": 6.618887901306152, "log_odds_ratio": -0.036496974527835846, "logits/chosen": 6.445762634277344, "logits/rejected": 4.543337821960449, "logps/chosen": -0.4191294312477112, "logps/rejected": -5.088369846343994, "loss": 0.5288, "nll_loss": 0.525194525718689, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04191293567419052, "rewards/margins": 0.4669240117073059, "rewards/rejected": -0.5088369250297546, "step": 4120 }, { "epoch": 0.826, "grad_norm": 1.241087794303894, "learning_rate": 9.341600185675553e-07, "log_odds_chosen": 6.264078140258789, "log_odds_ratio": -0.0036445967853069305, "logits/chosen": 6.4479780197143555, "logits/rejected": 4.558334827423096, "logps/chosen": -0.36844152212142944, "logps/rejected": -5.119875907897949, "loss": 0.5156, "nll_loss": 0.515204131603241, "rewards/accuracies": 1.0, "rewards/chosen": -0.036844149231910706, "rewards/margins": 0.4751434922218323, "rewards/rejected": -0.511987566947937, "step": 4130 }, { "epoch": 0.828, "grad_norm": 1.050620198249817, "learning_rate": 9.338480271275268e-07, "log_odds_chosen": 6.536016941070557, "log_odds_ratio": -0.002675173804163933, "logits/chosen": 6.52242374420166, "logits/rejected": 4.549101829528809, "logps/chosen": -0.2580649256706238, "logps/rejected": -4.967737674713135, "loss": 0.4061, "nll_loss": 0.4058622419834137, "rewards/accuracies": 1.0, "rewards/chosen": -0.025806492194533348, "rewards/margins": 0.47096723318099976, "rewards/rejected": -0.49677371978759766, "step": 4140 }, { "epoch": 0.83, "grad_norm": 0.824720025062561, "learning_rate": 9.33535350582245e-07, "log_odds_chosen": 6.814411163330078, "log_odds_ratio": -0.035224489867687225, "logits/chosen": 6.463698387145996, "logits/rejected": 4.406224250793457, "logps/chosen": -0.4043169915676117, "logps/rejected": -5.2811102867126465, "loss": 0.4659, "nll_loss": 0.46238821744918823, "rewards/accuracies": 1.0, "rewards/chosen": -0.04043170064687729, "rewards/margins": 0.4876793920993805, "rewards/rejected": -0.5281109809875488, "step": 4150 }, { "epoch": 0.832, "grad_norm": 1.547453761100769, "learning_rate": 9.332219894254685e-07, "log_odds_chosen": 6.869448184967041, "log_odds_ratio": -0.0014314033323898911, "logits/chosen": 6.4657301902771, "logits/rejected": 4.548287391662598, "logps/chosen": -0.20487630367279053, "logps/rejected": -5.181761741638184, "loss": 0.357, "nll_loss": 0.35690638422966003, "rewards/accuracies": 1.0, "rewards/chosen": -0.020487630739808083, "rewards/margins": 0.49768853187561035, "rewards/rejected": -0.5181761384010315, "step": 4160 }, { "epoch": 0.834, "grad_norm": 0.8279621601104736, "learning_rate": 9.329079441520376e-07, "log_odds_chosen": 6.334306716918945, "log_odds_ratio": -0.03521367907524109, "logits/chosen": 6.387889862060547, "logits/rejected": 4.500416278839111, "logps/chosen": -0.33811670541763306, "logps/rejected": -4.920292377471924, "loss": 0.4613, "nll_loss": 0.45776504278182983, "rewards/accuracies": 1.0, "rewards/chosen": -0.033811669796705246, "rewards/margins": 0.4582175612449646, "rewards/rejected": -0.4920292794704437, "step": 4170 }, { "epoch": 0.836, "grad_norm": 1.0773085355758667, "learning_rate": 9.325932152578723e-07, "log_odds_chosen": 6.557684421539307, "log_odds_ratio": -0.003799585159868002, "logits/chosen": 6.443119049072266, "logits/rejected": 4.436644554138184, "logps/chosen": -0.2566658854484558, "logps/rejected": -4.958864688873291, "loss": 0.4226, "nll_loss": 0.4222394824028015, "rewards/accuracies": 1.0, "rewards/chosen": -0.02566658891737461, "rewards/margins": 0.47021985054016113, "rewards/rejected": -0.4958864748477936, "step": 4180 }, { "epoch": 0.838, "grad_norm": 0.839879035949707, "learning_rate": 9.322778032399727e-07, "log_odds_chosen": 6.601690769195557, "log_odds_ratio": -0.0029939706437289715, "logits/chosen": 6.4521002769470215, "logits/rejected": 4.459874629974365, "logps/chosen": -0.2737942337989807, "logps/rejected": -5.028016090393066, "loss": 0.4394, "nll_loss": 0.43911972641944885, "rewards/accuracies": 1.0, "rewards/chosen": -0.027379419654607773, "rewards/margins": 0.475422203540802, "rewards/rejected": -0.5028015971183777, "step": 4190 }, { "epoch": 0.84, "grad_norm": 0.9402925968170166, "learning_rate": 9.319617085964175e-07, "log_odds_chosen": 6.524517059326172, "log_odds_ratio": -0.0021382959093898535, "logits/chosen": 6.400186061859131, "logits/rejected": 4.432770729064941, "logps/chosen": -0.21598687767982483, "logps/rejected": -4.903524398803711, "loss": 0.3799, "nll_loss": 0.37965327501296997, "rewards/accuracies": 1.0, "rewards/chosen": -0.021598689258098602, "rewards/margins": 0.4687538146972656, "rewards/rejected": -0.49035245180130005, "step": 4200 }, { "epoch": 0.842, "grad_norm": 1.2806203365325928, "learning_rate": 9.316449318263634e-07, "log_odds_chosen": 7.001919746398926, "log_odds_ratio": -0.001402832567691803, "logits/chosen": 6.507615566253662, "logits/rejected": 4.522975444793701, "logps/chosen": -0.1735239326953888, "logps/rejected": -5.091763496398926, "loss": 0.3209, "nll_loss": 0.3207835555076599, "rewards/accuracies": 1.0, "rewards/chosen": -0.01735239289700985, "rewards/margins": 0.4918239712715149, "rewards/rejected": -0.5091763734817505, "step": 4210 }, { "epoch": 0.844, "grad_norm": 1.0104825496673584, "learning_rate": 9.313274734300438e-07, "log_odds_chosen": 6.892808437347412, "log_odds_ratio": -0.0016783317551016808, "logits/chosen": 6.459802150726318, "logits/rejected": 4.494961738586426, "logps/chosen": -0.21578891575336456, "logps/rejected": -5.202939033508301, "loss": 0.388, "nll_loss": 0.38785281777381897, "rewards/accuracies": 1.0, "rewards/chosen": -0.021578893065452576, "rewards/margins": 0.49871501326560974, "rewards/rejected": -0.5202938318252563, "step": 4220 }, { "epoch": 0.846, "grad_norm": 0.9656134843826294, "learning_rate": 9.31009333908769e-07, "log_odds_chosen": 6.296927452087402, "log_odds_ratio": -0.0356130450963974, "logits/chosen": 6.359738349914551, "logits/rejected": 4.5456132888793945, "logps/chosen": -0.40794438123703003, "logps/rejected": -4.974993705749512, "loss": 0.492, "nll_loss": 0.488426148891449, "rewards/accuracies": 1.0, "rewards/chosen": -0.04079443961381912, "rewards/margins": 0.4567050039768219, "rewards/rejected": -0.4974994659423828, "step": 4230 }, { "epoch": 0.848, "grad_norm": 1.0786991119384766, "learning_rate": 9.306905137649249e-07, "log_odds_chosen": 6.655545234680176, "log_odds_ratio": -0.002482083160430193, "logits/chosen": 6.466832637786865, "logits/rejected": 4.559632301330566, "logps/chosen": -0.23335318267345428, "logps/rejected": -5.011594772338867, "loss": 0.3883, "nll_loss": 0.38801801204681396, "rewards/accuracies": 1.0, "rewards/chosen": -0.023335319012403488, "rewards/margins": 0.4778241515159607, "rewards/rejected": -0.5011595487594604, "step": 4240 }, { "epoch": 0.85, "grad_norm": 1.0295921564102173, "learning_rate": 9.303710135019717e-07, "log_odds_chosen": 6.7304205894470215, "log_odds_ratio": -0.003096895758062601, "logits/chosen": 6.457919120788574, "logits/rejected": 4.636351108551025, "logps/chosen": -0.2531762421131134, "logps/rejected": -4.993119716644287, "loss": 0.4251, "nll_loss": 0.4247414171695709, "rewards/accuracies": 1.0, "rewards/chosen": -0.02531762421131134, "rewards/margins": 0.47399434447288513, "rewards/rejected": -0.4993119239807129, "step": 4250 }, { "epoch": 0.852, "grad_norm": 2.3429675102233887, "learning_rate": 9.300508336244443e-07, "log_odds_chosen": 6.768043518066406, "log_odds_ratio": -0.0015363225247710943, "logits/chosen": 6.391881942749023, "logits/rejected": 4.482906818389893, "logps/chosen": -0.19046056270599365, "logps/rejected": -5.021550178527832, "loss": 0.3667, "nll_loss": 0.36658212542533875, "rewards/accuracies": 1.0, "rewards/chosen": -0.019046057015657425, "rewards/margins": 0.48310890793800354, "rewards/rejected": -0.5021549463272095, "step": 4260 }, { "epoch": 0.854, "grad_norm": 1.216168999671936, "learning_rate": 9.297299746379501e-07, "log_odds_chosen": 6.1993327140808105, "log_odds_ratio": -0.00381826376542449, "logits/chosen": 6.375151634216309, "logits/rejected": 4.592380046844482, "logps/chosen": -0.32840076088905334, "logps/rejected": -4.954320907592773, "loss": 0.4802, "nll_loss": 0.4798545837402344, "rewards/accuracies": 1.0, "rewards/chosen": -0.032840076833963394, "rewards/margins": 0.46259206533432007, "rewards/rejected": -0.4954320788383484, "step": 4270 }, { "epoch": 0.856, "grad_norm": 0.7768840193748474, "learning_rate": 9.294084370491694e-07, "log_odds_chosen": 6.579707145690918, "log_odds_ratio": -0.0037500481121242046, "logits/chosen": 6.379559516906738, "logits/rejected": 4.599681377410889, "logps/chosen": -0.27037355303764343, "logps/rejected": -4.886434078216553, "loss": 0.4068, "nll_loss": 0.40640488266944885, "rewards/accuracies": 1.0, "rewards/chosen": -0.027037352323532104, "rewards/margins": 0.46160611510276794, "rewards/rejected": -0.48864346742630005, "step": 4280 }, { "epoch": 0.858, "grad_norm": 1.0187979936599731, "learning_rate": 9.290862213658539e-07, "log_odds_chosen": 6.560310363769531, "log_odds_ratio": -0.0029190839268267155, "logits/chosen": 6.4410719871521, "logits/rejected": 4.436441898345947, "logps/chosen": -0.282156378030777, "logps/rejected": -5.037269115447998, "loss": 0.4077, "nll_loss": 0.4074084162712097, "rewards/accuracies": 1.0, "rewards/chosen": -0.028215637430548668, "rewards/margins": 0.475511372089386, "rewards/rejected": -0.5037269592285156, "step": 4290 }, { "epoch": 0.86, "grad_norm": 0.6735871434211731, "learning_rate": 9.287633280968261e-07, "log_odds_chosen": 6.839577674865723, "log_odds_ratio": -0.0015812463825568557, "logits/chosen": 6.382290363311768, "logits/rejected": 4.545443534851074, "logps/chosen": -0.21783670783042908, "logps/rejected": -5.152751922607422, "loss": 0.3686, "nll_loss": 0.36845502257347107, "rewards/accuracies": 1.0, "rewards/chosen": -0.02178366854786873, "rewards/margins": 0.4934915602207184, "rewards/rejected": -0.5152751803398132, "step": 4300 }, { "epoch": 0.862, "grad_norm": 1.1651512384414673, "learning_rate": 9.284397577519787e-07, "log_odds_chosen": 6.999510288238525, "log_odds_ratio": -0.00132844690233469, "logits/chosen": 6.364565849304199, "logits/rejected": 4.522577285766602, "logps/chosen": -0.13851478695869446, "logps/rejected": -4.85791540145874, "loss": 0.3146, "nll_loss": 0.3144325613975525, "rewards/accuracies": 1.0, "rewards/chosen": -0.013851478695869446, "rewards/margins": 0.47194012999534607, "rewards/rejected": -0.4857915937900543, "step": 4310 }, { "epoch": 0.864, "grad_norm": 1.1976990699768066, "learning_rate": 9.281155108422731e-07, "log_odds_chosen": 6.23905086517334, "log_odds_ratio": -0.005951134953647852, "logits/chosen": 6.253172397613525, "logits/rejected": 4.53241491317749, "logps/chosen": -0.3783080577850342, "logps/rejected": -4.904393672943115, "loss": 0.5288, "nll_loss": 0.5281778573989868, "rewards/accuracies": 1.0, "rewards/chosen": -0.03783080726861954, "rewards/margins": 0.452608585357666, "rewards/rejected": -0.49043935537338257, "step": 4320 }, { "epoch": 0.866, "grad_norm": 0.7834147810935974, "learning_rate": 9.277905878797399e-07, "log_odds_chosen": 7.04880428314209, "log_odds_ratio": -0.001592749380506575, "logits/chosen": 6.38688850402832, "logits/rejected": 4.487563610076904, "logps/chosen": -0.17274010181427002, "logps/rejected": -5.10521125793457, "loss": 0.3429, "nll_loss": 0.34276360273361206, "rewards/accuracies": 1.0, "rewards/chosen": -0.01727401092648506, "rewards/margins": 0.4932471215724945, "rewards/rejected": -0.5105210542678833, "step": 4330 }, { "epoch": 0.868, "grad_norm": 0.9116793870925903, "learning_rate": 9.274649893774766e-07, "log_odds_chosen": 6.7354936599731445, "log_odds_ratio": -0.0018941003363579512, "logits/chosen": 6.365485668182373, "logits/rejected": 4.415067195892334, "logps/chosen": -0.22248582541942596, "logps/rejected": -5.037978172302246, "loss": 0.3933, "nll_loss": 0.3931126594543457, "rewards/accuracies": 1.0, "rewards/chosen": -0.022248584777116776, "rewards/margins": 0.4815492630004883, "rewards/rejected": -0.5037978291511536, "step": 4340 }, { "epoch": 0.87, "grad_norm": 1.3433868885040283, "learning_rate": 9.271387158496476e-07, "log_odds_chosen": 6.2217817306518555, "log_odds_ratio": -0.03661145269870758, "logits/chosen": 6.178121566772461, "logits/rejected": 4.445399284362793, "logps/chosen": -0.35498887300491333, "logps/rejected": -4.909811496734619, "loss": 0.4892, "nll_loss": 0.4855527877807617, "rewards/accuracies": 1.0, "rewards/chosen": -0.03549888730049133, "rewards/margins": 0.4554823040962219, "rewards/rejected": -0.49098116159439087, "step": 4350 }, { "epoch": 0.872, "grad_norm": 1.274056315422058, "learning_rate": 9.268117678114833e-07, "log_odds_chosen": 6.91080379486084, "log_odds_ratio": -0.0016500046476721764, "logits/chosen": 6.389491558074951, "logits/rejected": 4.502622127532959, "logps/chosen": -0.15090808272361755, "logps/rejected": -4.832688808441162, "loss": 0.3065, "nll_loss": 0.3062984049320221, "rewards/accuracies": 1.0, "rewards/chosen": -0.015090808272361755, "rewards/margins": 0.4681780934333801, "rewards/rejected": -0.4832689166069031, "step": 4360 }, { "epoch": 0.874, "grad_norm": 1.1579333543777466, "learning_rate": 9.264841457792793e-07, "log_odds_chosen": 6.907468318939209, "log_odds_ratio": -0.0014221479650586843, "logits/chosen": 6.372506141662598, "logits/rejected": 4.398002624511719, "logps/chosen": -0.18786069750785828, "logps/rejected": -5.121979236602783, "loss": 0.3616, "nll_loss": 0.3614131808280945, "rewards/accuracies": 1.0, "rewards/chosen": -0.018786069005727768, "rewards/margins": 0.4934118390083313, "rewards/rejected": -0.5121979713439941, "step": 4370 }, { "epoch": 0.876, "grad_norm": 1.0184903144836426, "learning_rate": 9.261558502703958e-07, "log_odds_chosen": 6.529870510101318, "log_odds_ratio": -0.004374033771455288, "logits/chosen": 6.25473690032959, "logits/rejected": 4.420238018035889, "logps/chosen": -0.26449376344680786, "logps/rejected": -4.915526866912842, "loss": 0.4252, "nll_loss": 0.42473381757736206, "rewards/accuracies": 1.0, "rewards/chosen": -0.026449376717209816, "rewards/margins": 0.4651033878326416, "rewards/rejected": -0.4915527403354645, "step": 4380 }, { "epoch": 0.878, "grad_norm": 1.1648223400115967, "learning_rate": 9.258268818032559e-07, "log_odds_chosen": 6.471505641937256, "log_odds_ratio": -0.003747046459466219, "logits/chosen": 6.285727500915527, "logits/rejected": 4.531101226806641, "logps/chosen": -0.3210960328578949, "logps/rejected": -5.062140464782715, "loss": 0.4841, "nll_loss": 0.4837323725223541, "rewards/accuracies": 1.0, "rewards/chosen": -0.03210960701107979, "rewards/margins": 0.47410446405410767, "rewards/rejected": -0.5062140822410583, "step": 4390 }, { "epoch": 0.88, "grad_norm": 1.0466597080230713, "learning_rate": 9.25497240897346e-07, "log_odds_chosen": 6.515618324279785, "log_odds_ratio": -0.0022223982959985733, "logits/chosen": 6.282362461090088, "logits/rejected": 4.509023189544678, "logps/chosen": -0.2730008661746979, "logps/rejected": -5.079770088195801, "loss": 0.4455, "nll_loss": 0.44523343443870544, "rewards/accuracies": 1.0, "rewards/chosen": -0.027300089597702026, "rewards/margins": 0.4806768298149109, "rewards/rejected": -0.5079769492149353, "step": 4400 }, { "epoch": 0.882, "grad_norm": 1.1246615648269653, "learning_rate": 9.251669280732137e-07, "log_odds_chosen": 6.225779056549072, "log_odds_ratio": -0.0373506173491478, "logits/chosen": 6.164698600769043, "logits/rejected": 4.534468173980713, "logps/chosen": -0.6258977651596069, "logps/rejected": -5.307897090911865, "loss": 0.6772, "nll_loss": 0.6734220385551453, "rewards/accuracies": 1.0, "rewards/chosen": -0.06258977204561234, "rewards/margins": 0.4681999087333679, "rewards/rejected": -0.5307897329330444, "step": 4410 }, { "epoch": 0.884, "grad_norm": 1.029317021369934, "learning_rate": 9.248359438524682e-07, "log_odds_chosen": 6.179784774780273, "log_odds_ratio": -0.03790164738893509, "logits/chosen": 6.221670627593994, "logits/rejected": 4.391867160797119, "logps/chosen": -0.4766991138458252, "logps/rejected": -4.980790138244629, "loss": 0.5831, "nll_loss": 0.579335629940033, "rewards/accuracies": 1.0, "rewards/chosen": -0.0476699098944664, "rewards/margins": 0.4504091143608093, "rewards/rejected": -0.4980790615081787, "step": 4420 }, { "epoch": 0.886, "grad_norm": 2.0555038452148438, "learning_rate": 9.245042887577787e-07, "log_odds_chosen": 6.452902793884277, "log_odds_ratio": -0.0034144162200391293, "logits/chosen": 6.279538631439209, "logits/rejected": 4.459462642669678, "logps/chosen": -0.31600221991539, "logps/rejected": -5.078652381896973, "loss": 0.4597, "nll_loss": 0.4593172073364258, "rewards/accuracies": 1.0, "rewards/chosen": -0.0316002257168293, "rewards/margins": 0.4762650430202484, "rewards/rejected": -0.5078652501106262, "step": 4430 }, { "epoch": 0.888, "grad_norm": 1.0288081169128418, "learning_rate": 9.241719633128742e-07, "log_odds_chosen": 6.649461269378662, "log_odds_ratio": -0.002629634691402316, "logits/chosen": 6.324483394622803, "logits/rejected": 4.486919403076172, "logps/chosen": -0.2929510176181793, "logps/rejected": -5.237978935241699, "loss": 0.454, "nll_loss": 0.45371896028518677, "rewards/accuracies": 1.0, "rewards/chosen": -0.02929510548710823, "rewards/margins": 0.4945027828216553, "rewards/rejected": -0.523797869682312, "step": 4440 }, { "epoch": 0.89, "grad_norm": 1.222400426864624, "learning_rate": 9.238389680425415e-07, "log_odds_chosen": 6.616555213928223, "log_odds_ratio": -0.0029933559708297253, "logits/chosen": 6.3475751876831055, "logits/rejected": 4.47217321395874, "logps/chosen": -0.25007396936416626, "logps/rejected": -5.015128135681152, "loss": 0.3987, "nll_loss": 0.3983631134033203, "rewards/accuracies": 1.0, "rewards/chosen": -0.025007396936416626, "rewards/margins": 0.47650545835494995, "rewards/rejected": -0.5015128254890442, "step": 4450 }, { "epoch": 0.892, "grad_norm": 1.3300082683563232, "learning_rate": 9.235053034726259e-07, "log_odds_chosen": 6.487146854400635, "log_odds_ratio": -0.0023240013979375362, "logits/chosen": 6.310849666595459, "logits/rejected": 4.465019226074219, "logps/chosen": -0.29591602087020874, "logps/rejected": -5.203901290893555, "loss": 0.4558, "nll_loss": 0.45557737350463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.029591601341962814, "rewards/margins": 0.49079856276512146, "rewards/rejected": -0.5203901529312134, "step": 4460 }, { "epoch": 0.894, "grad_norm": 1.1958425045013428, "learning_rate": 9.231709701300292e-07, "log_odds_chosen": 6.776163578033447, "log_odds_ratio": -0.0019628177396953106, "logits/chosen": 6.321583271026611, "logits/rejected": 4.520340919494629, "logps/chosen": -0.27221041917800903, "logps/rejected": -5.279122829437256, "loss": 0.4491, "nll_loss": 0.448930025100708, "rewards/accuracies": 1.0, "rewards/chosen": -0.027221042662858963, "rewards/margins": 0.500691294670105, "rewards/rejected": -0.5279122591018677, "step": 4470 }, { "epoch": 0.896, "grad_norm": 0.9793204069137573, "learning_rate": 9.228359685427094e-07, "log_odds_chosen": 6.429377555847168, "log_odds_ratio": -0.03612486273050308, "logits/chosen": 6.170660972595215, "logits/rejected": 4.4929609298706055, "logps/chosen": -0.4841790199279785, "logps/rejected": -5.249126434326172, "loss": 0.5643, "nll_loss": 0.5607103705406189, "rewards/accuracies": 1.0, "rewards/chosen": -0.04841790348291397, "rewards/margins": 0.4764947295188904, "rewards/rejected": -0.5249125957489014, "step": 4480 }, { "epoch": 0.898, "grad_norm": 0.7740151882171631, "learning_rate": 9.225002992396795e-07, "log_odds_chosen": 7.437492370605469, "log_odds_ratio": -0.0007891276036389172, "logits/chosen": 6.394959926605225, "logits/rejected": 4.400628566741943, "logps/chosen": -0.17440195381641388, "logps/rejected": -5.5952558517456055, "loss": 0.3271, "nll_loss": 0.32704734802246094, "rewards/accuracies": 1.0, "rewards/chosen": -0.017440196126699448, "rewards/margins": 0.5420854091644287, "rewards/rejected": -0.5595256090164185, "step": 4490 }, { "epoch": 0.9, "grad_norm": 1.271400809288025, "learning_rate": 9.221639627510075e-07, "log_odds_chosen": 6.624998569488525, "log_odds_ratio": -0.03766821324825287, "logits/chosen": 6.222220420837402, "logits/rejected": 4.368878364562988, "logps/chosen": -0.43808430433273315, "logps/rejected": -5.170182704925537, "loss": 0.6052, "nll_loss": 0.6014216542243958, "rewards/accuracies": 1.0, "rewards/chosen": -0.043808430433273315, "rewards/margins": 0.47320985794067383, "rewards/rejected": -0.5170182585716248, "step": 4500 }, { "epoch": 0.902, "grad_norm": 1.1419399976730347, "learning_rate": 9.218269596078144e-07, "log_odds_chosen": 6.925511360168457, "log_odds_ratio": -0.001665793708525598, "logits/chosen": 6.399992942810059, "logits/rejected": 4.435540199279785, "logps/chosen": -0.26783376932144165, "logps/rejected": -5.496996879577637, "loss": 0.4176, "nll_loss": 0.4174232482910156, "rewards/accuracies": 1.0, "rewards/chosen": -0.026783380657434464, "rewards/margins": 0.522916316986084, "rewards/rejected": -0.5496997237205505, "step": 4510 }, { "epoch": 0.904, "grad_norm": 1.2404911518096924, "learning_rate": 9.214892903422743e-07, "log_odds_chosen": 7.099173545837402, "log_odds_ratio": -0.003216427518054843, "logits/chosen": 6.318722724914551, "logits/rejected": 4.369141578674316, "logps/chosen": -0.2648473083972931, "logps/rejected": -5.436789035797119, "loss": 0.4229, "nll_loss": 0.4225897192955017, "rewards/accuracies": 1.0, "rewards/chosen": -0.026484733447432518, "rewards/margins": 0.5171941518783569, "rewards/rejected": -0.543678879737854, "step": 4520 }, { "epoch": 0.906, "grad_norm": 1.939365267753601, "learning_rate": 9.211509554876129e-07, "log_odds_chosen": 6.83371114730835, "log_odds_ratio": -0.0026100091636180878, "logits/chosen": 6.38198184967041, "logits/rejected": 4.415197372436523, "logps/chosen": -0.30761319398880005, "logps/rejected": -5.3661789894104, "loss": 0.4607, "nll_loss": 0.4604048728942871, "rewards/accuracies": 1.0, "rewards/chosen": -0.030761322006583214, "rewards/margins": 0.5058565139770508, "rewards/rejected": -0.5366178750991821, "step": 4530 }, { "epoch": 0.908, "grad_norm": 1.1772817373275757, "learning_rate": 9.208119555781073e-07, "log_odds_chosen": 7.200939178466797, "log_odds_ratio": -0.000868998933583498, "logits/chosen": 6.419210910797119, "logits/rejected": 4.515276908874512, "logps/chosen": -0.16513343155384064, "logps/rejected": -5.3803629875183105, "loss": 0.3123, "nll_loss": 0.3121756911277771, "rewards/accuracies": 1.0, "rewards/chosen": -0.016513343900442123, "rewards/margins": 0.5215229392051697, "rewards/rejected": -0.5380362868309021, "step": 4540 }, { "epoch": 0.91, "grad_norm": 1.2889471054077148, "learning_rate": 9.204722911490846e-07, "log_odds_chosen": 6.821619510650635, "log_odds_ratio": -0.001895050285384059, "logits/chosen": 6.36611270904541, "logits/rejected": 4.444981098175049, "logps/chosen": -0.32738733291625977, "logps/rejected": -5.518893241882324, "loss": 0.4984, "nll_loss": 0.49816733598709106, "rewards/accuracies": 1.0, "rewards/chosen": -0.032738737761974335, "rewards/margins": 0.5191505551338196, "rewards/rejected": -0.5518893003463745, "step": 4550 }, { "epoch": 0.912, "grad_norm": 0.9692014455795288, "learning_rate": 9.201319627369211e-07, "log_odds_chosen": 6.888115882873535, "log_odds_ratio": -0.001313843298703432, "logits/chosen": 6.403358459472656, "logits/rejected": 4.592280387878418, "logps/chosen": -0.22391323745250702, "logps/rejected": -5.335136413574219, "loss": 0.3759, "nll_loss": 0.37577202916145325, "rewards/accuracies": 1.0, "rewards/chosen": -0.02239132486283779, "rewards/margins": 0.5111223459243774, "rewards/rejected": -0.5335136651992798, "step": 4560 }, { "epoch": 0.914, "grad_norm": 1.131729006767273, "learning_rate": 9.197909708790419e-07, "log_odds_chosen": 6.891315460205078, "log_odds_ratio": -0.002267856616526842, "logits/chosen": 6.41015100479126, "logits/rejected": 4.573599338531494, "logps/chosen": -0.2291596382856369, "logps/rejected": -5.181371688842773, "loss": 0.383, "nll_loss": 0.38274234533309937, "rewards/accuracies": 1.0, "rewards/chosen": -0.02291596494615078, "rewards/margins": 0.49522119760513306, "rewards/rejected": -0.5181371569633484, "step": 4570 }, { "epoch": 0.916, "grad_norm": 1.0019422769546509, "learning_rate": 9.194493161139199e-07, "log_odds_chosen": 7.1604509353637695, "log_odds_ratio": -0.0013298902194947004, "logits/chosen": 6.3449177742004395, "logits/rejected": 4.571479320526123, "logps/chosen": -0.2350112497806549, "logps/rejected": -5.591994285583496, "loss": 0.3855, "nll_loss": 0.38535866141319275, "rewards/accuracies": 1.0, "rewards/chosen": -0.02350112795829773, "rewards/margins": 0.5356983542442322, "rewards/rejected": -0.5591994524002075, "step": 4580 }, { "epoch": 0.918, "grad_norm": 1.1240133047103882, "learning_rate": 9.191069989810742e-07, "log_odds_chosen": 6.923707485198975, "log_odds_ratio": -0.03665893152356148, "logits/chosen": 6.079102516174316, "logits/rejected": 4.3458333015441895, "logps/chosen": -0.4458109736442566, "logps/rejected": -5.545201301574707, "loss": 0.5955, "nll_loss": 0.5918465852737427, "rewards/accuracies": 1.0, "rewards/chosen": -0.0445811003446579, "rewards/margins": 0.5099390149116516, "rewards/rejected": -0.5545201301574707, "step": 4590 }, { "epoch": 0.92, "grad_norm": 1.1702466011047363, "learning_rate": 9.187640200210709e-07, "log_odds_chosen": 7.4429497718811035, "log_odds_ratio": -0.0009586008382029831, "logits/chosen": 6.433224678039551, "logits/rejected": 4.420071601867676, "logps/chosen": -0.21098503470420837, "logps/rejected": -5.610914707183838, "loss": 0.3826, "nll_loss": 0.38249123096466064, "rewards/accuracies": 1.0, "rewards/chosen": -0.021098503842949867, "rewards/margins": 0.5399929881095886, "rewards/rejected": -0.5610914826393127, "step": 4600 }, { "epoch": 0.922, "grad_norm": 1.9730074405670166, "learning_rate": 9.184203797755198e-07, "log_odds_chosen": 7.442303657531738, "log_odds_ratio": -0.002302887151017785, "logits/chosen": 6.392502307891846, "logits/rejected": 4.483992576599121, "logps/chosen": -0.22779667377471924, "logps/rejected": -5.646862506866455, "loss": 0.376, "nll_loss": 0.37575098872184753, "rewards/accuracies": 1.0, "rewards/chosen": -0.022779664024710655, "rewards/margins": 0.5419066548347473, "rewards/rejected": -0.5646862983703613, "step": 4610 }, { "epoch": 0.924, "grad_norm": 0.8910603523254395, "learning_rate": 9.180760787870764e-07, "log_odds_chosen": 6.71671199798584, "log_odds_ratio": -0.0025668800808489323, "logits/chosen": 6.31324577331543, "logits/rejected": 4.485504627227783, "logps/chosen": -0.29949694871902466, "logps/rejected": -5.311914443969727, "loss": 0.4596, "nll_loss": 0.4593851566314697, "rewards/accuracies": 1.0, "rewards/chosen": -0.029949698597192764, "rewards/margins": 0.5012418031692505, "rewards/rejected": -0.5311914682388306, "step": 4620 }, { "epoch": 0.926, "grad_norm": 0.8291816711425781, "learning_rate": 9.177311175994389e-07, "log_odds_chosen": 6.831979274749756, "log_odds_ratio": -0.0020939656533300877, "logits/chosen": 6.330061912536621, "logits/rejected": 4.435722351074219, "logps/chosen": -0.24179883301258087, "logps/rejected": -5.261069297790527, "loss": 0.3958, "nll_loss": 0.3956221342086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.024179881438612938, "rewards/margins": 0.5019270181655884, "rewards/rejected": -0.5261069536209106, "step": 4630 }, { "epoch": 0.928, "grad_norm": 0.9781948924064636, "learning_rate": 9.173854967573477e-07, "log_odds_chosen": 6.572016716003418, "log_odds_ratio": -0.01275884360074997, "logits/chosen": 6.238918304443359, "logits/rejected": 4.404139995574951, "logps/chosen": -0.474698930978775, "logps/rejected": -5.299221515655518, "loss": 0.5772, "nll_loss": 0.5759351849555969, "rewards/accuracies": 1.0, "rewards/chosen": -0.04746989160776138, "rewards/margins": 0.48245224356651306, "rewards/rejected": -0.5299221277236938, "step": 4640 }, { "epoch": 0.93, "grad_norm": 1.205812692642212, "learning_rate": 9.170392168065856e-07, "log_odds_chosen": 7.065306663513184, "log_odds_ratio": -0.0021757392678409815, "logits/chosen": 6.336427211761475, "logits/rejected": 4.423583984375, "logps/chosen": -0.25533774495124817, "logps/rejected": -5.496275424957275, "loss": 0.4277, "nll_loss": 0.4275270998477936, "rewards/accuracies": 1.0, "rewards/chosen": -0.025533774867653847, "rewards/margins": 0.5240936875343323, "rewards/rejected": -0.5496274828910828, "step": 4650 }, { "epoch": 0.932, "grad_norm": 0.9668650031089783, "learning_rate": 9.166922782939757e-07, "log_odds_chosen": 6.564268589019775, "log_odds_ratio": -0.0021955573465675116, "logits/chosen": 6.306970119476318, "logits/rejected": 4.393918514251709, "logps/chosen": -0.3447110950946808, "logps/rejected": -5.419023513793945, "loss": 0.4788, "nll_loss": 0.4786209166049957, "rewards/accuracies": 1.0, "rewards/chosen": -0.03447111323475838, "rewards/margins": 0.5074312090873718, "rewards/rejected": -0.5419023633003235, "step": 4660 }, { "epoch": 0.934, "grad_norm": 1.36598539352417, "learning_rate": 9.163446817673816e-07, "log_odds_chosen": 7.149025917053223, "log_odds_ratio": -0.001721569336950779, "logits/chosen": 6.290081024169922, "logits/rejected": 4.442195415496826, "logps/chosen": -0.2776959538459778, "logps/rejected": -5.635960578918457, "loss": 0.4509, "nll_loss": 0.4507099688053131, "rewards/accuracies": 1.0, "rewards/chosen": -0.027769599109888077, "rewards/margins": 0.535826563835144, "rewards/rejected": -0.5635961294174194, "step": 4670 }, { "epoch": 0.936, "grad_norm": 1.1780980825424194, "learning_rate": 9.159964277757053e-07, "log_odds_chosen": 7.020745754241943, "log_odds_ratio": -0.0018629736732691526, "logits/chosen": 6.326540946960449, "logits/rejected": 4.469094276428223, "logps/chosen": -0.23601219058036804, "logps/rejected": -5.345212936401367, "loss": 0.398, "nll_loss": 0.39784976840019226, "rewards/accuracies": 1.0, "rewards/chosen": -0.023601222783327103, "rewards/margins": 0.5109200477600098, "rewards/rejected": -0.5345213413238525, "step": 4680 }, { "epoch": 0.938, "grad_norm": 1.116162657737732, "learning_rate": 9.156475168688875e-07, "log_odds_chosen": 7.233823299407959, "log_odds_ratio": -0.0013824701309204102, "logits/chosen": 6.354869365692139, "logits/rejected": 4.403590679168701, "logps/chosen": -0.25004369020462036, "logps/rejected": -5.753513813018799, "loss": 0.3985, "nll_loss": 0.3983686864376068, "rewards/accuracies": 1.0, "rewards/chosen": -0.025004372000694275, "rewards/margins": 0.5503470301628113, "rewards/rejected": -0.575351357460022, "step": 4690 }, { "epoch": 0.94, "grad_norm": 1.2694344520568848, "learning_rate": 9.152979495979063e-07, "log_odds_chosen": 6.689292907714844, "log_odds_ratio": -0.03526494652032852, "logits/chosen": 6.186707019805908, "logits/rejected": 4.3853278160095215, "logps/chosen": -0.4859474301338196, "logps/rejected": -5.473963737487793, "loss": 0.6057, "nll_loss": 0.6021597981452942, "rewards/accuracies": 1.0, "rewards/chosen": -0.048594746738672256, "rewards/margins": 0.4988015592098236, "rewards/rejected": -0.5473963618278503, "step": 4700 }, { "epoch": 0.942, "grad_norm": 0.895919680595398, "learning_rate": 9.14947726514776e-07, "log_odds_chosen": 6.907168388366699, "log_odds_ratio": -0.004091665148735046, "logits/chosen": 6.2840094566345215, "logits/rejected": 4.357535362243652, "logps/chosen": -0.3312530815601349, "logps/rejected": -5.398890495300293, "loss": 0.4819, "nll_loss": 0.4814796447753906, "rewards/accuracies": 1.0, "rewards/chosen": -0.03312530741095543, "rewards/margins": 0.506763756275177, "rewards/rejected": -0.5398890376091003, "step": 4710 }, { "epoch": 0.944, "grad_norm": 0.8075011372566223, "learning_rate": 9.145968481725465e-07, "log_odds_chosen": 7.035026550292969, "log_odds_ratio": -0.0024856107775121927, "logits/chosen": 6.272315502166748, "logits/rejected": 4.481423377990723, "logps/chosen": -0.28923144936561584, "logps/rejected": -5.512960433959961, "loss": 0.4609, "nll_loss": 0.460632860660553, "rewards/accuracies": 1.0, "rewards/chosen": -0.028923142701387405, "rewards/margins": 0.5223729014396667, "rewards/rejected": -0.5512961149215698, "step": 4720 }, { "epoch": 0.946, "grad_norm": 1.6165740489959717, "learning_rate": 9.142453151253031e-07, "log_odds_chosen": 7.046057224273682, "log_odds_ratio": -0.0022915536537766457, "logits/chosen": 6.327601432800293, "logits/rejected": 4.450203895568848, "logps/chosen": -0.2711673378944397, "logps/rejected": -5.512051582336426, "loss": 0.407, "nll_loss": 0.4067264497280121, "rewards/accuracies": 1.0, "rewards/chosen": -0.02711673639714718, "rewards/margins": 0.524088442325592, "rewards/rejected": -0.5512052178382874, "step": 4730 }, { "epoch": 0.948, "grad_norm": 0.8625959753990173, "learning_rate": 9.138931279281639e-07, "log_odds_chosen": 7.275559425354004, "log_odds_ratio": -0.0011486748699098825, "logits/chosen": 6.428385257720947, "logits/rejected": 4.524054527282715, "logps/chosen": -0.18965522944927216, "logps/rejected": -5.5295844078063965, "loss": 0.3318, "nll_loss": 0.3317176103591919, "rewards/accuracies": 1.0, "rewards/chosen": -0.018965523689985275, "rewards/margins": 0.5339929461479187, "rewards/rejected": -0.5529584288597107, "step": 4740 }, { "epoch": 0.95, "grad_norm": 1.0655407905578613, "learning_rate": 9.135402871372808e-07, "log_odds_chosen": 7.279078483581543, "log_odds_ratio": -0.0012197154574096203, "logits/chosen": 6.3309645652771, "logits/rejected": 4.479368209838867, "logps/chosen": -0.22414462268352509, "logps/rejected": -5.616034984588623, "loss": 0.3684, "nll_loss": 0.3682928681373596, "rewards/accuracies": 1.0, "rewards/chosen": -0.022414464503526688, "rewards/margins": 0.5391889810562134, "rewards/rejected": -0.5616034865379333, "step": 4750 }, { "epoch": 0.952, "grad_norm": 0.9327263832092285, "learning_rate": 9.131867933098377e-07, "log_odds_chosen": 7.707557678222656, "log_odds_ratio": -0.0006594017613679171, "logits/chosen": 6.390438079833984, "logits/rejected": 4.261819362640381, "logps/chosen": -0.16084811091423035, "logps/rejected": -5.769453525543213, "loss": 0.323, "nll_loss": 0.32292598485946655, "rewards/accuracies": 1.0, "rewards/chosen": -0.016084810718894005, "rewards/margins": 0.5608605146408081, "rewards/rejected": -0.5769453048706055, "step": 4760 }, { "epoch": 0.954, "grad_norm": 1.3660173416137695, "learning_rate": 9.128326470040494e-07, "log_odds_chosen": 7.502626895904541, "log_odds_ratio": -0.0009994189022108912, "logits/chosen": 6.304349422454834, "logits/rejected": 4.502041339874268, "logps/chosen": -0.18566982448101044, "logps/rejected": -5.445758819580078, "loss": 0.3348, "nll_loss": 0.3346882462501526, "rewards/accuracies": 1.0, "rewards/chosen": -0.018566984683275223, "rewards/margins": 0.5260088443756104, "rewards/rejected": -0.5445759296417236, "step": 4770 }, { "epoch": 0.956, "grad_norm": 1.1717113256454468, "learning_rate": 9.124778487791613e-07, "log_odds_chosen": 7.633363246917725, "log_odds_ratio": -0.0009209602139890194, "logits/chosen": 6.365530490875244, "logits/rejected": 4.424466133117676, "logps/chosen": -0.21872179210186005, "logps/rejected": -5.9136810302734375, "loss": 0.3701, "nll_loss": 0.3700493276119232, "rewards/accuracies": 1.0, "rewards/chosen": -0.021872179582715034, "rewards/margins": 0.5694959759712219, "rewards/rejected": -0.5913681983947754, "step": 4780 }, { "epoch": 0.958, "grad_norm": 1.258946418762207, "learning_rate": 9.121223991954483e-07, "log_odds_chosen": 7.424051761627197, "log_odds_ratio": -0.0032275444827973843, "logits/chosen": 6.235428333282471, "logits/rejected": 4.368508338928223, "logps/chosen": -0.2966634929180145, "logps/rejected": -5.885865688323975, "loss": 0.4566, "nll_loss": 0.4562526345252991, "rewards/accuracies": 1.0, "rewards/chosen": -0.02966635301709175, "rewards/margins": 0.5589202046394348, "rewards/rejected": -0.5885865688323975, "step": 4790 }, { "epoch": 0.96, "grad_norm": 1.260561227798462, "learning_rate": 9.117662988142136e-07, "log_odds_chosen": 6.62033224105835, "log_odds_ratio": -0.0091238496825099, "logits/chosen": 6.23178243637085, "logits/rejected": 4.447699546813965, "logps/chosen": -0.3787044882774353, "logps/rejected": -5.29676628112793, "loss": 0.5233, "nll_loss": 0.5223841071128845, "rewards/accuracies": 1.0, "rewards/chosen": -0.03787045180797577, "rewards/margins": 0.4918060898780823, "rewards/rejected": -0.5296765565872192, "step": 4800 }, { "epoch": 0.962, "grad_norm": 1.3139419555664062, "learning_rate": 9.114095481977887e-07, "log_odds_chosen": 6.757578372955322, "log_odds_ratio": -0.0023020957596600056, "logits/chosen": 6.288142204284668, "logits/rejected": 4.418452739715576, "logps/chosen": -0.26926904916763306, "logps/rejected": -5.3225579261779785, "loss": 0.4254, "nll_loss": 0.42519229650497437, "rewards/accuracies": 1.0, "rewards/chosen": -0.026926901191473007, "rewards/margins": 0.5053288340568542, "rewards/rejected": -0.5322557687759399, "step": 4810 }, { "epoch": 0.964, "grad_norm": 1.3004530668258667, "learning_rate": 9.110521479095312e-07, "log_odds_chosen": 7.312197685241699, "log_odds_ratio": -0.0009200919303111732, "logits/chosen": 6.344655513763428, "logits/rejected": 4.472952842712402, "logps/chosen": -0.17277827858924866, "logps/rejected": -5.442536354064941, "loss": 0.3298, "nll_loss": 0.3296608626842499, "rewards/accuracies": 1.0, "rewards/chosen": -0.017277831211686134, "rewards/margins": 0.5269757509231567, "rewards/rejected": -0.5442535877227783, "step": 4820 }, { "epoch": 0.966, "grad_norm": 1.1247901916503906, "learning_rate": 9.106940985138249e-07, "log_odds_chosen": 6.819394111633301, "log_odds_ratio": -0.004149539861828089, "logits/chosen": 6.254158973693848, "logits/rejected": 4.375284671783447, "logps/chosen": -0.384360134601593, "logps/rejected": -5.608311176300049, "loss": 0.5151, "nll_loss": 0.5147296190261841, "rewards/accuracies": 1.0, "rewards/chosen": -0.03843601420521736, "rewards/margins": 0.522395133972168, "rewards/rejected": -0.5608311295509338, "step": 4830 }, { "epoch": 0.968, "grad_norm": 1.2373359203338623, "learning_rate": 9.103354005760789e-07, "log_odds_chosen": 7.096539497375488, "log_odds_ratio": -0.0016875818837434053, "logits/chosen": 6.265519142150879, "logits/rejected": 4.454163074493408, "logps/chosen": -0.27902132272720337, "logps/rejected": -5.661034107208252, "loss": 0.4429, "nll_loss": 0.44276872277259827, "rewards/accuracies": 1.0, "rewards/chosen": -0.027902130037546158, "rewards/margins": 0.5382012128829956, "rewards/rejected": -0.5661033987998962, "step": 4840 }, { "epoch": 0.97, "grad_norm": 0.8554509282112122, "learning_rate": 9.099760546627261e-07, "log_odds_chosen": 7.18752908706665, "log_odds_ratio": -0.0018622407224029303, "logits/chosen": 6.273581504821777, "logits/rejected": 4.356325626373291, "logps/chosen": -0.2193288505077362, "logps/rejected": -5.387886047363281, "loss": 0.3754, "nll_loss": 0.3752513527870178, "rewards/accuracies": 1.0, "rewards/chosen": -0.02193288318812847, "rewards/margins": 0.5168557167053223, "rewards/rejected": -0.5387886166572571, "step": 4850 }, { "epoch": 0.972, "grad_norm": 1.1719117164611816, "learning_rate": 9.096160613412227e-07, "log_odds_chosen": 7.566287040710449, "log_odds_ratio": -0.000739475479349494, "logits/chosen": 6.350264549255371, "logits/rejected": 4.398411750793457, "logps/chosen": -0.17172771692276, "logps/rejected": -5.678108215332031, "loss": 0.341, "nll_loss": 0.3409445881843567, "rewards/accuracies": 1.0, "rewards/chosen": -0.01717277243733406, "rewards/margins": 0.5506380200386047, "rewards/rejected": -0.5678107738494873, "step": 4860 }, { "epoch": 0.974, "grad_norm": 1.2669713497161865, "learning_rate": 9.092554211800474e-07, "log_odds_chosen": 6.508450984954834, "log_odds_ratio": -0.03520217910408974, "logits/chosen": 6.165085792541504, "logits/rejected": 4.426114082336426, "logps/chosen": -0.4336794316768646, "logps/rejected": -5.3094048500061035, "loss": 0.527, "nll_loss": 0.5234943628311157, "rewards/accuracies": 1.0, "rewards/chosen": -0.04336794465780258, "rewards/margins": 0.48757249116897583, "rewards/rejected": -0.5309404134750366, "step": 4870 }, { "epoch": 0.976, "grad_norm": 1.5227739810943604, "learning_rate": 9.088941347487003e-07, "log_odds_chosen": 7.123232841491699, "log_odds_ratio": -0.0016758956480771303, "logits/chosen": 6.273725986480713, "logits/rejected": 4.456374645233154, "logps/chosen": -0.274553120136261, "logps/rejected": -5.597880840301514, "loss": 0.4305, "nll_loss": 0.4303327202796936, "rewards/accuracies": 1.0, "rewards/chosen": -0.027455314993858337, "rewards/margins": 0.5323327779769897, "rewards/rejected": -0.5597881078720093, "step": 4880 }, { "epoch": 0.978, "grad_norm": 1.0316818952560425, "learning_rate": 9.085322026177016e-07, "log_odds_chosen": 7.688002586364746, "log_odds_ratio": -0.00151945895049721, "logits/chosen": 6.355983257293701, "logits/rejected": 4.349728584289551, "logps/chosen": -0.17413464188575745, "logps/rejected": -5.5823540687561035, "loss": 0.333, "nll_loss": 0.33289575576782227, "rewards/accuracies": 1.0, "rewards/chosen": -0.017413467168807983, "rewards/margins": 0.5408219695091248, "rewards/rejected": -0.5582354664802551, "step": 4890 }, { "epoch": 0.98, "grad_norm": 1.4018882513046265, "learning_rate": 9.08169625358592e-07, "log_odds_chosen": 6.596604347229004, "log_odds_ratio": -0.005936605390161276, "logits/chosen": 6.168510913848877, "logits/rejected": 4.3158063888549805, "logps/chosen": -0.3653812110424042, "logps/rejected": -5.294976234436035, "loss": 0.5163, "nll_loss": 0.5157453417778015, "rewards/accuracies": 1.0, "rewards/chosen": -0.036538124084472656, "rewards/margins": 0.4929594397544861, "rewards/rejected": -0.5294975638389587, "step": 4900 }, { "epoch": 0.982, "grad_norm": 1.396485447883606, "learning_rate": 9.078064035439301e-07, "log_odds_chosen": 6.847977638244629, "log_odds_ratio": -0.001582261174917221, "logits/chosen": 6.243250846862793, "logits/rejected": 4.310359001159668, "logps/chosen": -0.2764071822166443, "logps/rejected": -5.48032283782959, "loss": 0.449, "nll_loss": 0.4487924575805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.02764071896672249, "rewards/margins": 0.5203915238380432, "rewards/rejected": -0.5480322241783142, "step": 4910 }, { "epoch": 0.984, "grad_norm": 1.0323219299316406, "learning_rate": 9.07442537747293e-07, "log_odds_chosen": 6.795835018157959, "log_odds_ratio": -0.03520290553569794, "logits/chosen": 6.246700763702393, "logits/rejected": 4.421573162078857, "logps/chosen": -0.3893483281135559, "logps/rejected": -5.309283256530762, "loss": 0.4434, "nll_loss": 0.4398702085018158, "rewards/accuracies": 1.0, "rewards/chosen": -0.03893483430147171, "rewards/margins": 0.4919935166835785, "rewards/rejected": -0.5309282541275024, "step": 4920 }, { "epoch": 0.986, "grad_norm": 1.0710110664367676, "learning_rate": 9.070780285432744e-07, "log_odds_chosen": 6.913191318511963, "log_odds_ratio": -0.0017760207410901785, "logits/chosen": 6.231974124908447, "logits/rejected": 4.469661712646484, "logps/chosen": -0.22329595685005188, "logps/rejected": -5.240417003631592, "loss": 0.3923, "nll_loss": 0.3921278119087219, "rewards/accuracies": 1.0, "rewards/chosen": -0.022329596802592278, "rewards/margins": 0.5017122030258179, "rewards/rejected": -0.5240417122840881, "step": 4930 }, { "epoch": 0.988, "grad_norm": 1.2723889350891113, "learning_rate": 9.067128765074841e-07, "log_odds_chosen": 7.405287742614746, "log_odds_ratio": -0.0009322086116299033, "logits/chosen": 6.321938514709473, "logits/rejected": 4.360632419586182, "logps/chosen": -0.17409247159957886, "logps/rejected": -5.4286394119262695, "loss": 0.34, "nll_loss": 0.33991149067878723, "rewards/accuracies": 1.0, "rewards/chosen": -0.017409246414899826, "rewards/margins": 0.5254546999931335, "rewards/rejected": -0.5428639650344849, "step": 4940 }, { "epoch": 0.99, "grad_norm": 0.8792760372161865, "learning_rate": 9.063470822165468e-07, "log_odds_chosen": 7.020652770996094, "log_odds_ratio": -0.0017573708901181817, "logits/chosen": 6.225141525268555, "logits/rejected": 4.389218807220459, "logps/chosen": -0.24532218277454376, "logps/rejected": -5.2369160652160645, "loss": 0.4205, "nll_loss": 0.4202881455421448, "rewards/accuracies": 1.0, "rewards/chosen": -0.024532217532396317, "rewards/margins": 0.49915939569473267, "rewards/rejected": -0.5236915349960327, "step": 4950 }, { "epoch": 0.992, "grad_norm": 1.4908047914505005, "learning_rate": 9.059806462481021e-07, "log_odds_chosen": 6.889493465423584, "log_odds_ratio": -0.0016459077596664429, "logits/chosen": 6.323537826538086, "logits/rejected": 4.394965648651123, "logps/chosen": -0.25714653730392456, "logps/rejected": -5.307542324066162, "loss": 0.4035, "nll_loss": 0.40333685278892517, "rewards/accuracies": 1.0, "rewards/chosen": -0.025714654475450516, "rewards/margins": 0.5050395727157593, "rewards/rejected": -0.5307542085647583, "step": 4960 }, { "epoch": 0.994, "grad_norm": 1.4701157808303833, "learning_rate": 9.056135691808018e-07, "log_odds_chosen": 6.602829933166504, "log_odds_ratio": -0.035199254751205444, "logits/chosen": 6.267186164855957, "logits/rejected": 4.286623954772949, "logps/chosen": -0.4190858006477356, "logps/rejected": -5.348400592803955, "loss": 0.5134, "nll_loss": 0.5098801255226135, "rewards/accuracies": 1.0, "rewards/chosen": -0.04190857708454132, "rewards/margins": 0.4929315149784088, "rewards/rejected": -0.5348400473594666, "step": 4970 }, { "epoch": 0.996, "grad_norm": 1.0036792755126953, "learning_rate": 9.05245851594311e-07, "log_odds_chosen": 6.779881477355957, "log_odds_ratio": -0.03454712778329849, "logits/chosen": 6.251736640930176, "logits/rejected": 4.550353050231934, "logps/chosen": -0.43145427107810974, "logps/rejected": -5.50458288192749, "loss": 0.5177, "nll_loss": 0.5142897367477417, "rewards/accuracies": 1.0, "rewards/chosen": -0.04314542934298515, "rewards/margins": 0.5073128342628479, "rewards/rejected": -0.5504583120346069, "step": 4980 }, { "epoch": 0.998, "grad_norm": 1.4822487831115723, "learning_rate": 9.048774940693061e-07, "log_odds_chosen": 6.438785552978516, "log_odds_ratio": -0.03711153566837311, "logits/chosen": 6.103884696960449, "logits/rejected": 4.504773139953613, "logps/chosen": -0.5261000394821167, "logps/rejected": -5.420708656311035, "loss": 0.6617, "nll_loss": 0.6579656600952148, "rewards/accuracies": 1.0, "rewards/chosen": -0.05261000245809555, "rewards/margins": 0.48946088552474976, "rewards/rejected": -0.5420709252357483, "step": 4990 }, { "epoch": 1.0, "grad_norm": 1.1346509456634521, "learning_rate": 9.045084971874737e-07, "log_odds_chosen": 6.84014368057251, "log_odds_ratio": -0.0017958584940060973, "logits/chosen": 6.280858039855957, "logits/rejected": 4.399279594421387, "logps/chosen": -0.30037903785705566, "logps/rejected": -5.429879665374756, "loss": 0.447, "nll_loss": 0.4468502104282379, "rewards/accuracies": 1.0, "rewards/chosen": -0.030037909746170044, "rewards/margins": 0.5129501223564148, "rewards/rejected": -0.5429880023002625, "step": 5000 } ], "logging_steps": 10, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }