diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4769 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9987368421052631, + "eval_steps": 10000, + "global_step": 593, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003368421052631579, + "grad_norm": 1021.0826362249041, + "learning_rate": 8e-09, + "logits/chosen": -6.098826885223389, + "logits/rejected": -8.285457611083984, + "logps/chosen": -1105.89208984375, + "logps/rejected": -1840.68798828125, + "loss": 0.6675, + "nll_loss": 4.724418640136719, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03404197469353676, + "rewards/margins": 0.10088615119457245, + "rewards/rejected": -0.06684418022632599, + "step": 2 + }, + { + "epoch": 0.006736842105263158, + "grad_norm": 775.2893008419502, + "learning_rate": 1.6e-08, + "logits/chosen": -6.404820919036865, + "logits/rejected": -8.010771751403809, + "logps/chosen": -1317.140625, + "logps/rejected": -1826.8807373046875, + "loss": 0.7069, + "nll_loss": 4.841824054718018, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.024968720972537994, + "rewards/margins": 0.0651191771030426, + "rewards/rejected": -0.04015045240521431, + "step": 4 + }, + { + "epoch": 0.010105263157894737, + "grad_norm": 1152.6434834761353, + "learning_rate": 2.3999999999999997e-08, + "logits/chosen": -7.275630950927734, + "logits/rejected": -7.599463939666748, + "logps/chosen": -1705.133544921875, + "logps/rejected": -1831.8782958984375, + "loss": 0.7266, + "nll_loss": 4.776337623596191, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00952148623764515, + "rewards/margins": -0.00301208533346653, + "rewards/rejected": 0.01253356970846653, + "step": 6 + }, + { + "epoch": 0.013473684210526317, + "grad_norm": 1340.686158278663, + "learning_rate": 3.2e-08, + "logits/chosen": -7.0847063064575195, + "logits/rejected": -7.663762092590332, + "logps/chosen": -1612.9189453125, + "logps/rejected": -1863.4921875, + "loss": 0.6938, + "nll_loss": 4.129978656768799, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.05372562259435654, + "rewards/margins": -0.047937583178281784, + "rewards/rejected": 0.10166320949792862, + "step": 8 + }, + { + "epoch": 0.016842105263157894, + "grad_norm": 830.1962870309648, + "learning_rate": 4e-08, + "logits/chosen": -7.328038692474365, + "logits/rejected": -7.968114852905273, + "logps/chosen": -1624.5255126953125, + "logps/rejected": -1856.871826171875, + "loss": 0.7296, + "nll_loss": 4.658226490020752, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.03902282938361168, + "rewards/margins": -0.01064453274011612, + "rewards/rejected": 0.0496673583984375, + "step": 10 + }, + { + "epoch": 0.020210526315789474, + "grad_norm": 663.540527209906, + "learning_rate": 4.799999999999999e-08, + "logits/chosen": -7.396481513977051, + "logits/rejected": -7.322075843811035, + "logps/chosen": -1599.8494873046875, + "logps/rejected": -1831.381103515625, + "loss": 0.6782, + "nll_loss": 4.17366361618042, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10295410454273224, + "rewards/margins": 0.06537018716335297, + "rewards/rejected": -0.168324276804924, + "step": 12 + }, + { + "epoch": 0.023578947368421053, + "grad_norm": 1099.037021669425, + "learning_rate": 5.6000000000000005e-08, + "logits/chosen": -7.1363630294799805, + "logits/rejected": -7.741977691650391, + "logps/chosen": -1470.75537109375, + "logps/rejected": -1858.2095947265625, + "loss": 0.6512, + "nll_loss": 4.299507141113281, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2573639154434204, + "rewards/margins": 0.10828553140163422, + "rewards/rejected": -0.36564940214157104, + "step": 14 + }, + { + "epoch": 0.026947368421052633, + "grad_norm": 566.7332650815549, + "learning_rate": 6.4e-08, + "logits/chosen": -6.818307876586914, + "logits/rejected": -7.9370036125183105, + "logps/chosen": -1442.171875, + "logps/rejected": -1850.56298828125, + "loss": 0.6185, + "nll_loss": 4.646788597106934, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3849685788154602, + "rewards/margins": 0.29212191700935364, + "rewards/rejected": -0.6770904660224915, + "step": 16 + }, + { + "epoch": 0.03031578947368421, + "grad_norm": 743.3397608092816, + "learning_rate": 7.2e-08, + "logits/chosen": -7.3267436027526855, + "logits/rejected": -7.727769374847412, + "logps/chosen": -1639.974853515625, + "logps/rejected": -1839.6697998046875, + "loss": 0.5848, + "nll_loss": 4.681390285491943, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.806835949420929, + "rewards/margins": 0.1156768724322319, + "rewards/rejected": -0.9225128889083862, + "step": 18 + }, + { + "epoch": 0.03368421052631579, + "grad_norm": 555.7381352759589, + "learning_rate": 8e-08, + "logits/chosen": -6.984310150146484, + "logits/rejected": -7.973010540008545, + "logps/chosen": -1658.0970458984375, + "logps/rejected": -1865.5997314453125, + "loss": 0.5718, + "nll_loss": 4.629044532775879, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5665146112442017, + "rewards/margins": 0.23055119812488556, + "rewards/rejected": -1.7970658540725708, + "step": 20 + }, + { + "epoch": 0.03705263157894737, + "grad_norm": 537.2764069824767, + "learning_rate": 8.8e-08, + "logits/chosen": -7.248239040374756, + "logits/rejected": -7.833005428314209, + "logps/chosen": -1698.72998046875, + "logps/rejected": -1851.3116455078125, + "loss": 0.5141, + "nll_loss": 4.778887748718262, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.040907382965088, + "rewards/margins": 0.4460693299770355, + "rewards/rejected": -2.4869766235351562, + "step": 22 + }, + { + "epoch": 0.04042105263157895, + "grad_norm": 427.6716943228766, + "learning_rate": 9.599999999999999e-08, + "logits/chosen": -6.897974967956543, + "logits/rejected": -8.095926284790039, + "logps/chosen": -1618.291748046875, + "logps/rejected": -1899.2071533203125, + "loss": 0.5628, + "nll_loss": 4.640469074249268, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.707034111022949, + "rewards/margins": 0.4983123540878296, + "rewards/rejected": -3.2053468227386475, + "step": 24 + }, + { + "epoch": 0.043789473684210524, + "grad_norm": 502.48124965017354, + "learning_rate": 1.04e-07, + "logits/chosen": -6.930876731872559, + "logits/rejected": -7.992618083953857, + "logps/chosen": -1484.425048828125, + "logps/rejected": -1923.680908203125, + "loss": 0.5462, + "nll_loss": 4.688654899597168, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.368743419647217, + "rewards/margins": 1.1556202173233032, + "rewards/rejected": -5.524363994598389, + "step": 26 + }, + { + "epoch": 0.04715789473684211, + "grad_norm": 383.7692991889789, + "learning_rate": 1.1200000000000001e-07, + "logits/chosen": -7.199338912963867, + "logits/rejected": -7.51033353805542, + "logps/chosen": -1769.854248046875, + "logps/rejected": -1886.64501953125, + "loss": 0.4697, + "nll_loss": 4.6844096183776855, + "rewards/accuracies": 0.75, + "rewards/chosen": -6.4538960456848145, + "rewards/margins": 0.6539719104766846, + "rewards/rejected": -7.107868194580078, + "step": 28 + }, + { + "epoch": 0.05052631578947368, + "grad_norm": 481.75715160752935, + "learning_rate": 1.2e-07, + "logits/chosen": -6.897581577301025, + "logits/rejected": -7.895030498504639, + "logps/chosen": -1615.928466796875, + "logps/rejected": -1956.94287109375, + "loss": 0.4974, + "nll_loss": 4.612817764282227, + "rewards/accuracies": 0.625, + "rewards/chosen": -6.480710029602051, + "rewards/margins": 1.3521087169647217, + "rewards/rejected": -7.832818984985352, + "step": 30 + }, + { + "epoch": 0.053894736842105266, + "grad_norm": 433.1144839993232, + "learning_rate": 1.28e-07, + "logits/chosen": -7.140986442565918, + "logits/rejected": -7.891180515289307, + "logps/chosen": -1722.1290283203125, + "logps/rejected": -1961.19140625, + "loss": 0.3723, + "nll_loss": 4.612584590911865, + "rewards/accuracies": 0.75, + "rewards/chosen": -7.3035502433776855, + "rewards/margins": 1.1011128425598145, + "rewards/rejected": -8.4046630859375, + "step": 32 + }, + { + "epoch": 0.05726315789473684, + "grad_norm": 460.5421567294356, + "learning_rate": 1.36e-07, + "logits/chosen": -5.8924641609191895, + "logits/rejected": -7.968005657196045, + "logps/chosen": -1063.8079833984375, + "logps/rejected": -1961.5302734375, + "loss": 0.3717, + "nll_loss": 4.887433052062988, + "rewards/accuracies": 0.875, + "rewards/chosen": -4.523468017578125, + "rewards/margins": 4.804460525512695, + "rewards/rejected": -9.327927589416504, + "step": 34 + }, + { + "epoch": 0.06063157894736842, + "grad_norm": 382.3217926367787, + "learning_rate": 1.44e-07, + "logits/chosen": -7.277335166931152, + "logits/rejected": -8.038162231445312, + "logps/chosen": -1612.2705078125, + "logps/rejected": -1970.5556640625, + "loss": 0.4893, + "nll_loss": 4.758667469024658, + "rewards/accuracies": 0.75, + "rewards/chosen": -8.101587295532227, + "rewards/margins": 1.8269003629684448, + "rewards/rejected": -9.928487777709961, + "step": 36 + }, + { + "epoch": 0.064, + "grad_norm": 327.81609268765317, + "learning_rate": 1.5199999999999998e-07, + "logits/chosen": -6.232708930969238, + "logits/rejected": -7.9314703941345215, + "logps/chosen": -1311.58984375, + "logps/rejected": -1898.37353515625, + "loss": 0.4384, + "nll_loss": 4.995038986206055, + "rewards/accuracies": 0.875, + "rewards/chosen": -7.171569347381592, + "rewards/margins": 4.5418925285339355, + "rewards/rejected": -11.713461875915527, + "step": 38 + }, + { + "epoch": 0.06736842105263158, + "grad_norm": 411.60155946011673, + "learning_rate": 1.6e-07, + "logits/chosen": -7.439314365386963, + "logits/rejected": -7.884708404541016, + "logps/chosen": -1801.27978515625, + "logps/rejected": -2006.94921875, + "loss": 0.3972, + "nll_loss": 4.832527160644531, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.396756172180176, + "rewards/margins": 1.0853164196014404, + "rewards/rejected": -13.482072830200195, + "step": 40 + }, + { + "epoch": 0.07073684210526315, + "grad_norm": 353.27396194384323, + "learning_rate": 1.68e-07, + "logits/chosen": -6.52924108505249, + "logits/rejected": -7.981770038604736, + "logps/chosen": -1366.396484375, + "logps/rejected": -2025.9642333984375, + "loss": 0.5679, + "nll_loss": 4.827330112457275, + "rewards/accuracies": 0.75, + "rewards/chosen": -9.584479331970215, + "rewards/margins": 5.323737621307373, + "rewards/rejected": -14.90821647644043, + "step": 42 + }, + { + "epoch": 0.07410526315789474, + "grad_norm": 259.81954374623166, + "learning_rate": 1.76e-07, + "logits/chosen": -7.801054000854492, + "logits/rejected": -7.639019966125488, + "logps/chosen": -1827.47314453125, + "logps/rejected": -1996.3192138671875, + "loss": 0.3678, + "nll_loss": 4.5667243003845215, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.310271263122559, + "rewards/margins": 2.2248902320861816, + "rewards/rejected": -16.5351619720459, + "step": 44 + }, + { + "epoch": 0.07747368421052632, + "grad_norm": 406.5448161027269, + "learning_rate": 1.84e-07, + "logits/chosen": -8.297408103942871, + "logits/rejected": -7.499330043792725, + "logps/chosen": -1998.0411376953125, + "logps/rejected": -1991.69921875, + "loss": 0.3749, + "nll_loss": 4.313129425048828, + "rewards/accuracies": 0.75, + "rewards/chosen": -16.31363296508789, + "rewards/margins": 0.6383074522018433, + "rewards/rejected": -16.951940536499023, + "step": 46 + }, + { + "epoch": 0.0808421052631579, + "grad_norm": 462.82095282625227, + "learning_rate": 1.9199999999999997e-07, + "logits/chosen": -7.885348320007324, + "logits/rejected": -8.057937622070312, + "logps/chosen": -1869.8287353515625, + "logps/rejected": -1985.5797119140625, + "loss": 0.4191, + "nll_loss": 4.887444019317627, + "rewards/accuracies": 0.625, + "rewards/chosen": -15.957789421081543, + "rewards/margins": 1.4249342679977417, + "rewards/rejected": -17.382722854614258, + "step": 48 + }, + { + "epoch": 0.08421052631578947, + "grad_norm": 612.0204500259774, + "learning_rate": 2e-07, + "logits/chosen": -5.61109733581543, + "logits/rejected": -7.90653657913208, + "logps/chosen": -1070.6937255859375, + "logps/rejected": -2036.2265625, + "loss": 0.3329, + "nll_loss": 5.1021575927734375, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.67391586303711, + "rewards/margins": 8.88609504699707, + "rewards/rejected": -17.560009002685547, + "step": 50 + }, + { + "epoch": 0.08757894736842105, + "grad_norm": 450.4075931864382, + "learning_rate": 1.9999330539070613e-07, + "logits/chosen": -7.206405162811279, + "logits/rejected": -8.117046356201172, + "logps/chosen": -1794.2418212890625, + "logps/rejected": -2010.4686279296875, + "loss": 0.3961, + "nll_loss": 5.458052158355713, + "rewards/accuracies": 0.75, + "rewards/chosen": -15.731528282165527, + "rewards/margins": 1.6168347597122192, + "rewards/rejected": -17.348363876342773, + "step": 52 + }, + { + "epoch": 0.09094736842105264, + "grad_norm": 521.1752044658302, + "learning_rate": 1.9997322245918037e-07, + "logits/chosen": -7.272748947143555, + "logits/rejected": -8.118557929992676, + "logps/chosen": -1745.1031494140625, + "logps/rejected": -2038.525390625, + "loss": 0.4714, + "nll_loss": 5.170174598693848, + "rewards/accuracies": 0.875, + "rewards/chosen": -15.204635620117188, + "rewards/margins": 2.791910409927368, + "rewards/rejected": -17.996545791625977, + "step": 54 + }, + { + "epoch": 0.09431578947368421, + "grad_norm": 217.38647360461505, + "learning_rate": 1.9993975389437037e-07, + "logits/chosen": -6.186915397644043, + "logits/rejected": -8.047518730163574, + "logps/chosen": -1388.68603515625, + "logps/rejected": -2048.745361328125, + "loss": 0.2578, + "nll_loss": 5.35891056060791, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.742466926574707, + "rewards/margins": 7.281023025512695, + "rewards/rejected": -19.023488998413086, + "step": 56 + }, + { + "epoch": 0.09768421052631579, + "grad_norm": 405.6486896009896, + "learning_rate": 1.9989290417745539e-07, + "logits/chosen": -6.964948654174805, + "logits/rejected": -8.299407005310059, + "logps/chosen": -1703.4879150390625, + "logps/rejected": -2057.886962890625, + "loss": 0.4002, + "nll_loss": 5.260710716247559, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.706157684326172, + "rewards/margins": 3.9658584594726562, + "rewards/rejected": -18.672016143798828, + "step": 58 + }, + { + "epoch": 0.10105263157894737, + "grad_norm": 475.2633809268288, + "learning_rate": 1.9983267958124644e-07, + "logits/chosen": -6.648141384124756, + "logits/rejected": -7.772933006286621, + "logps/chosen": -1497.2598876953125, + "logps/rejected": -2023.4912109375, + "loss": 0.474, + "nll_loss": 5.289524555206299, + "rewards/accuracies": 0.625, + "rewards/chosen": -13.703365325927734, + "rewards/margins": 5.52022647857666, + "rewards/rejected": -19.22359275817871, + "step": 60 + }, + { + "epoch": 0.10442105263157894, + "grad_norm": 295.526199134696, + "learning_rate": 1.9975908816934638e-07, + "logits/chosen": -7.280607223510742, + "logits/rejected": -7.836946964263916, + "logps/chosen": -1767.7076416015625, + "logps/rejected": -2027.5374755859375, + "loss": 0.3311, + "nll_loss": 4.935500144958496, + "rewards/accuracies": 0.625, + "rewards/chosen": -16.52497100830078, + "rewards/margins": 2.8025190830230713, + "rewards/rejected": -19.327489852905273, + "step": 62 + }, + { + "epoch": 0.10778947368421053, + "grad_norm": 451.01621367751295, + "learning_rate": 1.9967213979507017e-07, + "logits/chosen": -7.747291564941406, + "logits/rejected": -7.578754901885986, + "logps/chosen": -1903.654052734375, + "logps/rejected": -1995.362060546875, + "loss": 0.2961, + "nll_loss": 4.874569416046143, + "rewards/accuracies": 0.625, + "rewards/chosen": -18.415132522583008, + "rewards/margins": 1.7832107543945312, + "rewards/rejected": -20.19834327697754, + "step": 64 + }, + { + "epoch": 0.11115789473684211, + "grad_norm": 391.98836079659304, + "learning_rate": 1.995718461001257e-07, + "logits/chosen": -6.887873649597168, + "logits/rejected": -7.892035484313965, + "logps/chosen": -1613.168701171875, + "logps/rejected": -2109.185546875, + "loss": 0.2514, + "nll_loss": 4.979562282562256, + "rewards/accuracies": 0.75, + "rewards/chosen": -15.48417854309082, + "rewards/margins": 5.469597816467285, + "rewards/rejected": -20.953777313232422, + "step": 66 + }, + { + "epoch": 0.11452631578947368, + "grad_norm": 357.9128298203349, + "learning_rate": 1.9945822051305505e-07, + "logits/chosen": -6.562501430511475, + "logits/rejected": -8.093667984008789, + "logps/chosen": -1624.415771484375, + "logps/rejected": -2102.240966796875, + "loss": 0.2549, + "nll_loss": 5.153851509094238, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.476547241210938, + "rewards/margins": 6.269686222076416, + "rewards/rejected": -21.746234893798828, + "step": 68 + }, + { + "epoch": 0.11789473684210526, + "grad_norm": 324.2814050905479, + "learning_rate": 1.9933127824743643e-07, + "logits/chosen": -7.621331214904785, + "logits/rejected": -7.522034645080566, + "logps/chosen": -1808.6651611328125, + "logps/rejected": -2016.3822021484375, + "loss": 0.1968, + "nll_loss": 5.224420070648193, + "rewards/accuracies": 0.875, + "rewards/chosen": -17.95054817199707, + "rewards/margins": 3.378469944000244, + "rewards/rejected": -21.329015731811523, + "step": 70 + }, + { + "epoch": 0.12126315789473684, + "grad_norm": 507.77270997957345, + "learning_rate": 1.9919103629984725e-07, + "logits/chosen": -7.971831321716309, + "logits/rejected": -7.918793201446533, + "logps/chosen": -1930.361083984375, + "logps/rejected": -2080.251953125, + "loss": 0.3995, + "nll_loss": 4.725856781005859, + "rewards/accuracies": 0.75, + "rewards/chosen": -19.6121826171875, + "rewards/margins": 1.6514708995819092, + "rewards/rejected": -21.263654708862305, + "step": 72 + }, + { + "epoch": 0.12463157894736843, + "grad_norm": 405.2199441352807, + "learning_rate": 1.9903751344758845e-07, + "logits/chosen": -7.292391300201416, + "logits/rejected": -8.180887222290039, + "logps/chosen": -1775.805908203125, + "logps/rejected": -2013.7442626953125, + "loss": 0.4428, + "nll_loss": 5.191011905670166, + "rewards/accuracies": 0.625, + "rewards/chosen": -17.799779891967773, + "rewards/margins": 3.329552173614502, + "rewards/rejected": -21.129331588745117, + "step": 74 + }, + { + "epoch": 0.128, + "grad_norm": 240.9685453409843, + "learning_rate": 1.9887073024617028e-07, + "logits/chosen": -6.470153331756592, + "logits/rejected": -7.746395587921143, + "logps/chosen": -1489.3653564453125, + "logps/rejected": -2056.020751953125, + "loss": 0.2493, + "nll_loss": 5.174279689788818, + "rewards/accuracies": 0.875, + "rewards/chosen": -15.556097030639648, + "rewards/margins": 6.754159927368164, + "rewards/rejected": -22.310256958007812, + "step": 76 + }, + { + "epoch": 0.13136842105263158, + "grad_norm": 493.99746962914696, + "learning_rate": 1.9869070902656017e-07, + "logits/chosen": -5.889953136444092, + "logits/rejected": -8.0224027633667, + "logps/chosen": -1382.2691650390625, + "logps/rejected": -2080.0107421875, + "loss": 0.485, + "nll_loss": 5.714942932128906, + "rewards/accuracies": 0.875, + "rewards/chosen": -14.33112907409668, + "rewards/margins": 7.287192344665527, + "rewards/rejected": -21.61832046508789, + "step": 78 + }, + { + "epoch": 0.13473684210526315, + "grad_norm": 137.24435752290512, + "learning_rate": 1.984974738921927e-07, + "logits/chosen": -6.03630256652832, + "logits/rejected": -8.135429382324219, + "logps/chosen": -1310.54833984375, + "logps/rejected": -2067.27490234375, + "loss": 0.2926, + "nll_loss": 5.408031463623047, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.000334739685059, + "rewards/margins": 9.575756072998047, + "rewards/rejected": -22.57608985900879, + "step": 80 + }, + { + "epoch": 0.13810526315789473, + "grad_norm": 332.6447170969351, + "learning_rate": 1.982910507157424e-07, + "logits/chosen": -7.524165630340576, + "logits/rejected": -7.871054649353027, + "logps/chosen": -1923.2384033203125, + "logps/rejected": -2104.744384765625, + "loss": 0.4062, + "nll_loss": 4.740773677825928, + "rewards/accuracies": 0.875, + "rewards/chosen": -20.172786712646484, + "rewards/margins": 2.7083492279052734, + "rewards/rejected": -22.881135940551758, + "step": 82 + }, + { + "epoch": 0.1414736842105263, + "grad_norm": 434.7785715165825, + "learning_rate": 1.9807146713565955e-07, + "logits/chosen": -7.366245746612549, + "logits/rejected": -7.99586296081543, + "logps/chosen": -1834.388916015625, + "logps/rejected": -2039.752685546875, + "loss": 0.4183, + "nll_loss": 5.197725296020508, + "rewards/accuracies": 0.625, + "rewards/chosen": -19.303865432739258, + "rewards/margins": 3.0796072483062744, + "rewards/rejected": -22.38347053527832, + "step": 84 + }, + { + "epoch": 0.14484210526315788, + "grad_norm": 394.456469398082, + "learning_rate": 1.9783875255246973e-07, + "logits/chosen": -7.369488716125488, + "logits/rejected": -7.899564743041992, + "logps/chosen": -2014.7353515625, + "logps/rejected": -2079.622802734375, + "loss": 0.435, + "nll_loss": 5.506885051727295, + "rewards/accuracies": 0.5, + "rewards/chosen": -22.415340423583984, + "rewards/margins": 1.3391568660736084, + "rewards/rejected": -23.75449562072754, + "step": 86 + }, + { + "epoch": 0.1482105263157895, + "grad_norm": 474.52038286042045, + "learning_rate": 1.9759293812483712e-07, + "logits/chosen": -6.315229892730713, + "logits/rejected": -7.918384075164795, + "logps/chosen": -1505.7802734375, + "logps/rejected": -2153.10986328125, + "loss": 0.518, + "nll_loss": 5.55679988861084, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.11681365966797, + "rewards/margins": 8.099884033203125, + "rewards/rejected": -24.216697692871094, + "step": 88 + }, + { + "epoch": 0.15157894736842106, + "grad_norm": 463.6020653534667, + "learning_rate": 1.973340567653928e-07, + "logits/chosen": -7.8903656005859375, + "logits/rejected": -7.871662616729736, + "logps/chosen": -2007.9490966796875, + "logps/rejected": -2079.7353515625, + "loss": 0.3285, + "nll_loss": 4.673924922943115, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.700756072998047, + "rewards/margins": 2.0794034004211426, + "rewards/rejected": -23.78015899658203, + "step": 90 + }, + { + "epoch": 0.15494736842105264, + "grad_norm": 469.1854810466752, + "learning_rate": 1.9706214313632782e-07, + "logits/chosen": -7.816376686096191, + "logits/rejected": -7.4226765632629395, + "logps/chosen": -1935.4766845703125, + "logps/rejected": -2112.067138671875, + "loss": 0.4765, + "nll_loss": 4.788717746734619, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.18560791015625, + "rewards/margins": 3.7070724964141846, + "rewards/rejected": -24.892681121826172, + "step": 92 + }, + { + "epoch": 0.15831578947368422, + "grad_norm": 203.68587948112653, + "learning_rate": 1.9677723364475236e-07, + "logits/chosen": -7.06157112121582, + "logits/rejected": -8.203742027282715, + "logps/chosen": -1748.9493408203125, + "logps/rejected": -2103.68212890625, + "loss": 0.2824, + "nll_loss": 5.13286828994751, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.6251220703125, + "rewards/margins": 5.7565789222717285, + "rewards/rejected": -24.381698608398438, + "step": 94 + }, + { + "epoch": 0.1616842105263158, + "grad_norm": 454.0753654405841, + "learning_rate": 1.9647936643782106e-07, + "logits/chosen": -7.063906669616699, + "logits/rejected": -7.930819511413574, + "logps/chosen": -1851.276611328125, + "logps/rejected": -2139.397216796875, + "loss": 0.4842, + "nll_loss": 4.915232181549072, + "rewards/accuracies": 0.75, + "rewards/chosen": -20.60812759399414, + "rewards/margins": 3.6515331268310547, + "rewards/rejected": -24.259662628173828, + "step": 96 + }, + { + "epoch": 0.16505263157894737, + "grad_norm": 290.76223910181324, + "learning_rate": 1.961685813976253e-07, + "logits/chosen": -7.524170875549316, + "logits/rejected": -8.006389617919922, + "logps/chosen": -1945.13623046875, + "logps/rejected": -2088.755126953125, + "loss": 0.3862, + "nll_loss": 4.894111633300781, + "rewards/accuracies": 0.75, + "rewards/chosen": -22.57034683227539, + "rewards/margins": 2.1726748943328857, + "rewards/rejected": -24.74302101135254, + "step": 98 + }, + { + "epoch": 0.16842105263157894, + "grad_norm": 245.46799212168173, + "learning_rate": 1.9584492013585354e-07, + "logits/chosen": -7.0596513748168945, + "logits/rejected": -7.93139123916626, + "logps/chosen": -1557.81787109375, + "logps/rejected": -2036.077880859375, + "loss": 0.192, + "nll_loss": 5.297558784484863, + "rewards/accuracies": 0.875, + "rewards/chosen": -16.414541244506836, + "rewards/margins": 8.017242431640625, + "rewards/rejected": -24.431781768798828, + "step": 100 + }, + { + "epoch": 0.17178947368421052, + "grad_norm": 404.1873598654466, + "learning_rate": 1.955084259882195e-07, + "logits/chosen": -5.742030143737793, + "logits/rejected": -7.751094818115234, + "logps/chosen": -1235.095947265625, + "logps/rejected": -2130.07080078125, + "loss": 0.3401, + "nll_loss": 5.310590744018555, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.479273796081543, + "rewards/margins": 12.036299705505371, + "rewards/rejected": -25.515575408935547, + "step": 102 + }, + { + "epoch": 0.1751578947368421, + "grad_norm": 116.3442830030767, + "learning_rate": 1.9515914400866017e-07, + "logits/chosen": -7.13689661026001, + "logits/rejected": -7.9674553871154785, + "logps/chosen": -1809.794677734375, + "logps/rejected": -2110.8828125, + "loss": 0.2132, + "nll_loss": 5.315598487854004, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.165102005004883, + "rewards/margins": 4.648441791534424, + "rewards/rejected": -25.81354522705078, + "step": 104 + }, + { + "epoch": 0.17852631578947367, + "grad_norm": 290.204628775111, + "learning_rate": 1.9479712096330334e-07, + "logits/chosen": -7.378734111785889, + "logits/rejected": -7.894670486450195, + "logps/chosen": -1803.5947265625, + "logps/rejected": -2066.1181640625, + "loss": 0.3482, + "nll_loss": 5.543490409851074, + "rewards/accuracies": 0.75, + "rewards/chosen": -20.527360916137695, + "rewards/margins": 5.067646026611328, + "rewards/rejected": -25.595006942749023, + "step": 106 + }, + { + "epoch": 0.18189473684210528, + "grad_norm": 361.78273867060045, + "learning_rate": 1.944224053242058e-07, + "logits/chosen": -7.142321586608887, + "logits/rejected": -7.959826946258545, + "logps/chosen": -1579.7891845703125, + "logps/rejected": -2068.02197265625, + "loss": 0.2804, + "nll_loss": 4.833759307861328, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.271129608154297, + "rewards/margins": 7.227394104003906, + "rewards/rejected": -25.498523712158203, + "step": 108 + }, + { + "epoch": 0.18526315789473685, + "grad_norm": 235.0435714261294, + "learning_rate": 1.9403504726286367e-07, + "logits/chosen": -5.593753337860107, + "logits/rejected": -8.2781400680542, + "logps/chosen": -1415.476806640625, + "logps/rejected": -2148.1337890625, + "loss": 0.259, + "nll_loss": 4.7400031089782715, + "rewards/accuracies": 0.75, + "rewards/chosen": -16.4371337890625, + "rewards/margins": 9.744308471679688, + "rewards/rejected": -26.18144416809082, + "step": 110 + }, + { + "epoch": 0.18863157894736843, + "grad_norm": 347.2912929912007, + "learning_rate": 1.9363509864349436e-07, + "logits/chosen": -6.396492004394531, + "logits/rejected": -8.129210472106934, + "logps/chosen": -1428.15185546875, + "logps/rejected": -2139.29931640625, + "loss": 0.1951, + "nll_loss": 5.208731174468994, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.481124877929688, + "rewards/margins": 9.886039733886719, + "rewards/rejected": -27.367164611816406, + "step": 112 + }, + { + "epoch": 0.192, + "grad_norm": 443.2420860827636, + "learning_rate": 1.9322261301609284e-07, + "logits/chosen": -7.289253234863281, + "logits/rejected": -8.456584930419922, + "logps/chosen": -1952.02685546875, + "logps/rejected": -2108.48876953125, + "loss": 0.4061, + "nll_loss": 5.445844650268555, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.95429229736328, + "rewards/margins": 2.7193362712860107, + "rewards/rejected": -27.673629760742188, + "step": 114 + }, + { + "epoch": 0.19536842105263158, + "grad_norm": 322.7074429727334, + "learning_rate": 1.927976456092614e-07, + "logits/chosen": -7.254305362701416, + "logits/rejected": -8.170782089233398, + "logps/chosen": -1910.8201904296875, + "logps/rejected": -2137.612548828125, + "loss": 0.3789, + "nll_loss": 5.32393217086792, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.50674819946289, + "rewards/margins": 3.4448273181915283, + "rewards/rejected": -26.951576232910156, + "step": 116 + }, + { + "epoch": 0.19873684210526316, + "grad_norm": 230.28383471544416, + "learning_rate": 1.9236025332281506e-07, + "logits/chosen": -6.197360515594482, + "logits/rejected": -8.328180313110352, + "logps/chosen": -1564.211669921875, + "logps/rejected": -2164.58056640625, + "loss": 0.1884, + "nll_loss": 5.403533458709717, + "rewards/accuracies": 1.0, + "rewards/chosen": -18.896282196044922, + "rewards/margins": 8.365097999572754, + "rewards/rejected": -27.261381149291992, + "step": 118 + }, + { + "epoch": 0.20210526315789473, + "grad_norm": 375.89177298791736, + "learning_rate": 1.9191049472016313e-07, + "logits/chosen": -7.113523006439209, + "logits/rejected": -7.964775562286377, + "logps/chosen": -1672.96923828125, + "logps/rejected": -2136.83642578125, + "loss": 0.2439, + "nll_loss": 4.739628791809082, + "rewards/accuracies": 0.625, + "rewards/chosen": -20.55191993713379, + "rewards/margins": 5.649931907653809, + "rewards/rejected": -26.20184898376465, + "step": 120 + }, + { + "epoch": 0.2054736842105263, + "grad_norm": 323.0978953815701, + "learning_rate": 1.9144843002046805e-07, + "logits/chosen": -6.809453964233398, + "logits/rejected": -8.00958251953125, + "logps/chosen": -1658.2227783203125, + "logps/rejected": -2084.864990234375, + "loss": 0.5958, + "nll_loss": 5.457843780517578, + "rewards/accuracies": 0.75, + "rewards/chosen": -20.677547454833984, + "rewards/margins": 6.124557018280029, + "rewards/rejected": -26.802101135253906, + "step": 122 + }, + { + "epoch": 0.20884210526315788, + "grad_norm": 258.6436979707799, + "learning_rate": 1.9097412109058243e-07, + "logits/chosen": -7.692294597625732, + "logits/rejected": -8.052206993103027, + "logps/chosen": -1873.5733642578125, + "logps/rejected": -2030.232177734375, + "loss": 0.4711, + "nll_loss": 5.26876163482666, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.60447120666504, + "rewards/margins": 3.8998751640319824, + "rewards/rejected": -26.50434684753418, + "step": 124 + }, + { + "epoch": 0.21221052631578946, + "grad_norm": 372.3923082583742, + "learning_rate": 1.9048763143676575e-07, + "logits/chosen": -7.407994747161865, + "logits/rejected": -7.782034397125244, + "logps/chosen": -1862.9501953125, + "logps/rejected": -2138.9091796875, + "loss": 0.1584, + "nll_loss": 5.26196813583374, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.54726791381836, + "rewards/margins": 5.700539588928223, + "rewards/rejected": -27.247806549072266, + "step": 126 + }, + { + "epoch": 0.21557894736842106, + "grad_norm": 176.97835981766247, + "learning_rate": 1.8998902619618113e-07, + "logits/chosen": -6.254648208618164, + "logits/rejected": -8.410988807678223, + "logps/chosen": -1320.6219482421875, + "logps/rejected": -2096.9287109375, + "loss": 0.2115, + "nll_loss": 5.321068286895752, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.538348197937012, + "rewards/margins": 11.237432479858398, + "rewards/rejected": -26.775779724121094, + "step": 128 + }, + { + "epoch": 0.21894736842105264, + "grad_norm": 547.3377613503384, + "learning_rate": 1.8947837212817413e-07, + "logits/chosen": -6.936631202697754, + "logits/rejected": -8.201074600219727, + "logps/chosen": -1785.872314453125, + "logps/rejected": -2129.34375, + "loss": 0.3615, + "nll_loss": 5.191522598266602, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.109575271606445, + "rewards/margins": 5.265467643737793, + "rewards/rejected": -26.37504005432129, + "step": 130 + }, + { + "epoch": 0.22231578947368422, + "grad_norm": 353.0941136935024, + "learning_rate": 1.8895573760533412e-07, + "logits/chosen": -7.306629180908203, + "logits/rejected": -7.669928073883057, + "logps/chosen": -1730.935546875, + "logps/rejected": -2167.73095703125, + "loss": 0.293, + "nll_loss": 4.846038818359375, + "rewards/accuracies": 0.75, + "rewards/chosen": -20.911489486694336, + "rewards/margins": 6.074740409851074, + "rewards/rejected": -26.986228942871094, + "step": 132 + }, + { + "epoch": 0.2256842105263158, + "grad_norm": 212.13304713669675, + "learning_rate": 1.884211926043398e-07, + "logits/chosen": -6.941915035247803, + "logits/rejected": -8.13010311126709, + "logps/chosen": -1714.9908447265625, + "logps/rejected": -2106.353515625, + "loss": 0.1782, + "nll_loss": 5.203182220458984, + "rewards/accuracies": 0.875, + "rewards/chosen": -20.565366744995117, + "rewards/margins": 5.9251708984375, + "rewards/rejected": -26.490537643432617, + "step": 134 + }, + { + "epoch": 0.22905263157894737, + "grad_norm": 279.1889760986275, + "learning_rate": 1.8787480869658978e-07, + "logits/chosen": -7.706783294677734, + "logits/rejected": -7.740918159484863, + "logps/chosen": -2039.910888671875, + "logps/rejected": -2072.723876953125, + "loss": 0.2864, + "nll_loss": 4.911942958831787, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.88863754272461, + "rewards/margins": 1.29465651512146, + "rewards/rejected": -25.183292388916016, + "step": 136 + }, + { + "epoch": 0.23242105263157894, + "grad_norm": 550.1062681428017, + "learning_rate": 1.8731665903861985e-07, + "logits/chosen": -6.753871917724609, + "logits/rejected": -8.267376899719238, + "logps/chosen": -1722.739990234375, + "logps/rejected": -2134.793212890625, + "loss": 0.2628, + "nll_loss": 5.156876087188721, + "rewards/accuracies": 1.0, + "rewards/chosen": -19.4663143157959, + "rewards/margins": 6.938652038574219, + "rewards/rejected": -26.404966354370117, + "step": 138 + }, + { + "epoch": 0.23578947368421052, + "grad_norm": 387.2010540404042, + "learning_rate": 1.8674681836230768e-07, + "logits/chosen": -7.421821117401123, + "logits/rejected": -8.06303882598877, + "logps/chosen": -1804.21875, + "logps/rejected": -2085.41943359375, + "loss": 0.4242, + "nll_loss": 4.941443920135498, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.495800018310547, + "rewards/margins": 4.65245246887207, + "rewards/rejected": -26.148252487182617, + "step": 140 + }, + { + "epoch": 0.2391578947368421, + "grad_norm": 319.34935073129554, + "learning_rate": 1.8616536296486708e-07, + "logits/chosen": -6.959980487823486, + "logits/rejected": -8.008804321289062, + "logps/chosen": -1645.2440185546875, + "logps/rejected": -2129.713623046875, + "loss": 0.2486, + "nll_loss": 5.249794960021973, + "rewards/accuracies": 0.875, + "rewards/chosen": -19.205081939697266, + "rewards/margins": 6.898474216461182, + "rewards/rejected": -26.103557586669922, + "step": 142 + }, + { + "epoch": 0.24252631578947367, + "grad_norm": 179.51700063120762, + "learning_rate": 1.855723706986322e-07, + "logits/chosen": -6.863515377044678, + "logits/rejected": -8.17676067352295, + "logps/chosen": -1595.7620849609375, + "logps/rejected": -2139.49951171875, + "loss": 0.2893, + "nll_loss": 5.239679336547852, + "rewards/accuracies": 0.75, + "rewards/chosen": -19.65725326538086, + "rewards/margins": 7.443434238433838, + "rewards/rejected": -27.10068702697754, + "step": 144 + }, + { + "epoch": 0.24589473684210525, + "grad_norm": 275.3682193899652, + "learning_rate": 1.8496792096063377e-07, + "logits/chosen": -6.194632530212402, + "logits/rejected": -8.473308563232422, + "logps/chosen": -1469.7490234375, + "logps/rejected": -2113.530029296875, + "loss": 0.2238, + "nll_loss": 5.825500965118408, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.392675399780273, + "rewards/margins": 9.299560546875, + "rewards/rejected": -27.692235946655273, + "step": 146 + }, + { + "epoch": 0.24926315789473685, + "grad_norm": 323.66578296523716, + "learning_rate": 1.8435209468196847e-07, + "logits/chosen": -7.083681106567383, + "logits/rejected": -8.149633407592773, + "logps/chosen": -1779.7303466796875, + "logps/rejected": -2133.3251953125, + "loss": 0.5144, + "nll_loss": 4.9437971115112305, + "rewards/accuracies": 0.75, + "rewards/chosen": -22.35186767578125, + "rewards/margins": 6.203129291534424, + "rewards/rejected": -28.55499839782715, + "step": 148 + }, + { + "epoch": 0.25263157894736843, + "grad_norm": 461.451311082762, + "learning_rate": 1.8372497431696285e-07, + "logits/chosen": -7.620912551879883, + "logits/rejected": -8.02045726776123, + "logps/chosen": -1918.67724609375, + "logps/rejected": -2131.48193359375, + "loss": 0.4635, + "nll_loss": 4.790881633758545, + "rewards/accuracies": 0.75, + "rewards/chosen": -24.607433319091797, + "rewards/margins": 3.646448850631714, + "rewards/rejected": -28.25388526916504, + "step": 150 + }, + { + "epoch": 0.256, + "grad_norm": 405.566848682187, + "learning_rate": 1.830866438321334e-07, + "logits/chosen": -7.111250877380371, + "logits/rejected": -8.096766471862793, + "logps/chosen": -1748.1270751953125, + "logps/rejected": -2097.0595703125, + "loss": 0.4518, + "nll_loss": 5.408980846405029, + "rewards/accuracies": 0.75, + "rewards/chosen": -23.680131912231445, + "rewards/margins": 5.77828311920166, + "rewards/rejected": -29.458415985107422, + "step": 152 + }, + { + "epoch": 0.2593684210526316, + "grad_norm": 298.617818561619, + "learning_rate": 1.8243718869494405e-07, + "logits/chosen": -7.182644844055176, + "logits/rejected": -8.200958251953125, + "logps/chosen": -1616.265625, + "logps/rejected": -2086.645751953125, + "loss": 0.2766, + "nll_loss": 5.574889183044434, + "rewards/accuracies": 0.875, + "rewards/chosen": -21.693281173706055, + "rewards/margins": 7.690347671508789, + "rewards/rejected": -29.38362693786621, + "step": 154 + }, + { + "epoch": 0.26273684210526316, + "grad_norm": 297.68208713323685, + "learning_rate": 1.8177669586236274e-07, + "logits/chosen": -7.177743434906006, + "logits/rejected": -8.266185760498047, + "logps/chosen": -1737.14892578125, + "logps/rejected": -2121.933837890625, + "loss": 0.1476, + "nll_loss": 5.227602481842041, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.120718002319336, + "rewards/margins": 7.246493816375732, + "rewards/rejected": -30.367212295532227, + "step": 156 + }, + { + "epoch": 0.26610526315789473, + "grad_norm": 321.45477422959516, + "learning_rate": 1.811052537692186e-07, + "logits/chosen": -7.373076915740967, + "logits/rejected": -8.062271118164062, + "logps/chosen": -1772.658447265625, + "logps/rejected": -2124.108642578125, + "loss": 0.1706, + "nll_loss": 5.48671817779541, + "rewards/accuracies": 0.875, + "rewards/chosen": -22.967531204223633, + "rewards/margins": 7.354568004608154, + "rewards/rejected": -30.322099685668945, + "step": 158 + }, + { + "epoch": 0.2694736842105263, + "grad_norm": 265.3611042051943, + "learning_rate": 1.8042295231636113e-07, + "logits/chosen": -6.719966888427734, + "logits/rejected": -8.064167976379395, + "logps/chosen": -1674.589111328125, + "logps/rejected": -2176.885009765625, + "loss": 0.3049, + "nll_loss": 5.396821022033691, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.00328254699707, + "rewards/margins": 8.764394760131836, + "rewards/rejected": -30.767677307128906, + "step": 160 + }, + { + "epoch": 0.2728421052631579, + "grad_norm": 442.39075275710724, + "learning_rate": 1.7972988285862333e-07, + "logits/chosen": -7.613976001739502, + "logits/rejected": -8.213547706604004, + "logps/chosen": -2117.5400390625, + "logps/rejected": -2208.39599609375, + "loss": 0.2877, + "nll_loss": 5.046686172485352, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.775413513183594, + "rewards/margins": 1.5463881492614746, + "rewards/rejected": -30.32179832458496, + "step": 162 + }, + { + "epoch": 0.27621052631578946, + "grad_norm": 232.11714950375813, + "learning_rate": 1.7902613819258983e-07, + "logits/chosen": -6.078405857086182, + "logits/rejected": -8.073712348937988, + "logps/chosen": -1325.989990234375, + "logps/rejected": -2109.148681640625, + "loss": 0.1362, + "nll_loss": 5.590985298156738, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.27791976928711, + "rewards/margins": 13.148313522338867, + "rewards/rejected": -30.426233291625977, + "step": 164 + }, + { + "epoch": 0.27957894736842104, + "grad_norm": 178.08792377292178, + "learning_rate": 1.7831181254417226e-07, + "logits/chosen": -7.393200397491455, + "logits/rejected": -7.933506965637207, + "logps/chosen": -1770.2550048828125, + "logps/rejected": -2183.81884765625, + "loss": 0.1839, + "nll_loss": 4.8053669929504395, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.983407974243164, + "rewards/margins": 7.7809247970581055, + "rewards/rejected": -30.764333724975586, + "step": 166 + }, + { + "epoch": 0.2829473684210526, + "grad_norm": 422.36016546899873, + "learning_rate": 1.7758700155599316e-07, + "logits/chosen": -6.5516037940979, + "logits/rejected": -7.964224815368652, + "logps/chosen": -1520.2445068359375, + "logps/rejected": -2135.012451171875, + "loss": 0.1957, + "nll_loss": 5.540028095245361, + "rewards/accuracies": 0.875, + "rewards/chosen": -20.019882202148438, + "rewards/margins": 11.298989295959473, + "rewards/rejected": -31.318870544433594, + "step": 168 + }, + { + "epoch": 0.2863157894736842, + "grad_norm": 496.76355668466664, + "learning_rate": 1.7685180227458002e-07, + "logits/chosen": -7.281660556793213, + "logits/rejected": -8.243330955505371, + "logps/chosen": -1699.278564453125, + "logps/rejected": -2149.191650390625, + "loss": 0.4263, + "nll_loss": 5.4164533615112305, + "rewards/accuracies": 0.75, + "rewards/chosen": -24.19361686706543, + "rewards/margins": 6.679977893829346, + "rewards/rejected": -30.873594284057617, + "step": 170 + }, + { + "epoch": 0.28968421052631577, + "grad_norm": 368.2965007979305, + "learning_rate": 1.7610631313737172e-07, + "logits/chosen": -7.080427646636963, + "logits/rejected": -8.119704246520996, + "logps/chosen": -1611.391357421875, + "logps/rejected": -2172.8203125, + "loss": 0.3694, + "nll_loss": 5.258100509643555, + "rewards/accuracies": 0.875, + "rewards/chosen": -22.043684005737305, + "rewards/margins": 9.61347770690918, + "rewards/rejected": -31.657163619995117, + "step": 172 + }, + { + "epoch": 0.29305263157894734, + "grad_norm": 670.6700241176729, + "learning_rate": 1.753506339595384e-07, + "logits/chosen": -7.517750263214111, + "logits/rejected": -7.757349967956543, + "logps/chosen": -1787.9560546875, + "logps/rejected": -2129.67822265625, + "loss": 0.298, + "nll_loss": 5.433893203735352, + "rewards/accuracies": 0.625, + "rewards/chosen": -25.235340118408203, + "rewards/margins": 6.193837642669678, + "rewards/rejected": -31.42917823791504, + "step": 174 + }, + { + "epoch": 0.296421052631579, + "grad_norm": 410.62113772917246, + "learning_rate": 1.7458486592061701e-07, + "logits/chosen": -6.625953674316406, + "logits/rejected": -8.243332862854004, + "logps/chosen": -1637.386474609375, + "logps/rejected": -2249.2197265625, + "loss": 0.3263, + "nll_loss": 5.135756969451904, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.93773651123047, + "rewards/margins": 10.093299865722656, + "rewards/rejected": -33.03103256225586, + "step": 176 + }, + { + "epoch": 0.29978947368421055, + "grad_norm": 518.14733064062, + "learning_rate": 1.7380911155096407e-07, + "logits/chosen": -6.312992572784424, + "logits/rejected": -7.9758687019348145, + "logps/chosen": -1488.8402099609375, + "logps/rejected": -2148.6962890625, + "loss": 0.591, + "nll_loss": 5.555506706237793, + "rewards/accuracies": 0.75, + "rewards/chosen": -20.87957763671875, + "rewards/margins": 10.946573257446289, + "rewards/rejected": -31.82615089416504, + "step": 178 + }, + { + "epoch": 0.3031578947368421, + "grad_norm": 230.12282725753218, + "learning_rate": 1.7302347471802795e-07, + "logits/chosen": -6.5913238525390625, + "logits/rejected": -8.363908767700195, + "logps/chosen": -1680.2467041015625, + "logps/rejected": -2223.9873046875, + "loss": 0.3578, + "nll_loss": 5.693438529968262, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.647768020629883, + "rewards/margins": 8.622623443603516, + "rewards/rejected": -32.27039337158203, + "step": 180 + }, + { + "epoch": 0.3065263157894737, + "grad_norm": 290.90448926580694, + "learning_rate": 1.7222806061244146e-07, + "logits/chosen": -6.316551208496094, + "logits/rejected": -8.457919120788574, + "logps/chosen": -1597.56103515625, + "logps/rejected": -2268.66259765625, + "loss": 0.1266, + "nll_loss": 5.54366397857666, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.09076499938965, + "rewards/margins": 9.565703392028809, + "rewards/rejected": -31.656469345092773, + "step": 182 + }, + { + "epoch": 0.3098947368421053, + "grad_norm": 219.15996405308613, + "learning_rate": 1.7142297573393788e-07, + "logits/chosen": -7.972108840942383, + "logits/rejected": -8.042094230651855, + "logps/chosen": -1998.596923828125, + "logps/rejected": -2172.12255859375, + "loss": 0.3072, + "nll_loss": 5.00222110748291, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.76752281188965, + "rewards/margins": 3.3450047969818115, + "rewards/rejected": -32.112525939941406, + "step": 184 + }, + { + "epoch": 0.31326315789473685, + "grad_norm": 322.85198814627114, + "learning_rate": 1.7060832787709138e-07, + "logits/chosen": -6.03144645690918, + "logits/rejected": -8.47148323059082, + "logps/chosen": -1274.5035400390625, + "logps/rejected": -2136.2109375, + "loss": 0.3631, + "nll_loss": 5.840561866760254, + "rewards/accuracies": 0.875, + "rewards/chosen": -17.418729782104492, + "rewards/margins": 15.012275695800781, + "rewards/rejected": -32.43100357055664, + "step": 186 + }, + { + "epoch": 0.31663157894736843, + "grad_norm": 513.7476885592771, + "learning_rate": 1.697842261168843e-07, + "logits/chosen": -7.334516525268555, + "logits/rejected": -8.225534439086914, + "logps/chosen": -1927.53515625, + "logps/rejected": -2148.278564453125, + "loss": 0.3361, + "nll_loss": 5.305893898010254, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.597455978393555, + "rewards/margins": 5.804078102111816, + "rewards/rejected": -32.40153503417969, + "step": 188 + }, + { + "epoch": 0.32, + "grad_norm": 163.682762913509, + "learning_rate": 1.6895078079410266e-07, + "logits/chosen": -5.538216590881348, + "logits/rejected": -8.494758605957031, + "logps/chosen": -1233.1563720703125, + "logps/rejected": -2203.232177734375, + "loss": 0.179, + "nll_loss": 5.9964189529418945, + "rewards/accuracies": 0.875, + "rewards/chosen": -17.165884017944336, + "rewards/margins": 15.676169395446777, + "rewards/rejected": -32.84205627441406, + "step": 190 + }, + { + "epoch": 0.3233684210526316, + "grad_norm": 308.2898266039604, + "learning_rate": 1.6810810350056258e-07, + "logits/chosen": -6.971872329711914, + "logits/rejected": -8.188929557800293, + "logps/chosen": -1851.5859375, + "logps/rejected": -2194.031982421875, + "loss": 0.4115, + "nll_loss": 5.3788743019104, + "rewards/accuracies": 0.75, + "rewards/chosen": -26.422870635986328, + "rewards/margins": 6.5016984939575195, + "rewards/rejected": -32.92456817626953, + "step": 192 + }, + { + "epoch": 0.32673684210526316, + "grad_norm": 412.5565491541735, + "learning_rate": 1.672563070641688e-07, + "logits/chosen": -7.640994548797607, + "logits/rejected": -8.178326606750488, + "logps/chosen": -2078.18115234375, + "logps/rejected": -2178.9091796875, + "loss": 0.2875, + "nll_loss": 5.410080432891846, + "rewards/accuracies": 0.625, + "rewards/chosen": -30.432947158813477, + "rewards/margins": 2.4267215728759766, + "rewards/rejected": -32.85966873168945, + "step": 194 + }, + { + "epoch": 0.33010526315789473, + "grad_norm": 469.2498668087184, + "learning_rate": 1.6639550553380816e-07, + "logits/chosen": -7.088956356048584, + "logits/rejected": -8.258764266967773, + "logps/chosen": -1782.86376953125, + "logps/rejected": -2211.70703125, + "loss": 0.369, + "nll_loss": 5.0121073722839355, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.950332641601562, + "rewards/margins": 7.496822357177734, + "rewards/rejected": -34.44715118408203, + "step": 196 + }, + { + "epoch": 0.3334736842105263, + "grad_norm": 236.59088336990078, + "learning_rate": 1.6552581416407916e-07, + "logits/chosen": -8.196027755737305, + "logits/rejected": -7.7416181564331055, + "logps/chosen": -2154.635498046875, + "logps/rejected": -2205.575927734375, + "loss": 0.3022, + "nll_loss": 4.687681198120117, + "rewards/accuracies": 0.75, + "rewards/chosen": -32.5702018737793, + "rewards/margins": 2.931349515914917, + "rewards/rejected": -35.501556396484375, + "step": 198 + }, + { + "epoch": 0.3368421052631579, + "grad_norm": 413.82963735092727, + "learning_rate": 1.6464734939986035e-07, + "logits/chosen": -7.169932842254639, + "logits/rejected": -8.24282455444336, + "logps/chosen": -1997.083984375, + "logps/rejected": -2223.7470703125, + "loss": 0.3572, + "nll_loss": 5.281017303466797, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.46786117553711, + "rewards/margins": 5.10554313659668, + "rewards/rejected": -35.573402404785156, + "step": 200 + }, + { + "epoch": 0.34021052631578946, + "grad_norm": 415.7728496426865, + "learning_rate": 1.637602288607192e-07, + "logits/chosen": -6.219871520996094, + "logits/rejected": -8.299065589904785, + "logps/chosen": -1463.1075439453125, + "logps/rejected": -2223.69775390625, + "loss": 0.2563, + "nll_loss": 5.272424697875977, + "rewards/accuracies": 0.875, + "rewards/chosen": -20.643104553222656, + "rewards/margins": 13.95057487487793, + "rewards/rejected": -34.59368133544922, + "step": 202 + }, + { + "epoch": 0.34357894736842104, + "grad_norm": 422.85323621204554, + "learning_rate": 1.6286457132516383e-07, + "logits/chosen": -6.83880615234375, + "logits/rejected": -8.385416030883789, + "logps/chosen": -1577.858154296875, + "logps/rejected": -2194.265869140625, + "loss": 0.3599, + "nll_loss": 5.622811317443848, + "rewards/accuracies": 0.875, + "rewards/chosen": -22.948638916015625, + "rewards/margins": 11.672065734863281, + "rewards/rejected": -34.620704650878906, + "step": 204 + }, + { + "epoch": 0.3469473684210526, + "grad_norm": 490.11145097314346, + "learning_rate": 1.6196049671473953e-07, + "logits/chosen": -7.446739196777344, + "logits/rejected": -8.425362586975098, + "logps/chosen": -1768.897705078125, + "logps/rejected": -2202.29443359375, + "loss": 0.2982, + "nll_loss": 5.6991496086120605, + "rewards/accuracies": 0.75, + "rewards/chosen": -28.04994773864746, + "rewards/margins": 6.368475914001465, + "rewards/rejected": -34.41842269897461, + "step": 206 + }, + { + "epoch": 0.3503157894736842, + "grad_norm": 530.9413220874789, + "learning_rate": 1.61048126077972e-07, + "logits/chosen": -7.343864440917969, + "logits/rejected": -8.190047264099121, + "logps/chosen": -1754.0421142578125, + "logps/rejected": -2148.49072265625, + "loss": 0.2674, + "nll_loss": 5.666429042816162, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.846054077148438, + "rewards/margins": 8.071301460266113, + "rewards/rejected": -33.917354583740234, + "step": 208 + }, + { + "epoch": 0.35368421052631577, + "grad_norm": 166.06683989436303, + "learning_rate": 1.6012758157416018e-07, + "logits/chosen": -7.329439640045166, + "logits/rejected": -7.938960075378418, + "logps/chosen": -1708.5814208984375, + "logps/rejected": -2173.199462890625, + "loss": 0.1976, + "nll_loss": 5.0503830909729, + "rewards/accuracies": 0.875, + "rewards/chosen": -24.269054412841797, + "rewards/margins": 9.374040603637695, + "rewards/rejected": -33.643096923828125, + "step": 210 + }, + { + "epoch": 0.35705263157894734, + "grad_norm": 525.5297922097282, + "learning_rate": 1.5919898645701988e-07, + "logits/chosen": -6.739994049072266, + "logits/rejected": -8.293057441711426, + "logps/chosen": -1717.248046875, + "logps/rejected": -2190.51171875, + "loss": 0.4755, + "nll_loss": 4.879263877868652, + "rewards/accuracies": 0.875, + "rewards/chosen": -24.138900756835938, + "rewards/margins": 8.57326602935791, + "rewards/rejected": -32.71216583251953, + "step": 212 + }, + { + "epoch": 0.3604210526315789, + "grad_norm": 548.5346673183018, + "learning_rate": 1.5826246505818112e-07, + "logits/chosen": -6.99255895614624, + "logits/rejected": -8.398011207580566, + "logps/chosen": -1611.18701171875, + "logps/rejected": -2188.08642578125, + "loss": 0.432, + "nll_loss": 5.333250999450684, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.24384880065918, + "rewards/margins": 10.111581802368164, + "rewards/rejected": -33.355430603027344, + "step": 214 + }, + { + "epoch": 0.36378947368421055, + "grad_norm": 185.93773109229338, + "learning_rate": 1.573181427705411e-07, + "logits/chosen": -7.210558891296387, + "logits/rejected": -8.003637313842773, + "logps/chosen": -1898.2894287109375, + "logps/rejected": -2259.249755859375, + "loss": 0.1156, + "nll_loss": 5.2748212814331055, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.62005043029785, + "rewards/margins": 6.235610485076904, + "rewards/rejected": -33.85565948486328, + "step": 216 + }, + { + "epoch": 0.3671578947368421, + "grad_norm": 298.31090901524203, + "learning_rate": 1.5636614603147512e-07, + "logits/chosen": -7.3358025550842285, + "logits/rejected": -8.385098457336426, + "logps/chosen": -1998.4923095703125, + "logps/rejected": -2187.198486328125, + "loss": 0.2525, + "nll_loss": 5.315785884857178, + "rewards/accuracies": 0.625, + "rewards/chosen": -29.09065818786621, + "rewards/margins": 3.6531078815460205, + "rewards/rejected": -32.74376678466797, + "step": 218 + }, + { + "epoch": 0.3705263157894737, + "grad_norm": 377.3450841073725, + "learning_rate": 1.5540660230590748e-07, + "logits/chosen": -6.510415077209473, + "logits/rejected": -8.296943664550781, + "logps/chosen": -1665.1639404296875, + "logps/rejected": -2214.30078125, + "loss": 0.438, + "nll_loss": 5.418496131896973, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.495925903320312, + "rewards/margins": 10.108709335327148, + "rewards/rejected": -33.604637145996094, + "step": 220 + }, + { + "epoch": 0.3738947368421053, + "grad_norm": 443.30578117705613, + "learning_rate": 1.5443964006924507e-07, + "logits/chosen": -6.681422710418701, + "logits/rejected": -8.464709281921387, + "logps/chosen": -1588.643310546875, + "logps/rejected": -2177.72705078125, + "loss": 0.2768, + "nll_loss": 6.007861137390137, + "rewards/accuracies": 0.75, + "rewards/chosen": -23.003562927246094, + "rewards/margins": 10.166757583618164, + "rewards/rejected": -33.17032241821289, + "step": 222 + }, + { + "epoch": 0.37726315789473686, + "grad_norm": 163.54164941613496, + "learning_rate": 1.534653887901754e-07, + "logits/chosen": -5.698173999786377, + "logits/rejected": -8.45270824432373, + "logps/chosen": -1357.1298828125, + "logps/rejected": -2201.384765625, + "loss": 0.1406, + "nll_loss": 5.912946701049805, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.897022247314453, + "rewards/margins": 14.996172904968262, + "rewards/rejected": -32.8931999206543, + "step": 224 + }, + { + "epoch": 0.38063157894736843, + "grad_norm": 319.17016425411697, + "learning_rate": 1.5248397891333183e-07, + "logits/chosen": -6.813119888305664, + "logits/rejected": -8.12168025970459, + "logps/chosen": -1691.3763427734375, + "logps/rejected": -2194.62646484375, + "loss": 0.2993, + "nll_loss": 5.037168025970459, + "rewards/accuracies": 0.625, + "rewards/chosen": -24.049442291259766, + "rewards/margins": 8.878725051879883, + "rewards/rejected": -32.928165435791016, + "step": 226 + }, + { + "epoch": 0.384, + "grad_norm": 284.69578061502773, + "learning_rate": 1.51495541841828e-07, + "logits/chosen": -6.420520305633545, + "logits/rejected": -8.212552070617676, + "logps/chosen": -1653.12890625, + "logps/rejected": -2208.427734375, + "loss": 0.2744, + "nll_loss": 5.3444905281066895, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.33785629272461, + "rewards/margins": 9.556280136108398, + "rewards/rejected": -32.894134521484375, + "step": 228 + }, + { + "epoch": 0.3873684210526316, + "grad_norm": 284.69191132696415, + "learning_rate": 1.5050020991966403e-07, + "logits/chosen": -6.635988712310791, + "logits/rejected": -7.83236837387085, + "logps/chosen": -1533.177978515625, + "logps/rejected": -2170.71044921875, + "loss": 0.1905, + "nll_loss": 5.389209270477295, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.042217254638672, + "rewards/margins": 11.956886291503906, + "rewards/rejected": -32.99910354614258, + "step": 230 + }, + { + "epoch": 0.39073684210526316, + "grad_norm": 733.6966760862421, + "learning_rate": 1.4949811641400668e-07, + "logits/chosen": -7.570094108581543, + "logits/rejected": -7.502762794494629, + "logps/chosen": -1841.1671142578125, + "logps/rejected": -2136.026611328125, + "loss": 0.5221, + "nll_loss": 4.983242988586426, + "rewards/accuracies": 0.75, + "rewards/chosen": -25.91825294494629, + "rewards/margins": 6.6433634757995605, + "rewards/rejected": -32.561614990234375, + "step": 232 + }, + { + "epoch": 0.39410526315789474, + "grad_norm": 188.5369451526676, + "learning_rate": 1.484893954973458e-07, + "logits/chosen": -6.149730205535889, + "logits/rejected": -8.597711563110352, + "logps/chosen": -1442.3626708984375, + "logps/rejected": -2134.160888671875, + "loss": 0.129, + "nll_loss": 5.606182098388672, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.86325454711914, + "rewards/margins": 13.016372680664062, + "rewards/rejected": -31.879629135131836, + "step": 234 + }, + { + "epoch": 0.3974736842105263, + "grad_norm": 305.89732243842866, + "learning_rate": 1.4747418222952995e-07, + "logits/chosen": -6.757414817810059, + "logits/rejected": -8.115114212036133, + "logps/chosen": -1697.4263916015625, + "logps/rejected": -2184.919921875, + "loss": 0.2413, + "nll_loss": 5.372623920440674, + "rewards/accuracies": 0.875, + "rewards/chosen": -22.677663803100586, + "rewards/margins": 8.713125228881836, + "rewards/rejected": -31.39078712463379, + "step": 236 + }, + { + "epoch": 0.4008421052631579, + "grad_norm": 341.4679249212142, + "learning_rate": 1.4645261253968259e-07, + "logits/chosen": -6.988116264343262, + "logits/rejected": -8.155961036682129, + "logps/chosen": -1654.0662841796875, + "logps/rejected": -2215.248291015625, + "loss": 0.2575, + "nll_loss": 5.236739158630371, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.976306915283203, + "rewards/margins": 10.24085807800293, + "rewards/rejected": -32.2171630859375, + "step": 238 + }, + { + "epoch": 0.40421052631578946, + "grad_norm": 132.8280211518827, + "learning_rate": 1.454248232080026e-07, + "logits/chosen": -7.433531761169434, + "logits/rejected": -8.119049072265625, + "logps/chosen": -1924.980712890625, + "logps/rejected": -2134.167724609375, + "loss": 0.2044, + "nll_loss": 5.498749256134033, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.4937801361084, + "rewards/margins": 5.2426652908325195, + "rewards/rejected": -31.736446380615234, + "step": 240 + }, + { + "epoch": 0.40757894736842104, + "grad_norm": 340.2818569766834, + "learning_rate": 1.4439095184745024e-07, + "logits/chosen": -6.053187370300293, + "logits/rejected": -8.49085807800293, + "logps/chosen": -1365.32568359375, + "logps/rejected": -2177.580078125, + "loss": 0.271, + "nll_loss": 5.570743083953857, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.466915130615234, + "rewards/margins": 12.624629974365234, + "rewards/rejected": -31.09154510498047, + "step": 242 + }, + { + "epoch": 0.4109473684210526, + "grad_norm": 214.41734275706338, + "learning_rate": 1.4335113688532182e-07, + "logits/chosen": -6.56601619720459, + "logits/rejected": -8.04720401763916, + "logps/chosen": -1554.8018798828125, + "logps/rejected": -2144.083251953125, + "loss": 0.138, + "nll_loss": 5.4142374992370605, + "rewards/accuracies": 1.0, + "rewards/chosen": -20.857574462890625, + "rewards/margins": 11.797863006591797, + "rewards/rejected": -32.655433654785156, + "step": 244 + }, + { + "epoch": 0.4143157894736842, + "grad_norm": 337.35948463767846, + "learning_rate": 1.423055175447155e-07, + "logits/chosen": -6.994697570800781, + "logits/rejected": -7.979827880859375, + "logps/chosen": -1652.507568359375, + "logps/rejected": -2181.640625, + "loss": 0.329, + "nll_loss": 5.518115043640137, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.70856475830078, + "rewards/margins": 8.838756561279297, + "rewards/rejected": -32.54732131958008, + "step": 246 + }, + { + "epoch": 0.41768421052631577, + "grad_norm": 274.454131527474, + "learning_rate": 1.4125423382589048e-07, + "logits/chosen": -6.993724346160889, + "logits/rejected": -8.257303237915039, + "logps/chosen": -1779.2265625, + "logps/rejected": -2149.052490234375, + "loss": 0.2965, + "nll_loss": 5.484703063964844, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.876686096191406, + "rewards/margins": 7.389163970947266, + "rewards/rejected": -32.26585006713867, + "step": 248 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 352.35818290838654, + "learning_rate": 1.401974264875218e-07, + "logits/chosen": -7.193760871887207, + "logits/rejected": -8.0586519241333, + "logps/chosen": -1831.6397705078125, + "logps/rejected": -2227.03857421875, + "loss": 0.2662, + "nll_loss": 5.117783546447754, + "rewards/accuracies": 0.875, + "rewards/chosen": -25.96319007873535, + "rewards/margins": 6.454355239868164, + "rewards/rejected": -32.41754150390625, + "step": 250 + }, + { + "epoch": 0.4244210526315789, + "grad_norm": 255.0171256504159, + "learning_rate": 1.391352370278541e-07, + "logits/chosen": -6.700958728790283, + "logits/rejected": -8.45335865020752, + "logps/chosen": -1634.654541015625, + "logps/rejected": -2221.049072265625, + "loss": 0.2647, + "nll_loss": 6.0273308753967285, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.497007369995117, + "rewards/margins": 10.51921558380127, + "rewards/rejected": -34.0162239074707, + "step": 252 + }, + { + "epoch": 0.42778947368421055, + "grad_norm": 481.3772072138439, + "learning_rate": 1.3806780766575588e-07, + "logits/chosen": -7.795599460601807, + "logits/rejected": -8.443253517150879, + "logps/chosen": -1971.610107421875, + "logps/rejected": -2192.519287109375, + "loss": 0.203, + "nll_loss": 5.141743183135986, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.521339416503906, + "rewards/margins": 5.977480888366699, + "rewards/rejected": -34.49882125854492, + "step": 254 + }, + { + "epoch": 0.43115789473684213, + "grad_norm": 451.03866242263535, + "learning_rate": 1.3699528132167776e-07, + "logits/chosen": -7.792696475982666, + "logits/rejected": -7.79255485534668, + "logps/chosen": -1888.9193115234375, + "logps/rejected": -2135.4482421875, + "loss": 0.3684, + "nll_loss": 4.749884128570557, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.909269332885742, + "rewards/margins": 6.3855719566345215, + "rewards/rejected": -33.29484176635742, + "step": 256 + }, + { + "epoch": 0.4345263157894737, + "grad_norm": 242.47130951413612, + "learning_rate": 1.3591780159851627e-07, + "logits/chosen": -6.771208763122559, + "logits/rejected": -8.258780479431152, + "logps/chosen": -1740.8980712890625, + "logps/rejected": -2198.968994140625, + "loss": 0.1598, + "nll_loss": 5.290096759796143, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.707876205444336, + "rewards/margins": 9.988024711608887, + "rewards/rejected": -34.695899963378906, + "step": 258 + }, + { + "epoch": 0.4378947368421053, + "grad_norm": 499.1497771210561, + "learning_rate": 1.3483551276238686e-07, + "logits/chosen": -6.881796836853027, + "logits/rejected": -8.311073303222656, + "logps/chosen": -1801.54443359375, + "logps/rejected": -2261.04150390625, + "loss": 0.5297, + "nll_loss": 4.906632900238037, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.078868865966797, + "rewards/margins": 9.138972282409668, + "rewards/rejected": -34.217838287353516, + "step": 260 + }, + { + "epoch": 0.44126315789473686, + "grad_norm": 340.10309787989286, + "learning_rate": 1.3374855972330756e-07, + "logits/chosen": -7.035615921020508, + "logits/rejected": -8.248941421508789, + "logps/chosen": -1872.8837890625, + "logps/rejected": -2217.878173828125, + "loss": 0.306, + "nll_loss": 5.385906219482422, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.4267520904541, + "rewards/margins": 7.073578357696533, + "rewards/rejected": -33.500328063964844, + "step": 262 + }, + { + "epoch": 0.44463157894736843, + "grad_norm": 225.67805816583055, + "learning_rate": 1.3265708801579666e-07, + "logits/chosen": -6.937837600708008, + "logits/rejected": -8.376426696777344, + "logps/chosen": -1765.403076171875, + "logps/rejected": -2198.61767578125, + "loss": 0.1877, + "nll_loss": 5.633481502532959, + "rewards/accuracies": 0.875, + "rewards/chosen": -25.389394760131836, + "rewards/margins": 7.419838905334473, + "rewards/rejected": -32.80923080444336, + "step": 264 + }, + { + "epoch": 0.448, + "grad_norm": 91.96492729058454, + "learning_rate": 1.3156124377938697e-07, + "logits/chosen": -7.8583831787109375, + "logits/rejected": -8.153332710266113, + "logps/chosen": -2142.6328125, + "logps/rejected": -2167.61376953125, + "loss": 0.2784, + "nll_loss": 4.982174396514893, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.6492919921875, + "rewards/margins": 2.7910072803497314, + "rewards/rejected": -33.4402961730957, + "step": 266 + }, + { + "epoch": 0.4513684210526316, + "grad_norm": 199.4223395992786, + "learning_rate": 1.3046117373905865e-07, + "logits/chosen": -6.933570861816406, + "logits/rejected": -8.099403381347656, + "logps/chosen": -1728.806396484375, + "logps/rejected": -2209.177734375, + "loss": 0.0685, + "nll_loss": 5.4580488204956055, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.95172882080078, + "rewards/margins": 10.292177200317383, + "rewards/rejected": -34.2439079284668, + "step": 268 + }, + { + "epoch": 0.45473684210526316, + "grad_norm": 216.84232462782722, + "learning_rate": 1.2935702518559397e-07, + "logits/chosen": -7.183603763580322, + "logits/rejected": -8.098438262939453, + "logps/chosen": -1903.0699462890625, + "logps/rejected": -2204.51904296875, + "loss": 0.2503, + "nll_loss": 5.397114276885986, + "rewards/accuracies": 0.625, + "rewards/chosen": -27.064674377441406, + "rewards/margins": 5.608869552612305, + "rewards/rejected": -32.673545837402344, + "step": 270 + }, + { + "epoch": 0.45810526315789474, + "grad_norm": 380.9095200963084, + "learning_rate": 1.2824894595585636e-07, + "logits/chosen": -6.462231636047363, + "logits/rejected": -8.369811058044434, + "logps/chosen": -1741.0838623046875, + "logps/rejected": -2199.78466796875, + "loss": 0.2068, + "nll_loss": 6.009423732757568, + "rewards/accuracies": 0.75, + "rewards/chosen": -26.056018829345703, + "rewards/margins": 7.744605541229248, + "rewards/rejected": -33.80062484741211, + "step": 272 + }, + { + "epoch": 0.4614736842105263, + "grad_norm": 137.60298699973777, + "learning_rate": 1.27137084412996e-07, + "logits/chosen": -6.126290321350098, + "logits/rejected": -8.357515335083008, + "logps/chosen": -1254.0537109375, + "logps/rejected": -2182.109130859375, + "loss": 0.1443, + "nll_loss": 5.386259078979492, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.421083450317383, + "rewards/margins": 17.08145523071289, + "rewards/rejected": -34.50253677368164, + "step": 274 + }, + { + "epoch": 0.4648421052631579, + "grad_norm": 238.72881941389062, + "learning_rate": 1.260215894265852e-07, + "logits/chosen": -6.577013969421387, + "logits/rejected": -8.490443229675293, + "logps/chosen": -1655.7412109375, + "logps/rejected": -2204.900390625, + "loss": 0.2314, + "nll_loss": 5.632746696472168, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.928146362304688, + "rewards/margins": 11.161284446716309, + "rewards/rejected": -35.08943176269531, + "step": 276 + }, + { + "epoch": 0.46821052631578947, + "grad_norm": 205.0849481007809, + "learning_rate": 1.2490261035268613e-07, + "logits/chosen": -7.492171287536621, + "logits/rejected": -8.131207466125488, + "logps/chosen": -1958.902099609375, + "logps/rejected": -2155.704345703125, + "loss": 0.1155, + "nll_loss": 5.272936820983887, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.05508804321289, + "rewards/margins": 5.22835111618042, + "rewards/rejected": -35.2834358215332, + "step": 278 + }, + { + "epoch": 0.47157894736842104, + "grad_norm": 226.262655032818, + "learning_rate": 1.2378029701385286e-07, + "logits/chosen": -6.374780178070068, + "logits/rejected": -8.473326683044434, + "logps/chosen": -1493.491455078125, + "logps/rejected": -2233.69140625, + "loss": 0.1602, + "nll_loss": 5.51650333404541, + "rewards/accuracies": 0.875, + "rewards/chosen": -22.54718017578125, + "rewards/margins": 13.27789306640625, + "rewards/rejected": -35.8250732421875, + "step": 280 + }, + { + "epoch": 0.4749473684210526, + "grad_norm": 304.14594820990135, + "learning_rate": 1.2265479967907158e-07, + "logits/chosen": -7.050374507904053, + "logits/rejected": -8.2347412109375, + "logps/chosen": -1850.6962890625, + "logps/rejected": -2199.18798828125, + "loss": 0.3842, + "nll_loss": 5.64528226852417, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.668458938598633, + "rewards/margins": 7.283104419708252, + "rewards/rejected": -35.95156478881836, + "step": 282 + }, + { + "epoch": 0.4783157894736842, + "grad_norm": 250.05279911200753, + "learning_rate": 1.2152626904364064e-07, + "logits/chosen": -6.778600215911865, + "logits/rejected": -8.34770679473877, + "logps/chosen": -1604.2724609375, + "logps/rejected": -2214.350830078125, + "loss": 0.1214, + "nll_loss": 5.6927690505981445, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.976463317871094, + "rewards/margins": 11.123945236206055, + "rewards/rejected": -36.10041046142578, + "step": 284 + }, + { + "epoch": 0.48168421052631577, + "grad_norm": 49.88924745592731, + "learning_rate": 1.2039485620899367e-07, + "logits/chosen": -7.990779876708984, + "logits/rejected": -7.396949291229248, + "logps/chosen": -2097.546142578125, + "logps/rejected": -2177.14501953125, + "loss": 0.092, + "nll_loss": 5.315004348754883, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.4441032409668, + "rewards/margins": 4.195204734802246, + "rewards/rejected": -37.63930892944336, + "step": 286 + }, + { + "epoch": 0.48505263157894735, + "grad_norm": 298.7045075195552, + "learning_rate": 1.1926071266246824e-07, + "logits/chosen": -7.262898921966553, + "logits/rejected": -7.93293571472168, + "logps/chosen": -1817.0556640625, + "logps/rejected": -2241.826904296875, + "loss": 0.378, + "nll_loss": 5.347080230712891, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.16703224182129, + "rewards/margins": 7.644922733306885, + "rewards/rejected": -36.81195831298828, + "step": 288 + }, + { + "epoch": 0.4884210526315789, + "grad_norm": 305.83167858317097, + "learning_rate": 1.1812399025702289e-07, + "logits/chosen": -7.386429309844971, + "logits/rejected": -7.993374347686768, + "logps/chosen": -1947.408935546875, + "logps/rejected": -2193.298583984375, + "loss": 0.2834, + "nll_loss": 5.327097415924072, + "rewards/accuracies": 0.75, + "rewards/chosen": -30.927370071411133, + "rewards/margins": 5.908793926239014, + "rewards/rejected": -36.83616256713867, + "step": 290 + }, + { + "epoch": 0.4917894736842105, + "grad_norm": 457.2760039079243, + "learning_rate": 1.1698484119090518e-07, + "logits/chosen": -7.0947346687316895, + "logits/rejected": -8.351741790771484, + "logps/chosen": -1785.537353515625, + "logps/rejected": -2246.34912109375, + "loss": 0.2948, + "nll_loss": 5.337271213531494, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.525087356567383, + "rewards/margins": 9.390640258789062, + "rewards/rejected": -37.91572952270508, + "step": 292 + }, + { + "epoch": 0.49515789473684213, + "grad_norm": 296.42125851564697, + "learning_rate": 1.1584341798727364e-07, + "logits/chosen": -6.732482433319092, + "logits/rejected": -8.335723876953125, + "logps/chosen": -1633.8388671875, + "logps/rejected": -2277.510498046875, + "loss": 0.2649, + "nll_loss": 5.798310279846191, + "rewards/accuracies": 0.875, + "rewards/chosen": -25.833433151245117, + "rewards/margins": 11.791748046875, + "rewards/rejected": -37.62518310546875, + "step": 294 + }, + { + "epoch": 0.4985263157894737, + "grad_norm": 439.27146717605046, + "learning_rate": 1.1469987347377601e-07, + "logits/chosen": -7.6690354347229, + "logits/rejected": -7.856517791748047, + "logps/chosen": -2050.155029296875, + "logps/rejected": -2208.57666015625, + "loss": 0.3627, + "nll_loss": 5.227299213409424, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.6103515625, + "rewards/margins": 2.915879249572754, + "rewards/rejected": -36.52622985839844, + "step": 296 + }, + { + "epoch": 0.5018947368421053, + "grad_norm": 318.9522275161282, + "learning_rate": 1.1355436076208687e-07, + "logits/chosen": -7.616082668304443, + "logits/rejected": -8.118494033813477, + "logps/chosen": -1956.5233154296875, + "logps/rejected": -2223.263916015625, + "loss": 0.2973, + "nll_loss": 5.363955497741699, + "rewards/accuracies": 0.75, + "rewards/chosen": -32.195335388183594, + "rewards/margins": 5.070850849151611, + "rewards/rejected": -37.26618576049805, + "step": 298 + }, + { + "epoch": 0.5052631578947369, + "grad_norm": 141.04978897095168, + "learning_rate": 1.124070332274071e-07, + "logits/chosen": -7.996880054473877, + "logits/rejected": -8.085912704467773, + "logps/chosen": -2062.050537109375, + "logps/rejected": -2186.66162109375, + "loss": 0.0746, + "nll_loss": 5.05142879486084, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.29553985595703, + "rewards/margins": 4.88584041595459, + "rewards/rejected": -38.18138122558594, + "step": 300 + }, + { + "epoch": 0.5086315789473684, + "grad_norm": 373.17587592849566, + "learning_rate": 1.112580444879283e-07, + "logits/chosen": -7.303493022918701, + "logits/rejected": -7.817137718200684, + "logps/chosen": -1930.81689453125, + "logps/rejected": -2200.581298828125, + "loss": 0.4021, + "nll_loss": 5.62571907043457, + "rewards/accuracies": 0.75, + "rewards/chosen": -31.353313446044922, + "rewards/margins": 5.793161869049072, + "rewards/rejected": -37.14647674560547, + "step": 302 + }, + { + "epoch": 0.512, + "grad_norm": 282.00796475897624, + "learning_rate": 1.1010754838426426e-07, + "logits/chosen": -7.355342388153076, + "logits/rejected": -8.244458198547363, + "logps/chosen": -1883.871337890625, + "logps/rejected": -2235.58447265625, + "loss": 0.2066, + "nll_loss": 5.333528995513916, + "rewards/accuracies": 1.0, + "rewards/chosen": -29.595571517944336, + "rewards/margins": 7.5849127769470215, + "rewards/rejected": -37.180484771728516, + "step": 304 + }, + { + "epoch": 0.5153684210526316, + "grad_norm": 243.79920977620088, + "learning_rate": 1.089556989588532e-07, + "logits/chosen": -7.213363170623779, + "logits/rejected": -8.331066131591797, + "logps/chosen": -1723.94287109375, + "logps/rejected": -2178.452880859375, + "loss": 0.1619, + "nll_loss": 5.337440490722656, + "rewards/accuracies": 0.625, + "rewards/chosen": -27.8447265625, + "rewards/margins": 9.870349884033203, + "rewards/rejected": -37.71508026123047, + "step": 306 + }, + { + "epoch": 0.5187368421052632, + "grad_norm": 199.39976335241724, + "learning_rate": 1.078026504353325e-07, + "logits/chosen": -7.63919734954834, + "logits/rejected": -8.216974258422852, + "logps/chosen": -2052.788330078125, + "logps/rejected": -2217.202880859375, + "loss": 0.2817, + "nll_loss": 5.197584629058838, + "rewards/accuracies": 1.0, + "rewards/chosen": -31.69765853881836, + "rewards/margins": 5.5009331703186035, + "rewards/rejected": -37.19859313964844, + "step": 308 + }, + { + "epoch": 0.5221052631578947, + "grad_norm": 620.1262472271271, + "learning_rate": 1.0664855719788934e-07, + "logits/chosen": -7.262815952301025, + "logits/rejected": -8.217065811157227, + "logps/chosen": -2147.777587890625, + "logps/rejected": -2262.103271484375, + "loss": 0.2195, + "nll_loss": 5.310882568359375, + "rewards/accuracies": 0.875, + "rewards/chosen": -35.0344123840332, + "rewards/margins": 2.332733631134033, + "rewards/rejected": -37.367149353027344, + "step": 310 + }, + { + "epoch": 0.5254736842105263, + "grad_norm": 395.93023375337225, + "learning_rate": 1.0549357377059005e-07, + "logits/chosen": -7.084537506103516, + "logits/rejected": -8.080052375793457, + "logps/chosen": -1756.028076171875, + "logps/rejected": -2257.95703125, + "loss": 0.479, + "nll_loss": 5.514889717102051, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.614755630493164, + "rewards/margins": 9.187152862548828, + "rewards/rejected": -37.801910400390625, + "step": 312 + }, + { + "epoch": 0.5288421052631579, + "grad_norm": 200.0389317576706, + "learning_rate": 1.0433785479669037e-07, + "logits/chosen": -7.246962547302246, + "logits/rejected": -8.294339179992676, + "logps/chosen": -1767.36376953125, + "logps/rejected": -2233.66064453125, + "loss": 0.2677, + "nll_loss": 5.436453342437744, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.277070999145508, + "rewards/margins": 9.897624969482422, + "rewards/rejected": -38.17469787597656, + "step": 314 + }, + { + "epoch": 0.5322105263157895, + "grad_norm": 232.98730015013695, + "learning_rate": 1.0318155501792987e-07, + "logits/chosen": -7.37540340423584, + "logits/rejected": -8.02101993560791, + "logps/chosen": -1990.3394775390625, + "logps/rejected": -2271.31201171875, + "loss": 0.0943, + "nll_loss": 5.673530578613281, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.0615348815918, + "rewards/margins": 6.438616752624512, + "rewards/rejected": -38.500152587890625, + "step": 316 + }, + { + "epoch": 0.535578947368421, + "grad_norm": 226.92107830972864, + "learning_rate": 1.0202482925381357e-07, + "logits/chosen": -6.763341426849365, + "logits/rejected": -8.087518692016602, + "logps/chosen": -1665.9189453125, + "logps/rejected": -2236.61376953125, + "loss": 0.2185, + "nll_loss": 5.170358657836914, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.559932708740234, + "rewards/margins": 10.590608596801758, + "rewards/rejected": -37.15054702758789, + "step": 318 + }, + { + "epoch": 0.5389473684210526, + "grad_norm": 393.3203409753937, + "learning_rate": 1.0086783238088243e-07, + "logits/chosen": -7.136436939239502, + "logits/rejected": -8.437828063964844, + "logps/chosen": -1846.78369140625, + "logps/rejected": -2221.16650390625, + "loss": 0.2647, + "nll_loss": 6.048235893249512, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.3648681640625, + "rewards/margins": 7.082037925720215, + "rewards/rejected": -37.44690704345703, + "step": 320 + }, + { + "epoch": 0.5423157894736842, + "grad_norm": 395.9473213067617, + "learning_rate": 9.971071931197684e-08, + "logits/chosen": -7.869472503662109, + "logits/rejected": -8.018000602722168, + "logps/chosen": -2073.983642578125, + "logps/rejected": -2261.07470703125, + "loss": 0.1895, + "nll_loss": 5.238686561584473, + "rewards/accuracies": 0.875, + "rewards/chosen": -34.49671936035156, + "rewards/margins": 3.941061019897461, + "rewards/rejected": -38.437782287597656, + "step": 322 + }, + { + "epoch": 0.5456842105263158, + "grad_norm": 407.67917839889736, + "learning_rate": 9.855364497549495e-08, + "logits/chosen": -7.1182451248168945, + "logits/rejected": -8.209175109863281, + "logps/chosen": -2025.939697265625, + "logps/rejected": -2232.8544921875, + "loss": 0.3687, + "nll_loss": 5.539498329162598, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.55401611328125, + "rewards/margins": 4.3816399574279785, + "rewards/rejected": -36.9356575012207, + "step": 324 + }, + { + "epoch": 0.5490526315789473, + "grad_norm": 82.29409674682019, + "learning_rate": 9.73967642946488e-08, + "logits/chosen": -6.609898567199707, + "logits/rejected": -8.180124282836914, + "logps/chosen": -1846.933837890625, + "logps/rejected": -2254.260009765625, + "loss": 0.1058, + "nll_loss": 6.1715264320373535, + "rewards/accuracies": 1.0, + "rewards/chosen": -31.290800094604492, + "rewards/margins": 6.582897186279297, + "rewards/rejected": -37.87369918823242, + "step": 326 + }, + { + "epoch": 0.5524210526315789, + "grad_norm": 263.7142452068147, + "learning_rate": 9.62402321667216e-08, + "logits/chosen": -7.458375453948975, + "logits/rejected": -8.259882926940918, + "logps/chosen": -2037.841064453125, + "logps/rejected": -2244.8076171875, + "loss": 0.1896, + "nll_loss": 5.539731502532959, + "rewards/accuracies": 1.0, + "rewards/chosen": -33.32776641845703, + "rewards/margins": 5.299670219421387, + "rewards/rejected": -38.62743377685547, + "step": 328 + }, + { + "epoch": 0.5557894736842105, + "grad_norm": 326.97700368298075, + "learning_rate": 9.508420344232799e-08, + "logits/chosen": -6.740803241729736, + "logits/rejected": -8.295230865478516, + "logps/chosen": -1766.736083984375, + "logps/rejected": -2225.2880859375, + "loss": 0.3129, + "nll_loss": 5.566542625427246, + "rewards/accuracies": 0.75, + "rewards/chosen": -29.658353805541992, + "rewards/margins": 8.34937858581543, + "rewards/rejected": -38.00773239135742, + "step": 330 + }, + { + "epoch": 0.5591578947368421, + "grad_norm": 208.334466663584, + "learning_rate": 9.392883290468082e-08, + "logits/chosen": -6.8002543449401855, + "logits/rejected": -8.274842262268066, + "logps/chosen": -1685.3468017578125, + "logps/rejected": -2213.642578125, + "loss": 0.265, + "nll_loss": 5.414514541625977, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.97294044494629, + "rewards/margins": 10.61333179473877, + "rewards/rejected": -38.586273193359375, + "step": 332 + }, + { + "epoch": 0.5625263157894737, + "grad_norm": 63.08646332985191, + "learning_rate": 9.277427524886687e-08, + "logits/chosen": -7.172906398773193, + "logits/rejected": -8.222369194030762, + "logps/chosen": -1881.0384521484375, + "logps/rejected": -2289.237548828125, + "loss": 0.1563, + "nll_loss": 5.601876258850098, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.56769561767578, + "rewards/margins": 7.970741271972656, + "rewards/rejected": -38.53843688964844, + "step": 334 + }, + { + "epoch": 0.5658947368421052, + "grad_norm": 500.5421904045652, + "learning_rate": 9.16206850611344e-08, + "logits/chosen": -6.209731578826904, + "logits/rejected": -8.183979034423828, + "logps/chosen": -1466.2998046875, + "logps/rejected": -2199.416015625, + "loss": 0.2086, + "nll_loss": 5.533670425415039, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.00306510925293, + "rewards/margins": 15.483078002929688, + "rewards/rejected": -38.48614501953125, + "step": 336 + }, + { + "epoch": 0.5692631578947368, + "grad_norm": 135.0392014842357, + "learning_rate": 9.046821679819527e-08, + "logits/chosen": -7.247140884399414, + "logits/rejected": -8.511941909790039, + "logps/chosen": -1807.933349609375, + "logps/rejected": -2225.65185546875, + "loss": 0.2196, + "nll_loss": 5.2833781242370605, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.05931282043457, + "rewards/margins": 9.361750602722168, + "rewards/rejected": -39.42106246948242, + "step": 338 + }, + { + "epoch": 0.5726315789473684, + "grad_norm": 359.17470505312593, + "learning_rate": 8.93170247665443e-08, + "logits/chosen": -7.181203365325928, + "logits/rejected": -7.774561882019043, + "logps/chosen": -1932.3189697265625, + "logps/rejected": -2207.670166015625, + "loss": 0.1509, + "nll_loss": 5.3413214683532715, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.930519104003906, + "rewards/margins": 7.081175804138184, + "rewards/rejected": -39.011695861816406, + "step": 340 + }, + { + "epoch": 0.576, + "grad_norm": 227.67264225472942, + "learning_rate": 8.816726310179903e-08, + "logits/chosen": -6.475002765655518, + "logits/rejected": -8.516534805297852, + "logps/chosen": -1540.990478515625, + "logps/rejected": -2179.462158203125, + "loss": 0.2961, + "nll_loss": 5.707268714904785, + "rewards/accuracies": 0.875, + "rewards/chosen": -24.699859619140625, + "rewards/margins": 14.47529411315918, + "rewards/rejected": -39.17515563964844, + "step": 342 + }, + { + "epoch": 0.5793684210526315, + "grad_norm": 97.40013916506999, + "learning_rate": 8.701908574806198e-08, + "logits/chosen": -7.5892767906188965, + "logits/rejected": -8.08768081665039, + "logps/chosen": -2175.58203125, + "logps/rejected": -2273.545166015625, + "loss": 0.2472, + "nll_loss": 5.250769138336182, + "rewards/accuracies": 0.875, + "rewards/chosen": -35.854244232177734, + "rewards/margins": 3.6291019916534424, + "rewards/rejected": -39.48334884643555, + "step": 344 + }, + { + "epoch": 0.5827368421052631, + "grad_norm": 248.47568888877237, + "learning_rate": 8.587264643730875e-08, + "logits/chosen": -7.8160400390625, + "logits/rejected": -7.814428329467773, + "logps/chosen": -2092.376708984375, + "logps/rejected": -2212.744873046875, + "loss": 0.2602, + "nll_loss": 5.270852565765381, + "rewards/accuracies": 1.0, + "rewards/chosen": -35.40900802612305, + "rewards/margins": 4.026213645935059, + "rewards/rejected": -39.43522262573242, + "step": 346 + }, + { + "epoch": 0.5861052631578947, + "grad_norm": 216.54285625194382, + "learning_rate": 8.472809866880475e-08, + "logits/chosen": -6.873068332672119, + "logits/rejected": -8.404152870178223, + "logps/chosen": -1594.337158203125, + "logps/rejected": -2241.162109375, + "loss": 0.167, + "nll_loss": 5.596911907196045, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.399681091308594, + "rewards/margins": 13.986969947814941, + "rewards/rejected": -39.38665771484375, + "step": 348 + }, + { + "epoch": 0.5894736842105263, + "grad_norm": 394.3825078524655, + "learning_rate": 8.358559568855248e-08, + "logits/chosen": -7.783346652984619, + "logits/rejected": -7.84064245223999, + "logps/chosen": -2028.794189453125, + "logps/rejected": -2198.99365234375, + "loss": 0.2492, + "nll_loss": 5.23142671585083, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.149845123291016, + "rewards/margins": 5.621738910675049, + "rewards/rejected": -38.77158737182617, + "step": 350 + }, + { + "epoch": 0.592842105263158, + "grad_norm": 476.30094871834973, + "learning_rate": 8.244529046877334e-08, + "logits/chosen": -6.29586935043335, + "logits/rejected": -8.341531753540039, + "logps/chosen": -1501.43212890625, + "logps/rejected": -2209.18115234375, + "loss": 0.336, + "nll_loss": 5.435293197631836, + "rewards/accuracies": 0.875, + "rewards/chosen": -24.25966453552246, + "rewards/margins": 15.036050796508789, + "rewards/rejected": -39.29571533203125, + "step": 352 + }, + { + "epoch": 0.5962105263157895, + "grad_norm": 478.94423279229494, + "learning_rate": 8.130733568742579e-08, + "logits/chosen": -8.36856460571289, + "logits/rejected": -8.123823165893555, + "logps/chosen": -2097.725830078125, + "logps/rejected": -2244.956298828125, + "loss": 0.2053, + "nll_loss": 5.296268939971924, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.903053283691406, + "rewards/margins": 4.607615947723389, + "rewards/rejected": -38.51067352294922, + "step": 354 + }, + { + "epoch": 0.5995789473684211, + "grad_norm": 456.66250188186024, + "learning_rate": 8.017188370776291e-08, + "logits/chosen": -6.48959493637085, + "logits/rejected": -7.693470478057861, + "logps/chosen": -1521.6650390625, + "logps/rejected": -2250.853271484375, + "loss": 0.2331, + "nll_loss": 5.437237739562988, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.872224807739258, + "rewards/margins": 15.529407501220703, + "rewards/rejected": -39.401634216308594, + "step": 356 + }, + { + "epoch": 0.6029473684210527, + "grad_norm": 220.8432613727437, + "learning_rate": 7.903908655793223e-08, + "logits/chosen": -6.895531177520752, + "logits/rejected": -8.068331718444824, + "logps/chosen": -1690.776611328125, + "logps/rejected": -2218.949462890625, + "loss": 0.1547, + "nll_loss": 5.907761096954346, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.69055938720703, + "rewards/margins": 10.130281448364258, + "rewards/rejected": -38.820838928222656, + "step": 358 + }, + { + "epoch": 0.6063157894736843, + "grad_norm": 360.83010848251007, + "learning_rate": 7.790909591062032e-08, + "logits/chosen": -7.703709602355957, + "logits/rejected": -8.051048278808594, + "logps/chosen": -2004.1722412109375, + "logps/rejected": -2261.4755859375, + "loss": 0.2567, + "nll_loss": 5.125810146331787, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.635921478271484, + "rewards/margins": 6.34180212020874, + "rewards/rejected": -38.977725982666016, + "step": 360 + }, + { + "epoch": 0.6096842105263158, + "grad_norm": 173.19431712494207, + "learning_rate": 7.678206306274494e-08, + "logits/chosen": -7.508633613586426, + "logits/rejected": -8.247699737548828, + "logps/chosen": -1943.2529296875, + "logps/rejected": -2260.33447265625, + "loss": 0.1663, + "nll_loss": 5.228693008422852, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.90469741821289, + "rewards/margins": 7.72345495223999, + "rewards/rejected": -38.628150939941406, + "step": 362 + }, + { + "epoch": 0.6130526315789474, + "grad_norm": 387.0681464548588, + "learning_rate": 7.565813891519764e-08, + "logits/chosen": -7.763317108154297, + "logits/rejected": -8.09384822845459, + "logps/chosen": -1969.88134765625, + "logps/rejected": -2286.67333984375, + "loss": 0.2615, + "nll_loss": 5.108150005340576, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.575706481933594, + "rewards/margins": 7.403677463531494, + "rewards/rejected": -38.97938537597656, + "step": 364 + }, + { + "epoch": 0.616421052631579, + "grad_norm": 267.106705620619, + "learning_rate": 7.45374739526393e-08, + "logits/chosen": -7.11317253112793, + "logits/rejected": -8.084968566894531, + "logps/chosen": -1716.8660888671875, + "logps/rejected": -2228.20751953125, + "loss": 0.311, + "nll_loss": 4.8913702964782715, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.476198196411133, + "rewards/margins": 11.33434772491455, + "rewards/rejected": -38.810546875, + "step": 366 + }, + { + "epoch": 0.6197894736842106, + "grad_norm": 295.702658932922, + "learning_rate": 7.342021822335143e-08, + "logits/chosen": -6.858173847198486, + "logits/rejected": -8.155499458312988, + "logps/chosen": -1734.608642578125, + "logps/rejected": -2249.30029296875, + "loss": 0.2118, + "nll_loss": 5.381309986114502, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.76462745666504, + "rewards/margins": 11.01102066040039, + "rewards/rejected": -38.7756462097168, + "step": 368 + }, + { + "epoch": 0.6231578947368421, + "grad_norm": 253.96226497503523, + "learning_rate": 7.230652131914573e-08, + "logits/chosen": -6.647583484649658, + "logits/rejected": -8.132351875305176, + "logps/chosen": -1633.9908447265625, + "logps/rejected": -2257.0341796875, + "loss": 0.1478, + "nll_loss": 5.517997741699219, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.471336364746094, + "rewards/margins": 11.363603591918945, + "rewards/rejected": -37.83494186401367, + "step": 370 + }, + { + "epoch": 0.6265263157894737, + "grad_norm": 195.7616335942771, + "learning_rate": 7.119653235533519e-08, + "logits/chosen": -6.492335319519043, + "logits/rejected": -8.236223220825195, + "logps/chosen": -1685.419677734375, + "logps/rejected": -2270.370361328125, + "loss": 0.2562, + "nll_loss": 5.618862628936768, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.345735549926758, + "rewards/margins": 10.770453453063965, + "rewards/rejected": -37.116188049316406, + "step": 372 + }, + { + "epoch": 0.6298947368421053, + "grad_norm": 157.13258856973306, + "learning_rate": 7.009039995076844e-08, + "logits/chosen": -7.430450916290283, + "logits/rejected": -8.231632232666016, + "logps/chosen": -2042.21044921875, + "logps/rejected": -2247.43798828125, + "loss": 0.177, + "nll_loss": 5.24857234954834, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.96132278442383, + "rewards/margins": 5.063416004180908, + "rewards/rejected": -38.02473831176758, + "step": 374 + }, + { + "epoch": 0.6332631578947369, + "grad_norm": 306.3698872482412, + "learning_rate": 6.898827220793101e-08, + "logits/chosen": -7.346684455871582, + "logits/rejected": -8.37730884552002, + "logps/chosen": -1900.4794921875, + "logps/rejected": -2195.65380859375, + "loss": 0.2348, + "nll_loss": 5.606869697570801, + "rewards/accuracies": 0.75, + "rewards/chosen": -30.2860107421875, + "rewards/margins": 6.685296535491943, + "rewards/rejected": -36.97130584716797, + "step": 376 + }, + { + "epoch": 0.6366315789473684, + "grad_norm": 404.1225835065395, + "learning_rate": 6.78902966931155e-08, + "logits/chosen": -6.475454807281494, + "logits/rejected": -8.423490524291992, + "logps/chosen": -1562.2786865234375, + "logps/rejected": -2233.23779296875, + "loss": 0.2088, + "nll_loss": 5.756392478942871, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.448637008666992, + "rewards/margins": 13.247220993041992, + "rewards/rejected": -37.69586181640625, + "step": 378 + }, + { + "epoch": 0.64, + "grad_norm": 154.48168757776762, + "learning_rate": 6.67966204166636e-08, + "logits/chosen": -7.25782585144043, + "logits/rejected": -8.18947982788086, + "logps/chosen": -2031.32568359375, + "logps/rejected": -2208.31298828125, + "loss": 0.0898, + "nll_loss": 5.3062825202941895, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.2602596282959, + "rewards/margins": 6.670921325683594, + "rewards/rejected": -36.93117904663086, + "step": 380 + }, + { + "epoch": 0.6433684210526316, + "grad_norm": 118.13213437450032, + "learning_rate": 6.570738981328266e-08, + "logits/chosen": -6.853590488433838, + "logits/rejected": -8.224015235900879, + "logps/chosen": -1710.7021484375, + "logps/rejected": -2247.219482421875, + "loss": 0.092, + "nll_loss": 5.132368564605713, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.538768768310547, + "rewards/margins": 10.044065475463867, + "rewards/rejected": -36.58283615112305, + "step": 382 + }, + { + "epoch": 0.6467368421052632, + "grad_norm": 173.64933572252733, + "learning_rate": 6.462275072243906e-08, + "logits/chosen": -6.30927848815918, + "logits/rejected": -8.526763916015625, + "logps/chosen": -1525.7376708984375, + "logps/rejected": -2231.30859375, + "loss": 0.1534, + "nll_loss": 5.520993709564209, + "rewards/accuracies": 0.875, + "rewards/chosen": -23.9798641204834, + "rewards/margins": 13.774813652038574, + "rewards/rejected": -37.75468063354492, + "step": 384 + }, + { + "epoch": 0.6501052631578947, + "grad_norm": 210.67565229221435, + "learning_rate": 6.354284836883156e-08, + "logits/chosen": -7.883388042449951, + "logits/rejected": -7.760946273803711, + "logps/chosen": -2060.9970703125, + "logps/rejected": -2203.468505859375, + "loss": 0.1437, + "nll_loss": 4.833434104919434, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.70746612548828, + "rewards/margins": 4.838762283325195, + "rewards/rejected": -37.54623031616211, + "step": 386 + }, + { + "epoch": 0.6534736842105263, + "grad_norm": 194.1019283617502, + "learning_rate": 6.246782734294683e-08, + "logits/chosen": -6.361388206481934, + "logits/rejected": -8.105430603027344, + "logps/chosen": -1549.34228515625, + "logps/rejected": -2235.9560546875, + "loss": 0.0999, + "nll_loss": 5.782830238342285, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.41505241394043, + "rewards/margins": 15.236028671264648, + "rewards/rejected": -38.651084899902344, + "step": 388 + }, + { + "epoch": 0.6568421052631579, + "grad_norm": 151.3635743520382, + "learning_rate": 6.139783158169984e-08, + "logits/chosen": -7.068288803100586, + "logits/rejected": -7.906414985656738, + "logps/chosen": -1757.916259765625, + "logps/rejected": -2189.158203125, + "loss": 0.2537, + "nll_loss": 5.233246326446533, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.988143920898438, + "rewards/margins": 10.64902114868164, + "rewards/rejected": -37.63716506958008, + "step": 390 + }, + { + "epoch": 0.6602105263157895, + "grad_norm": 202.61630533473854, + "learning_rate": 6.033300434916202e-08, + "logits/chosen": -6.130324363708496, + "logits/rejected": -8.429763793945312, + "logps/chosen": -1517.0257568359375, + "logps/rejected": -2255.234619140625, + "loss": 0.0935, + "nll_loss": 5.869035243988037, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.74703598022461, + "rewards/margins": 14.127795219421387, + "rewards/rejected": -38.87482833862305, + "step": 392 + }, + { + "epoch": 0.663578947368421, + "grad_norm": 51.935332949676464, + "learning_rate": 5.9273488217379056e-08, + "logits/chosen": -6.12472677230835, + "logits/rejected": -8.31787109375, + "logps/chosen": -1496.01025390625, + "logps/rejected": -2250.830078125, + "loss": 0.0405, + "nll_loss": 5.127452373504639, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.69045066833496, + "rewards/margins": 14.94088363647461, + "rewards/rejected": -38.6313362121582, + "step": 394 + }, + { + "epoch": 0.6669473684210526, + "grad_norm": 157.62065071319176, + "learning_rate": 5.821942504728182e-08, + "logits/chosen": -5.9207844734191895, + "logits/rejected": -8.412075996398926, + "logps/chosen": -1410.460205078125, + "logps/rejected": -2238.791259765625, + "loss": 0.2002, + "nll_loss": 5.9735589027404785, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.796749114990234, + "rewards/margins": 17.19828224182129, + "rewards/rejected": -38.99502944946289, + "step": 396 + }, + { + "epoch": 0.6703157894736842, + "grad_norm": 409.7869110891659, + "learning_rate": 5.7170955969692257e-08, + "logits/chosen": -7.128916263580322, + "logits/rejected": -8.091632843017578, + "logps/chosen": -1968.1507568359375, + "logps/rejected": -2279.8564453125, + "loss": 0.2463, + "nll_loss": 5.298381328582764, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.842985153198242, + "rewards/margins": 7.260722637176514, + "rewards/rejected": -39.10371017456055, + "step": 398 + }, + { + "epoch": 0.6736842105263158, + "grad_norm": 196.72465489549091, + "learning_rate": 5.612822136642696e-08, + "logits/chosen": -7.994609832763672, + "logits/rejected": -8.255779266357422, + "logps/chosen": -2162.84375, + "logps/rejected": -2256.470458984375, + "loss": 0.1349, + "nll_loss": 5.209514617919922, + "rewards/accuracies": 0.875, + "rewards/chosen": -35.79097366333008, + "rewards/margins": 3.0046000480651855, + "rewards/rejected": -38.795570373535156, + "step": 400 + }, + { + "epoch": 0.6770526315789474, + "grad_norm": 276.3858546036682, + "learning_rate": 5.5091360851501214e-08, + "logits/chosen": -7.058955192565918, + "logits/rejected": -8.353060722351074, + "logps/chosen": -1752.0609130859375, + "logps/rejected": -2249.98486328125, + "loss": 0.1673, + "nll_loss": 5.86051607131958, + "rewards/accuracies": 0.75, + "rewards/chosen": -29.15858268737793, + "rewards/margins": 8.361825942993164, + "rewards/rejected": -37.520408630371094, + "step": 402 + }, + { + "epoch": 0.6804210526315789, + "grad_norm": 283.88427439715645, + "learning_rate": 5.406051325243586e-08, + "logits/chosen": -6.989212989807129, + "logits/rejected": -7.9651103019714355, + "logps/chosen": -1695.2578125, + "logps/rejected": -2204.557373046875, + "loss": 0.173, + "nll_loss": 5.183251857757568, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.098966598510742, + "rewards/margins": 11.072956085205078, + "rewards/rejected": -38.17192077636719, + "step": 404 + }, + { + "epoch": 0.6837894736842105, + "grad_norm": 262.0694683666909, + "learning_rate": 5.30358165916692e-08, + "logits/chosen": -7.981943130493164, + "logits/rejected": -8.023259162902832, + "logps/chosen": -2143.8271484375, + "logps/rejected": -2240.49609375, + "loss": 0.1934, + "nll_loss": 5.243900775909424, + "rewards/accuracies": 1.0, + "rewards/chosen": -34.75602722167969, + "rewards/margins": 2.599457263946533, + "rewards/rejected": -37.3554801940918, + "step": 406 + }, + { + "epoch": 0.6871578947368421, + "grad_norm": 375.4614868731545, + "learning_rate": 5.201740806807706e-08, + "logits/chosen": -6.7205047607421875, + "logits/rejected": -8.098003387451172, + "logps/chosen": -1731.788818359375, + "logps/rejected": -2216.237060546875, + "loss": 0.2756, + "nll_loss": 5.406951904296875, + "rewards/accuracies": 0.75, + "rewards/chosen": -27.372705459594727, + "rewards/margins": 11.000872611999512, + "rewards/rejected": -38.37357711791992, + "step": 408 + }, + { + "epoch": 0.6905263157894737, + "grad_norm": 210.62564910879206, + "learning_rate": 5.100542403860272e-08, + "logits/chosen": -7.43436861038208, + "logits/rejected": -7.938074111938477, + "logps/chosen": -2066.572265625, + "logps/rejected": -2238.23876953125, + "loss": 0.2334, + "nll_loss": 5.189877033233643, + "rewards/accuracies": 0.875, + "rewards/chosen": -34.191410064697266, + "rewards/margins": 4.024079322814941, + "rewards/rejected": -38.215492248535156, + "step": 410 + }, + { + "epoch": 0.6938947368421052, + "grad_norm": 136.5153093521016, + "learning_rate": 5.000000000000002e-08, + "logits/chosen": -5.983953475952148, + "logits/rejected": -8.266366004943848, + "logps/chosen": -1458.96875, + "logps/rejected": -2244.586181640625, + "loss": 0.1569, + "nll_loss": 6.0212860107421875, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.258224487304688, + "rewards/margins": 14.572385787963867, + "rewards/rejected": -37.83060836791992, + "step": 412 + }, + { + "epoch": 0.6972631578947368, + "grad_norm": 403.7847701746563, + "learning_rate": 4.900127057069116e-08, + "logits/chosen": -6.388440132141113, + "logits/rejected": -8.440248489379883, + "logps/chosen": -1643.813720703125, + "logps/rejected": -2231.506591796875, + "loss": 0.2028, + "nll_loss": 5.681787490844727, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.718189239501953, + "rewards/margins": 11.000421524047852, + "rewards/rejected": -37.71861267089844, + "step": 414 + }, + { + "epoch": 0.7006315789473684, + "grad_norm": 384.7121082705872, + "learning_rate": 4.8009369472742546e-08, + "logits/chosen": -6.855067729949951, + "logits/rejected": -8.357769966125488, + "logps/chosen": -1806.01318359375, + "logps/rejected": -2285.265869140625, + "loss": 0.2982, + "nll_loss": 5.6631622314453125, + "rewards/accuracies": 1.0, + "rewards/chosen": -29.00935935974121, + "rewards/margins": 9.206945419311523, + "rewards/rejected": -38.216304779052734, + "step": 416 + }, + { + "epoch": 0.704, + "grad_norm": 205.84300753243963, + "learning_rate": 4.7024429513960414e-08, + "logits/chosen": -6.546984672546387, + "logits/rejected": -8.477337837219238, + "logps/chosen": -1774.5728759765625, + "logps/rejected": -2264.49169921875, + "loss": 0.1813, + "nll_loss": 5.92774772644043, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.638093948364258, + "rewards/margins": 9.986715316772461, + "rewards/rejected": -38.62480926513672, + "step": 418 + }, + { + "epoch": 0.7073684210526315, + "grad_norm": 143.96793262737364, + "learning_rate": 4.604658257010874e-08, + "logits/chosen": -6.683577060699463, + "logits/rejected": -8.190496444702148, + "logps/chosen": -1780.205322265625, + "logps/rejected": -2335.033203125, + "loss": 0.0523, + "nll_loss": 5.4027886390686035, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.897085189819336, + "rewards/margins": 11.124114036560059, + "rewards/rejected": -38.02119827270508, + "step": 420 + }, + { + "epoch": 0.7107368421052631, + "grad_norm": 168.66393059683804, + "learning_rate": 4.507595956725233e-08, + "logits/chosen": -6.165244102478027, + "logits/rejected": -8.504766464233398, + "logps/chosen": -1527.099609375, + "logps/rejected": -2197.563720703125, + "loss": 0.2034, + "nll_loss": 6.075287818908691, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.494510650634766, + "rewards/margins": 13.510110855102539, + "rewards/rejected": -38.00461959838867, + "step": 422 + }, + { + "epoch": 0.7141052631578947, + "grad_norm": 412.195910642015, + "learning_rate": 4.4112690464226697e-08, + "logits/chosen": -6.30023717880249, + "logits/rejected": -8.322918891906738, + "logps/chosen": -1680.518310546875, + "logps/rejected": -2284.201416015625, + "loss": 0.274, + "nll_loss": 5.803287982940674, + "rewards/accuracies": 0.75, + "rewards/chosen": -26.578989028930664, + "rewards/margins": 11.078222274780273, + "rewards/rejected": -37.65721130371094, + "step": 424 + }, + { + "epoch": 0.7174736842105263, + "grad_norm": 292.02246007370735, + "learning_rate": 4.315690423523757e-08, + "logits/chosen": -6.849490165710449, + "logits/rejected": -8.255549430847168, + "logps/chosen": -1616.78857421875, + "logps/rejected": -2214.650390625, + "loss": 0.2553, + "nll_loss": 5.239808559417725, + "rewards/accuracies": 0.75, + "rewards/chosen": -24.917198181152344, + "rewards/margins": 12.881205558776855, + "rewards/rejected": -37.79840087890625, + "step": 426 + }, + { + "epoch": 0.7208421052631578, + "grad_norm": 256.5445563152725, + "learning_rate": 4.2208728852592465e-08, + "logits/chosen": -6.686156272888184, + "logits/rejected": -8.05382251739502, + "logps/chosen": -1576.5635986328125, + "logps/rejected": -2256.39697265625, + "loss": 0.3019, + "nll_loss": 5.413787841796875, + "rewards/accuracies": 0.875, + "rewards/chosen": -25.76823616027832, + "rewards/margins": 12.707958221435547, + "rewards/rejected": -38.476192474365234, + "step": 428 + }, + { + "epoch": 0.7242105263157895, + "grad_norm": 244.67144743001788, + "learning_rate": 4.126829126956588e-08, + "logits/chosen": -6.939605236053467, + "logits/rejected": -8.691452026367188, + "logps/chosen": -1630.356689453125, + "logps/rejected": -2211.13671875, + "loss": 0.252, + "nll_loss": 5.6548848152160645, + "rewards/accuracies": 0.875, + "rewards/chosen": -25.576709747314453, + "rewards/margins": 13.35787582397461, + "rewards/rejected": -38.93458938598633, + "step": 430 + }, + { + "epoch": 0.7275789473684211, + "grad_norm": 141.2458931925102, + "learning_rate": 4.033571740340157e-08, + "logits/chosen": -6.9448723793029785, + "logits/rejected": -8.11502456665039, + "logps/chosen": -1771.171875, + "logps/rejected": -2253.62353515625, + "loss": 0.0496, + "nll_loss": 5.7086663246154785, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.938629150390625, + "rewards/margins": 10.429763793945312, + "rewards/rejected": -38.36838912963867, + "step": 432 + }, + { + "epoch": 0.7309473684210527, + "grad_norm": 70.65354096973839, + "learning_rate": 3.9411132118452896e-08, + "logits/chosen": -7.186574935913086, + "logits/rejected": -8.068167686462402, + "logps/chosen": -1631.0499267578125, + "logps/rejected": -2222.7802734375, + "loss": 0.0387, + "nll_loss": 4.864493370056152, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.45480728149414, + "rewards/margins": 13.78203010559082, + "rewards/rejected": -39.236839294433594, + "step": 434 + }, + { + "epoch": 0.7343157894736843, + "grad_norm": 206.48565136287314, + "learning_rate": 3.849465920946475e-08, + "logits/chosen": -6.413349151611328, + "logits/rejected": -8.138294219970703, + "logps/chosen": -1650.3955078125, + "logps/rejected": -2234.84912109375, + "loss": 0.1177, + "nll_loss": 5.74521541595459, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.769577026367188, + "rewards/margins": 10.97209358215332, + "rewards/rejected": -37.74167251586914, + "step": 436 + }, + { + "epoch": 0.7376842105263158, + "grad_norm": 204.9937975433647, + "learning_rate": 3.758642138499819e-08, + "logits/chosen": -6.9706597328186035, + "logits/rejected": -8.29416275024414, + "logps/chosen": -1775.1536865234375, + "logps/rejected": -2233.42138671875, + "loss": 0.1547, + "nll_loss": 5.631414890289307, + "rewards/accuracies": 0.75, + "rewards/chosen": -28.840255737304688, + "rewards/margins": 9.066543579101562, + "rewards/rejected": -37.90679931640625, + "step": 438 + }, + { + "epoch": 0.7410526315789474, + "grad_norm": 277.05848886110675, + "learning_rate": 3.668654025100075e-08, + "logits/chosen": -7.38844108581543, + "logits/rejected": -8.166891098022461, + "logps/chosen": -1834.601318359375, + "logps/rejected": -2221.83984375, + "loss": 0.151, + "nll_loss": 5.479992389678955, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.740129470825195, + "rewards/margins": 10.051774978637695, + "rewards/rejected": -38.791900634765625, + "step": 440 + }, + { + "epoch": 0.744421052631579, + "grad_norm": 67.30116112714921, + "learning_rate": 3.579513629452464e-08, + "logits/chosen": -6.464494228363037, + "logits/rejected": -8.139457702636719, + "logps/chosen": -1527.421630859375, + "logps/rejected": -2210.33740234375, + "loss": 0.0763, + "nll_loss": 5.879497528076172, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.09062385559082, + "rewards/margins": 14.153335571289062, + "rewards/rejected": -38.243961334228516, + "step": 442 + }, + { + "epoch": 0.7477894736842106, + "grad_norm": 198.27913956717686, + "learning_rate": 3.491232886759398e-08, + "logits/chosen": -7.406864166259766, + "logits/rejected": -8.350690841674805, + "logps/chosen": -1810.20166015625, + "logps/rejected": -2223.15478515625, + "loss": 0.1604, + "nll_loss": 5.455080032348633, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.108121871948242, + "rewards/margins": 7.683882236480713, + "rewards/rejected": -37.7920036315918, + "step": 444 + }, + { + "epoch": 0.7511578947368421, + "grad_norm": 57.27184908194972, + "learning_rate": 3.4038236171224943e-08, + "logits/chosen": -6.585896968841553, + "logits/rejected": -8.207939147949219, + "logps/chosen": -1619.34423828125, + "logps/rejected": -2219.935546875, + "loss": 0.1412, + "nll_loss": 5.543335914611816, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.96392250061035, + "rewards/margins": 12.792692184448242, + "rewards/rejected": -37.756614685058594, + "step": 446 + }, + { + "epoch": 0.7545263157894737, + "grad_norm": 162.97400818458752, + "learning_rate": 3.317297523959927e-08, + "logits/chosen": -7.09434175491333, + "logits/rejected": -8.217880249023438, + "logps/chosen": -1831.752685546875, + "logps/rejected": -2260.440185546875, + "loss": 0.1862, + "nll_loss": 5.406676292419434, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.033018112182617, + "rewards/margins": 9.028882026672363, + "rewards/rejected": -39.0619010925293, + "step": 448 + }, + { + "epoch": 0.7578947368421053, + "grad_norm": 154.08278897844173, + "learning_rate": 3.231666192439442e-08, + "logits/chosen": -7.439211368560791, + "logits/rejected": -8.138461112976074, + "logps/chosen": -1908.0830078125, + "logps/rejected": -2208.218505859375, + "loss": 0.1308, + "nll_loss": 6.031886100769043, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.519283294677734, + "rewards/margins": 8.400613784790039, + "rewards/rejected": -39.919898986816406, + "step": 450 + }, + { + "epoch": 0.7612631578947369, + "grad_norm": 73.20681520848144, + "learning_rate": 3.146941087927203e-08, + "logits/chosen": -7.356697082519531, + "logits/rejected": -7.933921813964844, + "logps/chosen": -1768.478271484375, + "logps/rejected": -2268.8671875, + "loss": 0.0731, + "nll_loss": 5.609357833862305, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.863664627075195, + "rewards/margins": 11.745465278625488, + "rewards/rejected": -39.609130859375, + "step": 452 + }, + { + "epoch": 0.7646315789473684, + "grad_norm": 304.7554603694575, + "learning_rate": 3.063133554452645e-08, + "logits/chosen": -7.254971027374268, + "logits/rejected": -8.151304244995117, + "logps/chosen": -1963.06396484375, + "logps/rejected": -2218.695068359375, + "loss": 0.2756, + "nll_loss": 5.318238258361816, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.04559326171875, + "rewards/margins": 6.272039890289307, + "rewards/rejected": -38.31763458251953, + "step": 454 + }, + { + "epoch": 0.768, + "grad_norm": 196.76512807151576, + "learning_rate": 2.980254813189623e-08, + "logits/chosen": -7.754366397857666, + "logits/rejected": -8.369587898254395, + "logps/chosen": -1897.6082763671875, + "logps/rejected": -2181.4443359375, + "loss": 0.0918, + "nll_loss": 5.094344139099121, + "rewards/accuracies": 0.75, + "rewards/chosen": -30.451175689697266, + "rewards/margins": 7.756158351898193, + "rewards/rejected": -38.207332611083984, + "step": 456 + }, + { + "epoch": 0.7713684210526316, + "grad_norm": 232.4816020946604, + "learning_rate": 2.8983159609539633e-08, + "logits/chosen": -8.158332824707031, + "logits/rejected": -7.675507545471191, + "logps/chosen": -2173.97265625, + "logps/rejected": -2187.316650390625, + "loss": 0.1702, + "nll_loss": 4.910658359527588, + "rewards/accuracies": 1.0, + "rewards/chosen": -35.214569091796875, + "rewards/margins": 4.455814838409424, + "rewards/rejected": -39.67039108276367, + "step": 458 + }, + { + "epoch": 0.7747368421052632, + "grad_norm": 185.23537602534242, + "learning_rate": 2.8173279687177054e-08, + "logits/chosen": -6.689702987670898, + "logits/rejected": -8.015731811523438, + "logps/chosen": -1809.507568359375, + "logps/rejected": -2290.198486328125, + "loss": 0.079, + "nll_loss": 5.7478179931640625, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.168781280517578, + "rewards/margins": 10.092117309570312, + "rewards/rejected": -39.26089859008789, + "step": 460 + }, + { + "epoch": 0.7781052631578947, + "grad_norm": 228.5238209000322, + "learning_rate": 2.7373016801401573e-08, + "logits/chosen": -7.5323591232299805, + "logits/rejected": -8.179119110107422, + "logps/chosen": -2089.93896484375, + "logps/rejected": -2171.359130859375, + "loss": 0.2045, + "nll_loss": 5.747359275817871, + "rewards/accuracies": 0.75, + "rewards/chosen": -34.591739654541016, + "rewards/margins": 3.1891961097717285, + "rewards/rejected": -37.78093719482422, + "step": 462 + }, + { + "epoch": 0.7814736842105263, + "grad_norm": 176.4295959972419, + "learning_rate": 2.6582478101160165e-08, + "logits/chosen": -6.586089134216309, + "logits/rejected": -8.126249313354492, + "logps/chosen": -1404.34130859375, + "logps/rejected": -2220.29345703125, + "loss": 0.0916, + "nll_loss": 5.6755828857421875, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.904067993164062, + "rewards/margins": 16.568695068359375, + "rewards/rejected": -38.4727668762207, + "step": 464 + }, + { + "epoch": 0.7848421052631579, + "grad_norm": 109.08978437364105, + "learning_rate": 2.580176943340756e-08, + "logits/chosen": -6.541893005371094, + "logits/rejected": -8.444816589355469, + "logps/chosen": -1707.611328125, + "logps/rejected": -2249.0712890625, + "loss": 0.1225, + "nll_loss": 5.4323201179504395, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.17432403564453, + "rewards/margins": 12.210725784301758, + "rewards/rejected": -39.38505172729492, + "step": 466 + }, + { + "epoch": 0.7882105263157895, + "grad_norm": 266.4174356156143, + "learning_rate": 2.5030995328933723e-08, + "logits/chosen": -6.337734222412109, + "logits/rejected": -8.216695785522461, + "logps/chosen": -1433.7144775390625, + "logps/rejected": -2253.561279296875, + "loss": 0.0827, + "nll_loss": 5.440419673919678, + "rewards/accuracies": 1.0, + "rewards/chosen": -21.820663452148438, + "rewards/margins": 16.79531478881836, + "rewards/rejected": -38.61597442626953, + "step": 468 + }, + { + "epoch": 0.791578947368421, + "grad_norm": 444.435808075812, + "learning_rate": 2.4270258988368376e-08, + "logits/chosen": -6.913492202758789, + "logits/rejected": -8.389839172363281, + "logps/chosen": -1789.693603515625, + "logps/rejected": -2235.32421875, + "loss": 0.4713, + "nll_loss": 5.414417743682861, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.466032028198242, + "rewards/margins": 8.25886344909668, + "rewards/rejected": -37.72489547729492, + "step": 470 + }, + { + "epoch": 0.7949473684210526, + "grad_norm": 227.88051110998938, + "learning_rate": 2.3519662268363006e-08, + "logits/chosen": -7.451695442199707, + "logits/rejected": -8.134040832519531, + "logps/chosen": -1984.7669677734375, + "logps/rejected": -2170.09326171875, + "loss": 0.3332, + "nll_loss": 5.461165904998779, + "rewards/accuracies": 0.75, + "rewards/chosen": -32.05513381958008, + "rewards/margins": 4.828782081604004, + "rewards/rejected": -36.883914947509766, + "step": 472 + }, + { + "epoch": 0.7983157894736842, + "grad_norm": 161.35000532615425, + "learning_rate": 2.2779305667953152e-08, + "logits/chosen": -7.434182167053223, + "logits/rejected": -7.826815605163574, + "logps/chosen": -1815.1357421875, + "logps/rejected": -2225.9404296875, + "loss": 0.1478, + "nll_loss": 4.895688533782959, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.85767936706543, + "rewards/margins": 8.826580047607422, + "rewards/rejected": -37.684261322021484, + "step": 474 + }, + { + "epoch": 0.8016842105263158, + "grad_norm": 252.39419967830085, + "learning_rate": 2.204928831510241e-08, + "logits/chosen": -7.645818710327148, + "logits/rejected": -8.229476928710938, + "logps/chosen": -1907.0130615234375, + "logps/rejected": -2305.749755859375, + "loss": 0.1393, + "nll_loss": 5.33302116394043, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.992538452148438, + "rewards/margins": 9.087993621826172, + "rewards/rejected": -40.080535888671875, + "step": 476 + }, + { + "epoch": 0.8050526315789474, + "grad_norm": 519.421493105551, + "learning_rate": 2.132970795342982e-08, + "logits/chosen": -7.341223239898682, + "logits/rejected": -8.555485725402832, + "logps/chosen": -1928.36865234375, + "logps/rejected": -2207.854736328125, + "loss": 0.3261, + "nll_loss": 5.729315280914307, + "rewards/accuracies": 0.625, + "rewards/chosen": -32.422027587890625, + "rewards/margins": 6.0826640129089355, + "rewards/rejected": -38.504695892333984, + "step": 478 + }, + { + "epoch": 0.8084210526315789, + "grad_norm": 291.74584937064657, + "learning_rate": 2.0620660929123e-08, + "logits/chosen": -6.5453057289123535, + "logits/rejected": -7.966734886169434, + "logps/chosen": -1665.8740234375, + "logps/rejected": -2183.277099609375, + "loss": 0.1242, + "nll_loss": 5.9209136962890625, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.65888786315918, + "rewards/margins": 10.57414436340332, + "rewards/rejected": -38.2330322265625, + "step": 480 + }, + { + "epoch": 0.8117894736842105, + "grad_norm": 324.06037246167574, + "learning_rate": 1.992224217803786e-08, + "logits/chosen": -6.169129371643066, + "logits/rejected": -7.988075256347656, + "logps/chosen": -1408.3675537109375, + "logps/rejected": -2238.338134765625, + "loss": 0.1118, + "nll_loss": 5.187215805053711, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.458417892456055, + "rewards/margins": 16.477596282958984, + "rewards/rejected": -38.93601608276367, + "step": 482 + }, + { + "epoch": 0.8151578947368421, + "grad_norm": 394.80006807243586, + "learning_rate": 1.9234545212987686e-08, + "logits/chosen": -7.038122653961182, + "logits/rejected": -8.259831428527832, + "logps/chosen": -1790.6435546875, + "logps/rejected": -2252.951416015625, + "loss": 0.237, + "nll_loss": 5.162839412689209, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.044601440429688, + "rewards/margins": 9.581314086914062, + "rewards/rejected": -38.62591552734375, + "step": 484 + }, + { + "epoch": 0.8185263157894737, + "grad_norm": 245.72106899897696, + "learning_rate": 1.855766211122234e-08, + "logits/chosen": -6.452915191650391, + "logits/rejected": -8.34599494934082, + "logps/chosen": -1615.44775390625, + "logps/rejected": -2228.80810546875, + "loss": 0.1125, + "nll_loss": 5.462100028991699, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.96318244934082, + "rewards/margins": 12.65347671508789, + "rewards/rejected": -38.61665725708008, + "step": 486 + }, + { + "epoch": 0.8218947368421052, + "grad_norm": 265.58626188114476, + "learning_rate": 1.789168350209983e-08, + "logits/chosen": -7.410139083862305, + "logits/rejected": -8.122781753540039, + "logps/chosen": -2041.22216796875, + "logps/rejected": -2253.333740234375, + "loss": 0.1616, + "nll_loss": 5.257528781890869, + "rewards/accuracies": 1.0, + "rewards/chosen": -32.81037521362305, + "rewards/margins": 5.845396518707275, + "rewards/rejected": -38.65576934814453, + "step": 488 + }, + { + "epoch": 0.8252631578947368, + "grad_norm": 195.8964969058916, + "learning_rate": 1.723669855495199e-08, + "logits/chosen": -7.151376247406006, + "logits/rejected": -7.946229934692383, + "logps/chosen": -1715.78955078125, + "logps/rejected": -2244.12841796875, + "loss": 0.1499, + "nll_loss": 5.343441963195801, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.61037826538086, + "rewards/margins": 11.252402305603027, + "rewards/rejected": -38.8627815246582, + "step": 490 + }, + { + "epoch": 0.8286315789473684, + "grad_norm": 186.530933259071, + "learning_rate": 1.659279496714503e-08, + "logits/chosen": -6.534136772155762, + "logits/rejected": -8.039037704467773, + "logps/chosen": -1630.31298828125, + "logps/rejected": -2252.484130859375, + "loss": 0.097, + "nll_loss": 5.664884567260742, + "rewards/accuracies": 1.0, + "rewards/chosen": -26.512128829956055, + "rewards/margins": 13.004049301147461, + "rewards/rejected": -39.51618194580078, + "step": 492 + }, + { + "epoch": 0.832, + "grad_norm": 335.92762194779397, + "learning_rate": 1.5960058952337884e-08, + "logits/chosen": -6.526432514190674, + "logits/rejected": -8.54305648803711, + "logps/chosen": -1744.421875, + "logps/rejected": -2280.98486328125, + "loss": 0.2653, + "nll_loss": 5.502665996551514, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.049074172973633, + "rewards/margins": 9.935380935668945, + "rewards/rejected": -38.98445129394531, + "step": 494 + }, + { + "epoch": 0.8353684210526315, + "grad_norm": 218.92976542147028, + "learning_rate": 1.5338575228938612e-08, + "logits/chosen": -6.907125949859619, + "logits/rejected": -8.139141082763672, + "logps/chosen": -1731.1072998046875, + "logps/rejected": -2325.4013671875, + "loss": 0.2407, + "nll_loss": 4.939925670623779, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.770357131958008, + "rewards/margins": 11.083059310913086, + "rewards/rejected": -38.85342025756836, + "step": 496 + }, + { + "epoch": 0.8387368421052631, + "grad_norm": 159.02247163333553, + "learning_rate": 1.4728427008761401e-08, + "logits/chosen": -7.341606140136719, + "logits/rejected": -8.316031455993652, + "logps/chosen": -1968.184326171875, + "logps/rejected": -2243.793701171875, + "loss": 0.1247, + "nll_loss": 5.373488426208496, + "rewards/accuracies": 1.0, + "rewards/chosen": -32.148460388183594, + "rewards/margins": 6.647695541381836, + "rewards/rejected": -38.7961540222168, + "step": 498 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 324.2851031499905, + "learning_rate": 1.4129695985885225e-08, + "logits/chosen": -7.782922267913818, + "logits/rejected": -7.933620452880859, + "logps/chosen": -2051.697265625, + "logps/rejected": -2286.102783203125, + "loss": 0.2646, + "nll_loss": 5.484002590179443, + "rewards/accuracies": 0.875, + "rewards/chosen": -32.9913215637207, + "rewards/margins": 6.650774955749512, + "rewards/rejected": -39.642093658447266, + "step": 500 + }, + { + "epoch": 0.8454736842105263, + "grad_norm": 383.21308122896846, + "learning_rate": 1.3542462325715442e-08, + "logits/chosen": -7.545998573303223, + "logits/rejected": -8.588085174560547, + "logps/chosen": -1862.868896484375, + "logps/rejected": -2210.8505859375, + "loss": 0.2632, + "nll_loss": 5.365788459777832, + "rewards/accuracies": 0.75, + "rewards/chosen": -31.22475814819336, + "rewards/margins": 7.746358871459961, + "rewards/rejected": -38.97111511230469, + "step": 502 + }, + { + "epoch": 0.8488421052631578, + "grad_norm": 252.10082928265032, + "learning_rate": 1.2966804654250462e-08, + "logits/chosen": -6.983362674713135, + "logits/rejected": -8.178686141967773, + "logps/chosen": -1801.875244140625, + "logps/rejected": -2263.5986328125, + "loss": 0.1475, + "nll_loss": 6.0845537185668945, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.309919357299805, + "rewards/margins": 8.438333511352539, + "rewards/rejected": -38.748252868652344, + "step": 504 + }, + { + "epoch": 0.8522105263157894, + "grad_norm": 100.48131239501701, + "learning_rate": 1.2402800047554207e-08, + "logits/chosen": -6.94020938873291, + "logits/rejected": -8.300201416015625, + "logps/chosen": -1838.616943359375, + "logps/rejected": -2285.8544921875, + "loss": 0.2172, + "nll_loss": 5.775844573974609, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.875246047973633, + "rewards/margins": 8.517755508422852, + "rewards/rejected": -39.393001556396484, + "step": 506 + }, + { + "epoch": 0.8555789473684211, + "grad_norm": 158.9115319918602, + "learning_rate": 1.1850524021436336e-08, + "logits/chosen": -6.291950225830078, + "logits/rejected": -8.398715019226074, + "logps/chosen": -1423.4140625, + "logps/rejected": -2280.9658203125, + "loss": 0.1451, + "nll_loss": 5.541550636291504, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.83027458190918, + "rewards/margins": 16.543231964111328, + "rewards/rejected": -39.373504638671875, + "step": 508 + }, + { + "epoch": 0.8589473684210527, + "grad_norm": 139.85116251304456, + "learning_rate": 1.1310050521341197e-08, + "logits/chosen": -7.1967291831970215, + "logits/rejected": -8.252025604248047, + "logps/chosen": -1780.57080078125, + "logps/rejected": -2163.275390625, + "loss": 0.2197, + "nll_loss": 5.365113258361816, + "rewards/accuracies": 1.0, + "rewards/chosen": -29.839202880859375, + "rewards/margins": 9.690923690795898, + "rewards/rejected": -39.53012466430664, + "step": 510 + }, + { + "epoch": 0.8623157894736843, + "grad_norm": 320.90667451768593, + "learning_rate": 1.0781451912447059e-08, + "logits/chosen": -6.554062366485596, + "logits/rejected": -8.399422645568848, + "logps/chosen": -1562.239013671875, + "logps/rejected": -2252.832763671875, + "loss": 0.2041, + "nll_loss": 5.663952827453613, + "rewards/accuracies": 1.0, + "rewards/chosen": -25.284164428710938, + "rewards/margins": 13.453445434570312, + "rewards/rejected": -38.73760986328125, + "step": 512 + }, + { + "epoch": 0.8656842105263158, + "grad_norm": 466.8053997652555, + "learning_rate": 1.0264798969977228e-08, + "logits/chosen": -7.735861301422119, + "logits/rejected": -8.004199981689453, + "logps/chosen": -1992.1900634765625, + "logps/rejected": -2198.92041015625, + "loss": 0.3714, + "nll_loss": 5.44398307800293, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.00054168701172, + "rewards/margins": 4.714691162109375, + "rewards/rejected": -37.715232849121094, + "step": 514 + }, + { + "epoch": 0.8690526315789474, + "grad_norm": 265.87276679833155, + "learning_rate": 9.760160869723455e-09, + "logits/chosen": -8.163463592529297, + "logits/rejected": -7.93167781829834, + "logps/chosen": -2085.497314453125, + "logps/rejected": -2193.436279296875, + "loss": 0.2273, + "nll_loss": 5.032775402069092, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.65901565551758, + "rewards/margins": 5.517548561096191, + "rewards/rejected": -39.17656326293945, + "step": 516 + }, + { + "epoch": 0.872421052631579, + "grad_norm": 150.83909835897987, + "learning_rate": 9.267605178784032e-09, + "logits/chosen": -7.286246299743652, + "logits/rejected": -8.414872169494629, + "logps/chosen": -1961.04541015625, + "logps/rejected": -2198.59912109375, + "loss": 0.1727, + "nll_loss": 6.030449867248535, + "rewards/accuracies": 1.0, + "rewards/chosen": -32.829463958740234, + "rewards/margins": 5.171086311340332, + "rewards/rejected": -38.000553131103516, + "step": 518 + }, + { + "epoch": 0.8757894736842106, + "grad_norm": 251.81035926119085, + "learning_rate": 8.787197846517147e-09, + "logits/chosen": -5.83320426940918, + "logits/rejected": -8.448880195617676, + "logps/chosen": -1317.8983154296875, + "logps/rejected": -2223.787109375, + "loss": 0.1032, + "nll_loss": 5.769347190856934, + "rewards/accuracies": 1.0, + "rewards/chosen": -20.78626823425293, + "rewards/margins": 16.798099517822266, + "rewards/rejected": -37.58436965942383, + "step": 520 + }, + { + "epoch": 0.8791578947368421, + "grad_norm": 363.5013970055686, + "learning_rate": 8.319003195710571e-09, + "logits/chosen": -7.96688175201416, + "logits/rejected": -8.128907203674316, + "logps/chosen": -2162.495361328125, + "logps/rejected": -2259.739013671875, + "loss": 0.2421, + "nll_loss": 5.122936725616455, + "rewards/accuracies": 1.0, + "rewards/chosen": -35.563453674316406, + "rewards/margins": 4.058735370635986, + "rewards/rejected": -39.6221923828125, + "step": 522 + }, + { + "epoch": 0.8825263157894737, + "grad_norm": 196.5174829213676, + "learning_rate": 7.86308391396956e-09, + "logits/chosen": -7.527979850769043, + "logits/rejected": -8.271844863891602, + "logps/chosen": -2047.7938232421875, + "logps/rejected": -2268.5478515625, + "loss": 0.2425, + "nll_loss": 5.69685697555542, + "rewards/accuracies": 0.875, + "rewards/chosen": -34.301979064941406, + "rewards/margins": 4.180940628051758, + "rewards/rejected": -38.48291778564453, + "step": 524 + }, + { + "epoch": 0.8858947368421053, + "grad_norm": 268.6123339266088, + "learning_rate": 7.4195010453232956e-09, + "logits/chosen": -7.325321197509766, + "logits/rejected": -8.17194938659668, + "logps/chosen": -1752.6014404296875, + "logps/rejected": -2256.585205078125, + "loss": 0.1815, + "nll_loss": 5.504175186157227, + "rewards/accuracies": 0.75, + "rewards/chosen": -28.291757583618164, + "rewards/margins": 9.903141021728516, + "rewards/rejected": -38.19489669799805, + "step": 526 + }, + { + "epoch": 0.8892631578947369, + "grad_norm": 246.9449900787623, + "learning_rate": 6.988313982051719e-09, + "logits/chosen": -7.29826021194458, + "logits/rejected": -7.989704608917236, + "logps/chosen": -1862.6717529296875, + "logps/rejected": -2166.964599609375, + "loss": 0.1301, + "nll_loss": 5.650459289550781, + "rewards/accuracies": 0.75, + "rewards/chosen": -30.91683578491211, + "rewards/margins": 7.5222368240356445, + "rewards/rejected": -38.4390754699707, + "step": 528 + }, + { + "epoch": 0.8926315789473684, + "grad_norm": 378.94244285907064, + "learning_rate": 6.5695804567332036e-09, + "logits/chosen": -6.940848350524902, + "logits/rejected": -8.17110824584961, + "logps/chosen": -1906.43212890625, + "logps/rejected": -2278.51025390625, + "loss": 0.2445, + "nll_loss": 5.465893745422363, + "rewards/accuracies": 0.75, + "rewards/chosen": -31.02324867248535, + "rewards/margins": 8.205381393432617, + "rewards/rejected": -39.228633880615234, + "step": 530 + }, + { + "epoch": 0.896, + "grad_norm": 490.0475742211158, + "learning_rate": 6.163356534514807e-09, + "logits/chosen": -6.172750949859619, + "logits/rejected": -8.489022254943848, + "logps/chosen": -1533.9468994140625, + "logps/rejected": -2212.72802734375, + "loss": 0.1805, + "nll_loss": 5.867823123931885, + "rewards/accuracies": 1.0, + "rewards/chosen": -24.704307556152344, + "rewards/margins": 14.34754753112793, + "rewards/rejected": -39.05185317993164, + "step": 532 + }, + { + "epoch": 0.8993684210526316, + "grad_norm": 255.92774706692123, + "learning_rate": 5.7696966056053785e-09, + "logits/chosen": -6.2565388679504395, + "logits/rejected": -8.29428482055664, + "logps/chosen": -1412.822998046875, + "logps/rejected": -2203.464599609375, + "loss": 0.2004, + "nll_loss": 5.558704853057861, + "rewards/accuracies": 1.0, + "rewards/chosen": -20.468172073364258, + "rewards/margins": 18.679214477539062, + "rewards/rejected": -39.14738845825195, + "step": 534 + }, + { + "epoch": 0.9027368421052632, + "grad_norm": 114.13752241676485, + "learning_rate": 5.388653377993324e-09, + "logits/chosen": -6.097024917602539, + "logits/rejected": -8.490983963012695, + "logps/chosen": -1212.191162109375, + "logps/rejected": -2282.863037109375, + "loss": 0.1083, + "nll_loss": 5.277416229248047, + "rewards/accuracies": 0.875, + "rewards/chosen": -18.971294403076172, + "rewards/margins": 20.884796142578125, + "rewards/rejected": -39.8560905456543, + "step": 536 + }, + { + "epoch": 0.9061052631578947, + "grad_norm": 280.6301781259457, + "learning_rate": 5.020277870389311e-09, + "logits/chosen": -7.719882965087891, + "logits/rejected": -8.041402816772461, + "logps/chosen": -2189.172119140625, + "logps/rejected": -2295.2314453125, + "loss": 0.2457, + "nll_loss": 5.236344337463379, + "rewards/accuracies": 0.875, + "rewards/chosen": -36.655120849609375, + "rewards/margins": 2.5539703369140625, + "rewards/rejected": -39.20909118652344, + "step": 538 + }, + { + "epoch": 0.9094736842105263, + "grad_norm": 79.48759546011723, + "learning_rate": 4.664619405395265e-09, + "logits/chosen": -6.635951995849609, + "logits/rejected": -8.384746551513672, + "logps/chosen": -1718.5721435546875, + "logps/rejected": -2246.451904296875, + "loss": 0.1078, + "nll_loss": 5.581665992736816, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.542470932006836, + "rewards/margins": 12.282429695129395, + "rewards/rejected": -39.82490158081055, + "step": 540 + }, + { + "epoch": 0.9128421052631579, + "grad_norm": 229.40394912686224, + "learning_rate": 4.321725602900472e-09, + "logits/chosen": -7.116404056549072, + "logits/rejected": -8.396138191223145, + "logps/chosen": -1757.71484375, + "logps/rejected": -2282.0869140625, + "loss": 0.1307, + "nll_loss": 4.875128269195557, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.32893180847168, + "rewards/margins": 12.180450439453125, + "rewards/rejected": -39.50938415527344, + "step": 542 + }, + { + "epoch": 0.9162105263157895, + "grad_norm": 289.16292174568304, + "learning_rate": 3.991642373705695e-09, + "logits/chosen": -6.898987770080566, + "logits/rejected": -8.000998497009277, + "logps/chosen": -1806.2984619140625, + "logps/rejected": -2228.53076171875, + "loss": 0.1709, + "nll_loss": 5.912762641906738, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.397703170776367, + "rewards/margins": 9.081558227539062, + "rewards/rejected": -39.4792594909668, + "step": 544 + }, + { + "epoch": 0.919578947368421, + "grad_norm": 239.60349733987607, + "learning_rate": 3.6744139133759956e-09, + "logits/chosen": -7.233107089996338, + "logits/rejected": -8.430794715881348, + "logps/chosen": -1778.7083740234375, + "logps/rejected": -2200.23876953125, + "loss": 0.171, + "nll_loss": 5.608141899108887, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.902019500732422, + "rewards/margins": 10.116137504577637, + "rewards/rejected": -39.018157958984375, + "step": 546 + }, + { + "epoch": 0.9229473684210526, + "grad_norm": 203.79868091759081, + "learning_rate": 3.370082696323373e-09, + "logits/chosen": -7.195704460144043, + "logits/rejected": -8.166756629943848, + "logps/chosen": -1911.45361328125, + "logps/rejected": -2244.798095703125, + "loss": 0.151, + "nll_loss": 5.114628314971924, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.203014373779297, + "rewards/margins": 8.864540100097656, + "rewards/rejected": -39.06755447387695, + "step": 548 + }, + { + "epoch": 0.9263157894736842, + "grad_norm": 120.3056248035602, + "learning_rate": 3.0786894701196775e-09, + "logits/chosen": -7.326858997344971, + "logits/rejected": -8.087577819824219, + "logps/chosen": -2035.1636962890625, + "logps/rejected": -2246.181884765625, + "loss": 0.0989, + "nll_loss": 5.451916694641113, + "rewards/accuracies": 1.0, + "rewards/chosen": -33.032073974609375, + "rewards/margins": 5.780745506286621, + "rewards/rejected": -38.81281661987305, + "step": 550 + }, + { + "epoch": 0.9296842105263158, + "grad_norm": 275.770327333272, + "learning_rate": 2.800273250040952e-09, + "logits/chosen": -7.597753524780273, + "logits/rejected": -7.824631690979004, + "logps/chosen": -2004.7808837890625, + "logps/rejected": -2244.8212890625, + "loss": 0.1247, + "nll_loss": 4.721861839294434, + "rewards/accuracies": 1.0, + "rewards/chosen": -31.433671951293945, + "rewards/margins": 7.14993143081665, + "rewards/rejected": -38.58360290527344, + "step": 552 + }, + { + "epoch": 0.9330526315789474, + "grad_norm": 422.0746379492613, + "learning_rate": 2.5348713138434565e-09, + "logits/chosen": -7.181792259216309, + "logits/rejected": -8.049543380737305, + "logps/chosen": -1707.939208984375, + "logps/rejected": -2208.77734375, + "loss": 0.2438, + "nll_loss": 5.61822509765625, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.62373161315918, + "rewards/margins": 11.296683311462402, + "rewards/rejected": -38.92041015625, + "step": 554 + }, + { + "epoch": 0.9364210526315789, + "grad_norm": 37.31015421332207, + "learning_rate": 2.28251919677257e-09, + "logits/chosen": -6.2887396812438965, + "logits/rejected": -8.343204498291016, + "logps/chosen": -1522.6220703125, + "logps/rejected": -2214.747802734375, + "loss": 0.024, + "nll_loss": 5.970060348510742, + "rewards/accuracies": 1.0, + "rewards/chosen": -23.73328971862793, + "rewards/margins": 15.055632591247559, + "rewards/rejected": -38.78892135620117, + "step": 556 + }, + { + "epoch": 0.9397894736842105, + "grad_norm": 310.37839808852675, + "learning_rate": 2.043250686804865e-09, + "logits/chosen": -7.500852108001709, + "logits/rejected": -7.99564266204834, + "logps/chosen": -1894.286376953125, + "logps/rejected": -2235.044921875, + "loss": 0.2862, + "nll_loss": 5.685905933380127, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.6041202545166, + "rewards/margins": 7.6095404624938965, + "rewards/rejected": -39.21365737915039, + "step": 558 + }, + { + "epoch": 0.9431578947368421, + "grad_norm": 294.5349485842045, + "learning_rate": 1.817097820124147e-09, + "logits/chosen": -6.809659957885742, + "logits/rejected": -8.295966148376465, + "logps/chosen": -1886.675537109375, + "logps/rejected": -2271.10986328125, + "loss": 0.2189, + "nll_loss": 5.538888931274414, + "rewards/accuracies": 0.875, + "rewards/chosen": -30.958091735839844, + "rewards/margins": 7.502249240875244, + "rewards/rejected": -38.4603385925293, + "step": 560 + }, + { + "epoch": 0.9465263157894737, + "grad_norm": 390.96399290727527, + "learning_rate": 1.604090876832087e-09, + "logits/chosen": -7.394793510437012, + "logits/rejected": -7.84267520904541, + "logps/chosen": -1881.2449951171875, + "logps/rejected": -2232.389404296875, + "loss": 0.2396, + "nll_loss": 5.126293659210205, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.569351196289062, + "rewards/margins": 7.6439924240112305, + "rewards/rejected": -39.213340759277344, + "step": 562 + }, + { + "epoch": 0.9498947368421052, + "grad_norm": 310.3158547149984, + "learning_rate": 1.4042583768939299e-09, + "logits/chosen": -6.972746849060059, + "logits/rejected": -8.371156692504883, + "logps/chosen": -1714.3399658203125, + "logps/rejected": -2206.8310546875, + "loss": 0.1199, + "nll_loss": 5.605197906494141, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.692014694213867, + "rewards/margins": 11.318819046020508, + "rewards/rejected": -39.010833740234375, + "step": 564 + }, + { + "epoch": 0.9532631578947368, + "grad_norm": 184.23275233044205, + "learning_rate": 1.2176270763198826e-09, + "logits/chosen": -7.448263168334961, + "logits/rejected": -8.297872543334961, + "logps/chosen": -1999.669189453125, + "logps/rejected": -2279.085693359375, + "loss": 0.1391, + "nll_loss": 5.474849224090576, + "rewards/accuracies": 1.0, + "rewards/chosen": -33.233360290527344, + "rewards/margins": 6.506428241729736, + "rewards/rejected": -39.73978805541992, + "step": 566 + }, + { + "epoch": 0.9566315789473684, + "grad_norm": 212.45164891099338, + "learning_rate": 1.0442219635827587e-09, + "logits/chosen": -6.379326820373535, + "logits/rejected": -8.392101287841797, + "logps/chosen": -1411.836669921875, + "logps/rejected": -2236.45556640625, + "loss": 0.1531, + "nll_loss": 5.742588520050049, + "rewards/accuracies": 1.0, + "rewards/chosen": -22.78182601928711, + "rewards/margins": 15.909248352050781, + "rewards/rejected": -38.69107437133789, + "step": 568 + }, + { + "epoch": 0.96, + "grad_norm": 176.85132902561122, + "learning_rate": 8.840662562721313e-10, + "logits/chosen": -7.5018157958984375, + "logits/rejected": -8.164826393127441, + "logps/chosen": -2023.5302734375, + "logps/rejected": -2211.3251953125, + "loss": 0.1309, + "nll_loss": 5.339632034301758, + "rewards/accuracies": 1.0, + "rewards/chosen": -33.29636764526367, + "rewards/margins": 5.157810211181641, + "rewards/rejected": -38.45417404174805, + "step": 570 + }, + { + "epoch": 0.9633684210526315, + "grad_norm": 175.63839472941274, + "learning_rate": 7.371813979857311e-10, + "logits/chosen": -7.2445173263549805, + "logits/rejected": -8.058394432067871, + "logps/chosen": -1971.2177734375, + "logps/rejected": -2261.788818359375, + "loss": 0.1188, + "nll_loss": 4.997542381286621, + "rewards/accuracies": 0.875, + "rewards/chosen": -31.539989471435547, + "rewards/margins": 7.252119064331055, + "rewards/rejected": -38.792110443115234, + "step": 572 + }, + { + "epoch": 0.9667368421052631, + "grad_norm": 190.37162464177797, + "learning_rate": 6.035870554582989e-10, + "logits/chosen": -6.96453332901001, + "logits/rejected": -8.222492218017578, + "logps/chosen": -1799.5657958984375, + "logps/rejected": -2261.03857421875, + "loss": 0.1363, + "nll_loss": 5.406635284423828, + "rewards/accuracies": 0.875, + "rewards/chosen": -29.174453735351562, + "rewards/margins": 9.996255874633789, + "rewards/rejected": -39.170711517333984, + "step": 574 + }, + { + "epoch": 0.9701052631578947, + "grad_norm": 179.27962403393755, + "learning_rate": 4.833011159284028e-10, + "logits/chosen": -7.209228515625, + "logits/rejected": -7.93266487121582, + "logps/chosen": -2057.040771484375, + "logps/rejected": -2237.497314453125, + "loss": 0.0949, + "nll_loss": 5.6815595626831055, + "rewards/accuracies": 0.875, + "rewards/chosen": -33.02680969238281, + "rewards/margins": 6.363800048828125, + "rewards/rejected": -39.3906135559082, + "step": 576 + }, + { + "epoch": 0.9734736842105263, + "grad_norm": 182.8402649855879, + "learning_rate": 3.763396847433875e-10, + "logits/chosen": -7.02055549621582, + "logits/rejected": -8.420388221740723, + "logps/chosen": -1753.1495361328125, + "logps/rejected": -2262.48388671875, + "loss": 0.2172, + "nll_loss": 5.674689292907715, + "rewards/accuracies": 1.0, + "rewards/chosen": -28.21030044555664, + "rewards/margins": 11.214584350585938, + "rewards/rejected": -39.42488479614258, + "step": 578 + }, + { + "epoch": 0.9768421052631578, + "grad_norm": 59.175625376263454, + "learning_rate": 2.8271708320309893e-10, + "logits/chosen": -7.082157135009766, + "logits/rejected": -7.969208240509033, + "logps/chosen": -1872.389404296875, + "logps/rejected": -2212.38916015625, + "loss": 0.0631, + "nll_loss": 5.4750566482543945, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.0280704498291, + "rewards/margins": 8.51849365234375, + "rewards/rejected": -38.54656219482422, + "step": 580 + }, + { + "epoch": 0.9802105263157894, + "grad_norm": 31.130838237276375, + "learning_rate": 2.0244584664229624e-10, + "logits/chosen": -6.935577392578125, + "logits/rejected": -8.142889022827148, + "logps/chosen": -1751.22998046875, + "logps/rejected": -2215.97900390625, + "loss": 0.1858, + "nll_loss": 5.484932899475098, + "rewards/accuracies": 1.0, + "rewards/chosen": -27.302139282226562, + "rewards/margins": 10.821523666381836, + "rewards/rejected": -38.123661041259766, + "step": 582 + }, + { + "epoch": 0.983578947368421, + "grad_norm": 300.9076244256699, + "learning_rate": 1.3553672275230522e-10, + "logits/chosen": -6.667942047119141, + "logits/rejected": -8.240449905395508, + "logps/chosen": -1669.30908203125, + "logps/rejected": -2250.7109375, + "loss": 0.2112, + "nll_loss": 5.46246337890625, + "rewards/accuracies": 0.875, + "rewards/chosen": -26.676097869873047, + "rewards/margins": 11.850287437438965, + "rewards/rejected": -38.526390075683594, + "step": 584 + }, + { + "epoch": 0.9869473684210527, + "grad_norm": 205.37779593349424, + "learning_rate": 8.199867014198059e-11, + "logits/chosen": -7.167195796966553, + "logits/rejected": -8.354471206665039, + "logps/chosen": -1912.6531982421875, + "logps/rejected": -2276.97509765625, + "loss": 0.1158, + "nll_loss": 5.5597991943359375, + "rewards/accuracies": 1.0, + "rewards/chosen": -30.361347198486328, + "rewards/margins": 7.688362121582031, + "rewards/rejected": -38.049713134765625, + "step": 586 + }, + { + "epoch": 0.9903157894736843, + "grad_norm": 276.43754945737743, + "learning_rate": 4.1838857138221105e-11, + "logits/chosen": -6.897299766540527, + "logits/rejected": -8.224220275878906, + "logps/chosen": -1726.572265625, + "logps/rejected": -2215.0009765625, + "loss": 0.3376, + "nll_loss": 5.524778842926025, + "rewards/accuracies": 0.875, + "rewards/chosen": -27.695968627929688, + "rewards/margins": 10.981616973876953, + "rewards/rejected": -38.677589416503906, + "step": 588 + }, + { + "epoch": 0.9936842105263158, + "grad_norm": 101.87640432314295, + "learning_rate": 1.506266082615948e-11, + "logits/chosen": -6.690047264099121, + "logits/rejected": -8.358760833740234, + "logps/chosen": -1721.9786376953125, + "logps/rejected": -2250.2998046875, + "loss": 0.1519, + "nll_loss": 5.525182247161865, + "rewards/accuracies": 0.875, + "rewards/chosen": -28.625076293945312, + "rewards/margins": 10.398566246032715, + "rewards/rejected": -39.02363967895508, + "step": 590 + }, + { + "epoch": 0.9970526315789474, + "grad_norm": 109.18438747710223, + "learning_rate": 1.6736663292604703e-12, + "logits/chosen": -7.776650428771973, + "logits/rejected": -8.136452674865723, + "logps/chosen": -2102.580810546875, + "logps/rejected": -2248.21337890625, + "loss": 0.1902, + "nll_loss": 5.207364559173584, + "rewards/accuracies": 0.875, + "rewards/chosen": -34.941654205322266, + "rewards/margins": 4.452713489532471, + "rewards/rejected": -39.39436721801758, + "step": 592 + } + ], + "logging_steps": 2, + "max_steps": 593, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}