|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990762978015888, |
|
"eval_steps": 400, |
|
"global_step": 507, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001970564689943962, |
|
"grad_norm": 3.444017553990766, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -0.23276051878929138, |
|
"logits/rejected": -0.43208426237106323, |
|
"logps/chosen": -95.95150756835938, |
|
"logps/rejected": -103.43749237060547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00985282344971981, |
|
"grad_norm": 3.522148904992214, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": -0.07696018368005753, |
|
"logits/rejected": -0.3334544003009796, |
|
"logps/chosen": -106.13592529296875, |
|
"logps/rejected": -98.82791137695312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -0.000650706235319376, |
|
"rewards/margins": 0.00010106083936989307, |
|
"rewards/rejected": -0.00075176713289693, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01970564689943962, |
|
"grad_norm": 3.219795158164747, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -0.06049077585339546, |
|
"logits/rejected": -0.260353147983551, |
|
"logps/chosen": -98.03811645507812, |
|
"logps/rejected": -97.61465454101562, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0005443535046651959, |
|
"rewards/margins": -0.0005626807105727494, |
|
"rewards/rejected": 1.8327215002500452e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02955847034915943, |
|
"grad_norm": 3.446656700249776, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -0.12729588150978088, |
|
"logits/rejected": -0.295417845249176, |
|
"logps/chosen": -99.15818786621094, |
|
"logps/rejected": -97.37001037597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.940361683722585e-05, |
|
"rewards/margins": -0.0002843155525624752, |
|
"rewards/rejected": 0.00020491195027716458, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03941129379887924, |
|
"grad_norm": 3.2961842224501425, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -0.11793007701635361, |
|
"logits/rejected": -0.29867538809776306, |
|
"logps/chosen": -103.20733642578125, |
|
"logps/rejected": -97.68354797363281, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0002625386696308851, |
|
"rewards/margins": 0.0006132128764875233, |
|
"rewards/rejected": -0.0003506741486489773, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.049264117248599054, |
|
"grad_norm": 3.486650848831331, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": -0.17689576745033264, |
|
"logits/rejected": -0.34940794110298157, |
|
"logps/chosen": -106.73783874511719, |
|
"logps/rejected": -104.61811828613281, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00013853741984348744, |
|
"rewards/margins": 0.001048876903951168, |
|
"rewards/rejected": -0.0009103395859710872, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05911694069831886, |
|
"grad_norm": 3.326203197649224, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -0.07684741169214249, |
|
"logits/rejected": -0.3008119761943817, |
|
"logps/chosen": -102.79545593261719, |
|
"logps/rejected": -101.55641174316406, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0009806936141103506, |
|
"rewards/margins": 0.003102914895862341, |
|
"rewards/rejected": -0.0021222210489213467, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06896976414803867, |
|
"grad_norm": 3.3448990755681747, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": -0.05913068726658821, |
|
"logits/rejected": -0.357162743806839, |
|
"logps/chosen": -105.96171569824219, |
|
"logps/rejected": -97.8105697631836, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 9.926412894856185e-05, |
|
"rewards/margins": 0.0057004219852387905, |
|
"rewards/rejected": -0.005601157899945974, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07882258759775848, |
|
"grad_norm": 3.507668256246831, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -0.08776526153087616, |
|
"logits/rejected": -0.24237962067127228, |
|
"logps/chosen": -90.3080825805664, |
|
"logps/rejected": -92.34758758544922, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.000393406196963042, |
|
"rewards/margins": 0.00874106865376234, |
|
"rewards/rejected": -0.009134475141763687, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0886754110474783, |
|
"grad_norm": 3.4196928203304737, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -0.10117539018392563, |
|
"logits/rejected": -0.3224117159843445, |
|
"logps/chosen": -109.0184326171875, |
|
"logps/rejected": -108.93272399902344, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.003931170795112848, |
|
"rewards/margins": 0.021240899339318275, |
|
"rewards/rejected": -0.025172073394060135, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09852823449719811, |
|
"grad_norm": 3.5401415663183173, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -0.15071377158164978, |
|
"logits/rejected": -0.3232310116291046, |
|
"logps/chosen": -104.10140228271484, |
|
"logps/rejected": -111.49967193603516, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.015169525519013405, |
|
"rewards/margins": 0.028989236801862717, |
|
"rewards/rejected": -0.04415876418352127, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10838105794691791, |
|
"grad_norm": 3.5990117325571944, |
|
"learning_rate": 4.999050767562379e-07, |
|
"logits/chosen": -0.08029767870903015, |
|
"logits/rejected": -0.28052276372909546, |
|
"logps/chosen": -99.97483825683594, |
|
"logps/rejected": -106.4577407836914, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.052169084548950195, |
|
"rewards/margins": 0.041819095611572266, |
|
"rewards/rejected": -0.09398818016052246, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11823388139663772, |
|
"grad_norm": 3.764415157715389, |
|
"learning_rate": 4.99519574616467e-07, |
|
"logits/chosen": -0.13724075257778168, |
|
"logits/rejected": -0.307682603597641, |
|
"logps/chosen": -112.85994720458984, |
|
"logps/rejected": -114.5948715209961, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08050791174173355, |
|
"rewards/margins": 0.06749774515628815, |
|
"rewards/rejected": -0.1480056643486023, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12808670484635754, |
|
"grad_norm": 4.280188058589697, |
|
"learning_rate": 4.988380179235842e-07, |
|
"logits/chosen": -0.04185126721858978, |
|
"logits/rejected": -0.2287481129169464, |
|
"logps/chosen": -116.09354400634766, |
|
"logps/rejected": -124.45157623291016, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.12864899635314941, |
|
"rewards/margins": 0.07999014109373093, |
|
"rewards/rejected": -0.20863911509513855, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13793952829607734, |
|
"grad_norm": 4.477584197323834, |
|
"learning_rate": 4.978612153434526e-07, |
|
"logits/chosen": -0.06294523924589157, |
|
"logits/rejected": -0.2265748679637909, |
|
"logps/chosen": -115.08953857421875, |
|
"logps/rejected": -129.55136108398438, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2160721719264984, |
|
"rewards/margins": 0.10703370720148087, |
|
"rewards/rejected": -0.32310590147972107, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14779235174579716, |
|
"grad_norm": 14.949806540573308, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": -0.07772421091794968, |
|
"logits/rejected": -0.21028895676136017, |
|
"logps/chosen": -133.30584716796875, |
|
"logps/rejected": -150.30581665039062, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.30895400047302246, |
|
"rewards/margins": 0.1755586862564087, |
|
"rewards/rejected": -0.48451265692710876, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15764517519551696, |
|
"grad_norm": 4.905767972397329, |
|
"learning_rate": 4.950268573535011e-07, |
|
"logits/chosen": -0.04325466603040695, |
|
"logits/rejected": -0.2531152367591858, |
|
"logps/chosen": -144.97561645507812, |
|
"logps/rejected": -158.16122436523438, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4763612151145935, |
|
"rewards/margins": 0.16494214534759521, |
|
"rewards/rejected": -0.6413034200668335, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16749799864523676, |
|
"grad_norm": 5.265890759040485, |
|
"learning_rate": 4.93172664904641e-07, |
|
"logits/chosen": -0.11157502233982086, |
|
"logits/rejected": -0.26035866141319275, |
|
"logps/chosen": -159.24551391601562, |
|
"logps/rejected": -204.39495849609375, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5800860524177551, |
|
"rewards/margins": 0.4450142979621887, |
|
"rewards/rejected": -1.0251003503799438, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1773508220949566, |
|
"grad_norm": 5.725906040480978, |
|
"learning_rate": 4.910299485003033e-07, |
|
"logits/chosen": -0.07629499584436417, |
|
"logits/rejected": -0.29128947854042053, |
|
"logps/chosen": -174.70559692382812, |
|
"logps/rejected": -207.3603515625, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.7107561230659485, |
|
"rewards/margins": 0.3972470760345459, |
|
"rewards/rejected": -1.10800302028656, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1872036455446764, |
|
"grad_norm": 6.403573729253865, |
|
"learning_rate": 4.886012504698769e-07, |
|
"logits/chosen": -0.1544032096862793, |
|
"logits/rejected": -0.37451326847076416, |
|
"logps/chosen": -184.72915649414062, |
|
"logps/rejected": -229.79541015625, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8728511929512024, |
|
"rewards/margins": 0.4681348204612732, |
|
"rewards/rejected": -1.3409860134124756, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19705646899439622, |
|
"grad_norm": 7.412632948891829, |
|
"learning_rate": 4.858894524594652e-07, |
|
"logits/chosen": -0.15470778942108154, |
|
"logits/rejected": -0.4357427656650543, |
|
"logps/chosen": -234.9979705810547, |
|
"logps/rejected": -306.1597595214844, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.2750121355056763, |
|
"rewards/margins": 0.8118740916252136, |
|
"rewards/rejected": -2.086885929107666, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20690929244411602, |
|
"grad_norm": 8.591568819157517, |
|
"learning_rate": 4.828977720128198e-07, |
|
"logits/chosen": -0.13351579010486603, |
|
"logits/rejected": -0.47733497619628906, |
|
"logps/chosen": -268.23260498046875, |
|
"logps/rejected": -370.4355163574219, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7161176204681396, |
|
"rewards/margins": 1.0792567729949951, |
|
"rewards/rejected": -2.7953743934631348, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21676211589383582, |
|
"grad_norm": 9.939985555056147, |
|
"learning_rate": 4.796297587537285e-07, |
|
"logits/chosen": -0.2192394733428955, |
|
"logits/rejected": -0.42173558473587036, |
|
"logps/chosen": -294.60870361328125, |
|
"logps/rejected": -394.9258117675781, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9103400707244873, |
|
"rewards/margins": 1.0444176197052002, |
|
"rewards/rejected": -2.9547572135925293, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22661493934355564, |
|
"grad_norm": 9.525042212102928, |
|
"learning_rate": 4.760892901743944e-07, |
|
"logits/chosen": -0.20480632781982422, |
|
"logits/rejected": -0.46547192335128784, |
|
"logps/chosen": -295.8061218261719, |
|
"logps/rejected": -422.6036071777344, |
|
"loss": 0.4309, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9569038152694702, |
|
"rewards/margins": 1.2766072750091553, |
|
"rewards/rejected": -3.233510971069336, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23646776279327544, |
|
"grad_norm": 12.501263690005059, |
|
"learning_rate": 4.7228056703479626e-07, |
|
"logits/chosen": -0.21756932139396667, |
|
"logits/rejected": -0.45747238397598267, |
|
"logps/chosen": -335.4225158691406, |
|
"logps/rejected": -476.47637939453125, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.304030179977417, |
|
"rewards/margins": 1.4334566593170166, |
|
"rewards/rejected": -3.7374866008758545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24632058624299527, |
|
"grad_norm": 10.295571989073236, |
|
"learning_rate": 4.6820810837849535e-07, |
|
"logits/chosen": -0.24834315478801727, |
|
"logits/rejected": -0.47704511880874634, |
|
"logps/chosen": -360.4291687011719, |
|
"logps/rejected": -489.4334411621094, |
|
"loss": 0.408, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5335352420806885, |
|
"rewards/margins": 1.3203860521316528, |
|
"rewards/rejected": -3.8539211750030518, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25617340969271507, |
|
"grad_norm": 10.797289668730418, |
|
"learning_rate": 4.63876746170797e-07, |
|
"logits/chosen": -0.2283620834350586, |
|
"logits/rejected": -0.45370951294898987, |
|
"logps/chosen": -362.5042724609375, |
|
"logps/rejected": -505.86968994140625, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5926971435546875, |
|
"rewards/margins": 1.4749071598052979, |
|
"rewards/rejected": -4.067604064941406, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2660262331424349, |
|
"grad_norm": 12.27688423712984, |
|
"learning_rate": 4.592916195656321e-07, |
|
"logits/chosen": -0.20323459804058075, |
|
"logits/rejected": -0.4668886065483093, |
|
"logps/chosen": -317.35345458984375, |
|
"logps/rejected": -467.3150329589844, |
|
"loss": 0.4105, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.133204460144043, |
|
"rewards/margins": 1.5337626934051514, |
|
"rewards/rejected": -3.6669669151306152, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27587905659215467, |
|
"grad_norm": 15.818054338660462, |
|
"learning_rate": 4.544581688079602e-07, |
|
"logits/chosen": -0.18876026570796967, |
|
"logits/rejected": -0.47572746872901917, |
|
"logps/chosen": -343.0673828125, |
|
"logps/rejected": -514.2898559570312, |
|
"loss": 0.3999, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.457926034927368, |
|
"rewards/margins": 1.7443277835845947, |
|
"rewards/rejected": -4.2022528648376465, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2857318800418745, |
|
"grad_norm": 10.354898946060759, |
|
"learning_rate": 4.493821287789272e-07, |
|
"logits/chosen": -0.2576290965080261, |
|
"logits/rejected": -0.4858337938785553, |
|
"logps/chosen": -385.8844299316406, |
|
"logps/rejected": -581.2125244140625, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.86683988571167, |
|
"rewards/margins": 1.994633436203003, |
|
"rewards/rejected": -4.861473083496094, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2955847034915943, |
|
"grad_norm": 14.110695862987818, |
|
"learning_rate": 4.4406952219143934e-07, |
|
"logits/chosen": -0.2310657501220703, |
|
"logits/rejected": -0.44463711977005005, |
|
"logps/chosen": -385.70623779296875, |
|
"logps/rejected": -559.1126708984375, |
|
"loss": 0.3314, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.8108325004577637, |
|
"rewards/margins": 1.7815577983856201, |
|
"rewards/rejected": -4.592390060424805, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3054375269413141, |
|
"grad_norm": 15.545527108248088, |
|
"learning_rate": 4.38526652444224e-07, |
|
"logits/chosen": -0.26962828636169434, |
|
"logits/rejected": -0.41183385252952576, |
|
"logps/chosen": -400.4664611816406, |
|
"logps/rejected": -608.4261474609375, |
|
"loss": 0.3854, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.9346907138824463, |
|
"rewards/margins": 2.0727694034576416, |
|
"rewards/rejected": -5.007460594177246, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3152903503910339, |
|
"grad_norm": 14.215413027429468, |
|
"learning_rate": 4.3276009614285824e-07, |
|
"logits/chosen": -0.22768807411193848, |
|
"logits/rejected": -0.4115552306175232, |
|
"logps/chosen": -361.54876708984375, |
|
"logps/rejected": -561.5390625, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.6572659015655518, |
|
"rewards/margins": 1.9841495752334595, |
|
"rewards/rejected": -4.641415596008301, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32514317384075375, |
|
"grad_norm": 15.569517834142987, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -0.20264258980751038, |
|
"logits/rejected": -0.40150555968284607, |
|
"logps/chosen": -376.89971923828125, |
|
"logps/rejected": -603.7943115234375, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.7810330390930176, |
|
"rewards/margins": 2.239793300628662, |
|
"rewards/rejected": -5.02082633972168, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3349959972904735, |
|
"grad_norm": 12.496148003358819, |
|
"learning_rate": 4.2058354920054043e-07, |
|
"logits/chosen": -0.28801003098487854, |
|
"logits/rejected": -0.4120853543281555, |
|
"logps/chosen": -348.6145324707031, |
|
"logps/rejected": -565.19482421875, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.543839454650879, |
|
"rewards/margins": 2.167828321456909, |
|
"rewards/rejected": -4.711668014526367, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34484882074019335, |
|
"grad_norm": 14.212926345332207, |
|
"learning_rate": 4.141880060119336e-07, |
|
"logits/chosen": -0.2191300094127655, |
|
"logits/rejected": -0.4566856324672699, |
|
"logps/chosen": -396.66876220703125, |
|
"logps/rejected": -567.9503784179688, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.9396305084228516, |
|
"rewards/margins": 1.7750612497329712, |
|
"rewards/rejected": -4.714691162109375, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3547016441899132, |
|
"grad_norm": 12.696873162753882, |
|
"learning_rate": 4.0759765403198877e-07, |
|
"logits/chosen": -0.28399690985679626, |
|
"logits/rejected": -0.4555039405822754, |
|
"logps/chosen": -343.26263427734375, |
|
"logps/rejected": -577.4595336914062, |
|
"loss": 0.3109, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.4329833984375, |
|
"rewards/margins": 2.351548671722412, |
|
"rewards/rejected": -4.784532070159912, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.364554467639633, |
|
"grad_norm": 15.682818507341654, |
|
"learning_rate": 4.008203127021797e-07, |
|
"logits/chosen": -0.30414339900016785, |
|
"logits/rejected": -0.49127548933029175, |
|
"logps/chosen": -401.3171081542969, |
|
"logps/rejected": -638.1029663085938, |
|
"loss": 0.3526, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.013474702835083, |
|
"rewards/margins": 2.391366720199585, |
|
"rewards/rejected": -5.404841899871826, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3744072910893528, |
|
"grad_norm": 15.137404252638685, |
|
"learning_rate": 3.9386402332652754e-07, |
|
"logits/chosen": -0.3183102011680603, |
|
"logits/rejected": -0.478738397359848, |
|
"logps/chosen": -362.82513427734375, |
|
"logps/rejected": -609.9462890625, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.608335018157959, |
|
"rewards/margins": 2.479790687561035, |
|
"rewards/rejected": -5.088125705718994, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3842601145390726, |
|
"grad_norm": 14.191789849116672, |
|
"learning_rate": 3.867370395306068e-07, |
|
"logits/chosen": -0.2816595137119293, |
|
"logits/rejected": -0.5143749117851257, |
|
"logps/chosen": -354.79388427734375, |
|
"logps/rejected": -573.3958740234375, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.5655016899108887, |
|
"rewards/margins": 2.193372964859009, |
|
"rewards/rejected": -4.758874416351318, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39411293798879243, |
|
"grad_norm": 16.13473953627841, |
|
"learning_rate": 3.794478174686328e-07, |
|
"logits/chosen": -0.30765408277511597, |
|
"logits/rejected": -0.5184319615364075, |
|
"logps/chosen": -380.7965087890625, |
|
"logps/rejected": -623.9815063476562, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.7764999866485596, |
|
"rewards/margins": 2.448732614517212, |
|
"rewards/rejected": -5.2252326011657715, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4039657614385122, |
|
"grad_norm": 13.799406477566153, |
|
"learning_rate": 3.720050057902495e-07, |
|
"logits/chosen": -0.28575196862220764, |
|
"logits/rejected": -0.5561090707778931, |
|
"logps/chosen": -412.14154052734375, |
|
"logps/rejected": -629.4691772460938, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.0631420612335205, |
|
"rewards/margins": 2.176858425140381, |
|
"rewards/rejected": -5.2400007247924805, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41381858488823203, |
|
"grad_norm": 13.376596964391368, |
|
"learning_rate": 3.644174353789204e-07, |
|
"logits/chosen": -0.3736252188682556, |
|
"logits/rejected": -0.514769434928894, |
|
"logps/chosen": -383.2430725097656, |
|
"logps/rejected": -640.6544189453125, |
|
"loss": 0.334, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.76784086227417, |
|
"rewards/margins": 2.553081512451172, |
|
"rewards/rejected": -5.320921897888184, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42367140833795186, |
|
"grad_norm": 11.451472713393272, |
|
"learning_rate": 3.566941088741009e-07, |
|
"logits/chosen": -0.3407908082008362, |
|
"logits/rejected": -0.6174032688140869, |
|
"logps/chosen": -347.8931884765625, |
|
"logps/rejected": -581.584228515625, |
|
"loss": 0.3268, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.4030208587646484, |
|
"rewards/margins": 2.3824656009674072, |
|
"rewards/rejected": -4.785486221313477, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43352423178767163, |
|
"grad_norm": 17.46089866269522, |
|
"learning_rate": 3.488441899896217e-07, |
|
"logits/chosen": -0.3514137864112854, |
|
"logits/rejected": -0.49381551146507263, |
|
"logps/chosen": -360.62103271484375, |
|
"logps/rejected": -615.2965087890625, |
|
"loss": 0.3361, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.5678088665008545, |
|
"rewards/margins": 2.5628271102905273, |
|
"rewards/rejected": -5.130635738372803, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44337705523739146, |
|
"grad_norm": 30.418216991487416, |
|
"learning_rate": 3.408769926409574e-07, |
|
"logits/chosen": -0.32667240500450134, |
|
"logits/rejected": -0.46130138635635376, |
|
"logps/chosen": -370.0388488769531, |
|
"logps/rejected": -607.0093994140625, |
|
"loss": 0.3309, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.7761330604553223, |
|
"rewards/margins": 2.300311803817749, |
|
"rewards/rejected": -5.076444149017334, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4532298786871113, |
|
"grad_norm": 22.496163988303977, |
|
"learning_rate": 3.3280196989428263e-07, |
|
"logits/chosen": -0.23927278816699982, |
|
"logits/rejected": -0.4683164060115814, |
|
"logps/chosen": -415.4649963378906, |
|
"logps/rejected": -715.1581420898438, |
|
"loss": 0.399, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.170175790786743, |
|
"rewards/margins": 3.04463529586792, |
|
"rewards/rejected": -6.214810848236084, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46308270213683106, |
|
"grad_norm": 15.358280098952724, |
|
"learning_rate": 3.2462870275042367e-07, |
|
"logits/chosen": -0.2206590175628662, |
|
"logits/rejected": -0.4772756099700928, |
|
"logps/chosen": -437.8194274902344, |
|
"logps/rejected": -676.9967041015625, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.432298183441162, |
|
"rewards/margins": 2.3835928440093994, |
|
"rewards/rejected": -5.815890789031982, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4729355255865509, |
|
"grad_norm": 14.636033595582514, |
|
"learning_rate": 3.1636688877701806e-07, |
|
"logits/chosen": -0.379282683134079, |
|
"logits/rejected": -0.5302340388298035, |
|
"logps/chosen": -425.94024658203125, |
|
"logps/rejected": -697.734130859375, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.2339348793029785, |
|
"rewards/margins": 2.747666835784912, |
|
"rewards/rejected": -5.981600761413574, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4827883490362707, |
|
"grad_norm": 15.394330260666242, |
|
"learning_rate": 3.080263306023669e-07, |
|
"logits/chosen": -0.2971007823944092, |
|
"logits/rejected": -0.4823763370513916, |
|
"logps/chosen": -433.9507751464844, |
|
"logps/rejected": -664.0755615234375, |
|
"loss": 0.2877, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.3310959339141846, |
|
"rewards/margins": 2.319425344467163, |
|
"rewards/rejected": -5.650521278381348, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.49264117248599054, |
|
"grad_norm": 14.169015562499252, |
|
"learning_rate": 2.996169242846328e-07, |
|
"logits/chosen": -0.3509058952331543, |
|
"logits/rejected": -0.5016965270042419, |
|
"logps/chosen": -401.8634948730469, |
|
"logps/rejected": -719.4425048828125, |
|
"loss": 0.2637, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.0134997367858887, |
|
"rewards/margins": 3.148432970046997, |
|
"rewards/rejected": -6.161932945251465, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5024939959357103, |
|
"grad_norm": 15.084142231117205, |
|
"learning_rate": 2.911486475701835e-07, |
|
"logits/chosen": -0.2690550684928894, |
|
"logits/rejected": -0.4977152943611145, |
|
"logps/chosen": -400.71649169921875, |
|
"logps/rejected": -628.1175537109375, |
|
"loss": 0.322, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.0442709922790527, |
|
"rewards/margins": 2.318587064743042, |
|
"rewards/rejected": -5.362858772277832, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5123468193854301, |
|
"grad_norm": 10.556248114155261, |
|
"learning_rate": 2.826315480550129e-07, |
|
"logits/chosen": -0.3703750967979431, |
|
"logits/rejected": -0.4834713041782379, |
|
"logps/chosen": -379.0237121582031, |
|
"logps/rejected": -712.1115112304688, |
|
"loss": 0.2737, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.82161283493042, |
|
"rewards/margins": 3.283346652984619, |
|
"rewards/rejected": -6.104959011077881, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.52219964283515, |
|
"grad_norm": 16.00385439318665, |
|
"learning_rate": 2.740757312632854e-07, |
|
"logits/chosen": -0.2965068221092224, |
|
"logits/rejected": -0.4728400707244873, |
|
"logps/chosen": -432.16650390625, |
|
"logps/rejected": -744.01806640625, |
|
"loss": 0.2898, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.3073620796203613, |
|
"rewards/margins": 3.125457286834717, |
|
"rewards/rejected": -6.432818412780762, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5320524662848698, |
|
"grad_norm": 15.37457746102107, |
|
"learning_rate": 2.654913486571487e-07, |
|
"logits/chosen": -0.26870328187942505, |
|
"logits/rejected": -0.5099757313728333, |
|
"logps/chosen": -461.7821350097656, |
|
"logps/rejected": -804.3317260742188, |
|
"loss": 0.281, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.477935791015625, |
|
"rewards/margins": 3.4908695220947266, |
|
"rewards/rejected": -6.968804836273193, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5419052897345896, |
|
"grad_norm": 17.39033230160854, |
|
"learning_rate": 2.5688858559204053e-07, |
|
"logits/chosen": -0.2892334461212158, |
|
"logits/rejected": -0.5267232656478882, |
|
"logps/chosen": -488.491943359375, |
|
"logps/rejected": -848.1380615234375, |
|
"loss": 0.2953, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.8831119537353516, |
|
"rewards/margins": 3.5666236877441406, |
|
"rewards/rejected": -7.44973611831665, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5517581131843093, |
|
"grad_norm": 19.079707368930343, |
|
"learning_rate": 2.4827764923178246e-07, |
|
"logits/chosen": -0.2866331934928894, |
|
"logits/rejected": -0.5004242062568665, |
|
"logps/chosen": -503.42572021484375, |
|
"logps/rejected": -888.0465698242188, |
|
"loss": 0.2978, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.999812602996826, |
|
"rewards/margins": 3.85322904586792, |
|
"rewards/rejected": -7.853041648864746, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5616109366340292, |
|
"grad_norm": 19.106895081330208, |
|
"learning_rate": 2.3966875643779667e-07, |
|
"logits/chosen": -0.3676909804344177, |
|
"logits/rejected": -0.5896965861320496, |
|
"logps/chosen": -404.20770263671875, |
|
"logps/rejected": -752.9081420898438, |
|
"loss": 0.288, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.025970935821533, |
|
"rewards/margins": 3.454861879348755, |
|
"rewards/rejected": -6.480832576751709, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.571463760083749, |
|
"grad_norm": 12.765348172490336, |
|
"learning_rate": 2.3107212164681774e-07, |
|
"logits/chosen": -0.3087966740131378, |
|
"logits/rejected": -0.5579283833503723, |
|
"logps/chosen": -426.75372314453125, |
|
"logps/rejected": -739.0400390625, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.3023364543914795, |
|
"rewards/margins": 3.1090188026428223, |
|
"rewards/rejected": -6.411355495452881, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5813165835334688, |
|
"grad_norm": 16.231816408507715, |
|
"learning_rate": 2.2249794475148019e-07, |
|
"logits/chosen": -0.4001474976539612, |
|
"logits/rejected": -0.5297967195510864, |
|
"logps/chosen": -417.8780822753906, |
|
"logps/rejected": -676.3338623046875, |
|
"loss": 0.3072, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.1984708309173584, |
|
"rewards/margins": 2.528751850128174, |
|
"rewards/rejected": -5.727222919464111, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5911694069831886, |
|
"grad_norm": 17.338695196365542, |
|
"learning_rate": 2.1395639899816332e-07, |
|
"logits/chosen": -0.4291343688964844, |
|
"logits/rejected": -0.556428849697113, |
|
"logps/chosen": -390.78582763671875, |
|
"logps/rejected": -669.5917358398438, |
|
"loss": 0.3313, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.9017252922058105, |
|
"rewards/margins": 2.7559847831726074, |
|
"rewards/rejected": -5.657710075378418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6010222304329085, |
|
"grad_norm": 12.126664974793542, |
|
"learning_rate": 2.0545761891645177e-07, |
|
"logits/chosen": -0.37195321917533875, |
|
"logits/rejected": -0.5651625394821167, |
|
"logps/chosen": -414.18927001953125, |
|
"logps/rejected": -704.6055297851562, |
|
"loss": 0.2784, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.175144910812378, |
|
"rewards/margins": 2.9191794395446777, |
|
"rewards/rejected": -6.094325065612793, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6108750538826282, |
|
"grad_norm": 22.304119797273902, |
|
"learning_rate": 1.9701168829453305e-07, |
|
"logits/chosen": -0.3536795973777771, |
|
"logits/rejected": -0.5591118931770325, |
|
"logps/chosen": -393.33905029296875, |
|
"logps/rejected": -676.2900390625, |
|
"loss": 0.303, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.915839672088623, |
|
"rewards/margins": 2.8338570594787598, |
|
"rewards/rejected": -5.749696731567383, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.620727877332348, |
|
"grad_norm": 14.597142317559607, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": -0.36113765835762024, |
|
"logits/rejected": -0.5111299157142639, |
|
"logps/chosen": -416.2958984375, |
|
"logps/rejected": -689.5440673828125, |
|
"loss": 0.2944, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1453917026519775, |
|
"rewards/margins": 2.7569806575775146, |
|
"rewards/rejected": -5.90237283706665, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6305807007820678, |
|
"grad_norm": 14.66404722177273, |
|
"learning_rate": 1.8031838516385422e-07, |
|
"logits/chosen": -0.29976287484169006, |
|
"logits/rejected": -0.5753315687179565, |
|
"logps/chosen": -426.15081787109375, |
|
"logps/rejected": -668.0872802734375, |
|
"loss": 0.3077, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.2156474590301514, |
|
"rewards/margins": 2.5099997520446777, |
|
"rewards/rejected": -5.725647926330566, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6404335242317877, |
|
"grad_norm": 24.997450008790477, |
|
"learning_rate": 1.7209081923101472e-07, |
|
"logits/chosen": -0.3522949814796448, |
|
"logits/rejected": -0.571318507194519, |
|
"logps/chosen": -428.62615966796875, |
|
"logps/rejected": -739.1427001953125, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.299776554107666, |
|
"rewards/margins": 3.1473708152770996, |
|
"rewards/rejected": -6.447146415710449, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6502863476815075, |
|
"grad_norm": 21.077844248854888, |
|
"learning_rate": 1.639556924093404e-07, |
|
"logits/chosen": -0.3864585757255554, |
|
"logits/rejected": -0.5924688577651978, |
|
"logps/chosen": -460.22955322265625, |
|
"logps/rejected": -780.5281372070312, |
|
"loss": 0.29, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.5728142261505127, |
|
"rewards/margins": 3.2330241203308105, |
|
"rewards/rejected": -6.805838584899902, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6601391711312273, |
|
"grad_norm": 13.281944316685474, |
|
"learning_rate": 1.5592265701304114e-07, |
|
"logits/chosen": -0.31550538539886475, |
|
"logits/rejected": -0.5574745535850525, |
|
"logps/chosen": -432.3729553222656, |
|
"logps/rejected": -738.777587890625, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.270681858062744, |
|
"rewards/margins": 3.0939764976501465, |
|
"rewards/rejected": -6.364658832550049, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.669991994580947, |
|
"grad_norm": 13.14308400069569, |
|
"learning_rate": 1.4800124422502334e-07, |
|
"logits/chosen": -0.35619470477104187, |
|
"logits/rejected": -0.5215466618537903, |
|
"logps/chosen": -402.02227783203125, |
|
"logps/rejected": -689.3594360351562, |
|
"loss": 0.2762, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.078434944152832, |
|
"rewards/margins": 2.8699941635131836, |
|
"rewards/rejected": -5.948429107666016, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6798448180306669, |
|
"grad_norm": 13.958916358154339, |
|
"learning_rate": 1.4020085278815743e-07, |
|
"logits/chosen": -0.348470538854599, |
|
"logits/rejected": -0.554540753364563, |
|
"logps/chosen": -393.90362548828125, |
|
"logps/rejected": -701.527099609375, |
|
"loss": 0.2845, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.926769733428955, |
|
"rewards/margins": 3.087486743927002, |
|
"rewards/rejected": -6.014256477355957, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6896976414803867, |
|
"grad_norm": 17.268899996459307, |
|
"learning_rate": 1.3253073785368545e-07, |
|
"logits/chosen": -0.37151604890823364, |
|
"logits/rejected": -0.5311695337295532, |
|
"logps/chosen": -426.25567626953125, |
|
"logps/rejected": -754.6126708984375, |
|
"loss": 0.2906, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.2909317016601562, |
|
"rewards/margins": 3.2550930976867676, |
|
"rewards/rejected": -6.546025276184082, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6995504649301065, |
|
"grad_norm": 23.463103741079866, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -0.29037579894065857, |
|
"logits/rejected": -0.5232888460159302, |
|
"logps/chosen": -434.14862060546875, |
|
"logps/rejected": -760.4425048828125, |
|
"loss": 0.3051, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.306781053543091, |
|
"rewards/margins": 3.264423370361328, |
|
"rewards/rejected": -6.571203708648682, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7094032883798264, |
|
"grad_norm": 15.524231108582358, |
|
"learning_rate": 1.1761757443482285e-07, |
|
"logits/chosen": -0.34106117486953735, |
|
"logits/rejected": -0.5115201473236084, |
|
"logps/chosen": -463.2628479003906, |
|
"logps/rejected": -801.8025512695312, |
|
"loss": 0.3022, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.6255767345428467, |
|
"rewards/margins": 3.3763046264648438, |
|
"rewards/rejected": -7.0018815994262695, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7192561118295462, |
|
"grad_norm": 13.880093445950616, |
|
"learning_rate": 1.1039222039359644e-07, |
|
"logits/chosen": -0.3453301787376404, |
|
"logits/rejected": -0.5254852175712585, |
|
"logps/chosen": -411.9518127441406, |
|
"logps/rejected": -712.6190795898438, |
|
"loss": 0.2746, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.0844180583953857, |
|
"rewards/margins": 3.044656991958618, |
|
"rewards/rejected": -6.129075527191162, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.729108935279266, |
|
"grad_norm": 14.175670082816483, |
|
"learning_rate": 1.0333251074666608e-07, |
|
"logits/chosen": -0.3751557469367981, |
|
"logits/rejected": -0.5939264297485352, |
|
"logps/chosen": -429.10809326171875, |
|
"logps/rejected": -725.9818115234375, |
|
"loss": 0.3023, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.204840898513794, |
|
"rewards/margins": 3.0460567474365234, |
|
"rewards/rejected": -6.2508978843688965, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7389617587289857, |
|
"grad_norm": 16.474329546472436, |
|
"learning_rate": 9.644682182758304e-08, |
|
"logits/chosen": -0.35652074217796326, |
|
"logits/rejected": -0.5406717658042908, |
|
"logps/chosen": -442.0631408691406, |
|
"logps/rejected": -735.47412109375, |
|
"loss": 0.2696, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.3726630210876465, |
|
"rewards/margins": 2.9257071018218994, |
|
"rewards/rejected": -6.298369407653809, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7488145821787056, |
|
"grad_norm": 18.213192833293316, |
|
"learning_rate": 8.974332349459992e-08, |
|
"logits/chosen": -0.3362307846546173, |
|
"logits/rejected": -0.5135927796363831, |
|
"logps/chosen": -418.939453125, |
|
"logps/rejected": -711.0570068359375, |
|
"loss": 0.2748, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.2188174724578857, |
|
"rewards/margins": 2.915469169616699, |
|
"rewards/rejected": -6.134286403656006, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7586674056284254, |
|
"grad_norm": 15.38124786698222, |
|
"learning_rate": 8.322996943714672e-08, |
|
"logits/chosen": -0.3672960698604584, |
|
"logits/rejected": -0.5296192765235901, |
|
"logps/chosen": -425.13427734375, |
|
"logps/rejected": -741.996337890625, |
|
"loss": 0.2826, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.2366461753845215, |
|
"rewards/margins": 3.166731119155884, |
|
"rewards/rejected": -6.403376579284668, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7685202290781452, |
|
"grad_norm": 20.521680628902402, |
|
"learning_rate": 7.691448773879256e-08, |
|
"logits/chosen": -0.38358789682388306, |
|
"logits/rejected": -0.5248023867607117, |
|
"logps/chosen": -465.2935485839844, |
|
"logps/rejected": -843.0035400390625, |
|
"loss": 0.3038, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.6291823387145996, |
|
"rewards/margins": 3.776301145553589, |
|
"rewards/rejected": -7.405484199523926, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.778373052527865, |
|
"grad_norm": 22.670752535686123, |
|
"learning_rate": 7.080437170788722e-08, |
|
"logits/chosen": -0.3909732699394226, |
|
"logits/rejected": -0.46815380454063416, |
|
"logps/chosen": -467.8148498535156, |
|
"logps/rejected": -813.1572265625, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.7561402320861816, |
|
"rewards/margins": 3.3577582836151123, |
|
"rewards/rejected": -7.113898277282715, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7882258759775849, |
|
"grad_norm": 21.305345425176412, |
|
"learning_rate": 6.490687098676332e-08, |
|
"logits/chosen": -0.40551847219467163, |
|
"logits/rejected": -0.5719416737556458, |
|
"logps/chosen": -435.22393798828125, |
|
"logps/rejected": -831.7904052734375, |
|
"loss": 0.2563, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.3291544914245605, |
|
"rewards/margins": 3.9090828895568848, |
|
"rewards/rejected": -7.2382378578186035, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7882258759775849, |
|
"eval_logits/chosen": -1.0068713426589966, |
|
"eval_logits/rejected": -0.773371696472168, |
|
"eval_logps/chosen": -503.0605773925781, |
|
"eval_logps/rejected": -703.8283081054688, |
|
"eval_loss": 0.7016588449478149, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -4.100825786590576, |
|
"eval_rewards/margins": 1.7457716464996338, |
|
"eval_rewards/rejected": -5.846597671508789, |
|
"eval_runtime": 224.226, |
|
"eval_samples_per_second": 8.915, |
|
"eval_steps_per_second": 1.115, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7980786994273046, |
|
"grad_norm": 16.817512778396765, |
|
"learning_rate": 5.9228982950048414e-08, |
|
"logits/chosen": -0.34627681970596313, |
|
"logits/rejected": -0.648546576499939, |
|
"logps/chosen": -444.9021911621094, |
|
"logps/rejected": -793.8383178710938, |
|
"loss": 0.3108, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.4342200756073, |
|
"rewards/margins": 3.5523598194122314, |
|
"rewards/rejected": -6.986579895019531, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8079315228770244, |
|
"grad_norm": 18.65876742845436, |
|
"learning_rate": 5.3777444402291345e-08, |
|
"logits/chosen": -0.3808293342590332, |
|
"logits/rejected": -0.5627844929695129, |
|
"logps/chosen": -447.5360412597656, |
|
"logps/rejected": -714.4208374023438, |
|
"loss": 0.3028, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.4805076122283936, |
|
"rewards/margins": 2.6875407695770264, |
|
"rewards/rejected": -6.168048858642578, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8177843463267442, |
|
"grad_norm": 21.614577011578998, |
|
"learning_rate": 4.855872358475546e-08, |
|
"logits/chosen": -0.44765299558639526, |
|
"logits/rejected": -0.5962406992912292, |
|
"logps/chosen": -454.58135986328125, |
|
"logps/rejected": -746.631103515625, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.5401859283447266, |
|
"rewards/margins": 2.886667251586914, |
|
"rewards/rejected": -6.426853179931641, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8276371697764641, |
|
"grad_norm": 18.508069249718975, |
|
"learning_rate": 4.357901250086107e-08, |
|
"logits/chosen": -0.3780784606933594, |
|
"logits/rejected": -0.6282440423965454, |
|
"logps/chosen": -464.607666015625, |
|
"logps/rejected": -732.405029296875, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.5666542053222656, |
|
"rewards/margins": 2.74709153175354, |
|
"rewards/rejected": -6.313745498657227, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8374899932261839, |
|
"grad_norm": 21.590957086592525, |
|
"learning_rate": 3.884421956938377e-08, |
|
"logits/chosen": -0.38022860884666443, |
|
"logits/rejected": -0.6045624017715454, |
|
"logps/chosen": -429.6410217285156, |
|
"logps/rejected": -730.5736083984375, |
|
"loss": 0.2923, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.25273060798645, |
|
"rewards/margins": 3.082298994064331, |
|
"rewards/rejected": -6.335029125213623, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8473428166759037, |
|
"grad_norm": 15.197391870273057, |
|
"learning_rate": 3.435996261412591e-08, |
|
"logits/chosen": -0.4843681752681732, |
|
"logits/rejected": -0.5831128358840942, |
|
"logps/chosen": -393.3730773925781, |
|
"logps/rejected": -707.8683471679688, |
|
"loss": 0.2692, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.8446130752563477, |
|
"rewards/margins": 3.154205799102783, |
|
"rewards/rejected": -5.998819351196289, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8571956401256235, |
|
"grad_norm": 15.288363962234385, |
|
"learning_rate": 3.013156219837776e-08, |
|
"logits/chosen": -0.4089592397212982, |
|
"logits/rejected": -0.5905001759529114, |
|
"logps/chosen": -391.95831298828125, |
|
"logps/rejected": -707.7792358398438, |
|
"loss": 0.2841, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.9444174766540527, |
|
"rewards/margins": 3.1579596996307373, |
|
"rewards/rejected": -6.102376937866211, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8670484635753433, |
|
"grad_norm": 17.671998119316797, |
|
"learning_rate": 2.6164035312078447e-08, |
|
"logits/chosen": -0.42121395468711853, |
|
"logits/rejected": -0.5983023047447205, |
|
"logps/chosen": -422.3511657714844, |
|
"logps/rejected": -786.9813232421875, |
|
"loss": 0.3052, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.2139759063720703, |
|
"rewards/margins": 3.617478847503662, |
|
"rewards/rejected": -6.831455230712891, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8769012870250631, |
|
"grad_norm": 15.463536800192488, |
|
"learning_rate": 2.2462089419165776e-08, |
|
"logits/chosen": -0.39272046089172363, |
|
"logits/rejected": -0.5592643618583679, |
|
"logps/chosen": -421.66619873046875, |
|
"logps/rejected": -675.9927368164062, |
|
"loss": 0.3122, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.1869752407073975, |
|
"rewards/margins": 2.581495761871338, |
|
"rewards/rejected": -5.7684712409973145, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8867541104747829, |
|
"grad_norm": 17.36075063464836, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -0.42874231934547424, |
|
"logits/rejected": -0.5679312944412231, |
|
"logps/chosen": -416.0025939941406, |
|
"logps/rejected": -734.6575317382812, |
|
"loss": 0.289, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.1724095344543457, |
|
"rewards/margins": 3.161487102508545, |
|
"rewards/rejected": -6.333896160125732, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8966069339245027, |
|
"grad_norm": 15.642265040018296, |
|
"learning_rate": 1.5872189700736337e-08, |
|
"logits/chosen": -0.4477892518043518, |
|
"logits/rejected": -0.579742431640625, |
|
"logps/chosen": -446.33349609375, |
|
"logps/rejected": -779.8458862304688, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.4629955291748047, |
|
"rewards/margins": 3.308091640472412, |
|
"rewards/rejected": -6.771086692810059, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9064597573742226, |
|
"grad_norm": 14.160026400581446, |
|
"learning_rate": 1.2992054780085692e-08, |
|
"logits/chosen": -0.3982602059841156, |
|
"logits/rejected": -0.5977696776390076, |
|
"logps/chosen": -418.07061767578125, |
|
"logps/rejected": -713.667236328125, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.1576714515686035, |
|
"rewards/margins": 2.992490768432617, |
|
"rewards/rejected": -6.150162696838379, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9163125808239424, |
|
"grad_norm": 19.50813975803542, |
|
"learning_rate": 1.0393129385436823e-08, |
|
"logits/chosen": -0.364255428314209, |
|
"logits/rejected": -0.6485485434532166, |
|
"logps/chosen": -426.3414001464844, |
|
"logps/rejected": -729.6509399414062, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.199294328689575, |
|
"rewards/margins": 3.0993099212646484, |
|
"rewards/rejected": -6.2986040115356445, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9261654042736621, |
|
"grad_norm": 19.508164166434455, |
|
"learning_rate": 8.078497137373242e-09, |
|
"logits/chosen": -0.46434831619262695, |
|
"logits/rejected": -0.5418060421943665, |
|
"logps/chosen": -456.2374572753906, |
|
"logps/rejected": -767.5300903320312, |
|
"loss": 0.2769, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.5864601135253906, |
|
"rewards/margins": 3.058790922164917, |
|
"rewards/rejected": -6.645251274108887, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.936018227723382, |
|
"grad_norm": 14.71074195682132, |
|
"learning_rate": 6.0509043431410945e-09, |
|
"logits/chosen": -0.36522990465164185, |
|
"logits/rejected": -0.5609266757965088, |
|
"logps/chosen": -441.4654235839844, |
|
"logps/rejected": -724.3133544921875, |
|
"loss": 0.2906, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.432781934738159, |
|
"rewards/margins": 2.8102941513061523, |
|
"rewards/rejected": -6.243076324462891, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9458710511731018, |
|
"grad_norm": 15.31419630581113, |
|
"learning_rate": 4.312756738160145e-09, |
|
"logits/chosen": -0.4177488386631012, |
|
"logits/rejected": -0.6274289488792419, |
|
"logps/chosen": -437.7715759277344, |
|
"logps/rejected": -771.16552734375, |
|
"loss": 0.2431, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.4045562744140625, |
|
"rewards/margins": 3.3424010276794434, |
|
"rewards/rejected": -6.746957302093506, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9557238746228216, |
|
"grad_norm": 17.527039297327423, |
|
"learning_rate": 2.8661166316229223e-09, |
|
"logits/chosen": -0.4048340320587158, |
|
"logits/rejected": -0.5873134732246399, |
|
"logps/chosen": -422.97003173828125, |
|
"logps/rejected": -773.1441650390625, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.2094051837921143, |
|
"rewards/margins": 3.512725830078125, |
|
"rewards/rejected": -6.722131252288818, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9655766980725414, |
|
"grad_norm": 14.493022786248968, |
|
"learning_rate": 1.7127004595681727e-09, |
|
"logits/chosen": -0.39268267154693604, |
|
"logits/rejected": -0.5996861457824707, |
|
"logps/chosen": -428.91986083984375, |
|
"logps/rejected": -711.802001953125, |
|
"loss": 0.3532, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.3111557960510254, |
|
"rewards/margins": 2.841479539871216, |
|
"rewards/rejected": -6.152635097503662, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9754295215222613, |
|
"grad_norm": 18.440387807689753, |
|
"learning_rate": 8.538767483325383e-10, |
|
"logits/chosen": -0.4720977246761322, |
|
"logits/rejected": -0.5540111064910889, |
|
"logps/chosen": -421.6551208496094, |
|
"logps/rejected": -734.9032592773438, |
|
"loss": 0.2881, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.2461752891540527, |
|
"rewards/margins": 3.103208065032959, |
|
"rewards/rejected": -6.349383354187012, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9852823449719811, |
|
"grad_norm": 24.055730221765614, |
|
"learning_rate": 2.9066449079634404e-10, |
|
"logits/chosen": -0.3708307147026062, |
|
"logits/rejected": -0.6160975694656372, |
|
"logps/chosen": -466.91717529296875, |
|
"logps/rejected": -752.3226318359375, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.598177671432495, |
|
"rewards/margins": 2.9196102619171143, |
|
"rewards/rejected": -6.517787933349609, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9951351684217008, |
|
"grad_norm": 16.011780285680448, |
|
"learning_rate": 2.3731937350224273e-11, |
|
"logits/chosen": -0.3636520802974701, |
|
"logits/rejected": -0.6337639689445496, |
|
"logps/chosen": -420.154052734375, |
|
"logps/rejected": -775.5059814453125, |
|
"loss": 0.2825, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.1779532432556152, |
|
"rewards/margins": 3.574568510055542, |
|
"rewards/rejected": -6.752521514892578, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9990762978015888, |
|
"step": 507, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3847499547390308, |
|
"train_runtime": 27570.709, |
|
"train_samples_per_second": 2.356, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 507, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|