|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 1808.0, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": -1.008344054222107, |
|
"log_odds_ratio": -10.17955493927002, |
|
"logps/chosen": -21.696313858032227, |
|
"logps/rejected": -20.68819808959961, |
|
"loss": 736.0327, |
|
"nll_loss": 9.675058364868164, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -4.339262962341309, |
|
"rewards/margins": -0.20162281394004822, |
|
"rewards/rejected": -4.137639999389648, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 1256.0, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": -2.8046412467956543, |
|
"log_odds_ratio": -10.168278694152832, |
|
"logps/chosen": -21.001956939697266, |
|
"logps/rejected": -18.19767951965332, |
|
"loss": 738.0496, |
|
"nll_loss": 8.701889038085938, |
|
"rewards/accuracies": 0.4468750059604645, |
|
"rewards/chosen": -4.200392246246338, |
|
"rewards/margins": -0.5608552694320679, |
|
"rewards/rejected": -3.6395363807678223, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 5440.0, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": -1.2525489330291748, |
|
"log_odds_ratio": -10.522050857543945, |
|
"logps/chosen": -21.009998321533203, |
|
"logps/rejected": -19.756052017211914, |
|
"loss": 763.5602, |
|
"nll_loss": 8.136326789855957, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -4.201999664306641, |
|
"rewards/margins": -0.2507893443107605, |
|
"rewards/rejected": -3.9512104988098145, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 6496.0, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": -4.4795145988464355, |
|
"log_odds_ratio": -9.770153999328613, |
|
"logps/chosen": -18.06368064880371, |
|
"logps/rejected": -13.585962295532227, |
|
"loss": 685.1017, |
|
"nll_loss": 7.039858818054199, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -3.612736463546753, |
|
"rewards/margins": -0.8955442309379578, |
|
"rewards/rejected": -2.7171921730041504, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 460.0, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": -0.08603362739086151, |
|
"log_odds_ratio": -2.429269313812256, |
|
"logps/chosen": -5.611455917358398, |
|
"logps/rejected": -5.512633323669434, |
|
"loss": 191.7091, |
|
"nll_loss": 3.994724988937378, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -1.1222912073135376, |
|
"rewards/margins": -0.019764503464102745, |
|
"rewards/rejected": -1.1025266647338867, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 290.0, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.013890685513615608, |
|
"log_odds_ratio": -0.9068824052810669, |
|
"logps/chosen": -1.946455717086792, |
|
"logps/rejected": -1.9621555805206299, |
|
"loss": 62.3461, |
|
"nll_loss": 2.7580060958862305, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3892911374568939, |
|
"rewards/margins": 0.003139972686767578, |
|
"rewards/rejected": -0.3924311101436615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 156.0, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.1850312501192093, |
|
"log_odds_ratio": -0.8029718399047852, |
|
"logps/chosen": -1.6453851461410522, |
|
"logps/rejected": -1.8108527660369873, |
|
"loss": 52.5707, |
|
"nll_loss": 2.74991512298584, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.32907700538635254, |
|
"rewards/margins": 0.03309354558587074, |
|
"rewards/rejected": -0.362170547246933, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 434.0, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.26716217398643494, |
|
"log_odds_ratio": -0.7125700116157532, |
|
"logps/chosen": -1.4205152988433838, |
|
"logps/rejected": -1.643204927444458, |
|
"loss": 45.4019, |
|
"nll_loss": 2.5210635662078857, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.28410303592681885, |
|
"rewards/margins": 0.04453796148300171, |
|
"rewards/rejected": -0.32864099740982056, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 149.0, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.1843370497226715, |
|
"log_odds_ratio": -0.7205697894096375, |
|
"logps/chosen": -1.357447862625122, |
|
"logps/rejected": -1.5186808109283447, |
|
"loss": 43.4227, |
|
"nll_loss": 2.3052825927734375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.27148956060409546, |
|
"rewards/margins": 0.03224659711122513, |
|
"rewards/rejected": -0.30373615026474, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 89.5, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.21526531875133514, |
|
"log_odds_ratio": -0.7007580995559692, |
|
"logps/chosen": -1.2479262351989746, |
|
"logps/rejected": -1.4284145832061768, |
|
"loss": 39.9249, |
|
"nll_loss": 2.381633996963501, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.24958527088165283, |
|
"rewards/margins": 0.036097653210163116, |
|
"rewards/rejected": -0.28568291664123535, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 127.0, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.19697749614715576, |
|
"log_odds_ratio": -0.7165523171424866, |
|
"logps/chosen": -1.2462198734283447, |
|
"logps/rejected": -1.4008221626281738, |
|
"loss": 39.8666, |
|
"nll_loss": 2.3082547187805176, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.24924394488334656, |
|
"rewards/margins": 0.03092046082019806, |
|
"rewards/rejected": -0.2801644206047058, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 258.0, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.19977203011512756, |
|
"log_odds_ratio": -0.6954725384712219, |
|
"logps/chosen": -1.2253552675247192, |
|
"logps/rejected": -1.3942426443099976, |
|
"loss": 39.1969, |
|
"nll_loss": 2.4659817218780518, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.24507102370262146, |
|
"rewards/margins": 0.03377751260995865, |
|
"rewards/rejected": -0.2788485586643219, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 98.0, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.24036189913749695, |
|
"log_odds_ratio": -0.67494797706604, |
|
"logps/chosen": -1.202803373336792, |
|
"logps/rejected": -1.386717438697815, |
|
"loss": 38.4748, |
|
"nll_loss": 2.561748504638672, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.24056068062782288, |
|
"rewards/margins": 0.03678280860185623, |
|
"rewards/rejected": -0.2773435115814209, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 156.0, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.2758210301399231, |
|
"log_odds_ratio": -0.6614188551902771, |
|
"logps/chosen": -1.177128553390503, |
|
"logps/rejected": -1.3960373401641846, |
|
"loss": 37.6547, |
|
"nll_loss": 2.348580837249756, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.23542571067810059, |
|
"rewards/margins": 0.043781764805316925, |
|
"rewards/rejected": -0.2792074978351593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 296.0, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.283970445394516, |
|
"log_odds_ratio": -0.6692668199539185, |
|
"logps/chosen": -1.140987515449524, |
|
"logps/rejected": -1.3577347993850708, |
|
"loss": 36.4491, |
|
"nll_loss": 2.3564374446868896, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.22819750010967255, |
|
"rewards/margins": 0.04334944486618042, |
|
"rewards/rejected": -0.2715469300746918, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 110.0, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.16520562767982483, |
|
"log_odds_ratio": -0.7094644904136658, |
|
"logps/chosen": -1.1379332542419434, |
|
"logps/rejected": -1.269300103187561, |
|
"loss": 36.4075, |
|
"nll_loss": 2.159475803375244, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.22758665680885315, |
|
"rewards/margins": 0.026273369789123535, |
|
"rewards/rejected": -0.2538600564002991, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 164.0, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.16680458188056946, |
|
"log_odds_ratio": -0.691378653049469, |
|
"logps/chosen": -1.1199719905853271, |
|
"logps/rejected": -1.2509021759033203, |
|
"loss": 35.8328, |
|
"nll_loss": 2.1975584030151367, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.2239944040775299, |
|
"rewards/margins": 0.02618604339659214, |
|
"rewards/rejected": -0.2501804232597351, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 65.5, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.25345996022224426, |
|
"log_odds_ratio": -0.6629332900047302, |
|
"logps/chosen": -1.052328109741211, |
|
"logps/rejected": -1.2316312789916992, |
|
"loss": 33.6699, |
|
"nll_loss": 2.1638712882995605, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21046562492847443, |
|
"rewards/margins": 0.03586065024137497, |
|
"rewards/rejected": -0.2463262975215912, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 78.0, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.13198286294937134, |
|
"log_odds_ratio": -0.7187220454216003, |
|
"logps/chosen": -1.1173431873321533, |
|
"logps/rejected": -1.2194410562515259, |
|
"loss": 35.7508, |
|
"nll_loss": 2.091909170150757, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.223468616604805, |
|
"rewards/margins": 0.020419595763087273, |
|
"rewards/rejected": -0.24388821423053741, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 66.0, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.233420729637146, |
|
"log_odds_ratio": -0.6643728017807007, |
|
"logps/chosen": -1.0819002389907837, |
|
"logps/rejected": -1.2531683444976807, |
|
"loss": 34.6098, |
|
"nll_loss": 2.0589497089385986, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.2163800448179245, |
|
"rewards/margins": 0.03425363451242447, |
|
"rewards/rejected": -0.2506336569786072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 152.0, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.25719505548477173, |
|
"log_odds_ratio": -0.6791940927505493, |
|
"logps/chosen": -1.0752637386322021, |
|
"logps/rejected": -1.2887135744094849, |
|
"loss": 34.3747, |
|
"nll_loss": 2.0260989665985107, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.2150527536869049, |
|
"rewards/margins": 0.04268994182348251, |
|
"rewards/rejected": -0.257742702960968, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 57.5, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.5201781988143921, |
|
"log_odds_ratio": -0.564489483833313, |
|
"logps/chosen": -0.9281116724014282, |
|
"logps/rejected": -1.2665040493011475, |
|
"loss": 29.6815, |
|
"nll_loss": 1.8919875621795654, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.18562233448028564, |
|
"rewards/margins": 0.06767849624156952, |
|
"rewards/rejected": -0.25330081582069397, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 69.5, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.7453327178955078, |
|
"log_odds_ratio": -0.5092401504516602, |
|
"logps/chosen": -0.8455835580825806, |
|
"logps/rejected": -1.3220586776733398, |
|
"loss": 26.9978, |
|
"nll_loss": 2.0071778297424316, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16911670565605164, |
|
"rewards/margins": 0.09529503434896469, |
|
"rewards/rejected": -0.2644117474555969, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 114.5, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.6257216334342957, |
|
"log_odds_ratio": -0.525432288646698, |
|
"logps/chosen": -0.8546761274337769, |
|
"logps/rejected": -1.2405774593353271, |
|
"loss": 27.3343, |
|
"nll_loss": 2.060844898223877, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.1709352433681488, |
|
"rewards/margins": 0.07718025892972946, |
|
"rewards/rejected": -0.24811549484729767, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 202.0, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.6118601560592651, |
|
"log_odds_ratio": -0.5294589996337891, |
|
"logps/chosen": -0.907837986946106, |
|
"logps/rejected": -1.2936393022537231, |
|
"loss": 29.0283, |
|
"nll_loss": 1.9723001718521118, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18156759440898895, |
|
"rewards/margins": 0.07716026157140732, |
|
"rewards/rejected": -0.25872787833213806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 47.25, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.7508286237716675, |
|
"log_odds_ratio": -0.5130770206451416, |
|
"logps/chosen": -0.8467851877212524, |
|
"logps/rejected": -1.3315311670303345, |
|
"loss": 26.9969, |
|
"nll_loss": 1.963587760925293, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.169357031583786, |
|
"rewards/margins": 0.09694920480251312, |
|
"rewards/rejected": -0.26630622148513794, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 219.0, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.6324235796928406, |
|
"log_odds_ratio": -0.5143457055091858, |
|
"logps/chosen": -0.8541671633720398, |
|
"logps/rejected": -1.2207109928131104, |
|
"loss": 27.3238, |
|
"nll_loss": 1.9547522068023682, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.17083343863487244, |
|
"rewards/margins": 0.07330875098705292, |
|
"rewards/rejected": -0.24414214491844177, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 219.0, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.7118546366691589, |
|
"log_odds_ratio": -0.5201038122177124, |
|
"logps/chosen": -0.8679434061050415, |
|
"logps/rejected": -1.3246322870254517, |
|
"loss": 27.7072, |
|
"nll_loss": 1.9330047369003296, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.17358867824077606, |
|
"rewards/margins": 0.09133778512477875, |
|
"rewards/rejected": -0.2649264931678772, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 66.0, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.6516977548599243, |
|
"log_odds_ratio": -0.5285124778747559, |
|
"logps/chosen": -0.8772061467170715, |
|
"logps/rejected": -1.2873995304107666, |
|
"loss": 28.0346, |
|
"nll_loss": 2.029435873031616, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.17544123530387878, |
|
"rewards/margins": 0.08203869313001633, |
|
"rewards/rejected": -0.2574799358844757, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 162.0, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.7266349792480469, |
|
"log_odds_ratio": -0.5039714574813843, |
|
"logps/chosen": -0.8121232986450195, |
|
"logps/rejected": -1.273530125617981, |
|
"loss": 25.94, |
|
"nll_loss": 2.0966086387634277, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16242465376853943, |
|
"rewards/margins": 0.09228137135505676, |
|
"rewards/rejected": -0.2547060549259186, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 143.0, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.650794506072998, |
|
"log_odds_ratio": -0.5278457403182983, |
|
"logps/chosen": -0.8365495800971985, |
|
"logps/rejected": -1.2551826238632202, |
|
"loss": 26.7323, |
|
"nll_loss": 2.240175485610962, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16730991005897522, |
|
"rewards/margins": 0.08372663706541061, |
|
"rewards/rejected": -0.25103655457496643, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 77.5, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.6227356195449829, |
|
"log_odds_ratio": -0.5516515970230103, |
|
"logps/chosen": -0.828398585319519, |
|
"logps/rejected": -1.2222946882247925, |
|
"loss": 26.4676, |
|
"nll_loss": 2.183659076690674, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.1656797230243683, |
|
"rewards/margins": 0.0787791982293129, |
|
"rewards/rejected": -0.24445891380310059, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 63.5, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.7487412691116333, |
|
"log_odds_ratio": -0.49377554655075073, |
|
"logps/chosen": -0.7827351689338684, |
|
"logps/rejected": -1.2390353679656982, |
|
"loss": 25.0132, |
|
"nll_loss": 2.1968765258789062, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.15654703974723816, |
|
"rewards/margins": 0.09126004576683044, |
|
"rewards/rejected": -0.2478071004152298, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 81.0, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.6999706029891968, |
|
"log_odds_ratio": -0.514404833316803, |
|
"logps/chosen": -0.8419672846794128, |
|
"logps/rejected": -1.2977701425552368, |
|
"loss": 26.9121, |
|
"nll_loss": 2.095829963684082, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16839346289634705, |
|
"rewards/margins": 0.09116056561470032, |
|
"rewards/rejected": -0.25955405831336975, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 60.5, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.7051068544387817, |
|
"log_odds_ratio": -0.5076509714126587, |
|
"logps/chosen": -0.8126438856124878, |
|
"logps/rejected": -1.2429084777832031, |
|
"loss": 25.972, |
|
"nll_loss": 2.211460828781128, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.16252879798412323, |
|
"rewards/margins": 0.08605290949344635, |
|
"rewards/rejected": -0.2485816925764084, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 68.0, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.6526662707328796, |
|
"log_odds_ratio": -0.5229703783988953, |
|
"logps/chosen": -0.8144344091415405, |
|
"logps/rejected": -1.2128846645355225, |
|
"loss": 26.0409, |
|
"nll_loss": 2.0978920459747314, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16288688778877258, |
|
"rewards/margins": 0.07969003915786743, |
|
"rewards/rejected": -0.24257692694664001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 61.0, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.5944602489471436, |
|
"log_odds_ratio": -0.5618599057197571, |
|
"logps/chosen": -0.8705334663391113, |
|
"logps/rejected": -1.2557179927825928, |
|
"loss": 27.8395, |
|
"nll_loss": 1.9792810678482056, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.1741066873073578, |
|
"rewards/margins": 0.07703690975904465, |
|
"rewards/rejected": -0.25114360451698303, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 97.0, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.6862818002700806, |
|
"log_odds_ratio": -0.5245167016983032, |
|
"logps/chosen": -0.8145660161972046, |
|
"logps/rejected": -1.232753038406372, |
|
"loss": 26.0206, |
|
"nll_loss": 2.093113422393799, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16291318833827972, |
|
"rewards/margins": 0.08363740146160126, |
|
"rewards/rejected": -0.24655060470104218, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 50.0, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.7545720338821411, |
|
"log_odds_ratio": -0.48210686445236206, |
|
"logps/chosen": -0.8307647705078125, |
|
"logps/rejected": -1.3077232837677002, |
|
"loss": 26.5505, |
|
"nll_loss": 2.2815651893615723, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.1661529690027237, |
|
"rewards/margins": 0.09539170563220978, |
|
"rewards/rejected": -0.26154467463493347, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 65.0, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.6288995742797852, |
|
"log_odds_ratio": -0.5331851840019226, |
|
"logps/chosen": -0.8431414365768433, |
|
"logps/rejected": -1.233559489250183, |
|
"loss": 26.946, |
|
"nll_loss": 2.2366058826446533, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16862830519676208, |
|
"rewards/margins": 0.0780835822224617, |
|
"rewards/rejected": -0.24671189486980438, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 56.0, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.6592320203781128, |
|
"log_odds_ratio": -0.5180245041847229, |
|
"logps/chosen": -0.8212572336196899, |
|
"logps/rejected": -1.2188732624053955, |
|
"loss": 26.2562, |
|
"nll_loss": 2.1104683876037598, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.16425147652626038, |
|
"rewards/margins": 0.07952319085597992, |
|
"rewards/rejected": -0.2437746524810791, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 63.5, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.536708414554596, |
|
"log_odds_ratio": -0.5566378235816956, |
|
"logps/chosen": -0.8468238115310669, |
|
"logps/rejected": -1.1587189435958862, |
|
"loss": 27.0896, |
|
"nll_loss": 2.173583745956421, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16936475038528442, |
|
"rewards/margins": 0.06237905099987984, |
|
"rewards/rejected": -0.23174378275871277, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 36.75, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 1.0355523824691772, |
|
"log_odds_ratio": -0.41216397285461426, |
|
"logps/chosen": -0.6247184872627258, |
|
"logps/rejected": -1.1848514080047607, |
|
"loss": 19.9299, |
|
"nll_loss": 2.0519230365753174, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.12494368851184845, |
|
"rewards/margins": 0.11202657222747803, |
|
"rewards/rejected": -0.23697027564048767, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 45.75, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.188058614730835, |
|
"log_odds_ratio": -0.3877725303173065, |
|
"logps/chosen": -0.6814132332801819, |
|
"logps/rejected": -1.3385488986968994, |
|
"loss": 21.7134, |
|
"nll_loss": 2.186136245727539, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.13628263771533966, |
|
"rewards/margins": 0.13142715394496918, |
|
"rewards/rejected": -0.26770979166030884, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 69.0, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.3558170795440674, |
|
"log_odds_ratio": -0.3460015654563904, |
|
"logps/chosen": -0.6204769015312195, |
|
"logps/rejected": -1.3810127973556519, |
|
"loss": 19.7129, |
|
"nll_loss": 2.1640377044677734, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.1240953654050827, |
|
"rewards/margins": 0.1521071493625641, |
|
"rewards/rejected": -0.27620255947113037, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 60.5, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.3206228017807007, |
|
"log_odds_ratio": -0.3496930003166199, |
|
"logps/chosen": -0.6283946633338928, |
|
"logps/rejected": -1.3481276035308838, |
|
"loss": 20.0634, |
|
"nll_loss": 2.163454532623291, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.1256789267063141, |
|
"rewards/margins": 0.1439466029405594, |
|
"rewards/rejected": -0.2696255147457123, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 46.25, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.3826463222503662, |
|
"log_odds_ratio": -0.34246888756752014, |
|
"logps/chosen": -0.601762056350708, |
|
"logps/rejected": -1.3406898975372314, |
|
"loss": 19.1994, |
|
"nll_loss": 2.1239700317382812, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.12035240978002548, |
|
"rewards/margins": 0.14778557419776917, |
|
"rewards/rejected": -0.26813799142837524, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 52.25, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.3184112310409546, |
|
"log_odds_ratio": -0.36138203740119934, |
|
"logps/chosen": -0.626305878162384, |
|
"logps/rejected": -1.3914432525634766, |
|
"loss": 19.7327, |
|
"nll_loss": 2.0872886180877686, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.12526118755340576, |
|
"rewards/margins": 0.1530275046825409, |
|
"rewards/rejected": -0.27828869223594666, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 47.5, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.342882513999939, |
|
"log_odds_ratio": -0.33094173669815063, |
|
"logps/chosen": -0.5907199382781982, |
|
"logps/rejected": -1.3384162187576294, |
|
"loss": 18.8515, |
|
"nll_loss": 2.086435556411743, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.11814399063587189, |
|
"rewards/margins": 0.1495392769575119, |
|
"rewards/rejected": -0.2676832377910614, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 48.0, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 1.4440391063690186, |
|
"log_odds_ratio": -0.3371729254722595, |
|
"logps/chosen": -0.5727447867393494, |
|
"logps/rejected": -1.3450387716293335, |
|
"loss": 17.9915, |
|
"nll_loss": 2.1893529891967773, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11454895883798599, |
|
"rewards/margins": 0.15445882081985474, |
|
"rewards/rejected": -0.26900777220726013, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 44.0, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 1.4869956970214844, |
|
"log_odds_ratio": -0.33424651622772217, |
|
"logps/chosen": -0.5934678912162781, |
|
"logps/rejected": -1.4604480266571045, |
|
"loss": 18.3318, |
|
"nll_loss": 2.072566270828247, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11869357526302338, |
|
"rewards/margins": 0.1733960211277008, |
|
"rewards/rejected": -0.2920895516872406, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 82.0, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.3816086053848267, |
|
"log_odds_ratio": -0.34434396028518677, |
|
"logps/chosen": -0.5892956256866455, |
|
"logps/rejected": -1.3579511642456055, |
|
"loss": 18.3788, |
|
"nll_loss": 2.0510034561157227, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.1178591251373291, |
|
"rewards/margins": 0.15373112261295319, |
|
"rewards/rejected": -0.2715902626514435, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 54.75, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.349381685256958, |
|
"log_odds_ratio": -0.3549429774284363, |
|
"logps/chosen": -0.6175572276115417, |
|
"logps/rejected": -1.377739667892456, |
|
"loss": 19.2041, |
|
"nll_loss": 2.2108561992645264, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.12351144850254059, |
|
"rewards/margins": 0.15203648805618286, |
|
"rewards/rejected": -0.27554795145988464, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 46.0, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.4625142812728882, |
|
"log_odds_ratio": -0.33779287338256836, |
|
"logps/chosen": -0.6002839803695679, |
|
"logps/rejected": -1.4510117769241333, |
|
"loss": 18.223, |
|
"nll_loss": 2.1512744426727295, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.12005682289600372, |
|
"rewards/margins": 0.17014555633068085, |
|
"rewards/rejected": -0.2902023494243622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 41.75, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 1.3319700956344604, |
|
"log_odds_ratio": -0.35751980543136597, |
|
"logps/chosen": -0.6509729623794556, |
|
"logps/rejected": -1.444106101989746, |
|
"loss": 19.8273, |
|
"nll_loss": 1.982131004333496, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.13019458949565887, |
|
"rewards/margins": 0.15862663090229034, |
|
"rewards/rejected": -0.2888212203979492, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 50.75, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 1.2478935718536377, |
|
"log_odds_ratio": -0.3773137629032135, |
|
"logps/chosen": -0.6254156827926636, |
|
"logps/rejected": -1.2992069721221924, |
|
"loss": 19.9581, |
|
"nll_loss": 2.197223663330078, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.12508316338062286, |
|
"rewards/margins": 0.13475826382637024, |
|
"rewards/rejected": -0.2598413825035095, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 47.5, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.3165029287338257, |
|
"log_odds_ratio": -0.3543280363082886, |
|
"logps/chosen": -0.6258935332298279, |
|
"logps/rejected": -1.3820149898529053, |
|
"loss": 19.8195, |
|
"nll_loss": 2.149914264678955, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.12517870962619781, |
|
"rewards/margins": 0.1512243151664734, |
|
"rewards/rejected": -0.2764030396938324, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 51.25, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.294450044631958, |
|
"log_odds_ratio": -0.36391139030456543, |
|
"logps/chosen": -0.6437335014343262, |
|
"logps/rejected": -1.3571479320526123, |
|
"loss": 20.5514, |
|
"nll_loss": 2.049088716506958, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.12874671816825867, |
|
"rewards/margins": 0.14268288016319275, |
|
"rewards/rejected": -0.2714295983314514, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 43.75, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 1.2758103609085083, |
|
"log_odds_ratio": -0.36855071783065796, |
|
"logps/chosen": -0.6186591982841492, |
|
"logps/rejected": -1.315861701965332, |
|
"loss": 19.73, |
|
"nll_loss": 2.048844814300537, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.12373185157775879, |
|
"rewards/margins": 0.13944050669670105, |
|
"rewards/rejected": -0.26317232847213745, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 51.25, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.4028387069702148, |
|
"log_odds_ratio": -0.34696242213249207, |
|
"logps/chosen": -0.5851832032203674, |
|
"logps/rejected": -1.3483918905258179, |
|
"loss": 18.6233, |
|
"nll_loss": 2.146777629852295, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.11703664064407349, |
|
"rewards/margins": 0.15264175832271576, |
|
"rewards/rejected": -0.26967838406562805, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 108.5, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 1.4782116413116455, |
|
"log_odds_ratio": -0.3342163860797882, |
|
"logps/chosen": -0.5890188813209534, |
|
"logps/rejected": -1.4168431758880615, |
|
"loss": 17.9475, |
|
"nll_loss": 2.091174840927124, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11780376732349396, |
|
"rewards/margins": 0.1655648797750473, |
|
"rewards/rejected": -0.28336864709854126, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 40.5, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.4966692924499512, |
|
"log_odds_ratio": -0.3115060031414032, |
|
"logps/chosen": -0.5653601288795471, |
|
"logps/rejected": -1.383745551109314, |
|
"loss": 17.9926, |
|
"nll_loss": 2.0637173652648926, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.11307201534509659, |
|
"rewards/margins": 0.1636771261692047, |
|
"rewards/rejected": -0.2767491042613983, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 88.5, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.2434196472167969, |
|
"log_odds_ratio": -0.36147943139076233, |
|
"logps/chosen": -0.605234682559967, |
|
"logps/rejected": -1.2406527996063232, |
|
"loss": 19.3401, |
|
"nll_loss": 2.0648436546325684, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.12104693800210953, |
|
"rewards/margins": 0.12708361446857452, |
|
"rewards/rejected": -0.24813053011894226, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 75.11768820626395, |
|
"train_runtime": 7196.0938, |
|
"train_samples_per_second": 2.814, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|