gemma-7b-borpo-basic-5e-5-02-v4 / trainer_state.json
silviasapora's picture
Model save
ba56621 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986666666666667,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 1808.0,
"learning_rate": 7.8125e-06,
"log_odds_chosen": -1.008344054222107,
"log_odds_ratio": -10.17955493927002,
"logps/chosen": -21.696313858032227,
"logps/rejected": -20.68819808959961,
"loss": 736.0327,
"nll_loss": 9.675058364868164,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -4.339262962341309,
"rewards/margins": -0.20162281394004822,
"rewards/rejected": -4.137639999389648,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 1256.0,
"learning_rate": 1.5625e-05,
"log_odds_chosen": -2.8046412467956543,
"log_odds_ratio": -10.168278694152832,
"logps/chosen": -21.001956939697266,
"logps/rejected": -18.19767951965332,
"loss": 738.0496,
"nll_loss": 8.701889038085938,
"rewards/accuracies": 0.4468750059604645,
"rewards/chosen": -4.200392246246338,
"rewards/margins": -0.5608552694320679,
"rewards/rejected": -3.6395363807678223,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 5440.0,
"learning_rate": 2.34375e-05,
"log_odds_chosen": -1.2525489330291748,
"log_odds_ratio": -10.522050857543945,
"logps/chosen": -21.009998321533203,
"logps/rejected": -19.756052017211914,
"loss": 763.5602,
"nll_loss": 8.136326789855957,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -4.201999664306641,
"rewards/margins": -0.2507893443107605,
"rewards/rejected": -3.9512104988098145,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 6496.0,
"learning_rate": 3.125e-05,
"log_odds_chosen": -4.4795145988464355,
"log_odds_ratio": -9.770153999328613,
"logps/chosen": -18.06368064880371,
"logps/rejected": -13.585962295532227,
"loss": 685.1017,
"nll_loss": 7.039858818054199,
"rewards/accuracies": 0.4375,
"rewards/chosen": -3.612736463546753,
"rewards/margins": -0.8955442309379578,
"rewards/rejected": -2.7171921730041504,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 460.0,
"learning_rate": 3.90625e-05,
"log_odds_chosen": -0.08603362739086151,
"log_odds_ratio": -2.429269313812256,
"logps/chosen": -5.611455917358398,
"logps/rejected": -5.512633323669434,
"loss": 191.7091,
"nll_loss": 3.994724988937378,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -1.1222912073135376,
"rewards/margins": -0.019764503464102745,
"rewards/rejected": -1.1025266647338867,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 290.0,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.013890685513615608,
"log_odds_ratio": -0.9068824052810669,
"logps/chosen": -1.946455717086792,
"logps/rejected": -1.9621555805206299,
"loss": 62.3461,
"nll_loss": 2.7580060958862305,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.3892911374568939,
"rewards/margins": 0.003139972686767578,
"rewards/rejected": -0.3924311101436615,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 156.0,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.1850312501192093,
"log_odds_ratio": -0.8029718399047852,
"logps/chosen": -1.6453851461410522,
"logps/rejected": -1.8108527660369873,
"loss": 52.5707,
"nll_loss": 2.74991512298584,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.32907700538635254,
"rewards/margins": 0.03309354558587074,
"rewards/rejected": -0.362170547246933,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 434.0,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.26716217398643494,
"log_odds_ratio": -0.7125700116157532,
"logps/chosen": -1.4205152988433838,
"logps/rejected": -1.643204927444458,
"loss": 45.4019,
"nll_loss": 2.5210635662078857,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.28410303592681885,
"rewards/margins": 0.04453796148300171,
"rewards/rejected": -0.32864099740982056,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 149.0,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.1843370497226715,
"log_odds_ratio": -0.7205697894096375,
"logps/chosen": -1.357447862625122,
"logps/rejected": -1.5186808109283447,
"loss": 43.4227,
"nll_loss": 2.3052825927734375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.27148956060409546,
"rewards/margins": 0.03224659711122513,
"rewards/rejected": -0.30373615026474,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 89.5,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.21526531875133514,
"log_odds_ratio": -0.7007580995559692,
"logps/chosen": -1.2479262351989746,
"logps/rejected": -1.4284145832061768,
"loss": 39.9249,
"nll_loss": 2.381633996963501,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.24958527088165283,
"rewards/margins": 0.036097653210163116,
"rewards/rejected": -0.28568291664123535,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 127.0,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 0.19697749614715576,
"log_odds_ratio": -0.7165523171424866,
"logps/chosen": -1.2462198734283447,
"logps/rejected": -1.4008221626281738,
"loss": 39.8666,
"nll_loss": 2.3082547187805176,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.24924394488334656,
"rewards/margins": 0.03092046082019806,
"rewards/rejected": -0.2801644206047058,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 258.0,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.19977203011512756,
"log_odds_ratio": -0.6954725384712219,
"logps/chosen": -1.2253552675247192,
"logps/rejected": -1.3942426443099976,
"loss": 39.1969,
"nll_loss": 2.4659817218780518,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.24507102370262146,
"rewards/margins": 0.03377751260995865,
"rewards/rejected": -0.2788485586643219,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 98.0,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.24036189913749695,
"log_odds_ratio": -0.67494797706604,
"logps/chosen": -1.202803373336792,
"logps/rejected": -1.386717438697815,
"loss": 38.4748,
"nll_loss": 2.561748504638672,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.24056068062782288,
"rewards/margins": 0.03678280860185623,
"rewards/rejected": -0.2773435115814209,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 156.0,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.2758210301399231,
"log_odds_ratio": -0.6614188551902771,
"logps/chosen": -1.177128553390503,
"logps/rejected": -1.3960373401641846,
"loss": 37.6547,
"nll_loss": 2.348580837249756,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.23542571067810059,
"rewards/margins": 0.043781764805316925,
"rewards/rejected": -0.2792074978351593,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 296.0,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 0.283970445394516,
"log_odds_ratio": -0.6692668199539185,
"logps/chosen": -1.140987515449524,
"logps/rejected": -1.3577347993850708,
"loss": 36.4491,
"nll_loss": 2.3564374446868896,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.22819750010967255,
"rewards/margins": 0.04334944486618042,
"rewards/rejected": -0.2715469300746918,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 110.0,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 0.16520562767982483,
"log_odds_ratio": -0.7094644904136658,
"logps/chosen": -1.1379332542419434,
"logps/rejected": -1.269300103187561,
"loss": 36.4075,
"nll_loss": 2.159475803375244,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.22758665680885315,
"rewards/margins": 0.026273369789123535,
"rewards/rejected": -0.2538600564002991,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 164.0,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 0.16680458188056946,
"log_odds_ratio": -0.691378653049469,
"logps/chosen": -1.1199719905853271,
"logps/rejected": -1.2509021759033203,
"loss": 35.8328,
"nll_loss": 2.1975584030151367,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.2239944040775299,
"rewards/margins": 0.02618604339659214,
"rewards/rejected": -0.2501804232597351,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 65.5,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.25345996022224426,
"log_odds_ratio": -0.6629332900047302,
"logps/chosen": -1.052328109741211,
"logps/rejected": -1.2316312789916992,
"loss": 33.6699,
"nll_loss": 2.1638712882995605,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.21046562492847443,
"rewards/margins": 0.03586065024137497,
"rewards/rejected": -0.2463262975215912,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 78.0,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.13198286294937134,
"log_odds_ratio": -0.7187220454216003,
"logps/chosen": -1.1173431873321533,
"logps/rejected": -1.2194410562515259,
"loss": 35.7508,
"nll_loss": 2.091909170150757,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.223468616604805,
"rewards/margins": 0.020419595763087273,
"rewards/rejected": -0.24388821423053741,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 66.0,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.233420729637146,
"log_odds_ratio": -0.6643728017807007,
"logps/chosen": -1.0819002389907837,
"logps/rejected": -1.2531683444976807,
"loss": 34.6098,
"nll_loss": 2.0589497089385986,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.2163800448179245,
"rewards/margins": 0.03425363451242447,
"rewards/rejected": -0.2506336569786072,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 152.0,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.25719505548477173,
"log_odds_ratio": -0.6791940927505493,
"logps/chosen": -1.0752637386322021,
"logps/rejected": -1.2887135744094849,
"loss": 34.3747,
"nll_loss": 2.0260989665985107,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.2150527536869049,
"rewards/margins": 0.04268994182348251,
"rewards/rejected": -0.257742702960968,
"step": 105
},
{
"epoch": 1.0429629629629629,
"grad_norm": 57.5,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.5201781988143921,
"log_odds_ratio": -0.564489483833313,
"logps/chosen": -0.9281116724014282,
"logps/rejected": -1.2665040493011475,
"loss": 29.6815,
"nll_loss": 1.8919875621795654,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.18562233448028564,
"rewards/margins": 0.06767849624156952,
"rewards/rejected": -0.25330081582069397,
"step": 110
},
{
"epoch": 1.0903703703703704,
"grad_norm": 69.5,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.7453327178955078,
"log_odds_ratio": -0.5092401504516602,
"logps/chosen": -0.8455835580825806,
"logps/rejected": -1.3220586776733398,
"loss": 26.9978,
"nll_loss": 2.0071778297424316,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16911670565605164,
"rewards/margins": 0.09529503434896469,
"rewards/rejected": -0.2644117474555969,
"step": 115
},
{
"epoch": 1.1377777777777778,
"grad_norm": 114.5,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 0.6257216334342957,
"log_odds_ratio": -0.525432288646698,
"logps/chosen": -0.8546761274337769,
"logps/rejected": -1.2405774593353271,
"loss": 27.3343,
"nll_loss": 2.060844898223877,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.1709352433681488,
"rewards/margins": 0.07718025892972946,
"rewards/rejected": -0.24811549484729767,
"step": 120
},
{
"epoch": 1.1851851851851851,
"grad_norm": 202.0,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 0.6118601560592651,
"log_odds_ratio": -0.5294589996337891,
"logps/chosen": -0.907837986946106,
"logps/rejected": -1.2936393022537231,
"loss": 29.0283,
"nll_loss": 1.9723001718521118,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.18156759440898895,
"rewards/margins": 0.07716026157140732,
"rewards/rejected": -0.25872787833213806,
"step": 125
},
{
"epoch": 1.2325925925925927,
"grad_norm": 47.25,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.7508286237716675,
"log_odds_ratio": -0.5130770206451416,
"logps/chosen": -0.8467851877212524,
"logps/rejected": -1.3315311670303345,
"loss": 26.9969,
"nll_loss": 1.963587760925293,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.169357031583786,
"rewards/margins": 0.09694920480251312,
"rewards/rejected": -0.26630622148513794,
"step": 130
},
{
"epoch": 1.28,
"grad_norm": 219.0,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 0.6324235796928406,
"log_odds_ratio": -0.5143457055091858,
"logps/chosen": -0.8541671633720398,
"logps/rejected": -1.2207109928131104,
"loss": 27.3238,
"nll_loss": 1.9547522068023682,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.17083343863487244,
"rewards/margins": 0.07330875098705292,
"rewards/rejected": -0.24414214491844177,
"step": 135
},
{
"epoch": 1.3274074074074074,
"grad_norm": 219.0,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 0.7118546366691589,
"log_odds_ratio": -0.5201038122177124,
"logps/chosen": -0.8679434061050415,
"logps/rejected": -1.3246322870254517,
"loss": 27.7072,
"nll_loss": 1.9330047369003296,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.17358867824077606,
"rewards/margins": 0.09133778512477875,
"rewards/rejected": -0.2649264931678772,
"step": 140
},
{
"epoch": 1.374814814814815,
"grad_norm": 66.0,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 0.6516977548599243,
"log_odds_ratio": -0.5285124778747559,
"logps/chosen": -0.8772061467170715,
"logps/rejected": -1.2873995304107666,
"loss": 28.0346,
"nll_loss": 2.029435873031616,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.17544123530387878,
"rewards/margins": 0.08203869313001633,
"rewards/rejected": -0.2574799358844757,
"step": 145
},
{
"epoch": 1.4222222222222223,
"grad_norm": 162.0,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.7266349792480469,
"log_odds_ratio": -0.5039714574813843,
"logps/chosen": -0.8121232986450195,
"logps/rejected": -1.273530125617981,
"loss": 25.94,
"nll_loss": 2.0966086387634277,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.16242465376853943,
"rewards/margins": 0.09228137135505676,
"rewards/rejected": -0.2547060549259186,
"step": 150
},
{
"epoch": 1.4696296296296296,
"grad_norm": 143.0,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 0.650794506072998,
"log_odds_ratio": -0.5278457403182983,
"logps/chosen": -0.8365495800971985,
"logps/rejected": -1.2551826238632202,
"loss": 26.7323,
"nll_loss": 2.240175485610962,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.16730991005897522,
"rewards/margins": 0.08372663706541061,
"rewards/rejected": -0.25103655457496643,
"step": 155
},
{
"epoch": 1.5170370370370372,
"grad_norm": 77.5,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 0.6227356195449829,
"log_odds_ratio": -0.5516515970230103,
"logps/chosen": -0.828398585319519,
"logps/rejected": -1.2222946882247925,
"loss": 26.4676,
"nll_loss": 2.183659076690674,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.1656797230243683,
"rewards/margins": 0.0787791982293129,
"rewards/rejected": -0.24445891380310059,
"step": 160
},
{
"epoch": 1.5644444444444443,
"grad_norm": 63.5,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.7487412691116333,
"log_odds_ratio": -0.49377554655075073,
"logps/chosen": -0.7827351689338684,
"logps/rejected": -1.2390353679656982,
"loss": 25.0132,
"nll_loss": 2.1968765258789062,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.15654703974723816,
"rewards/margins": 0.09126004576683044,
"rewards/rejected": -0.2478071004152298,
"step": 165
},
{
"epoch": 1.6118518518518519,
"grad_norm": 81.0,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 0.6999706029891968,
"log_odds_ratio": -0.514404833316803,
"logps/chosen": -0.8419672846794128,
"logps/rejected": -1.2977701425552368,
"loss": 26.9121,
"nll_loss": 2.095829963684082,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16839346289634705,
"rewards/margins": 0.09116056561470032,
"rewards/rejected": -0.25955405831336975,
"step": 170
},
{
"epoch": 1.6592592592592592,
"grad_norm": 60.5,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.7051068544387817,
"log_odds_ratio": -0.5076509714126587,
"logps/chosen": -0.8126438856124878,
"logps/rejected": -1.2429084777832031,
"loss": 25.972,
"nll_loss": 2.211460828781128,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.16252879798412323,
"rewards/margins": 0.08605290949344635,
"rewards/rejected": -0.2485816925764084,
"step": 175
},
{
"epoch": 1.7066666666666666,
"grad_norm": 68.0,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.6526662707328796,
"log_odds_ratio": -0.5229703783988953,
"logps/chosen": -0.8144344091415405,
"logps/rejected": -1.2128846645355225,
"loss": 26.0409,
"nll_loss": 2.0978920459747314,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16288688778877258,
"rewards/margins": 0.07969003915786743,
"rewards/rejected": -0.24257692694664001,
"step": 180
},
{
"epoch": 1.7540740740740741,
"grad_norm": 61.0,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 0.5944602489471436,
"log_odds_ratio": -0.5618599057197571,
"logps/chosen": -0.8705334663391113,
"logps/rejected": -1.2557179927825928,
"loss": 27.8395,
"nll_loss": 1.9792810678482056,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.1741066873073578,
"rewards/margins": 0.07703690975904465,
"rewards/rejected": -0.25114360451698303,
"step": 185
},
{
"epoch": 1.8014814814814815,
"grad_norm": 97.0,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.6862818002700806,
"log_odds_ratio": -0.5245167016983032,
"logps/chosen": -0.8145660161972046,
"logps/rejected": -1.232753038406372,
"loss": 26.0206,
"nll_loss": 2.093113422393799,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.16291318833827972,
"rewards/margins": 0.08363740146160126,
"rewards/rejected": -0.24655060470104218,
"step": 190
},
{
"epoch": 1.8488888888888888,
"grad_norm": 50.0,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 0.7545720338821411,
"log_odds_ratio": -0.48210686445236206,
"logps/chosen": -0.8307647705078125,
"logps/rejected": -1.3077232837677002,
"loss": 26.5505,
"nll_loss": 2.2815651893615723,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.1661529690027237,
"rewards/margins": 0.09539170563220978,
"rewards/rejected": -0.26154467463493347,
"step": 195
},
{
"epoch": 1.8962962962962964,
"grad_norm": 65.0,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.6288995742797852,
"log_odds_ratio": -0.5331851840019226,
"logps/chosen": -0.8431414365768433,
"logps/rejected": -1.233559489250183,
"loss": 26.946,
"nll_loss": 2.2366058826446533,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16862830519676208,
"rewards/margins": 0.0780835822224617,
"rewards/rejected": -0.24671189486980438,
"step": 200
},
{
"epoch": 1.9437037037037037,
"grad_norm": 56.0,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.6592320203781128,
"log_odds_ratio": -0.5180245041847229,
"logps/chosen": -0.8212572336196899,
"logps/rejected": -1.2188732624053955,
"loss": 26.2562,
"nll_loss": 2.1104683876037598,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.16425147652626038,
"rewards/margins": 0.07952319085597992,
"rewards/rejected": -0.2437746524810791,
"step": 205
},
{
"epoch": 1.991111111111111,
"grad_norm": 63.5,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.536708414554596,
"log_odds_ratio": -0.5566378235816956,
"logps/chosen": -0.8468238115310669,
"logps/rejected": -1.1587189435958862,
"loss": 27.0896,
"nll_loss": 2.173583745956421,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.16936475038528442,
"rewards/margins": 0.06237905099987984,
"rewards/rejected": -0.23174378275871277,
"step": 210
},
{
"epoch": 2.0385185185185186,
"grad_norm": 36.75,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 1.0355523824691772,
"log_odds_ratio": -0.41216397285461426,
"logps/chosen": -0.6247184872627258,
"logps/rejected": -1.1848514080047607,
"loss": 19.9299,
"nll_loss": 2.0519230365753174,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.12494368851184845,
"rewards/margins": 0.11202657222747803,
"rewards/rejected": -0.23697027564048767,
"step": 215
},
{
"epoch": 2.0859259259259257,
"grad_norm": 45.75,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 1.188058614730835,
"log_odds_ratio": -0.3877725303173065,
"logps/chosen": -0.6814132332801819,
"logps/rejected": -1.3385488986968994,
"loss": 21.7134,
"nll_loss": 2.186136245727539,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.13628263771533966,
"rewards/margins": 0.13142715394496918,
"rewards/rejected": -0.26770979166030884,
"step": 220
},
{
"epoch": 2.1333333333333333,
"grad_norm": 69.0,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 1.3558170795440674,
"log_odds_ratio": -0.3460015654563904,
"logps/chosen": -0.6204769015312195,
"logps/rejected": -1.3810127973556519,
"loss": 19.7129,
"nll_loss": 2.1640377044677734,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.1240953654050827,
"rewards/margins": 0.1521071493625641,
"rewards/rejected": -0.27620255947113037,
"step": 225
},
{
"epoch": 2.180740740740741,
"grad_norm": 60.5,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 1.3206228017807007,
"log_odds_ratio": -0.3496930003166199,
"logps/chosen": -0.6283946633338928,
"logps/rejected": -1.3481276035308838,
"loss": 20.0634,
"nll_loss": 2.163454532623291,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.1256789267063141,
"rewards/margins": 0.1439466029405594,
"rewards/rejected": -0.2696255147457123,
"step": 230
},
{
"epoch": 2.228148148148148,
"grad_norm": 46.25,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 1.3826463222503662,
"log_odds_ratio": -0.34246888756752014,
"logps/chosen": -0.601762056350708,
"logps/rejected": -1.3406898975372314,
"loss": 19.1994,
"nll_loss": 2.1239700317382812,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.12035240978002548,
"rewards/margins": 0.14778557419776917,
"rewards/rejected": -0.26813799142837524,
"step": 235
},
{
"epoch": 2.2755555555555556,
"grad_norm": 52.25,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 1.3184112310409546,
"log_odds_ratio": -0.36138203740119934,
"logps/chosen": -0.626305878162384,
"logps/rejected": -1.3914432525634766,
"loss": 19.7327,
"nll_loss": 2.0872886180877686,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.12526118755340576,
"rewards/margins": 0.1530275046825409,
"rewards/rejected": -0.27828869223594666,
"step": 240
},
{
"epoch": 2.322962962962963,
"grad_norm": 47.5,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.342882513999939,
"log_odds_ratio": -0.33094173669815063,
"logps/chosen": -0.5907199382781982,
"logps/rejected": -1.3384162187576294,
"loss": 18.8515,
"nll_loss": 2.086435556411743,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.11814399063587189,
"rewards/margins": 0.1495392769575119,
"rewards/rejected": -0.2676832377910614,
"step": 245
},
{
"epoch": 2.3703703703703702,
"grad_norm": 48.0,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 1.4440391063690186,
"log_odds_ratio": -0.3371729254722595,
"logps/chosen": -0.5727447867393494,
"logps/rejected": -1.3450387716293335,
"loss": 17.9915,
"nll_loss": 2.1893529891967773,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11454895883798599,
"rewards/margins": 0.15445882081985474,
"rewards/rejected": -0.26900777220726013,
"step": 250
},
{
"epoch": 2.417777777777778,
"grad_norm": 44.0,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 1.4869956970214844,
"log_odds_ratio": -0.33424651622772217,
"logps/chosen": -0.5934678912162781,
"logps/rejected": -1.4604480266571045,
"loss": 18.3318,
"nll_loss": 2.072566270828247,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.11869357526302338,
"rewards/margins": 0.1733960211277008,
"rewards/rejected": -0.2920895516872406,
"step": 255
},
{
"epoch": 2.4651851851851854,
"grad_norm": 82.0,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 1.3816086053848267,
"log_odds_ratio": -0.34434396028518677,
"logps/chosen": -0.5892956256866455,
"logps/rejected": -1.3579511642456055,
"loss": 18.3788,
"nll_loss": 2.0510034561157227,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.1178591251373291,
"rewards/margins": 0.15373112261295319,
"rewards/rejected": -0.2715902626514435,
"step": 260
},
{
"epoch": 2.5125925925925925,
"grad_norm": 54.75,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 1.349381685256958,
"log_odds_ratio": -0.3549429774284363,
"logps/chosen": -0.6175572276115417,
"logps/rejected": -1.377739667892456,
"loss": 19.2041,
"nll_loss": 2.2108561992645264,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.12351144850254059,
"rewards/margins": 0.15203648805618286,
"rewards/rejected": -0.27554795145988464,
"step": 265
},
{
"epoch": 2.56,
"grad_norm": 46.0,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 1.4625142812728882,
"log_odds_ratio": -0.33779287338256836,
"logps/chosen": -0.6002839803695679,
"logps/rejected": -1.4510117769241333,
"loss": 18.223,
"nll_loss": 2.1512744426727295,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.12005682289600372,
"rewards/margins": 0.17014555633068085,
"rewards/rejected": -0.2902023494243622,
"step": 270
},
{
"epoch": 2.6074074074074076,
"grad_norm": 41.75,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 1.3319700956344604,
"log_odds_ratio": -0.35751980543136597,
"logps/chosen": -0.6509729623794556,
"logps/rejected": -1.444106101989746,
"loss": 19.8273,
"nll_loss": 1.982131004333496,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.13019458949565887,
"rewards/margins": 0.15862663090229034,
"rewards/rejected": -0.2888212203979492,
"step": 275
},
{
"epoch": 2.6548148148148147,
"grad_norm": 50.75,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 1.2478935718536377,
"log_odds_ratio": -0.3773137629032135,
"logps/chosen": -0.6254156827926636,
"logps/rejected": -1.2992069721221924,
"loss": 19.9581,
"nll_loss": 2.197223663330078,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.12508316338062286,
"rewards/margins": 0.13475826382637024,
"rewards/rejected": -0.2598413825035095,
"step": 280
},
{
"epoch": 2.7022222222222223,
"grad_norm": 47.5,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 1.3165029287338257,
"log_odds_ratio": -0.3543280363082886,
"logps/chosen": -0.6258935332298279,
"logps/rejected": -1.3820149898529053,
"loss": 19.8195,
"nll_loss": 2.149914264678955,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.12517870962619781,
"rewards/margins": 0.1512243151664734,
"rewards/rejected": -0.2764030396938324,
"step": 285
},
{
"epoch": 2.74962962962963,
"grad_norm": 51.25,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 1.294450044631958,
"log_odds_ratio": -0.36391139030456543,
"logps/chosen": -0.6437335014343262,
"logps/rejected": -1.3571479320526123,
"loss": 20.5514,
"nll_loss": 2.049088716506958,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.12874671816825867,
"rewards/margins": 0.14268288016319275,
"rewards/rejected": -0.2714295983314514,
"step": 290
},
{
"epoch": 2.797037037037037,
"grad_norm": 43.75,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 1.2758103609085083,
"log_odds_ratio": -0.36855071783065796,
"logps/chosen": -0.6186591982841492,
"logps/rejected": -1.315861701965332,
"loss": 19.73,
"nll_loss": 2.048844814300537,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": -0.12373185157775879,
"rewards/margins": 0.13944050669670105,
"rewards/rejected": -0.26317232847213745,
"step": 295
},
{
"epoch": 2.8444444444444446,
"grad_norm": 51.25,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 1.4028387069702148,
"log_odds_ratio": -0.34696242213249207,
"logps/chosen": -0.5851832032203674,
"logps/rejected": -1.3483918905258179,
"loss": 18.6233,
"nll_loss": 2.146777629852295,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.11703664064407349,
"rewards/margins": 0.15264175832271576,
"rewards/rejected": -0.26967838406562805,
"step": 300
},
{
"epoch": 2.891851851851852,
"grad_norm": 108.5,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 1.4782116413116455,
"log_odds_ratio": -0.3342163860797882,
"logps/chosen": -0.5890188813209534,
"logps/rejected": -1.4168431758880615,
"loss": 17.9475,
"nll_loss": 2.091174840927124,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11780376732349396,
"rewards/margins": 0.1655648797750473,
"rewards/rejected": -0.28336864709854126,
"step": 305
},
{
"epoch": 2.9392592592592592,
"grad_norm": 40.5,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 1.4966692924499512,
"log_odds_ratio": -0.3115060031414032,
"logps/chosen": -0.5653601288795471,
"logps/rejected": -1.383745551109314,
"loss": 17.9926,
"nll_loss": 2.0637173652648926,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -0.11307201534509659,
"rewards/margins": 0.1636771261692047,
"rewards/rejected": -0.2767491042613983,
"step": 310
},
{
"epoch": 2.986666666666667,
"grad_norm": 88.5,
"learning_rate": 0.0,
"log_odds_chosen": 1.2434196472167969,
"log_odds_ratio": -0.36147943139076233,
"logps/chosen": -0.605234682559967,
"logps/rejected": -1.2406527996063232,
"loss": 19.3401,
"nll_loss": 2.0648436546325684,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.12104693800210953,
"rewards/margins": 0.12708361446857452,
"rewards/rejected": -0.24813053011894226,
"step": 315
},
{
"epoch": 2.986666666666667,
"step": 315,
"total_flos": 0.0,
"train_loss": 75.11768820626395,
"train_runtime": 7196.0938,
"train_samples_per_second": 2.814,
"train_steps_per_second": 0.044
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}