gemma-7b-cpo-noisy-5e-5-v4 / trainer_state.json
silviasapora's picture
Model save
9535c8f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9765925925925925,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 1313.8543701171875,
"learning_rate": 7.8125e-06,
"log_odds_chosen": 1.6327810287475586,
"log_odds_ratio": -11.146058082580566,
"logps/chosen": -21.960407257080078,
"logps/rejected": -23.59285545349121,
"loss": 881.0415,
"nll_loss": 8.637601852416992,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -10.980203628540039,
"rewards/margins": 0.8162234425544739,
"rewards/rejected": -11.796427726745605,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 958.0737915039062,
"learning_rate": 1.5625e-05,
"log_odds_chosen": 1.0805047750473022,
"log_odds_ratio": -8.258191108703613,
"logps/chosen": -19.551382064819336,
"logps/rejected": -20.631277084350586,
"loss": 757.7502,
"nll_loss": 7.813385009765625,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": -9.775691032409668,
"rewards/margins": 0.5399460792541504,
"rewards/rejected": -10.315638542175293,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 1503.8668212890625,
"learning_rate": 2.34375e-05,
"log_odds_chosen": 4.2659807205200195,
"log_odds_ratio": -6.569916725158691,
"logps/chosen": -17.423053741455078,
"logps/rejected": -21.687484741210938,
"loss": 662.6386,
"nll_loss": 7.951455116271973,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -8.711526870727539,
"rewards/margins": 2.132215976715088,
"rewards/rejected": -10.843742370605469,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 5612.318359375,
"learning_rate": 3.125e-05,
"log_odds_chosen": 0.5963099598884583,
"log_odds_ratio": -4.979976177215576,
"logps/chosen": -11.640253067016602,
"logps/rejected": -12.238527297973633,
"loss": 452.0445,
"nll_loss": 6.280893802642822,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -5.820126533508301,
"rewards/margins": 0.29913684725761414,
"rewards/rejected": -6.119263648986816,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 448.8934326171875,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 0.5738601684570312,
"log_odds_ratio": -0.9728918075561523,
"logps/chosen": -2.5826029777526855,
"logps/rejected": -3.132516860961914,
"loss": 97.8864,
"nll_loss": 3.112938404083252,
"rewards/accuracies": 0.59375,
"rewards/chosen": -1.2913014888763428,
"rewards/margins": 0.27495700120925903,
"rewards/rejected": -1.566258430480957,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 193.44644165039062,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.14733314514160156,
"log_odds_ratio": -0.8188334703445435,
"logps/chosen": -1.7538366317749023,
"logps/rejected": -1.880671739578247,
"loss": 68.7921,
"nll_loss": 2.9175949096679688,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.8769183158874512,
"rewards/margins": 0.0634174793958664,
"rewards/rejected": -0.9403358697891235,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 169.11569213867188,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.1972377598285675,
"log_odds_ratio": -0.7700116038322449,
"logps/chosen": -1.5413159132003784,
"logps/rejected": -1.7179752588272095,
"loss": 61.1452,
"nll_loss": 2.3813323974609375,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.7706579566001892,
"rewards/margins": 0.08832962810993195,
"rewards/rejected": -0.8589876294136047,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 250.76620483398438,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.23389403522014618,
"log_odds_ratio": -0.7099635004997253,
"logps/chosen": -1.4198099374771118,
"logps/rejected": -1.616281270980835,
"loss": 56.518,
"nll_loss": 2.3243794441223145,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.7099049687385559,
"rewards/margins": 0.09823578596115112,
"rewards/rejected": -0.8081406354904175,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 279.9685363769531,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.1506054848432541,
"log_odds_ratio": -0.7535517811775208,
"logps/chosen": -1.3751205205917358,
"logps/rejected": -1.4878621101379395,
"loss": 55.5771,
"nll_loss": 2.230128049850464,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.6875602602958679,
"rewards/margins": 0.056370723992586136,
"rewards/rejected": -0.7439310550689697,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 382.7770080566406,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.19314703345298767,
"log_odds_ratio": -0.7266248464584351,
"logps/chosen": -1.3842805624008179,
"logps/rejected": -1.5405880212783813,
"loss": 55.5381,
"nll_loss": 2.407309055328369,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.6921402812004089,
"rewards/margins": 0.07815368473529816,
"rewards/rejected": -0.7702940106391907,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 130.2970733642578,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 0.1594429314136505,
"log_odds_ratio": -0.7218093276023865,
"logps/chosen": -1.2733328342437744,
"logps/rejected": -1.3996423482894897,
"loss": 51.8507,
"nll_loss": 2.147927761077881,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.6366664171218872,
"rewards/margins": 0.06315477192401886,
"rewards/rejected": -0.6998211741447449,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 134.74240112304688,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.2484438121318817,
"log_odds_ratio": -0.6789853572845459,
"logps/chosen": -1.2255313396453857,
"logps/rejected": -1.4233750104904175,
"loss": 49.8412,
"nll_loss": 2.2157835960388184,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.6127656698226929,
"rewards/margins": 0.09892191737890244,
"rewards/rejected": -0.7116875052452087,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 118.31177520751953,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.2591857612133026,
"log_odds_ratio": -0.6711713075637817,
"logps/chosen": -1.2236008644104004,
"logps/rejected": -1.4167249202728271,
"loss": 49.7015,
"nll_loss": 2.0784411430358887,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.6118004322052002,
"rewards/margins": 0.0965619757771492,
"rewards/rejected": -0.7083624601364136,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 102.92163848876953,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.2674064040184021,
"log_odds_ratio": -0.675905168056488,
"logps/chosen": -1.2073343992233276,
"logps/rejected": -1.4260364770889282,
"loss": 49.125,
"nll_loss": 1.9744670391082764,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.6036671996116638,
"rewards/margins": 0.10935105383396149,
"rewards/rejected": -0.7130182385444641,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 305.7223815917969,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 0.28773313760757446,
"log_odds_ratio": -0.6539745926856995,
"logps/chosen": -1.1219004392623901,
"logps/rejected": -1.327695608139038,
"loss": 46.259,
"nll_loss": 1.9466793537139893,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.5609502196311951,
"rewards/margins": 0.1028975397348404,
"rewards/rejected": -0.663847804069519,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 444.95172119140625,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 0.2160220444202423,
"log_odds_ratio": -0.6895222663879395,
"logps/chosen": -1.1895593404769897,
"logps/rejected": -1.3464289903640747,
"loss": 48.854,
"nll_loss": 1.8920223712921143,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.5947796702384949,
"rewards/margins": 0.07843481004238129,
"rewards/rejected": -0.6732144951820374,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 134.1718292236328,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 0.31953853368759155,
"log_odds_ratio": -0.650363564491272,
"logps/chosen": -1.1080071926116943,
"logps/rejected": -1.349273681640625,
"loss": 45.7072,
"nll_loss": 1.8154582977294922,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.5540035963058472,
"rewards/margins": 0.12063322216272354,
"rewards/rejected": -0.6746368408203125,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 288.4284362792969,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.21705381572246552,
"log_odds_ratio": -0.6870957016944885,
"logps/chosen": -1.1290249824523926,
"logps/rejected": -1.2838109731674194,
"loss": 46.8203,
"nll_loss": 1.9802055358886719,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.5645124912261963,
"rewards/margins": 0.07739301770925522,
"rewards/rejected": -0.6419054865837097,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 234.74221801757812,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.23379310965538025,
"log_odds_ratio": -0.6703908443450928,
"logps/chosen": -1.0899484157562256,
"logps/rejected": -1.263106346130371,
"loss": 45.3973,
"nll_loss": 1.8875010013580322,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.5449742078781128,
"rewards/margins": 0.08657898008823395,
"rewards/rejected": -0.6315531730651855,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 112.56975555419922,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.16518335044384003,
"log_odds_ratio": -0.7109084129333496,
"logps/chosen": -1.1009365320205688,
"logps/rejected": -1.238239049911499,
"loss": 46.0418,
"nll_loss": 1.8534952402114868,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.5504682660102844,
"rewards/margins": 0.06865125149488449,
"rewards/rejected": -0.6191195249557495,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 136.2017059326172,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.23913511633872986,
"log_odds_ratio": -0.6585836410522461,
"logps/chosen": -1.0759801864624023,
"logps/rejected": -1.2537977695465088,
"loss": 44.7828,
"nll_loss": 1.795069694519043,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.5379900932312012,
"rewards/margins": 0.08890879154205322,
"rewards/rejected": -0.6268988847732544,
"step": 105
},
{
"epoch": 1.037925925925926,
"grad_norm": 67.5390625,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.6602018475532532,
"log_odds_ratio": -0.5334572196006775,
"logps/chosen": -0.9451561570167542,
"logps/rejected": -1.404151439666748,
"loss": 35.0306,
"nll_loss": 1.7614768743515015,
"rewards/accuracies": 0.7307692170143127,
"rewards/chosen": -0.4725780785083771,
"rewards/margins": 0.22949755191802979,
"rewards/rejected": -0.702075719833374,
"step": 110
},
{
"epoch": 1.0853333333333333,
"grad_norm": 89.68997192382812,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.8264390230178833,
"log_odds_ratio": -0.46329426765441895,
"logps/chosen": -0.9025434255599976,
"logps/rejected": -1.4511505365371704,
"loss": 37.0855,
"nll_loss": 1.7627713680267334,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.4512717127799988,
"rewards/margins": 0.27430346608161926,
"rewards/rejected": -0.7255752682685852,
"step": 115
},
{
"epoch": 1.1327407407407408,
"grad_norm": 85.83277130126953,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 0.888095498085022,
"log_odds_ratio": -0.45123091340065,
"logps/chosen": -0.8698997497558594,
"logps/rejected": -1.456084966659546,
"loss": 35.8665,
"nll_loss": 2.0131936073303223,
"rewards/accuracies": 0.7906249761581421,
"rewards/chosen": -0.4349498748779297,
"rewards/margins": 0.2930925786495209,
"rewards/rejected": -0.728042483329773,
"step": 120
},
{
"epoch": 1.1801481481481482,
"grad_norm": 75.02178955078125,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 0.8515494465827942,
"log_odds_ratio": -0.46829432249069214,
"logps/chosen": -0.8703139424324036,
"logps/rejected": -1.4456019401550293,
"loss": 36.0252,
"nll_loss": 1.8564857244491577,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.4351569712162018,
"rewards/margins": 0.28764399886131287,
"rewards/rejected": -0.7228009700775146,
"step": 125
},
{
"epoch": 1.2275555555555555,
"grad_norm": 93.19149017333984,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.7404316663742065,
"log_odds_ratio": -0.49976396560668945,
"logps/chosen": -0.8461529016494751,
"logps/rejected": -1.305490255355835,
"loss": 35.7775,
"nll_loss": 1.7167637348175049,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.42307645082473755,
"rewards/margins": 0.22966866195201874,
"rewards/rejected": -0.6527451276779175,
"step": 130
},
{
"epoch": 1.274962962962963,
"grad_norm": 79.68480682373047,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 0.8560611009597778,
"log_odds_ratio": -0.4482923150062561,
"logps/chosen": -0.8573166728019714,
"logps/rejected": -1.4259978532791138,
"loss": 35.4271,
"nll_loss": 1.7739051580429077,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.4286583364009857,
"rewards/margins": 0.2843405604362488,
"rewards/rejected": -0.7129989266395569,
"step": 135
},
{
"epoch": 1.3223703703703704,
"grad_norm": 97.05864715576172,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 0.7707556486129761,
"log_odds_ratio": -0.5003089308738708,
"logps/chosen": -0.8969907760620117,
"logps/rejected": -1.404831886291504,
"loss": 37.3019,
"nll_loss": 1.8181276321411133,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.44849538803100586,
"rewards/margins": 0.2539205849170685,
"rewards/rejected": -0.702415943145752,
"step": 140
},
{
"epoch": 1.3697777777777778,
"grad_norm": 94.64373016357422,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 0.9273589253425598,
"log_odds_ratio": -0.4503125548362732,
"logps/chosen": -0.8693191409111023,
"logps/rejected": -1.50619375705719,
"loss": 35.7555,
"nll_loss": 1.7783292531967163,
"rewards/accuracies": 0.7906249761581421,
"rewards/chosen": -0.43465957045555115,
"rewards/margins": 0.31843727827072144,
"rewards/rejected": -0.753096878528595,
"step": 145
},
{
"epoch": 1.417185185185185,
"grad_norm": 112.79119110107422,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.733902096748352,
"log_odds_ratio": -0.49021005630493164,
"logps/chosen": -0.8194792866706848,
"logps/rejected": -1.2824211120605469,
"loss": 34.8116,
"nll_loss": 1.736271619796753,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.4097396433353424,
"rewards/margins": 0.23147086799144745,
"rewards/rejected": -0.6412105560302734,
"step": 150
},
{
"epoch": 1.4645925925925927,
"grad_norm": 120.8626480102539,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 0.8592801094055176,
"log_odds_ratio": -0.46693143248558044,
"logps/chosen": -0.8628988265991211,
"logps/rejected": -1.428763508796692,
"loss": 35.8416,
"nll_loss": 1.7393659353256226,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.43144941329956055,
"rewards/margins": 0.2829323410987854,
"rewards/rejected": -0.714381754398346,
"step": 155
},
{
"epoch": 1.512,
"grad_norm": 128.55426025390625,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 0.7666479349136353,
"log_odds_ratio": -0.48797711730003357,
"logps/chosen": -0.8693684339523315,
"logps/rejected": -1.35294771194458,
"loss": 36.3933,
"nll_loss": 1.7232725620269775,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.43468421697616577,
"rewards/margins": 0.2417895793914795,
"rewards/rejected": -0.67647385597229,
"step": 160
},
{
"epoch": 1.5594074074074074,
"grad_norm": 123.65845489501953,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.9002155065536499,
"log_odds_ratio": -0.4618147909641266,
"logps/chosen": -0.85200035572052,
"logps/rejected": -1.435723066329956,
"loss": 35.4562,
"nll_loss": 1.7452001571655273,
"rewards/accuracies": 0.7906249761581421,
"rewards/chosen": -0.42600017786026,
"rewards/margins": 0.291861355304718,
"rewards/rejected": -0.717861533164978,
"step": 165
},
{
"epoch": 1.6068148148148147,
"grad_norm": 77.17584228515625,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 0.8772485852241516,
"log_odds_ratio": -0.4688163697719574,
"logps/chosen": -0.8404110074043274,
"logps/rejected": -1.4133893251419067,
"loss": 35.1622,
"nll_loss": 1.7097526788711548,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.4202055037021637,
"rewards/margins": 0.28648921847343445,
"rewards/rejected": -0.7066946625709534,
"step": 170
},
{
"epoch": 1.6542222222222223,
"grad_norm": 91.94951629638672,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.859915554523468,
"log_odds_ratio": -0.4548751711845398,
"logps/chosen": -0.8396957516670227,
"logps/rejected": -1.4033467769622803,
"loss": 34.974,
"nll_loss": 1.7587263584136963,
"rewards/accuracies": 0.7906249761581421,
"rewards/chosen": -0.41984787583351135,
"rewards/margins": 0.2818255126476288,
"rewards/rejected": -0.7016733884811401,
"step": 175
},
{
"epoch": 1.7016296296296296,
"grad_norm": 75.54816436767578,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.7710874080657959,
"log_odds_ratio": -0.4820574223995209,
"logps/chosen": -0.8513079881668091,
"logps/rejected": -1.341399073600769,
"loss": 35.7398,
"nll_loss": 1.7565553188323975,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.42565399408340454,
"rewards/margins": 0.24504557251930237,
"rewards/rejected": -0.6706995368003845,
"step": 180
},
{
"epoch": 1.749037037037037,
"grad_norm": 60.633155822753906,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 0.9204598665237427,
"log_odds_ratio": -0.4521242678165436,
"logps/chosen": -0.8442584276199341,
"logps/rejected": -1.4724090099334717,
"loss": 35.0333,
"nll_loss": 1.7535591125488281,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.42212921380996704,
"rewards/margins": 0.3140752613544464,
"rewards/rejected": -0.7362045049667358,
"step": 185
},
{
"epoch": 1.7964444444444445,
"grad_norm": 58.7163200378418,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.6730726361274719,
"log_odds_ratio": -0.5413838624954224,
"logps/chosen": -0.9189823865890503,
"logps/rejected": -1.3468341827392578,
"loss": 38.5132,
"nll_loss": 1.7065455913543701,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.45949119329452515,
"rewards/margins": 0.21392583847045898,
"rewards/rejected": -0.6734170913696289,
"step": 190
},
{
"epoch": 1.8438518518518519,
"grad_norm": 60.383541107177734,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 0.9475343823432922,
"log_odds_ratio": -0.4484768509864807,
"logps/chosen": -0.8385717272758484,
"logps/rejected": -1.4635182619094849,
"loss": 34.8245,
"nll_loss": 1.713822603225708,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.4192858636379242,
"rewards/margins": 0.31247326731681824,
"rewards/rejected": -0.7317591309547424,
"step": 195
},
{
"epoch": 1.8912592592592592,
"grad_norm": 66.11405944824219,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.8329303860664368,
"log_odds_ratio": -0.49859505891799927,
"logps/chosen": -0.893582820892334,
"logps/rejected": -1.451395034790039,
"loss": 37.0308,
"nll_loss": 1.732862114906311,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.446791410446167,
"rewards/margins": 0.27890610694885254,
"rewards/rejected": -0.7256975173950195,
"step": 200
},
{
"epoch": 1.9386666666666668,
"grad_norm": 54.821868896484375,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.6610826849937439,
"log_odds_ratio": -0.5171926617622375,
"logps/chosen": -0.8425942659378052,
"logps/rejected": -1.258576512336731,
"loss": 35.923,
"nll_loss": 1.6366369724273682,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.4212971329689026,
"rewards/margins": 0.20799115300178528,
"rewards/rejected": -0.6292882561683655,
"step": 205
},
{
"epoch": 1.986074074074074,
"grad_norm": 410.4480895996094,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.773653507232666,
"log_odds_ratio": -0.4857940673828125,
"logps/chosen": -0.8226664662361145,
"logps/rejected": -1.3210365772247314,
"loss": 34.8322,
"nll_loss": 1.650650978088379,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.41133323311805725,
"rewards/margins": 0.24918513000011444,
"rewards/rejected": -0.6605182886123657,
"step": 210
},
{
"epoch": 2.0284444444444443,
"grad_norm": 81.6287841796875,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 1.237162470817566,
"log_odds_ratio": -0.37088167667388916,
"logps/chosen": -0.7396840453147888,
"logps/rejected": -1.494255542755127,
"loss": 27.6202,
"nll_loss": 1.6762900352478027,
"rewards/accuracies": 0.8531468510627747,
"rewards/chosen": -0.3698420226573944,
"rewards/margins": 0.3772856593132019,
"rewards/rejected": -0.7471277713775635,
"step": 215
},
{
"epoch": 2.075851851851852,
"grad_norm": 63.87202453613281,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 1.8257486820220947,
"log_odds_ratio": -0.26709312200546265,
"logps/chosen": -0.6293801665306091,
"logps/rejected": -1.7724393606185913,
"loss": 25.9158,
"nll_loss": 1.6067278385162354,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.31469008326530457,
"rewards/margins": 0.5715296268463135,
"rewards/rejected": -0.8862196803092957,
"step": 220
},
{
"epoch": 2.1232592592592594,
"grad_norm": 63.67515563964844,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 1.9475319385528564,
"log_odds_ratio": -0.22811241447925568,
"logps/chosen": -0.6124777793884277,
"logps/rejected": -1.8340566158294678,
"loss": 24.8892,
"nll_loss": 1.6936416625976562,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.30623888969421387,
"rewards/margins": 0.61078941822052,
"rewards/rejected": -0.9170283079147339,
"step": 225
},
{
"epoch": 2.1706666666666665,
"grad_norm": 73.4637222290039,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 1.9068591594696045,
"log_odds_ratio": -0.23920920491218567,
"logps/chosen": -0.6302188634872437,
"logps/rejected": -1.859368085861206,
"loss": 25.4943,
"nll_loss": 1.6373430490493774,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": -0.3151094317436218,
"rewards/margins": 0.6145747900009155,
"rewards/rejected": -0.929684042930603,
"step": 230
},
{
"epoch": 2.218074074074074,
"grad_norm": 94.13182067871094,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 1.905515432357788,
"log_odds_ratio": -0.2525004744529724,
"logps/chosen": -0.6512196063995361,
"logps/rejected": -1.868032455444336,
"loss": 26.2984,
"nll_loss": 1.6863908767700195,
"rewards/accuracies": 0.921875,
"rewards/chosen": -0.32560980319976807,
"rewards/margins": 0.6084063649177551,
"rewards/rejected": -0.934016227722168,
"step": 235
},
{
"epoch": 2.2654814814814817,
"grad_norm": 61.78620147705078,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 1.8288015127182007,
"log_odds_ratio": -0.2607673108577728,
"logps/chosen": -0.6412376165390015,
"logps/rejected": -1.772962212562561,
"loss": 26.2439,
"nll_loss": 1.679369568824768,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.32061880826950073,
"rewards/margins": 0.565862238407135,
"rewards/rejected": -0.8864811062812805,
"step": 240
},
{
"epoch": 2.3128888888888888,
"grad_norm": 72.63899993896484,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.9564971923828125,
"log_odds_ratio": -0.222591370344162,
"logps/chosen": -0.6008509397506714,
"logps/rejected": -1.823952078819275,
"loss": 24.3832,
"nll_loss": 1.634280800819397,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.3004254698753357,
"rewards/margins": 0.6115506291389465,
"rewards/rejected": -0.9119760394096375,
"step": 245
},
{
"epoch": 2.3602962962962963,
"grad_norm": 59.229347229003906,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 2.0835893154144287,
"log_odds_ratio": -0.2164476215839386,
"logps/chosen": -0.6128490567207336,
"logps/rejected": -1.9331867694854736,
"loss": 24.5665,
"nll_loss": 1.6675183773040771,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.3064245283603668,
"rewards/margins": 0.6601688861846924,
"rewards/rejected": -0.9665933847427368,
"step": 250
},
{
"epoch": 2.407703703703704,
"grad_norm": 62.527137756347656,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 2.1743245124816895,
"log_odds_ratio": -0.21075662970542908,
"logps/chosen": -0.5725008249282837,
"logps/rejected": -1.979318380355835,
"loss": 23.177,
"nll_loss": 1.6942886114120483,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -0.28625041246414185,
"rewards/margins": 0.7034087777137756,
"rewards/rejected": -0.9896591901779175,
"step": 255
},
{
"epoch": 2.455111111111111,
"grad_norm": 83.86973571777344,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 2.1156704425811768,
"log_odds_ratio": -0.22012558579444885,
"logps/chosen": -0.6010316610336304,
"logps/rejected": -1.9568220376968384,
"loss": 24.2239,
"nll_loss": 1.6713542938232422,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.3005158305168152,
"rewards/margins": 0.677895188331604,
"rewards/rejected": -0.9784110188484192,
"step": 260
},
{
"epoch": 2.5025185185185186,
"grad_norm": 78.56597900390625,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 1.9914929866790771,
"log_odds_ratio": -0.2501711845397949,
"logps/chosen": -0.6486467123031616,
"logps/rejected": -1.941457748413086,
"loss": 26.055,
"nll_loss": 1.6230090856552124,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.3243233561515808,
"rewards/margins": 0.6464055776596069,
"rewards/rejected": -0.970728874206543,
"step": 265
},
{
"epoch": 2.549925925925926,
"grad_norm": 63.49893569946289,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 2.094613552093506,
"log_odds_ratio": -0.2102334052324295,
"logps/chosen": -0.5994306802749634,
"logps/rejected": -1.9197509288787842,
"loss": 24.16,
"nll_loss": 1.6677443981170654,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.2997153401374817,
"rewards/margins": 0.6601601839065552,
"rewards/rejected": -0.9598754644393921,
"step": 270
},
{
"epoch": 2.5973333333333333,
"grad_norm": 79.37230682373047,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 2.1252670288085938,
"log_odds_ratio": -0.2258034646511078,
"logps/chosen": -0.638454794883728,
"logps/rejected": -2.0011186599731445,
"loss": 25.4827,
"nll_loss": 1.6783549785614014,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.319227397441864,
"rewards/margins": 0.6813319325447083,
"rewards/rejected": -1.0005593299865723,
"step": 275
},
{
"epoch": 2.644740740740741,
"grad_norm": 66.57933807373047,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 2.1056222915649414,
"log_odds_ratio": -0.2241026908159256,
"logps/chosen": -0.5848366022109985,
"logps/rejected": -1.9145119190216064,
"loss": 23.795,
"nll_loss": 1.665006399154663,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.29241830110549927,
"rewards/margins": 0.664837658405304,
"rewards/rejected": -0.9572559595108032,
"step": 280
},
{
"epoch": 2.6921481481481484,
"grad_norm": 72.5078353881836,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 2.1603641510009766,
"log_odds_ratio": -0.212470144033432,
"logps/chosen": -0.6109951138496399,
"logps/rejected": -2.0010976791381836,
"loss": 24.4558,
"nll_loss": 1.7145074605941772,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.30549755692481995,
"rewards/margins": 0.695051372051239,
"rewards/rejected": -1.0005488395690918,
"step": 285
},
{
"epoch": 2.7395555555555555,
"grad_norm": 66.63565063476562,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 2.2116315364837646,
"log_odds_ratio": -0.216557115316391,
"logps/chosen": -0.6101894378662109,
"logps/rejected": -2.0354068279266357,
"loss": 24.4504,
"nll_loss": 1.7398754358291626,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": -0.30509471893310547,
"rewards/margins": 0.7126085758209229,
"rewards/rejected": -1.0177034139633179,
"step": 290
},
{
"epoch": 2.786962962962963,
"grad_norm": 72.43724060058594,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 2.0436155796051025,
"log_odds_ratio": -0.25939661264419556,
"logps/chosen": -0.6374012231826782,
"logps/rejected": -1.9478752613067627,
"loss": 25.9435,
"nll_loss": 1.6763683557510376,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.3187006115913391,
"rewards/margins": 0.6552368998527527,
"rewards/rejected": -0.9739376306533813,
"step": 295
},
{
"epoch": 2.83437037037037,
"grad_norm": 63.837345123291016,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 2.2019195556640625,
"log_odds_ratio": -0.22682932019233704,
"logps/chosen": -0.6202256679534912,
"logps/rejected": -2.0658164024353027,
"loss": 24.8148,
"nll_loss": 1.7285759449005127,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.3101128339767456,
"rewards/margins": 0.7227953672409058,
"rewards/rejected": -1.0329082012176514,
"step": 300
},
{
"epoch": 2.8817777777777778,
"grad_norm": 70.43053436279297,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 2.2171826362609863,
"log_odds_ratio": -0.2098480463027954,
"logps/chosen": -0.5961582064628601,
"logps/rejected": -2.024376392364502,
"loss": 23.9484,
"nll_loss": 1.663637399673462,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": -0.29807910323143005,
"rewards/margins": 0.7141090631484985,
"rewards/rejected": -1.012188196182251,
"step": 305
},
{
"epoch": 2.9291851851851853,
"grad_norm": 95.00191497802734,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 2.249077081680298,
"log_odds_ratio": -0.1899929940700531,
"logps/chosen": -0.6121601462364197,
"logps/rejected": -2.043703317642212,
"loss": 24.1381,
"nll_loss": 1.6788402795791626,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.30608007311820984,
"rewards/margins": 0.7157715559005737,
"rewards/rejected": -1.021851658821106,
"step": 310
},
{
"epoch": 2.9765925925925925,
"grad_norm": 54.38016891479492,
"learning_rate": 0.0,
"log_odds_chosen": 2.183474063873291,
"log_odds_ratio": -0.20934459567070007,
"logps/chosen": -0.6058934926986694,
"logps/rejected": -2.00223445892334,
"loss": 24.3016,
"nll_loss": 1.6638948917388916,
"rewards/accuracies": 0.971875011920929,
"rewards/chosen": -0.3029467463493347,
"rewards/margins": 0.69817054271698,
"rewards/rejected": -1.00111722946167,
"step": 315
},
{
"epoch": 2.9765925925925925,
"step": 315,
"total_flos": 0.0,
"train_loss": 78.5789802187965,
"train_runtime": 9244.4601,
"train_samples_per_second": 2.191,
"train_steps_per_second": 0.034
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}