suri-i-orpo / trainer_state.json
chtmp223's picture
First commit
fbcff2d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008,
"grad_norm": 0.4211425877451838,
"learning_rate": 4.999912270696202e-05,
"log_odds_chosen": -0.0004254445375408977,
"log_odds_ratio": -0.6933605670928955,
"logits/chosen": -2.876610279083252,
"logits/chosen_prompt": -2.844738245010376,
"logits/rejected": -2.8758692741394043,
"logits/rejected_prompt": -2.8239073753356934,
"logps/chosen": -1.9094527959823608,
"logps/chosen_both": -1.9286587238311768,
"logps/chosen_prompt": -3.189321756362915,
"logps/rejected": -1.9090824127197266,
"logps/rejected_both": -1.9364073276519775,
"logps/rejected_prompt": -3.4751086235046387,
"loss": 2.325,
"nll_loss": 1.928330421447754,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.7637811899185181,
"rewards/margins": -0.00014820098294876516,
"rewards/rejected": -0.7636328935623169,
"step": 10
},
{
"epoch": 0.016,
"grad_norm": 0.19485166995413405,
"learning_rate": 4.9996490889419514e-05,
"log_odds_chosen": 0.0011974871158599854,
"log_odds_ratio": -0.6925489902496338,
"logits/chosen": -2.9591917991638184,
"logits/chosen_prompt": -2.8109309673309326,
"logits/rejected": -2.9579415321350098,
"logits/rejected_prompt": -2.789308547973633,
"logps/chosen": -2.084634304046631,
"logps/chosen_both": -2.0863680839538574,
"logps/chosen_prompt": -2.1795780658721924,
"logps/rejected": -2.0856688022613525,
"logps/rejected_both": -2.0941364765167236,
"logps/rejected_prompt": -2.347795009613037,
"loss": 2.2922,
"nll_loss": 2.08614182472229,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8338537216186523,
"rewards/margins": 0.0004138052463531494,
"rewards/rejected": -0.8342674970626831,
"step": 20
},
{
"epoch": 0.024,
"grad_norm": 0.16144893961648712,
"learning_rate": 4.99921047320825e-05,
"log_odds_chosen": 0.003194092307239771,
"log_odds_ratio": -0.6915546655654907,
"logits/chosen": -2.9421558380126953,
"logits/chosen_prompt": -2.7285828590393066,
"logits/rejected": -2.939770221710205,
"logits/rejected_prompt": -2.70296311378479,
"logps/chosen": -2.0509393215179443,
"logps/chosen_both": -2.0457570552825928,
"logps/chosen_prompt": -1.5747671127319336,
"logps/rejected": -2.0534369945526123,
"logps/rejected_both": -2.0497002601623535,
"logps/rejected_prompt": -1.6531193256378174,
"loss": 2.2795,
"nll_loss": 2.04412841796875,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8203758001327515,
"rewards/margins": 0.0009990095859393477,
"rewards/rejected": -0.821374773979187,
"step": 30
},
{
"epoch": 0.032,
"grad_norm": 0.16210904759452727,
"learning_rate": 4.9985964542786614e-05,
"log_odds_chosen": 0.0012136728037148714,
"log_odds_ratio": -0.6925405859947205,
"logits/chosen": -2.92653226852417,
"logits/chosen_prompt": -2.7136194705963135,
"logits/rejected": -2.925443172454834,
"logits/rejected_prompt": -2.700766086578369,
"logps/chosen": -2.0835628509521484,
"logps/chosen_both": -2.070845365524292,
"logps/chosen_prompt": -1.1743593215942383,
"logps/rejected": -2.084618330001831,
"logps/rejected_both": -2.076547384262085,
"logps/rejected_prompt": -1.2668603658676147,
"loss": 2.2852,
"nll_loss": 2.070385694503784,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8334251642227173,
"rewards/margins": 0.00042223333730362356,
"rewards/rejected": -0.8338474035263062,
"step": 40
},
{
"epoch": 0.04,
"grad_norm": 0.1829717877342827,
"learning_rate": 4.997807075247146e-05,
"log_odds_chosen": 0.000906852656044066,
"log_odds_ratio": -0.6926941871643066,
"logits/chosen": -2.8913445472717285,
"logits/chosen_prompt": -2.6892333030700684,
"logits/rejected": -2.8896098136901855,
"logits/rejected_prompt": -2.6766159534454346,
"logps/chosen": -2.009531259536743,
"logps/chosen_both": -1.9982995986938477,
"logps/chosen_prompt": -1.053348422050476,
"logps/rejected": -2.0103189945220947,
"logps/rejected_both": -2.0013835430145264,
"logps/rejected_prompt": -1.2616751194000244,
"loss": 2.2716,
"nll_loss": 1.996681571006775,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.8038125038146973,
"rewards/margins": 0.0003150761185679585,
"rewards/rejected": -0.80412757396698,
"step": 50
},
{
"epoch": 0.048,
"grad_norm": 0.1926273569998765,
"learning_rate": 4.996842391515044e-05,
"log_odds_chosen": 0.0007017262396402657,
"log_odds_ratio": -0.6927965879440308,
"logits/chosen": -2.9328999519348145,
"logits/chosen_prompt": -2.684788227081299,
"logits/rejected": -2.93101167678833,
"logits/rejected_prompt": -2.659271240234375,
"logps/chosen": -1.9513660669326782,
"logps/chosen_both": -1.93800950050354,
"logps/chosen_prompt": -0.95411616563797,
"logps/rejected": -1.9519250392913818,
"logps/rejected_both": -1.9419523477554321,
"logps/rejected_prompt": -1.0883800983428955,
"loss": 2.2492,
"nll_loss": 1.9371274709701538,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7805464863777161,
"rewards/margins": 0.00022354423708748072,
"rewards/rejected": -0.7807700634002686,
"step": 60
},
{
"epoch": 0.056,
"grad_norm": 0.1815660976282933,
"learning_rate": 4.9957024707871806e-05,
"log_odds_chosen": 0.0007978074136190116,
"log_odds_ratio": -0.6927486062049866,
"logits/chosen": -3.0125765800476074,
"logits/chosen_prompt": -2.6774511337280273,
"logits/rejected": -3.0124025344848633,
"logits/rejected_prompt": -2.6662356853485107,
"logps/chosen": -2.0494558811187744,
"logps/chosen_both": -2.0350148677825928,
"logps/chosen_prompt": -0.9741342663764954,
"logps/rejected": -2.050143003463745,
"logps/rejected_both": -2.042119264602661,
"logps/rejected_prompt": -1.1199967861175537,
"loss": 2.2682,
"nll_loss": 2.0335299968719482,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8197824358940125,
"rewards/margins": 0.00027483105077408254,
"rewards/rejected": -0.820057213306427,
"step": 70
},
{
"epoch": 0.064,
"grad_norm": 0.18993029983534432,
"learning_rate": 4.994387393067117e-05,
"log_odds_chosen": 0.0014978877734392881,
"log_odds_ratio": -0.6923991441726685,
"logits/chosen": -2.9860825538635254,
"logits/chosen_prompt": -2.6699416637420654,
"logits/rejected": -2.9854748249053955,
"logits/rejected_prompt": -2.6453309059143066,
"logps/chosen": -2.025066614151001,
"logps/chosen_both": -2.0116593837738037,
"logps/chosen_prompt": -1.0876951217651367,
"logps/rejected": -2.0263991355895996,
"logps/rejected_both": -2.0129716396331787,
"logps/rejected_prompt": -1.1680071353912354,
"loss": 2.2805,
"nll_loss": 2.008460283279419,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8100266456604004,
"rewards/margins": 0.0005330622079782188,
"rewards/rejected": -0.8105596303939819,
"step": 80
},
{
"epoch": 0.072,
"grad_norm": 0.19392806669970095,
"learning_rate": 4.992897250651535e-05,
"log_odds_chosen": 0.0007344387704506516,
"log_odds_ratio": -0.6927801370620728,
"logits/chosen": -2.998304605484009,
"logits/chosen_prompt": -2.7530579566955566,
"logits/rejected": -2.9966633319854736,
"logits/rejected_prompt": -2.726839542388916,
"logps/chosen": -1.9492180347442627,
"logps/chosen_both": -1.9305731058120728,
"logps/chosen_prompt": -0.871951699256897,
"logps/rejected": -1.9498412609100342,
"logps/rejected_both": -1.9371519088745117,
"logps/rejected_prompt": -1.0174219608306885,
"loss": 2.2152,
"nll_loss": 1.929351806640625,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7796871662139893,
"rewards/margins": 0.000249391800025478,
"rewards/rejected": -0.779936671257019,
"step": 90
},
{
"epoch": 0.08,
"grad_norm": 0.18477584362829488,
"learning_rate": 4.991232148123761e-05,
"log_odds_chosen": 0.0013153791660442948,
"log_odds_ratio": -0.6924898624420166,
"logits/chosen": -2.959036350250244,
"logits/chosen_prompt": -2.6582894325256348,
"logits/rejected": -2.959897518157959,
"logits/rejected_prompt": -2.656588077545166,
"logps/chosen": -1.980985403060913,
"logps/chosen_both": -1.965191125869751,
"logps/chosen_prompt": -0.8711269497871399,
"logps/rejected": -1.9821256399154663,
"logps/rejected_both": -1.9721254110336304,
"logps/rejected_prompt": -0.9294773936271667,
"loss": 2.2517,
"nll_loss": 1.964665412902832,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7923941612243652,
"rewards/margins": 0.00045606493949890137,
"rewards/rejected": -0.7928503155708313,
"step": 100
},
{
"epoch": 0.088,
"grad_norm": 0.19924379886100949,
"learning_rate": 4.9893922023464236e-05,
"log_odds_chosen": 0.002966083586215973,
"log_odds_ratio": -0.6916661858558655,
"logits/chosen": -3.0152981281280518,
"logits/chosen_prompt": -2.685716152191162,
"logits/rejected": -3.0145790576934814,
"logits/rejected_prompt": -2.6468653678894043,
"logps/chosen": -1.8295310735702515,
"logps/chosen_both": -1.8159011602401733,
"logps/chosen_prompt": -1.0153570175170898,
"logps/rejected": -1.8320270776748657,
"logps/rejected_both": -1.8261594772338867,
"logps/rejected_prompt": -1.1217412948608398,
"loss": 2.2814,
"nll_loss": 1.815495491027832,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7318124175071716,
"rewards/margins": 0.0009983479976654053,
"rewards/rejected": -0.7328108549118042,
"step": 110
},
{
"epoch": 0.096,
"grad_norm": 0.2009899005827714,
"learning_rate": 4.987377542453251e-05,
"log_odds_chosen": 0.0022429400123655796,
"log_odds_ratio": -0.6920267939567566,
"logits/chosen": -2.9447622299194336,
"logits/chosen_prompt": -2.632648468017578,
"logits/rejected": -2.9442973136901855,
"logits/rejected_prompt": -2.6101832389831543,
"logps/chosen": -2.0063014030456543,
"logps/chosen_both": -1.991539716720581,
"logps/chosen_prompt": -0.9827820658683777,
"logps/rejected": -2.0082459449768066,
"logps/rejected_both": -1.9994781017303467,
"logps/rejected_prompt": -1.0614566802978516,
"loss": 2.2719,
"nll_loss": 1.99040949344635,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8025206327438354,
"rewards/margins": 0.0007776618003845215,
"rewards/rejected": -0.8032983541488647,
"step": 120
},
{
"epoch": 0.104,
"grad_norm": 0.18861397575558203,
"learning_rate": 4.985188309840012e-05,
"log_odds_chosen": 0.001361916190944612,
"log_odds_ratio": -0.692466676235199,
"logits/chosen": -2.95689058303833,
"logits/chosen_prompt": -2.6187005043029785,
"logits/rejected": -2.95717191696167,
"logits/rejected_prompt": -2.592301607131958,
"logps/chosen": -2.0394482612609863,
"logps/chosen_both": -2.02314829826355,
"logps/chosen_prompt": -0.9008905291557312,
"logps/rejected": -2.040587902069092,
"logps/rejected_both": -2.0329113006591797,
"logps/rejected_prompt": -1.0704509019851685,
"loss": 2.2882,
"nll_loss": 2.023050546646118,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8157793283462524,
"rewards/margins": 0.00045590996160171926,
"rewards/rejected": -0.8162351846694946,
"step": 130
},
{
"epoch": 0.112,
"grad_norm": 0.2030737765327122,
"learning_rate": 4.982824658154589e-05,
"log_odds_chosen": 0.0003186427056789398,
"log_odds_ratio": -0.6929879188537598,
"logits/chosen": -2.934846878051758,
"logits/chosen_prompt": -2.6593239307403564,
"logits/rejected": -2.9346649646759033,
"logits/rejected_prompt": -2.637718677520752,
"logps/chosen": -2.066263437271118,
"logps/chosen_both": -2.0494155883789062,
"logps/chosen_prompt": -0.9298864603042603,
"logps/rejected": -2.066551685333252,
"logps/rejected_both": -2.0526323318481445,
"logps/rejected_prompt": -1.0461074113845825,
"loss": 2.2784,
"nll_loss": 2.048583984375,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.8265053629875183,
"rewards/margins": 0.0001151919350377284,
"rewards/rejected": -0.8266205787658691,
"step": 140
},
{
"epoch": 0.12,
"grad_norm": 0.17845448491542337,
"learning_rate": 4.980286753286195e-05,
"log_odds_chosen": 0.0020511746406555176,
"log_odds_ratio": -0.6921236515045166,
"logits/chosen": -2.9423627853393555,
"logits/chosen_prompt": -2.6544814109802246,
"logits/rejected": -2.9413440227508545,
"logits/rejected_prompt": -2.6495890617370605,
"logps/chosen": -2.0567996501922607,
"logps/chosen_both": -2.0376124382019043,
"logps/chosen_prompt": -0.8456690907478333,
"logps/rejected": -2.058603525161743,
"logps/rejected_both": -2.0455679893493652,
"logps/rejected_prompt": -1.0780448913574219,
"loss": 2.2474,
"nll_loss": 2.036198616027832,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.822719931602478,
"rewards/margins": 0.0007214724901132286,
"rewards/rejected": -0.8234413862228394,
"step": 150
},
{
"epoch": 0.128,
"grad_norm": 0.18228364635340788,
"learning_rate": 4.977574773353732e-05,
"log_odds_chosen": 0.0005785167450085282,
"log_odds_ratio": -0.6928580403327942,
"logits/chosen": -2.906240940093994,
"logits/chosen_prompt": -2.656862735748291,
"logits/rejected": -2.906233072280884,
"logits/rejected_prompt": -2.658569812774658,
"logps/chosen": -1.8988163471221924,
"logps/chosen_both": -1.8861125707626343,
"logps/chosen_prompt": -0.9287108182907104,
"logps/rejected": -1.8993009328842163,
"logps/rejected_both": -1.890856146812439,
"logps/rejected_prompt": -1.113793134689331,
"loss": 2.2658,
"nll_loss": 1.8859831094741821,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7595265507698059,
"rewards/margins": 0.00019387007341720164,
"rewards/rejected": -0.7597203850746155,
"step": 160
},
{
"epoch": 0.136,
"grad_norm": 0.21059375256598528,
"learning_rate": 4.9746889086932895e-05,
"log_odds_chosen": 0.0012606128584593534,
"log_odds_ratio": -0.6925175786018372,
"logits/chosen": -2.9255146980285645,
"logits/chosen_prompt": -2.681833505630493,
"logits/rejected": -2.9241907596588135,
"logits/rejected_prompt": -2.6375930309295654,
"logps/chosen": -2.018401861190796,
"logps/chosen_both": -2.0020346641540527,
"logps/chosen_prompt": -0.8163633346557617,
"logps/rejected": -2.0194990634918213,
"logps/rejected_both": -2.0088753700256348,
"logps/rejected_prompt": -1.024702787399292,
"loss": 2.2545,
"nll_loss": 2.0013086795806885,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8073607683181763,
"rewards/margins": 0.00043891073437407613,
"rewards/rejected": -0.8077996373176575,
"step": 170
},
{
"epoch": 0.144,
"grad_norm": 0.2593749816883702,
"learning_rate": 4.971629361844785e-05,
"log_odds_chosen": 0.000588211405556649,
"log_odds_ratio": -0.6928532123565674,
"logits/chosen": -2.9365015029907227,
"logits/chosen_prompt": -2.6852712631225586,
"logits/rejected": -2.9362454414367676,
"logits/rejected_prompt": -2.6527528762817383,
"logps/chosen": -2.049866199493408,
"logps/chosen_both": -2.03619122505188,
"logps/chosen_prompt": -0.8910077214241028,
"logps/rejected": -2.050372838973999,
"logps/rejected_both": -2.0393173694610596,
"logps/rejected_prompt": -1.0920004844665527,
"loss": 2.2312,
"nll_loss": 2.0342373847961426,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8199464678764343,
"rewards/margins": 0.00020260215387679636,
"rewards/rejected": -0.8201491236686707,
"step": 180
},
{
"epoch": 0.152,
"grad_norm": 0.21239961737940086,
"learning_rate": 4.968396347537751e-05,
"log_odds_chosen": 0.0017036155331879854,
"log_odds_ratio": -0.6922971606254578,
"logits/chosen": -2.9285712242126465,
"logits/chosen_prompt": -2.637676477432251,
"logits/rejected": -2.9268641471862793,
"logits/rejected_prompt": -2.601259231567383,
"logps/chosen": -2.019813060760498,
"logps/chosen_both": -2.003007173538208,
"logps/chosen_prompt": -0.9411777257919312,
"logps/rejected": -2.0213375091552734,
"logps/rejected_both": -2.013278007507324,
"logps/rejected_prompt": -1.0966544151306152,
"loss": 2.2257,
"nll_loss": 2.003007173538208,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.8079251050949097,
"rewards/margins": 0.0006098627927713096,
"rewards/rejected": -0.8085349798202515,
"step": 190
},
{
"epoch": 0.16,
"grad_norm": 0.2296631901191577,
"learning_rate": 4.964990092676263e-05,
"log_odds_chosen": 0.002268400741741061,
"log_odds_ratio": -0.6920153498649597,
"logits/chosen": -2.9518988132476807,
"logits/chosen_prompt": -2.6878037452697754,
"logits/rejected": -2.9512124061584473,
"logits/rejected_prompt": -2.6565701961517334,
"logps/chosen": -1.69021475315094,
"logps/chosen_both": -1.6815983057022095,
"logps/chosen_prompt": -0.8377019762992859,
"logps/rejected": -1.6910902261734009,
"logps/rejected_both": -1.686661958694458,
"logps/rejected_prompt": -0.9836887121200562,
"loss": 2.2189,
"nll_loss": 1.6812556982040405,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.6760859489440918,
"rewards/margins": 0.0003500869497656822,
"rewards/rejected": -0.6764360666275024,
"step": 200
},
{
"epoch": 0.168,
"grad_norm": 0.20512599393851222,
"learning_rate": 4.9614108363230135e-05,
"log_odds_chosen": 0.0021390921901911497,
"log_odds_ratio": -0.6920791268348694,
"logits/chosen": -2.9732565879821777,
"logits/chosen_prompt": -2.6687545776367188,
"logits/rejected": -2.9718270301818848,
"logits/rejected_prompt": -2.6496801376342773,
"logps/chosen": -2.0387587547302246,
"logps/chosen_both": -2.017876148223877,
"logps/chosen_prompt": -0.897871196269989,
"logps/rejected": -2.040605068206787,
"logps/rejected_both": -2.0265369415283203,
"logps/rejected_prompt": -1.0972706079483032,
"loss": 2.2179,
"nll_loss": 2.0162312984466553,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8155035972595215,
"rewards/margins": 0.000738424074370414,
"rewards/rejected": -0.8162419199943542,
"step": 210
},
{
"epoch": 0.176,
"grad_norm": 0.2127533742878833,
"learning_rate": 4.9576588296825386e-05,
"log_odds_chosen": 0.0020120560657233,
"log_odds_ratio": -0.6921423077583313,
"logits/chosen": -2.8992626667022705,
"logits/chosen_prompt": -2.7236571311950684,
"logits/rejected": -2.8986992835998535,
"logits/rejected_prompt": -2.676098346710205,
"logps/chosen": -2.0563912391662598,
"logps/chosen_both": -2.036818027496338,
"logps/chosen_prompt": -0.9310529828071594,
"logps/rejected": -2.058103322982788,
"logps/rejected_both": -2.0425784587860107,
"logps/rejected_prompt": -1.0257813930511475,
"loss": 2.273,
"nll_loss": 2.03584885597229,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8225564956665039,
"rewards/margins": 0.0006849050405435264,
"rewards/rejected": -0.8232414126396179,
"step": 220
},
{
"epoch": 0.184,
"grad_norm": 0.17056867832509964,
"learning_rate": 4.953734336083583e-05,
"log_odds_chosen": 0.0011583305895328522,
"log_odds_ratio": -0.6925683617591858,
"logits/chosen": -3.0050501823425293,
"logits/chosen_prompt": -2.7037124633789062,
"logits/rejected": -3.0038866996765137,
"logits/rejected_prompt": -2.6890504360198975,
"logps/chosen": -2.0860724449157715,
"logps/chosen_both": -2.067084550857544,
"logps/chosen_prompt": -0.8457021713256836,
"logps/rejected": -2.087078332901001,
"logps/rejected_both": -2.0733180046081543,
"logps/rejected_prompt": -1.0261476039886475,
"loss": 2.2779,
"nll_loss": 2.065519094467163,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8344290852546692,
"rewards/margins": 0.00040218234062194824,
"rewards/rejected": -0.8348312377929688,
"step": 230
},
{
"epoch": 0.192,
"grad_norm": 0.2058632754394824,
"learning_rate": 4.949637630960617e-05,
"log_odds_chosen": 0.0013900771737098694,
"log_odds_ratio": -0.6924527883529663,
"logits/chosen": -2.966139316558838,
"logits/chosen_prompt": -2.7504935264587402,
"logits/rejected": -2.965026378631592,
"logits/rejected_prompt": -2.7268807888031006,
"logps/chosen": -1.945728063583374,
"logps/chosen_both": -1.9301140308380127,
"logps/chosen_prompt": -0.9403144717216492,
"logps/rejected": -1.946915626525879,
"logps/rejected_both": -1.936022162437439,
"logps/rejected_prompt": -1.0291379690170288,
"loss": 2.2775,
"nll_loss": 1.9295330047607422,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7782912254333496,
"rewards/margins": 0.000475037086289376,
"rewards/rejected": -0.7787662744522095,
"step": 240
},
{
"epoch": 0.2,
"grad_norm": 0.18030355585658703,
"learning_rate": 4.9453690018345144e-05,
"log_odds_chosen": 0.0017323314677923918,
"log_odds_ratio": -0.6922817826271057,
"logits/chosen": -2.9892709255218506,
"logits/chosen_prompt": -2.7419209480285645,
"logits/rejected": -2.9878451824188232,
"logits/rejected_prompt": -2.706714391708374,
"logps/chosen": -2.0075595378875732,
"logps/chosen_both": -1.9899797439575195,
"logps/chosen_prompt": -0.8903474807739258,
"logps/rejected": -2.0090558528900146,
"logps/rejected_both": -1.998038649559021,
"logps/rejected_prompt": -1.0070338249206543,
"loss": 2.2079,
"nll_loss": 1.9889189004898071,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8030239343643188,
"rewards/margins": 0.0005984127637930214,
"rewards/rejected": -0.8036222457885742,
"step": 250
},
{
"epoch": 0.208,
"grad_norm": 0.18677326033959232,
"learning_rate": 4.940928748292363e-05,
"log_odds_chosen": 0.0003323271812405437,
"log_odds_ratio": -0.6929812431335449,
"logits/chosen": -2.8448781967163086,
"logits/chosen_prompt": -2.6570119857788086,
"logits/rejected": -2.844160795211792,
"logits/rejected_prompt": -2.6436538696289062,
"logps/chosen": -2.090553045272827,
"logps/chosen_both": -2.077347993850708,
"logps/chosen_prompt": -0.8073711395263672,
"logps/rejected": -2.090845823287964,
"logps/rejected_both": -2.077338695526123,
"logps/rejected_prompt": -0.9910534024238586,
"loss": 2.2579,
"nll_loss": 2.0748660564422607,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8362210988998413,
"rewards/margins": 0.00011717081360984594,
"rewards/rejected": -0.8363384008407593,
"step": 260
},
{
"epoch": 0.216,
"grad_norm": 0.19524819903076443,
"learning_rate": 4.9363171819664434e-05,
"log_odds_chosen": 0.001574930502101779,
"log_odds_ratio": -0.6923605799674988,
"logits/chosen": -2.9072844982147217,
"logits/chosen_prompt": -2.6988863945007324,
"logits/rejected": -2.9070873260498047,
"logits/rejected_prompt": -2.6662864685058594,
"logps/chosen": -1.8586593866348267,
"logps/chosen_both": -1.847161889076233,
"logps/chosen_prompt": -0.8614280819892883,
"logps/rejected": -1.8599656820297241,
"logps/rejected_both": -1.8520950078964233,
"logps/rejected_prompt": -1.0004897117614746,
"loss": 2.2122,
"nll_loss": 1.8460156917572021,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7434637546539307,
"rewards/margins": 0.0005225300556048751,
"rewards/rejected": -0.7439862489700317,
"step": 270
},
{
"epoch": 0.224,
"grad_norm": 0.17891708421025293,
"learning_rate": 4.9315346265123594e-05,
"log_odds_chosen": 0.0014710575342178345,
"log_odds_ratio": -0.6924123764038086,
"logits/chosen": -2.893035888671875,
"logits/chosen_prompt": -2.6818959712982178,
"logits/rejected": -2.8925375938415527,
"logits/rejected_prompt": -2.6510303020477295,
"logps/chosen": -1.959538221359253,
"logps/chosen_both": -1.9448583126068115,
"logps/chosen_prompt": -0.8354212641716003,
"logps/rejected": -1.9608103036880493,
"logps/rejected_both": -1.9502098560333252,
"logps/rejected_prompt": -0.9869192838668823,
"loss": 2.2903,
"nll_loss": 1.94313645362854,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7838152647018433,
"rewards/margins": 0.0005089103942736983,
"rewards/rejected": -0.7843241691589355,
"step": 280
},
{
"epoch": 0.232,
"grad_norm": 0.21094148380709188,
"learning_rate": 4.9265814175863186e-05,
"log_odds_chosen": 0.0009952529799193144,
"log_odds_ratio": -0.6926498413085938,
"logits/chosen": -2.9005274772644043,
"logits/chosen_prompt": -2.71238374710083,
"logits/rejected": -2.8991751670837402,
"logits/rejected_prompt": -2.6699583530426025,
"logps/chosen": -2.1492276191711426,
"logps/chosen_both": -2.1339974403381348,
"logps/chosen_prompt": -0.9373821020126343,
"logps/rejected": -2.1501176357269287,
"logps/rejected_both": -2.1395199298858643,
"logps/rejected_prompt": -1.100056529045105,
"loss": 2.2923,
"nll_loss": 2.1338019371032715,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8596910238265991,
"rewards/margins": 0.00035610198392532766,
"rewards/rejected": -0.8600472211837769,
"step": 290
},
{
"epoch": 0.24,
"grad_norm": 0.19361551914630554,
"learning_rate": 4.9214579028215776e-05,
"log_odds_chosen": 0.0016762830782681704,
"log_odds_ratio": -0.6923099160194397,
"logits/chosen": -2.9360158443450928,
"logits/chosen_prompt": -2.7480220794677734,
"logits/rejected": -2.9349968433380127,
"logits/rejected_prompt": -2.733687400817871,
"logps/chosen": -1.8898597955703735,
"logps/chosen_both": -1.874415636062622,
"logps/chosen_prompt": -0.8352281451225281,
"logps/rejected": -1.8912776708602905,
"logps/rejected_both": -1.8779733180999756,
"logps/rejected_prompt": -0.9313365817070007,
"loss": 2.2525,
"nll_loss": 1.8733183145523071,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7559438943862915,
"rewards/margins": 0.0005672037368640304,
"rewards/rejected": -0.7565110921859741,
"step": 300
},
{
"epoch": 0.248,
"grad_norm": 0.19645084360565487,
"learning_rate": 4.916164441804044e-05,
"log_odds_chosen": 0.0019232749473303556,
"log_odds_ratio": -0.692186176776886,
"logits/chosen": -2.9699971675872803,
"logits/chosen_prompt": -2.7393062114715576,
"logits/rejected": -2.9690558910369873,
"logits/rejected_prompt": -2.7017319202423096,
"logps/chosen": -1.9972589015960693,
"logps/chosen_both": -1.981871247291565,
"logps/chosen_prompt": -0.8229547739028931,
"logps/rejected": -1.9988943338394165,
"logps/rejected_both": -1.9911056756973267,
"logps/rejected_prompt": -0.9741779565811157,
"loss": 2.2527,
"nll_loss": 1.981127381324768,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7989035844802856,
"rewards/margins": 0.000654196715913713,
"rewards/rejected": -0.7995578050613403,
"step": 310
},
{
"epoch": 0.256,
"grad_norm": 0.17696578224649318,
"learning_rate": 4.910701406047037e-05,
"log_odds_chosen": 0.0012397505342960358,
"log_odds_ratio": -0.6925276517868042,
"logits/chosen": -2.9160306453704834,
"logits/chosen_prompt": -2.7327325344085693,
"logits/rejected": -2.915261745452881,
"logits/rejected_prompt": -2.701322078704834,
"logps/chosen": -1.9081172943115234,
"logps/chosen_both": -1.892844557762146,
"logps/chosen_prompt": -0.8174566030502319,
"logps/rejected": -1.9091819524765015,
"logps/rejected_both": -1.9010097980499268,
"logps/rejected_prompt": -1.0786253213882446,
"loss": 2.2602,
"nll_loss": 1.8927319049835205,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7632468938827515,
"rewards/margins": 0.00042594075785018504,
"rewards/rejected": -0.7636728286743164,
"step": 320
},
{
"epoch": 0.264,
"grad_norm": 0.17292787330822676,
"learning_rate": 4.905069178965215e-05,
"log_odds_chosen": 0.0019163743127137423,
"log_odds_ratio": -0.692189633846283,
"logits/chosen": -2.9151923656463623,
"logits/chosen_prompt": -2.7165563106536865,
"logits/rejected": -2.914482593536377,
"logits/rejected_prompt": -2.6829206943511963,
"logps/chosen": -1.8700447082519531,
"logps/chosen_both": -1.8556480407714844,
"logps/chosen_prompt": -0.8194649815559387,
"logps/rejected": -1.8716179132461548,
"logps/rejected_both": -1.864458680152893,
"logps/rejected_prompt": -1.1078553199768066,
"loss": 2.1808,
"nll_loss": 1.8551757335662842,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7480179071426392,
"rewards/margins": 0.0006292253965511918,
"rewards/rejected": -0.7486470937728882,
"step": 330
},
{
"epoch": 0.272,
"grad_norm": 0.19147435771992855,
"learning_rate": 4.899268155847667e-05,
"log_odds_chosen": 0.002677363809198141,
"log_odds_ratio": -0.6918100118637085,
"logits/chosen": -3.017524242401123,
"logits/chosen_prompt": -2.756082534790039,
"logits/rejected": -3.016745090484619,
"logits/rejected_prompt": -2.7283802032470703,
"logps/chosen": -1.8907134532928467,
"logps/chosen_both": -1.8744417428970337,
"logps/chosen_prompt": -0.8424029350280762,
"logps/rejected": -1.8929758071899414,
"logps/rejected_both": -1.8843475580215454,
"logps/rejected_prompt": -1.0425379276275635,
"loss": 2.225,
"nll_loss": 1.8739697933197021,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7562853693962097,
"rewards/margins": 0.000904941582120955,
"rewards/rejected": -0.7571902275085449,
"step": 340
},
{
"epoch": 0.28,
"grad_norm": 0.19050905162566348,
"learning_rate": 4.893298743830168e-05,
"log_odds_chosen": 0.0014245070051401854,
"log_odds_ratio": -0.6924355626106262,
"logits/chosen": -2.881587028503418,
"logits/chosen_prompt": -2.7358975410461426,
"logits/rejected": -2.8803658485412598,
"logits/rejected_prompt": -2.693080186843872,
"logps/chosen": -2.135007381439209,
"logps/chosen_both": -2.115304470062256,
"logps/chosen_prompt": -0.8588684797286987,
"logps/rejected": -2.1362690925598145,
"logps/rejected_both": -2.1251254081726074,
"logps/rejected_prompt": -1.0595465898513794,
"loss": 2.3085,
"nll_loss": 2.1149659156799316,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8540030717849731,
"rewards/margins": 0.0005046069854870439,
"rewards/rejected": -0.8545076251029968,
"step": 350
},
{
"epoch": 0.288,
"grad_norm": 0.2409286506380079,
"learning_rate": 4.887161361866608e-05,
"log_odds_chosen": 0.0026388473343104124,
"log_odds_ratio": -0.6918294429779053,
"logits/chosen": -2.983471632003784,
"logits/chosen_prompt": -2.755098819732666,
"logits/rejected": -2.982506513595581,
"logits/rejected_prompt": -2.7400355339050293,
"logps/chosen": -1.9234100580215454,
"logps/chosen_both": -1.904706597328186,
"logps/chosen_prompt": -0.8400828242301941,
"logps/rejected": -1.9256340265274048,
"logps/rejected_both": -1.912940263748169,
"logps/rejected_prompt": -0.9321552515029907,
"loss": 2.2324,
"nll_loss": 1.9039466381072998,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.769364058971405,
"rewards/margins": 0.0008895128848962486,
"rewards/rejected": -0.7702535390853882,
"step": 360
},
{
"epoch": 0.296,
"grad_norm": 0.16083280812237927,
"learning_rate": 4.880856440699582e-05,
"log_odds_chosen": 0.0021248466800898314,
"log_odds_ratio": -0.6920855641365051,
"logits/chosen": -2.9351096153259277,
"logits/chosen_prompt": -2.723745107650757,
"logits/rejected": -2.93329119682312,
"logits/rejected_prompt": -2.689175844192505,
"logps/chosen": -2.005812644958496,
"logps/chosen_both": -1.9874347448349,
"logps/chosen_prompt": -0.8169828653335571,
"logps/rejected": -2.0076451301574707,
"logps/rejected_both": -1.9974247217178345,
"logps/rejected_prompt": -0.9817326664924622,
"loss": 2.2565,
"nll_loss": 1.9868465662002563,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8023250699043274,
"rewards/margins": 0.0007329642539843917,
"rewards/rejected": -0.8030580282211304,
"step": 370
},
{
"epoch": 0.304,
"grad_norm": 0.22470013003589273,
"learning_rate": 4.874384422830167e-05,
"log_odds_chosen": 0.0011979244882240891,
"log_odds_ratio": -0.6925488710403442,
"logits/chosen": -2.9063477516174316,
"logits/chosen_prompt": -2.607713222503662,
"logits/rejected": -2.905827760696411,
"logits/rejected_prompt": -2.5853092670440674,
"logps/chosen": -1.9979126453399658,
"logps/chosen_both": -1.982242226600647,
"logps/chosen_prompt": -0.8234804272651672,
"logps/rejected": -1.9988930225372314,
"logps/rejected_both": -1.9891548156738281,
"logps/rejected_prompt": -0.9966527223587036,
"loss": 2.266,
"nll_loss": 1.9814211130142212,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7991650104522705,
"rewards/margins": 0.0003922194300685078,
"rewards/rejected": -0.7995571494102478,
"step": 380
},
{
"epoch": 0.312,
"grad_norm": 0.16501384834156196,
"learning_rate": 4.867745762486861e-05,
"log_odds_chosen": 0.0010735094547271729,
"log_odds_ratio": -0.6926108598709106,
"logits/chosen": -2.9659483432769775,
"logits/chosen_prompt": -2.684511661529541,
"logits/rejected": -2.9646358489990234,
"logits/rejected_prompt": -2.6466262340545654,
"logps/chosen": -1.8777449131011963,
"logps/chosen_both": -1.8621854782104492,
"logps/chosen_prompt": -0.8326584100723267,
"logps/rejected": -1.8786296844482422,
"logps/rejected_both": -1.8694502115249634,
"logps/rejected_prompt": -1.119554042816162,
"loss": 2.2551,
"nll_loss": 1.8609716892242432,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.7510979771614075,
"rewards/margins": 0.0003538370074238628,
"rewards/rejected": -0.751451849937439,
"step": 390
},
{
"epoch": 0.32,
"grad_norm": 0.18496197696993874,
"learning_rate": 4.860940925593703e-05,
"log_odds_chosen": 0.0022099569905549288,
"log_odds_ratio": -0.6920434832572937,
"logits/chosen": -2.8903660774230957,
"logits/chosen_prompt": -2.6781816482543945,
"logits/rejected": -2.890045166015625,
"logits/rejected_prompt": -2.6534364223480225,
"logps/chosen": -1.969386339187622,
"logps/chosen_both": -1.954185128211975,
"logps/chosen_prompt": -0.7636314630508423,
"logps/rejected": -1.9712820053100586,
"logps/rejected_both": -1.9598472118377686,
"logps/rejected_prompt": -0.9155877232551575,
"loss": 2.243,
"nll_loss": 1.9532957077026367,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7877545356750488,
"rewards/margins": 0.0007582366233691573,
"rewards/rejected": -0.7885128259658813,
"step": 400
},
{
"epoch": 0.328,
"grad_norm": 0.22859080108494093,
"learning_rate": 4.8539703897375755e-05,
"log_odds_chosen": 0.004624041263014078,
"log_odds_ratio": -0.690842866897583,
"logits/chosen": -2.9258294105529785,
"logits/chosen_prompt": -2.6813464164733887,
"logits/rejected": -2.9250378608703613,
"logits/rejected_prompt": -2.6571106910705566,
"logps/chosen": -2.0521552562713623,
"logps/chosen_both": -2.034921646118164,
"logps/chosen_prompt": -0.8797234296798706,
"logps/rejected": -2.056114673614502,
"logps/rejected_both": -2.044158935546875,
"logps/rejected_prompt": -0.9540025591850281,
"loss": 2.2663,
"nll_loss": 2.0334911346435547,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8208619952201843,
"rewards/margins": 0.0015838384861126542,
"rewards/rejected": -0.8224459886550903,
"step": 410
},
{
"epoch": 0.336,
"grad_norm": 0.2070567132691218,
"learning_rate": 4.846834644134686e-05,
"log_odds_chosen": 0.001986052840948105,
"log_odds_ratio": -0.6921548843383789,
"logits/chosen": -2.9888834953308105,
"logits/chosen_prompt": -2.6887311935424805,
"logits/rejected": -2.989170789718628,
"logits/rejected_prompt": -2.694418430328369,
"logps/chosen": -1.9955952167510986,
"logps/chosen_both": -1.9792373180389404,
"logps/chosen_prompt": -0.8381233215332031,
"logps/rejected": -1.997323751449585,
"logps/rejected_both": -1.9859631061553955,
"logps/rejected_prompt": -0.9913262128829956,
"loss": 2.2321,
"nll_loss": 1.9785674810409546,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7982381582260132,
"rewards/margins": 0.0006913721445016563,
"rewards/rejected": -0.7989295721054077,
"step": 420
},
{
"epoch": 0.344,
"grad_norm": 0.19605531509972363,
"learning_rate": 4.839534189596228e-05,
"log_odds_chosen": 0.0027246386744081974,
"log_odds_ratio": -0.6917861104011536,
"logits/chosen": -2.912360429763794,
"logits/chosen_prompt": -2.653672218322754,
"logits/rejected": -2.910978317260742,
"logits/rejected_prompt": -2.627488613128662,
"logps/chosen": -2.060957908630371,
"logps/chosen_both": -2.043726921081543,
"logps/chosen_prompt": -0.7695341110229492,
"logps/rejected": -2.0633223056793213,
"logps/rejected_both": -2.051257371902466,
"logps/rejected_prompt": -1.0156570672988892,
"loss": 2.2675,
"nll_loss": 2.042490005493164,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8243831396102905,
"rewards/margins": 0.0009458243730477989,
"rewards/rejected": -0.8253289461135864,
"step": 430
},
{
"epoch": 0.352,
"grad_norm": 0.44833865318290117,
"learning_rate": 4.832069538493237e-05,
"log_odds_chosen": 0.04500371962785721,
"log_odds_ratio": -0.6715863943099976,
"logits/chosen": -2.9302279949188232,
"logits/chosen_prompt": -2.6701042652130127,
"logits/rejected": -2.9281227588653564,
"logits/rejected_prompt": -2.666865587234497,
"logps/chosen": -1.9099162817001343,
"logps/chosen_both": -1.898306131362915,
"logps/chosen_prompt": -0.702593982219696,
"logps/rejected": -1.948999047279358,
"logps/rejected_both": -1.9378995895385742,
"logps/rejected_prompt": -0.971504807472229,
"loss": 2.2392,
"nll_loss": 1.895094633102417,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.76396644115448,
"rewards/margins": 0.015633201226592064,
"rewards/rejected": -0.7795997262001038,
"step": 440
},
{
"epoch": 0.36,
"grad_norm": 0.21226948111262156,
"learning_rate": 4.8244412147206284e-05,
"log_odds_chosen": 2.9653515815734863,
"log_odds_ratio": -0.40015140175819397,
"logits/chosen": -2.9068620204925537,
"logits/chosen_prompt": -2.6536412239074707,
"logits/rejected": -2.1202731132507324,
"logits/rejected_prompt": -2.6555583477020264,
"logps/chosen": -2.0414326190948486,
"logps/chosen_both": -2.0248727798461914,
"logps/chosen_prompt": -0.8300280570983887,
"logps/rejected": -4.945545196533203,
"logps/rejected_both": -4.884528160095215,
"logps/rejected_prompt": -0.9442939758300781,
"loss": 2.1853,
"nll_loss": 2.0240979194641113,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8165730237960815,
"rewards/margins": 1.1616451740264893,
"rewards/rejected": -1.9782178401947021,
"step": 450
},
{
"epoch": 0.368,
"grad_norm": 0.1736633646525648,
"learning_rate": 4.81664975366043e-05,
"log_odds_chosen": 7.59240198135376,
"log_odds_ratio": -0.1370885670185089,
"logits/chosen": -2.9020304679870605,
"logits/chosen_prompt": -2.6753904819488525,
"logits/rejected": -0.7233905792236328,
"logits/rejected_prompt": -2.637943983078003,
"logps/chosen": -1.8611255884170532,
"logps/chosen_both": -1.8469617366790771,
"logps/chosen_prompt": -0.8501307368278503,
"logps/rejected": -9.215188026428223,
"logps/rejected_both": -9.101489067077637,
"logps/rejected_prompt": -1.2299854755401611,
"loss": 2.0244,
"nll_loss": 1.8459827899932861,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7444502115249634,
"rewards/margins": 2.9416251182556152,
"rewards/rejected": -3.686075210571289,
"step": 460
},
{
"epoch": 0.376,
"grad_norm": 0.17769599500435684,
"learning_rate": 4.808695702144206e-05,
"log_odds_chosen": 5.727511882781982,
"log_odds_ratio": -0.2772656977176666,
"logits/chosen": -2.879725694656372,
"logits/chosen_prompt": -2.642578125,
"logits/rejected": -1.0399138927459717,
"logits/rejected_prompt": -2.6099534034729004,
"logps/chosen": -2.0047779083251953,
"logps/chosen_both": -1.9910427331924438,
"logps/chosen_prompt": -0.8587312698364258,
"logps/rejected": -7.64484167098999,
"logps/rejected_both": -7.5631890296936035,
"logps/rejected_prompt": -1.0231356620788574,
"loss": 2.0507,
"nll_loss": 1.990276575088501,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8019111752510071,
"rewards/margins": 2.2560253143310547,
"rewards/rejected": -3.057936429977417,
"step": 470
},
{
"epoch": 0.384,
"grad_norm": 0.18584682979949957,
"learning_rate": 4.800579618414676e-05,
"log_odds_chosen": 4.071249961853027,
"log_odds_ratio": -0.34571754932403564,
"logits/chosen": -2.903729200363159,
"logits/chosen_prompt": -2.7958900928497314,
"logits/rejected": -3.239121198654175,
"logits/rejected_prompt": -2.7663371562957764,
"logps/chosen": -1.9373371601104736,
"logps/chosen_both": -1.921233892440796,
"logps/chosen_prompt": -0.9925417900085449,
"logps/rejected": -5.936069488525391,
"logps/rejected_both": -5.837677955627441,
"logps/rejected_prompt": -1.1928670406341553,
"loss": 2.4809,
"nll_loss": 1.919942855834961,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7749348282814026,
"rewards/margins": 1.5994927883148193,
"rewards/rejected": -2.3744280338287354,
"step": 480
},
{
"epoch": 0.392,
"grad_norm": 0.1641543403493392,
"learning_rate": 4.7923020720865414e-05,
"log_odds_chosen": 3.001093626022339,
"log_odds_ratio": -0.484192430973053,
"logits/chosen": -2.983025312423706,
"logits/chosen_prompt": -3.0399768352508545,
"logits/rejected": -4.017498970031738,
"logits/rejected_prompt": -3.0394999980926514,
"logps/chosen": -2.209317922592163,
"logps/chosen_both": -2.1894264221191406,
"logps/chosen_prompt": -0.8747655153274536,
"logps/rejected": -5.173645496368408,
"logps/rejected_both": -5.1185221672058105,
"logps/rejected_prompt": -1.2934271097183228,
"loss": 2.1178,
"nll_loss": 2.188310384750366,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8837271928787231,
"rewards/margins": 1.185731053352356,
"rewards/rejected": -2.069458484649658,
"step": 490
},
{
"epoch": 0.4,
"grad_norm": 4.569078846846734,
"learning_rate": 4.783863644106502e-05,
"log_odds_chosen": 6.397196292877197,
"log_odds_ratio": -0.20790621638298035,
"logits/chosen": -2.8709733486175537,
"logits/chosen_prompt": -2.905733585357666,
"logits/rejected": -4.449090480804443,
"logits/rejected_prompt": -2.8762049674987793,
"logps/chosen": -1.861519455909729,
"logps/chosen_both": -1.8485714197158813,
"logps/chosen_prompt": -0.7894952893257141,
"logps/rejected": -8.093868255615234,
"logps/rejected_both": -7.9878997802734375,
"logps/rejected_prompt": -1.098191499710083,
"loss": 2.2466,
"nll_loss": 1.847815752029419,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7446077466011047,
"rewards/margins": 2.4929394721984863,
"rewards/rejected": -3.2375473976135254,
"step": 500
},
{
"epoch": 0.408,
"grad_norm": 26.906300876077555,
"learning_rate": 4.775264926712489e-05,
"log_odds_chosen": 5.443802833557129,
"log_odds_ratio": -0.13954684138298035,
"logits/chosen": -2.9360134601593018,
"logits/chosen_prompt": -2.6900744438171387,
"logits/rejected": -3.0484580993652344,
"logits/rejected_prompt": -2.612032890319824,
"logps/chosen": -1.974119782447815,
"logps/chosen_both": -1.958168625831604,
"logps/chosen_prompt": -0.8577529788017273,
"logps/rejected": -7.293883323669434,
"logps/rejected_both": -7.204199314117432,
"logps/rejected_prompt": -1.3446273803710938,
"loss": 2.518,
"nll_loss": 1.9573103189468384,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7896479368209839,
"rewards/margins": 2.1279053688049316,
"rewards/rejected": -2.917553424835205,
"step": 510
},
{
"epoch": 0.416,
"grad_norm": 1.0236738821403857,
"learning_rate": 4.7665065233920945e-05,
"log_odds_chosen": 4.726571559906006,
"log_odds_ratio": -0.14057810604572296,
"logits/chosen": -2.9554474353790283,
"logits/chosen_prompt": -3.076146364212036,
"logits/rejected": -3.131758689880371,
"logits/rejected_prompt": -3.045212507247925,
"logps/chosen": -1.9218995571136475,
"logps/chosen_both": -1.910244345664978,
"logps/chosen_prompt": -0.8790926933288574,
"logps/rejected": -6.504288673400879,
"logps/rejected_both": -6.445836544036865,
"logps/rejected_prompt": -1.288549542427063,
"loss": 2.0423,
"nll_loss": 1.909478783607483,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7687598466873169,
"rewards/margins": 1.8329557180404663,
"rewards/rejected": -2.601715564727783,
"step": 520
},
{
"epoch": 0.424,
"grad_norm": 0.6288533211783596,
"learning_rate": 4.7575890488402185e-05,
"log_odds_chosen": 4.645321846008301,
"log_odds_ratio": -0.14102457463741302,
"logits/chosen": -2.9634203910827637,
"logits/chosen_prompt": -3.0218586921691895,
"logits/rejected": -3.2898871898651123,
"logits/rejected_prompt": -3.0139455795288086,
"logps/chosen": -1.9550220966339111,
"logps/chosen_both": -1.9388656616210938,
"logps/chosen_prompt": -0.826554000377655,
"logps/rejected": -6.471889495849609,
"logps/rejected_both": -6.390293121337891,
"logps/rejected_prompt": -1.0643904209136963,
"loss": 2.2513,
"nll_loss": 1.9378074407577515,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7820087671279907,
"rewards/margins": 1.8067471981048584,
"rewards/rejected": -2.5887560844421387,
"step": 530
},
{
"epoch": 0.432,
"grad_norm": 2.277583713971035,
"learning_rate": 4.7485131289159276e-05,
"log_odds_chosen": 4.095762252807617,
"log_odds_ratio": -0.15678586065769196,
"logits/chosen": -2.9781079292297363,
"logits/chosen_prompt": -3.05256986618042,
"logits/rejected": -2.9668664932250977,
"logits/rejected_prompt": -3.041161060333252,
"logps/chosen": -1.9822967052459717,
"logps/chosen_both": -1.9686206579208374,
"logps/chosen_prompt": -0.9377325177192688,
"logps/rejected": -5.9602532386779785,
"logps/rejected_both": -5.897341728210449,
"logps/rejected_prompt": -1.051451563835144,
"loss": 2.0657,
"nll_loss": 1.9684457778930664,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7929186820983887,
"rewards/margins": 1.5911824703216553,
"rewards/rejected": -2.384101390838623,
"step": 540
},
{
"epoch": 0.44,
"grad_norm": 0.17707808472563716,
"learning_rate": 4.7392794005985326e-05,
"log_odds_chosen": 4.996828556060791,
"log_odds_ratio": -0.1402866542339325,
"logits/chosen": -2.9852428436279297,
"logits/chosen_prompt": -3.1000924110412598,
"logits/rejected": -3.4309897422790527,
"logits/rejected_prompt": -3.088724374771118,
"logps/chosen": -1.9283807277679443,
"logps/chosen_both": -1.913000464439392,
"logps/chosen_prompt": -0.7973994612693787,
"logps/rejected": -6.7942705154418945,
"logps/rejected_both": -6.711949348449707,
"logps/rejected_prompt": -1.098016619682312,
"loss": 2.2189,
"nll_loss": 1.9121148586273193,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7713521718978882,
"rewards/margins": 1.946356177330017,
"rewards/rejected": -2.7177083492279053,
"step": 550
},
{
"epoch": 0.448,
"grad_norm": 5.519018494250257,
"learning_rate": 4.7298885119428773e-05,
"log_odds_chosen": 5.843784332275391,
"log_odds_ratio": -0.07069602608680725,
"logits/chosen": -3.0550990104675293,
"logits/chosen_prompt": -3.058029890060425,
"logits/rejected": -3.9521071910858154,
"logits/rejected_prompt": -3.025411367416382,
"logps/chosen": -1.8835957050323486,
"logps/chosen_both": -1.8681533336639404,
"logps/chosen_prompt": -0.8553426861763,
"logps/rejected": -7.572214603424072,
"logps/rejected_both": -7.47025203704834,
"logps/rejected_prompt": -1.0323774814605713,
"loss": 2.077,
"nll_loss": 1.8675563335418701,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7534382939338684,
"rewards/margins": 2.275447368621826,
"rewards/rejected": -3.02888560295105,
"step": 560
},
{
"epoch": 0.456,
"grad_norm": 0.6103366310438396,
"learning_rate": 4.720341122033862e-05,
"log_odds_chosen": 5.190781593322754,
"log_odds_ratio": -0.4892934262752533,
"logits/chosen": -2.9757232666015625,
"logits/chosen_prompt": -3.0236659049987793,
"logits/rejected": -3.8188633918762207,
"logits/rejected_prompt": -3.0117480754852295,
"logps/chosen": -2.410020351409912,
"logps/chosen_both": -2.387420415878296,
"logps/chosen_prompt": -0.8877968788146973,
"logps/rejected": -7.459628105163574,
"logps/rejected_both": -7.362242698669434,
"logps/rejected_prompt": -1.1302134990692139,
"loss": 2.4112,
"nll_loss": 2.3871912956237793,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.9640080332756042,
"rewards/margins": 2.019843339920044,
"rewards/rejected": -2.983851432800293,
"step": 570
},
{
"epoch": 0.464,
"grad_norm": 0.151774002914212,
"learning_rate": 4.710637900940181e-05,
"log_odds_chosen": 3.729964017868042,
"log_odds_ratio": -0.2660212516784668,
"logits/chosen": -2.9713380336761475,
"logits/chosen_prompt": -2.968736410140991,
"logits/rejected": -3.0788886547088623,
"logits/rejected_prompt": -2.944664478302002,
"logps/chosen": -1.842739462852478,
"logps/chosen_both": -1.829923391342163,
"logps/chosen_prompt": -0.7877852320671082,
"logps/rejected": -5.440505504608154,
"logps/rejected_both": -5.388018608093262,
"logps/rejected_prompt": -1.0643196105957031,
"loss": 2.2685,
"nll_loss": 1.8282448053359985,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7370957732200623,
"rewards/margins": 1.4391063451766968,
"rewards/rejected": -2.1762022972106934,
"step": 580
},
{
"epoch": 0.472,
"grad_norm": 0.19312538023716122,
"learning_rate": 4.7007795296673006e-05,
"log_odds_chosen": 3.5488052368164062,
"log_odds_ratio": -0.27949827909469604,
"logits/chosen": -2.9776198863983154,
"logits/chosen_prompt": -3.0068747997283936,
"logits/rejected": -3.2581207752227783,
"logits/rejected_prompt": -2.980543613433838,
"logps/chosen": -1.9455007314682007,
"logps/chosen_both": -1.929386854171753,
"logps/chosen_prompt": -0.7683624625205994,
"logps/rejected": -5.4047675132751465,
"logps/rejected_both": -5.334201812744141,
"logps/rejected_prompt": -1.0063989162445068,
"loss": 2.0098,
"nll_loss": 1.927821159362793,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7782004475593567,
"rewards/margins": 1.3837066888809204,
"rewards/rejected": -2.161907196044922,
"step": 590
},
{
"epoch": 0.48,
"grad_norm": 0.6876461909667617,
"learning_rate": 4.690766700109659e-05,
"log_odds_chosen": 3.753337860107422,
"log_odds_ratio": -0.21310639381408691,
"logits/chosen": -2.983619213104248,
"logits/chosen_prompt": -3.056485652923584,
"logits/rejected": -3.4968714714050293,
"logits/rejected_prompt": -3.052788496017456,
"logps/chosen": -2.029822587966919,
"logps/chosen_both": -2.0120925903320312,
"logps/chosen_prompt": -0.8819751739501953,
"logps/rejected": -5.680521488189697,
"logps/rejected_both": -5.621560096740723,
"logps/rejected_prompt": -1.1547878980636597,
"loss": 2.2063,
"nll_loss": 2.011672258377075,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8119290471076965,
"rewards/margins": 1.4602794647216797,
"rewards/rejected": -2.2722086906433105,
"step": 600
},
{
"epoch": 0.488,
"grad_norm": 0.19286504559147355,
"learning_rate": 4.68060011500211e-05,
"log_odds_chosen": 4.486660957336426,
"log_odds_ratio": -0.16551145911216736,
"logits/chosen": -2.9143826961517334,
"logits/chosen_prompt": -3.077587366104126,
"logits/rejected": -3.641350507736206,
"logits/rejected_prompt": -3.062753677368164,
"logps/chosen": -1.9688940048217773,
"logps/chosen_both": -1.954045295715332,
"logps/chosen_prompt": -0.6965051293373108,
"logps/rejected": -6.356810569763184,
"logps/rejected_both": -6.294190406799316,
"logps/rejected_prompt": -0.9163694381713867,
"loss": 2.0169,
"nll_loss": 1.953741431236267,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7875575423240662,
"rewards/margins": 1.7551662921905518,
"rewards/rejected": -2.542724132537842,
"step": 610
},
{
"epoch": 0.496,
"grad_norm": 0.19049752930142771,
"learning_rate": 4.670280487870598e-05,
"log_odds_chosen": 4.947572708129883,
"log_odds_ratio": -0.14103658497333527,
"logits/chosen": -2.8884735107421875,
"logits/chosen_prompt": -3.0340023040771484,
"logits/rejected": -3.598095655441284,
"logits/rejected_prompt": -3.0135154724121094,
"logps/chosen": -2.0803651809692383,
"logps/chosen_both": -2.065659284591675,
"logps/chosen_prompt": -0.7768818140029907,
"logps/rejected": -6.917575836181641,
"logps/rejected_both": -6.847512245178223,
"logps/rejected_prompt": -1.0173327922821045,
"loss": 2.4222,
"nll_loss": 2.0645294189453125,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.832146167755127,
"rewards/margins": 1.9348840713500977,
"rewards/rejected": -2.7670302391052246,
"step": 620
},
{
"epoch": 0.504,
"grad_norm": 0.17010508801078386,
"learning_rate": 4.659808542982088e-05,
"log_odds_chosen": 4.44757604598999,
"log_odds_ratio": -0.07313639670610428,
"logits/chosen": -2.8788280487060547,
"logits/chosen_prompt": -2.848573923110962,
"logits/rejected": -2.6464812755584717,
"logits/rejected_prompt": -2.814408540725708,
"logps/chosen": -2.0289366245269775,
"logps/chosen_both": -2.014009952545166,
"logps/chosen_prompt": -0.7678987979888916,
"logps/rejected": -6.3392744064331055,
"logps/rejected_both": -6.2660441398620605,
"logps/rejected_prompt": -1.0411919355392456,
"loss": 2.0605,
"nll_loss": 2.0118680000305176,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8115746378898621,
"rewards/margins": 1.724135160446167,
"rewards/rejected": -2.535709857940674,
"step": 630
},
{
"epoch": 0.512,
"grad_norm": 102.20159023426744,
"learning_rate": 4.649185015293728e-05,
"log_odds_chosen": 5.305100440979004,
"log_odds_ratio": -0.02886788547039032,
"logits/chosen": -2.934922456741333,
"logits/chosen_prompt": -2.8038196563720703,
"logits/rejected": -2.483616828918457,
"logits/rejected_prompt": -2.801661491394043,
"logps/chosen": -1.7393245697021484,
"logps/chosen_both": -1.728514313697815,
"logps/chosen_prompt": -0.882293701171875,
"logps/rejected": -6.811369895935059,
"logps/rejected_both": -6.727609157562256,
"logps/rejected_prompt": -1.0623975992202759,
"loss": 2.1612,
"nll_loss": 1.7267690896987915,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6957297921180725,
"rewards/margins": 2.028818130493164,
"rewards/rejected": -2.724547863006592,
"step": 640
},
{
"epoch": 0.52,
"grad_norm": 2.577120716963003,
"learning_rate": 4.638410650401267e-05,
"log_odds_chosen": 5.029098033905029,
"log_odds_ratio": -0.0729464739561081,
"logits/chosen": -2.946472644805908,
"logits/chosen_prompt": -2.7987747192382812,
"logits/rejected": -2.31748628616333,
"logits/rejected_prompt": -2.7790069580078125,
"logps/chosen": -1.9928621053695679,
"logps/chosen_both": -1.97336745262146,
"logps/chosen_prompt": -0.8152757883071899,
"logps/rejected": -6.8893632888793945,
"logps/rejected_both": -6.791792392730713,
"logps/rejected_prompt": -1.0174424648284912,
"loss": 2.0913,
"nll_loss": 1.9712880849838257,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7971449494361877,
"rewards/margins": 1.9586002826690674,
"rewards/rejected": -2.7557451725006104,
"step": 650
},
{
"epoch": 0.528,
"grad_norm": 0.15356571620190568,
"learning_rate": 4.6274862044867304e-05,
"log_odds_chosen": 4.515711307525635,
"log_odds_ratio": -0.14140725135803223,
"logits/chosen": -2.93347430229187,
"logits/chosen_prompt": -2.790188789367676,
"logits/rejected": -2.197619915008545,
"logits/rejected_prompt": -2.7709336280822754,
"logps/chosen": -1.9486901760101318,
"logps/chosen_both": -1.936274766921997,
"logps/chosen_prompt": -0.9808751940727234,
"logps/rejected": -6.346037864685059,
"logps/rejected_both": -6.276151180267334,
"logps/rejected_prompt": -1.2042269706726074,
"loss": 2.0583,
"nll_loss": 1.9354143142700195,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7794761657714844,
"rewards/margins": 1.7589390277862549,
"rewards/rejected": -2.5384154319763184,
"step": 660
},
{
"epoch": 0.536,
"grad_norm": 0.18548636024672094,
"learning_rate": 4.616412444265345e-05,
"log_odds_chosen": 5.104066371917725,
"log_odds_ratio": -0.0724453255534172,
"logits/chosen": -2.9771525859832764,
"logits/chosen_prompt": -2.8243517875671387,
"logits/rejected": -2.083482265472412,
"logits/rejected_prompt": -2.8059630393981934,
"logps/chosen": -2.0861048698425293,
"logps/chosen_both": -2.068869113922119,
"logps/chosen_prompt": -0.8699228167533875,
"logps/rejected": -7.067320823669434,
"logps/rejected_both": -6.978930473327637,
"logps/rejected_prompt": -1.0220625400543213,
"loss": 2.1363,
"nll_loss": 2.0682852268218994,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8344419598579407,
"rewards/margins": 1.9924862384796143,
"rewards/rejected": -2.8269283771514893,
"step": 670
},
{
"epoch": 0.544,
"grad_norm": 59.01092604551995,
"learning_rate": 4.605190146931731e-05,
"log_odds_chosen": 4.40061092376709,
"log_odds_ratio": -0.1419232189655304,
"logits/chosen": -2.9263124465942383,
"logits/chosen_prompt": -2.8417701721191406,
"logits/rejected": -2.351675510406494,
"logits/rejected_prompt": -2.8414313793182373,
"logps/chosen": -2.124084711074829,
"logps/chosen_both": -2.102914571762085,
"logps/chosen_prompt": -0.8957809209823608,
"logps/rejected": -6.422041893005371,
"logps/rejected_both": -6.32672643661499,
"logps/rejected_prompt": -1.0718226432800293,
"loss": 2.1268,
"nll_loss": 2.1024184226989746,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8496338725090027,
"rewards/margins": 1.7191829681396484,
"rewards/rejected": -2.568816661834717,
"step": 680
},
{
"epoch": 0.552,
"grad_norm": 0.1936209207703668,
"learning_rate": 4.593820100105355e-05,
"log_odds_chosen": 4.4033403396606445,
"log_odds_ratio": -0.1418362557888031,
"logits/chosen": -2.947152614593506,
"logits/chosen_prompt": -2.8191583156585693,
"logits/rejected": -2.3703582286834717,
"logits/rejected_prompt": -2.8038182258605957,
"logps/chosen": -1.993703842163086,
"logps/chosen_both": -1.9738051891326904,
"logps/chosen_prompt": -0.8131387829780579,
"logps/rejected": -6.278976917266846,
"logps/rejected_both": -6.194762229919434,
"logps/rejected_prompt": -0.9806526303291321,
"loss": 2.0429,
"nll_loss": 1.9733550548553467,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7974814772605896,
"rewards/margins": 1.7141094207763672,
"rewards/rejected": -2.5115909576416016,
"step": 690
},
{
"epoch": 0.56,
"grad_norm": 0.21779512193360956,
"learning_rate": 4.5823031017752485e-05,
"log_odds_chosen": 4.373869895935059,
"log_odds_ratio": -0.1618097722530365,
"logits/chosen": -2.9762911796569824,
"logits/chosen_prompt": -2.787757396697998,
"logits/rejected": -2.3213400840759277,
"logits/rejected_prompt": -2.7804551124572754,
"logps/chosen": -1.8093370199203491,
"logps/chosen_both": -1.7962630987167358,
"logps/chosen_prompt": -0.7294620871543884,
"logps/rejected": -6.035723686218262,
"logps/rejected_both": -5.961843490600586,
"logps/rejected_prompt": -0.9543176889419556,
"loss": 2.0382,
"nll_loss": 1.7948728799819946,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7237349152565002,
"rewards/margins": 1.6905548572540283,
"rewards/rejected": -2.414289712905884,
"step": 700
},
{
"epoch": 0.568,
"grad_norm": 1.9769361000953782,
"learning_rate": 4.5706399602440106e-05,
"log_odds_chosen": 4.656636714935303,
"log_odds_ratio": -0.1408310979604721,
"logits/chosen": -2.916656255722046,
"logits/chosen_prompt": -2.787416458129883,
"logits/rejected": -2.190491199493408,
"logits/rejected_prompt": -2.754542589187622,
"logps/chosen": -2.000397205352783,
"logps/chosen_both": -1.983769416809082,
"logps/chosen_prompt": -0.7894454002380371,
"logps/rejected": -6.537571907043457,
"logps/rejected_both": -6.459201812744141,
"logps/rejected_prompt": -1.0599520206451416,
"loss": 2.098,
"nll_loss": 1.9831438064575195,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8001587986946106,
"rewards/margins": 1.8148695230484009,
"rewards/rejected": -2.6150283813476562,
"step": 710
},
{
"epoch": 0.576,
"grad_norm": 0.18745727904701265,
"learning_rate": 4.558831494071069e-05,
"log_odds_chosen": 4.969104290008545,
"log_odds_ratio": -0.14006975293159485,
"logits/chosen": -2.9004273414611816,
"logits/chosen_prompt": -2.7481789588928223,
"logits/rejected": -1.9203866720199585,
"logits/rejected_prompt": -2.7317967414855957,
"logps/chosen": -2.000072479248047,
"logps/chosen_both": -1.9829126596450806,
"logps/chosen_prompt": -0.9659306406974792,
"logps/rejected": -6.8479132652282715,
"logps/rejected_both": -6.743927955627441,
"logps/rejected_prompt": -1.1112347841262817,
"loss": 2.0041,
"nll_loss": 1.982696533203125,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.800028920173645,
"rewards/margins": 1.9391365051269531,
"rewards/rejected": -2.7391655445098877,
"step": 720
},
{
"epoch": 0.584,
"grad_norm": 44.61607145258881,
"learning_rate": 4.5468785320152365e-05,
"log_odds_chosen": 4.449766635894775,
"log_odds_ratio": -0.20899026095867157,
"logits/chosen": -3.0241429805755615,
"logits/chosen_prompt": -2.746372699737549,
"logits/rejected": -2.07698917388916,
"logits/rejected_prompt": -2.746025562286377,
"logps/chosen": -1.9495675563812256,
"logps/chosen_both": -1.9276573657989502,
"logps/chosen_prompt": -0.8301995992660522,
"logps/rejected": -6.287846565246582,
"logps/rejected_both": -6.172031402587891,
"logps/rejected_prompt": -0.9652963876724243,
"loss": 2.1169,
"nll_loss": 1.9262176752090454,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7798271179199219,
"rewards/margins": 1.735311508178711,
"rewards/rejected": -2.515138626098633,
"step": 730
},
{
"epoch": 0.592,
"grad_norm": 0.39108071766635244,
"learning_rate": 4.534781912976546e-05,
"log_odds_chosen": 3.2947051525115967,
"log_odds_ratio": -0.2812163829803467,
"logits/chosen": -2.989047050476074,
"logits/chosen_prompt": -2.7699084281921387,
"logits/rejected": -2.4307093620300293,
"logits/rejected_prompt": -2.756155014038086,
"logps/chosen": -1.9651190042495728,
"logps/chosen_both": -1.9502513408660889,
"logps/chosen_prompt": -0.7651479840278625,
"logps/rejected": -5.176846981048584,
"logps/rejected_both": -5.1231608390808105,
"logps/rejected_prompt": -0.8976105451583862,
"loss": 2.0946,
"nll_loss": 1.949180245399475,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7860475778579712,
"rewards/margins": 1.2846912145614624,
"rewards/rejected": -2.070739269256592,
"step": 740
},
{
"epoch": 0.6,
"grad_norm": 4.489644128912903,
"learning_rate": 4.522542485937369e-05,
"log_odds_chosen": 4.886274337768555,
"log_odds_ratio": -0.14077258110046387,
"logits/chosen": -2.948451519012451,
"logits/chosen_prompt": -2.7478134632110596,
"logits/rejected": -2.1101903915405273,
"logits/rejected_prompt": -2.7366366386413574,
"logps/chosen": -1.992583990097046,
"logps/chosen_both": -1.9766371250152588,
"logps/chosen_prompt": -0.8634021878242493,
"logps/rejected": -6.756987571716309,
"logps/rejected_both": -6.673755645751953,
"logps/rejected_prompt": -1.0165636539459229,
"loss": 2.1241,
"nll_loss": 1.9759677648544312,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7970336675643921,
"rewards/margins": 1.90576171875,
"rewards/rejected": -2.7027952671051025,
"step": 750
},
{
"epoch": 0.608,
"grad_norm": 0.4533909423122121,
"learning_rate": 4.510161109902837e-05,
"log_odds_chosen": 3.120637893676758,
"log_odds_ratio": -0.6285208463668823,
"logits/chosen": -2.909808397293091,
"logits/chosen_prompt": -2.8316149711608887,
"logits/rejected": -2.377187490463257,
"logits/rejected_prompt": -2.823117971420288,
"logps/chosen": -2.327125072479248,
"logps/chosen_both": -2.3096871376037598,
"logps/chosen_prompt": -0.868097186088562,
"logps/rejected": -5.366008281707764,
"logps/rejected_both": -5.30277681350708,
"logps/rejected_prompt": -1.0501350164413452,
"loss": 2.1836,
"nll_loss": 2.3085296154022217,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.9308500289916992,
"rewards/margins": 1.2155535221099854,
"rewards/rejected": -2.1464035511016846,
"step": 760
},
{
"epoch": 0.616,
"grad_norm": 0.19141971158001736,
"learning_rate": 4.4976386538405495e-05,
"log_odds_chosen": 2.943345546722412,
"log_odds_ratio": -0.2832263708114624,
"logits/chosen": -2.926583766937256,
"logits/chosen_prompt": -2.8340327739715576,
"logits/rejected": -2.5858168601989746,
"logits/rejected_prompt": -2.8149476051330566,
"logps/chosen": -2.0653610229492188,
"logps/chosen_both": -2.0445759296417236,
"logps/chosen_prompt": -0.8157526254653931,
"logps/rejected": -4.919131278991699,
"logps/rejected_both": -4.849064350128174,
"logps/rejected_prompt": -1.005324125289917,
"loss": 2.0024,
"nll_loss": 2.0445759296417236,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8261443972587585,
"rewards/margins": 1.1415081024169922,
"rewards/rejected": -1.9676525592803955,
"step": 770
},
{
"epoch": 0.624,
"grad_norm": 0.299820230370255,
"learning_rate": 4.484975996619589e-05,
"log_odds_chosen": 4.539975166320801,
"log_odds_ratio": -0.11812126636505127,
"logits/chosen": -2.87815523147583,
"logits/chosen_prompt": -2.8412280082702637,
"logits/rejected": -2.3637688159942627,
"logits/rejected_prompt": -2.8588156700134277,
"logps/chosen": -2.4759485721588135,
"logps/chosen_both": -2.454190731048584,
"logps/chosen_prompt": -0.7899399995803833,
"logps/rejected": -6.8973388671875,
"logps/rejected_both": -6.819916725158691,
"logps/rejected_prompt": -1.066646695137024,
"loss": 2.3702,
"nll_loss": 2.454133987426758,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9903793334960938,
"rewards/margins": 1.7685562372207642,
"rewards/rejected": -2.7589354515075684,
"step": 780
},
{
"epoch": 0.632,
"grad_norm": 5.192675922080671,
"learning_rate": 4.4721740269488355e-05,
"log_odds_chosen": 2.496995210647583,
"log_odds_ratio": -0.32391008734703064,
"logits/chosen": -2.966625213623047,
"logits/chosen_prompt": -2.795879602432251,
"logits/rejected": -2.514392137527466,
"logits/rejected_prompt": -2.783583164215088,
"logps/chosen": -2.563605546951294,
"logps/chosen_both": -2.541128635406494,
"logps/chosen_prompt": -0.9771214723587036,
"logps/rejected": -4.989082336425781,
"logps/rejected_both": -4.936980724334717,
"logps/rejected_prompt": -1.0889393091201782,
"loss": 2.1847,
"nll_loss": 2.5405211448669434,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.025442123413086,
"rewards/margins": 0.9701908230781555,
"rewards/rejected": -1.9956328868865967,
"step": 790
},
{
"epoch": 0.64,
"grad_norm": 0.4695325524437554,
"learning_rate": 4.4592336433146e-05,
"log_odds_chosen": 5.124607563018799,
"log_odds_ratio": -0.018428776413202286,
"logits/chosen": -3.051105260848999,
"logits/chosen_prompt": -2.8179726600646973,
"logits/rejected": -1.909102201461792,
"logits/rejected_prompt": -2.7916340827941895,
"logps/chosen": -1.8969109058380127,
"logps/chosen_both": -1.8779878616333008,
"logps/chosen_prompt": -0.8452935218811035,
"logps/rejected": -6.845399379730225,
"logps/rejected_both": -6.747313022613525,
"logps/rejected_prompt": -0.9934666752815247,
"loss": 2.0368,
"nll_loss": 1.8772528171539307,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.758764386177063,
"rewards/margins": 1.9793955087661743,
"rewards/rejected": -2.7381598949432373,
"step": 800
},
{
"epoch": 0.648,
"grad_norm": 0.21280813340257887,
"learning_rate": 4.4461557539175594e-05,
"log_odds_chosen": 5.451117515563965,
"log_odds_ratio": -0.07145892083644867,
"logits/chosen": -2.9378345012664795,
"logits/chosen_prompt": -2.762908458709717,
"logits/rejected": -1.6283600330352783,
"logits/rejected_prompt": -2.7498764991760254,
"logps/chosen": -2.0257043838500977,
"logps/chosen_both": -2.008737087249756,
"logps/chosen_prompt": -0.8673852682113647,
"logps/rejected": -7.346819877624512,
"logps/rejected_both": -7.247427940368652,
"logps/rejected_prompt": -1.0632621049880981,
"loss": 2.0447,
"nll_loss": 2.0078537464141846,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8102817535400391,
"rewards/margins": 2.128446340560913,
"rewards/rejected": -2.938728094100952,
"step": 810
},
{
"epoch": 0.656,
"grad_norm": 0.209653515397789,
"learning_rate": 4.432941276609018e-05,
"log_odds_chosen": 5.421745777130127,
"log_odds_ratio": -0.07243818789720535,
"logits/chosen": -2.9660727977752686,
"logits/chosen_prompt": -2.805607318878174,
"logits/rejected": -1.6398050785064697,
"logits/rejected_prompt": -2.7811026573181152,
"logps/chosen": -2.0751829147338867,
"logps/chosen_both": -2.0558664798736572,
"logps/chosen_prompt": -0.7402461767196655,
"logps/rejected": -7.376537322998047,
"logps/rejected_both": -7.285178184509277,
"logps/rejected_prompt": -0.9955169558525085,
"loss": 2.1673,
"nll_loss": 2.05536150932312,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8300731778144836,
"rewards/margins": 2.1205410957336426,
"rewards/rejected": -2.9506144523620605,
"step": 820
},
{
"epoch": 0.664,
"grad_norm": 0.2932004663372407,
"learning_rate": 4.4195911388264946e-05,
"log_odds_chosen": 3.337216854095459,
"log_odds_ratio": -0.28040507435798645,
"logits/chosen": -3.0083236694335938,
"logits/chosen_prompt": -2.7438673973083496,
"logits/rejected": -2.2188708782196045,
"logits/rejected_prompt": -2.710932970046997,
"logps/chosen": -1.7532163858413696,
"logps/chosen_both": -1.7392990589141846,
"logps/chosen_prompt": -0.881622314453125,
"logps/rejected": -4.988051891326904,
"logps/rejected_both": -4.921896934509277,
"logps/rejected_prompt": -0.8814730644226074,
"loss": 2.0387,
"nll_loss": 1.7385940551757812,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7012865543365479,
"rewards/margins": 1.2939343452453613,
"rewards/rejected": -1.9952208995819092,
"step": 830
},
{
"epoch": 0.672,
"grad_norm": 2.9403489436512475,
"learning_rate": 4.40610627752862e-05,
"log_odds_chosen": 5.995909690856934,
"log_odds_ratio": -0.07048363983631134,
"logits/chosen": -2.951843738555908,
"logits/chosen_prompt": -2.657824993133545,
"logits/rejected": -1.3483891487121582,
"logits/rejected_prompt": -2.6459240913391113,
"logps/chosen": -2.0297625064849854,
"logps/chosen_both": -2.011107921600342,
"logps/chosen_prompt": -0.8041833639144897,
"logps/rejected": -7.886776924133301,
"logps/rejected_both": -7.784094333648682,
"logps/rejected_prompt": -0.9874393343925476,
"loss": 2.0868,
"nll_loss": 2.0107545852661133,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8119049072265625,
"rewards/margins": 2.342806100845337,
"rewards/rejected": -3.1547107696533203,
"step": 840
},
{
"epoch": 0.68,
"grad_norm": 0.1951986041864062,
"learning_rate": 4.3924876391293915e-05,
"log_odds_chosen": 5.405202865600586,
"log_odds_ratio": -0.4933692514896393,
"logits/chosen": -2.8229470252990723,
"logits/chosen_prompt": -2.70353102684021,
"logits/rejected": -1.516230821609497,
"logits/rejected_prompt": -2.682372570037842,
"logps/chosen": -2.4473724365234375,
"logps/chosen_both": -2.4278030395507812,
"logps/chosen_prompt": -0.8016360402107239,
"logps/rejected": -7.731281280517578,
"logps/rejected_both": -7.645183563232422,
"logps/rejected_prompt": -0.9825652241706848,
"loss": 2.2426,
"nll_loss": 2.427164316177368,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.9789490699768066,
"rewards/margins": 2.113563299179077,
"rewards/rejected": -3.092512369155884,
"step": 850
},
{
"epoch": 0.688,
"grad_norm": 0.19812900890844543,
"learning_rate": 4.3787361794317405e-05,
"log_odds_chosen": 3.4184670448303223,
"log_odds_ratio": -0.22132563591003418,
"logits/chosen": -2.9762589931488037,
"logits/chosen_prompt": -2.764681816101074,
"logits/rejected": -2.4695773124694824,
"logits/rejected_prompt": -2.739607095718384,
"logps/chosen": -1.889784812927246,
"logps/chosen_both": -1.8726049661636353,
"logps/chosen_prompt": -0.8000418543815613,
"logps/rejected": -5.191944122314453,
"logps/rejected_both": -5.127084732055664,
"logps/rejected_prompt": -0.973870575428009,
"loss": 2.0017,
"nll_loss": 1.8721071481704712,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7559138536453247,
"rewards/margins": 1.3208638429641724,
"rewards/rejected": -2.076777935028076,
"step": 860
},
{
"epoch": 0.696,
"grad_norm": 1.1208289382374679,
"learning_rate": 4.3648528635604556e-05,
"log_odds_chosen": 4.736769199371338,
"log_odds_ratio": -0.07410699129104614,
"logits/chosen": -2.9047577381134033,
"logits/chosen_prompt": -2.7688372135162354,
"logits/rejected": -2.297377824783325,
"logits/rejected_prompt": -2.7379658222198486,
"logps/chosen": -2.166656017303467,
"logps/chosen_both": -2.149369955062866,
"logps/chosen_prompt": -0.7613478899002075,
"logps/rejected": -6.790528774261475,
"logps/rejected_both": -6.711920738220215,
"logps/rejected_prompt": -0.9217512011528015,
"loss": 2.19,
"nll_loss": 2.1481828689575195,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8666625022888184,
"rewards/margins": 1.8495492935180664,
"rewards/rejected": -2.7162115573883057,
"step": 870
},
{
"epoch": 0.704,
"grad_norm": 0.18802597714184358,
"learning_rate": 4.350838665894446e-05,
"log_odds_chosen": 3.573579788208008,
"log_odds_ratio": -0.2119835913181305,
"logits/chosen": -2.9564337730407715,
"logits/chosen_prompt": -2.8878400325775146,
"logits/rejected": -2.7999844551086426,
"logits/rejected_prompt": -2.8850619792938232,
"logps/chosen": -2.041067361831665,
"logps/chosen_both": -2.0219027996063232,
"logps/chosen_prompt": -0.7945634126663208,
"logps/rejected": -5.52020788192749,
"logps/rejected_both": -5.447958946228027,
"logps/rejected_prompt": -0.9404302835464478,
"loss": 2.1522,
"nll_loss": 2.0212433338165283,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8164268732070923,
"rewards/margins": 1.3916563987731934,
"rewards/rejected": -2.208083152770996,
"step": 880
},
{
"epoch": 0.712,
"grad_norm": 1.3417035590493764,
"learning_rate": 4.336694569998354e-05,
"log_odds_chosen": 4.419407367706299,
"log_odds_ratio": -0.07842884957790375,
"logits/chosen": -2.980591297149658,
"logits/chosen_prompt": -2.9254255294799805,
"logits/rejected": -2.7680697441101074,
"logits/rejected_prompt": -2.905561923980713,
"logps/chosen": -2.0169148445129395,
"logps/chosen_both": -2.0003621578216553,
"logps/chosen_prompt": -0.8039913177490234,
"logps/rejected": -6.302676200866699,
"logps/rejected_both": -6.233563423156738,
"logps/rejected_prompt": -0.9547332525253296,
"loss": 2.0996,
"nll_loss": 2.000209331512451,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8067659139633179,
"rewards/margins": 1.7143046855926514,
"rewards/rejected": -2.5210704803466797,
"step": 890
},
{
"epoch": 0.72,
"grad_norm": 0.17015695407576262,
"learning_rate": 4.3224215685535294e-05,
"log_odds_chosen": 3.736863613128662,
"log_odds_ratio": -0.21099340915679932,
"logits/chosen": -2.9480998516082764,
"logits/chosen_prompt": -2.909301519393921,
"logits/rejected": -2.5860133171081543,
"logits/rejected_prompt": -2.8961730003356934,
"logps/chosen": -1.99604070186615,
"logps/chosen_both": -1.9824683666229248,
"logps/chosen_prompt": -0.8537474870681763,
"logps/rejected": -5.6191020011901855,
"logps/rejected_both": -5.559712886810303,
"logps/rejected_prompt": -1.0109044313430786,
"loss": 2.0333,
"nll_loss": 1.9815161228179932,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.798416256904602,
"rewards/margins": 1.449224591255188,
"rewards/rejected": -2.247641086578369,
"step": 900
},
{
"epoch": 0.728,
"grad_norm": 0.1938256131016386,
"learning_rate": 4.3080206632883554e-05,
"log_odds_chosen": 4.993983745574951,
"log_odds_ratio": -0.07278299331665039,
"logits/chosen": -2.9305057525634766,
"logits/chosen_prompt": -2.8883767127990723,
"logits/rejected": -2.744293212890625,
"logits/rejected_prompt": -2.865830183029175,
"logps/chosen": -1.9137989282608032,
"logps/chosen_both": -1.897878646850586,
"logps/chosen_prompt": -0.8952886462211609,
"logps/rejected": -6.773948669433594,
"logps/rejected_both": -6.680284023284912,
"logps/rejected_prompt": -1.1111478805541992,
"loss": 2.072,
"nll_loss": 1.896592378616333,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7655196189880371,
"rewards/margins": 1.9440600872039795,
"rewards/rejected": -2.7095799446105957,
"step": 910
},
{
"epoch": 0.736,
"grad_norm": 0.19422924079693882,
"learning_rate": 4.293492864907947e-05,
"log_odds_chosen": 4.982480049133301,
"log_odds_ratio": -0.07303477078676224,
"logits/chosen": -2.897078275680542,
"logits/chosen_prompt": -2.8844199180603027,
"logits/rejected": -2.5853612422943115,
"logits/rejected_prompt": -2.896810531616211,
"logps/chosen": -2.046506404876709,
"logps/chosen_both": -2.027215003967285,
"logps/chosen_prompt": -0.8521916270256042,
"logps/rejected": -6.898811340332031,
"logps/rejected_both": -6.797191619873047,
"logps/rejected_prompt": -1.0783166885375977,
"loss": 2.0343,
"nll_loss": 2.025817394256592,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8186025619506836,
"rewards/margins": 1.9409217834472656,
"rewards/rejected": -2.7595245838165283,
"step": 920
},
{
"epoch": 0.744,
"grad_norm": 0.17503233577716112,
"learning_rate": 4.278839193023214e-05,
"log_odds_chosen": 5.051764011383057,
"log_odds_ratio": -0.07269078493118286,
"logits/chosen": -2.968621015548706,
"logits/chosen_prompt": -2.8850250244140625,
"logits/rejected": -2.575244426727295,
"logits/rejected_prompt": -2.879965305328369,
"logps/chosen": -2.0476856231689453,
"logps/chosen_both": -2.0287888050079346,
"logps/chosen_prompt": -0.8320780992507935,
"logps/rejected": -6.972892761230469,
"logps/rejected_both": -6.875253200531006,
"logps/rejected_prompt": -0.9857944250106812,
"loss": 2.4164,
"nll_loss": 2.027635335922241,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8190741539001465,
"rewards/margins": 1.9700825214385986,
"rewards/rejected": -2.7891571521759033,
"step": 930
},
{
"epoch": 0.752,
"grad_norm": 47.44652080135077,
"learning_rate": 4.264060676079302e-05,
"log_odds_chosen": 3.4615960121154785,
"log_odds_ratio": -0.25266528129577637,
"logits/chosen": -2.9501328468322754,
"logits/chosen_prompt": -2.8721659183502197,
"logits/rejected": -3.1557369232177734,
"logits/rejected_prompt": -2.854639768600464,
"logps/chosen": -2.153719425201416,
"logps/chosen_both": -2.135387897491455,
"logps/chosen_prompt": -0.9698511958122253,
"logps/rejected": -5.52289342880249,
"logps/rejected_both": -5.454329490661621,
"logps/rejected_prompt": -1.0520834922790527,
"loss": 2.1268,
"nll_loss": 2.1349105834960938,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8614877462387085,
"rewards/margins": 1.3476698398590088,
"rewards/rejected": -2.2091574668884277,
"step": 940
},
{
"epoch": 0.76,
"grad_norm": 0.19086614631182744,
"learning_rate": 4.249158351283414e-05,
"log_odds_chosen": 4.672451496124268,
"log_odds_ratio": -0.14073383808135986,
"logits/chosen": -3.003997325897217,
"logits/chosen_prompt": -2.9195713996887207,
"logits/rejected": -3.2987685203552246,
"logits/rejected_prompt": -2.9031708240509033,
"logps/chosen": -2.006805896759033,
"logps/chosen_both": -1.9856882095336914,
"logps/chosen_prompt": -0.8608209490776062,
"logps/rejected": -6.554454803466797,
"logps/rejected_both": -6.446510314941406,
"logps/rejected_prompt": -1.0303418636322021,
"loss": 2.0534,
"nll_loss": 1.9856884479522705,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8027224540710449,
"rewards/margins": 1.8190593719482422,
"rewards/rejected": -2.621781826019287,
"step": 950
},
{
"epoch": 0.768,
"grad_norm": 0.16090899053372315,
"learning_rate": 4.234133264532012e-05,
"log_odds_chosen": 6.077364444732666,
"log_odds_ratio": -0.004217286594212055,
"logits/chosen": -2.842454433441162,
"logits/chosen_prompt": -2.8957276344299316,
"logits/rejected": -3.5180137157440186,
"logits/rejected_prompt": -2.9135992527008057,
"logps/chosen": -1.9932161569595337,
"logps/chosen_both": -1.9756605625152588,
"logps/chosen_prompt": -0.8626230359077454,
"logps/rejected": -7.9156999588012695,
"logps/rejected_both": -7.813823699951172,
"logps/rejected_prompt": -1.0395594835281372,
"loss": 2.0091,
"nll_loss": 1.975542664527893,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7972863912582397,
"rewards/margins": 2.3689935207366943,
"rewards/rejected": -3.1662800312042236,
"step": 960
},
{
"epoch": 0.776,
"grad_norm": 0.4432866833057449,
"learning_rate": 4.218986470337419e-05,
"log_odds_chosen": 5.5125412940979,
"log_odds_ratio": -0.07154224812984467,
"logits/chosen": -2.9377503395080566,
"logits/chosen_prompt": -2.926082134246826,
"logits/rejected": -3.535740375518799,
"logits/rejected_prompt": -2.9182417392730713,
"logps/chosen": -1.919931411743164,
"logps/chosen_both": -1.9039018154144287,
"logps/chosen_prompt": -0.7944774627685547,
"logps/rejected": -7.288356781005859,
"logps/rejected_both": -7.193412780761719,
"logps/rejected_prompt": -0.9629098773002625,
"loss": 2.3092,
"nll_loss": 1.9036260843276978,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7679725289344788,
"rewards/margins": 2.147369861602783,
"rewards/rejected": -2.9153425693511963,
"step": 970
},
{
"epoch": 0.784,
"grad_norm": 0.19680727751711977,
"learning_rate": 4.2037190317538e-05,
"log_odds_chosen": 4.595906734466553,
"log_odds_ratio": -0.07939890027046204,
"logits/chosen": -2.9524266719818115,
"logits/chosen_prompt": -2.790818691253662,
"logits/rejected": -2.9070940017700195,
"logits/rejected_prompt": -2.781165599822998,
"logps/chosen": -1.9940401315689087,
"logps/chosen_both": -1.978316068649292,
"logps/chosen_prompt": -0.7690817713737488,
"logps/rejected": -6.455039024353027,
"logps/rejected_both": -6.385528087615967,
"logps/rejected_prompt": -0.9404104948043823,
"loss": 2.0872,
"nll_loss": 1.9778735637664795,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7976160049438477,
"rewards/margins": 1.7843996286392212,
"rewards/rejected": -2.5820157527923584,
"step": 980
},
{
"epoch": 0.792,
"grad_norm": 0.1584132780664858,
"learning_rate": 4.188332020302561e-05,
"log_odds_chosen": 4.230597496032715,
"log_odds_ratio": -0.14310847222805023,
"logits/chosen": -2.956609010696411,
"logits/chosen_prompt": -2.8512063026428223,
"logits/rejected": -2.678597927093506,
"logits/rejected_prompt": -2.8333568572998047,
"logps/chosen": -1.8776973485946655,
"logps/chosen_both": -1.8625962734222412,
"logps/chosen_prompt": -0.8090478777885437,
"logps/rejected": -5.976474761962891,
"logps/rejected_both": -5.902680397033691,
"logps/rejected_prompt": -0.9692068099975586,
"loss": 1.9999,
"nll_loss": 1.861577033996582,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7510789632797241,
"rewards/margins": 1.6395108699798584,
"rewards/rejected": -2.390589952468872,
"step": 990
},
{
"epoch": 0.8,
"grad_norm": 0.18982243368973564,
"learning_rate": 4.172826515897146e-05,
"log_odds_chosen": 4.3918375968933105,
"log_odds_ratio": -0.14247746765613556,
"logits/chosen": -2.9714953899383545,
"logits/chosen_prompt": -2.824305772781372,
"logits/rejected": -2.6518845558166504,
"logits/rejected_prompt": -2.8202338218688965,
"logps/chosen": -1.8688671588897705,
"logps/chosen_both": -1.8508541584014893,
"logps/chosen_prompt": -0.9176328778266907,
"logps/rejected": -6.10614538192749,
"logps/rejected_both": -6.007752418518066,
"logps/rejected_prompt": -1.0590510368347168,
"loss": 2.0857,
"nll_loss": 1.8497679233551025,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.747546911239624,
"rewards/margins": 1.694911241531372,
"rewards/rejected": -2.442458391189575,
"step": 1000
},
{
"epoch": 0.808,
"grad_norm": 0.15641654006436478,
"learning_rate": 4.157203606767238e-05,
"log_odds_chosen": 4.2656779289245605,
"log_odds_ratio": -0.14230065047740936,
"logits/chosen": -2.9932308197021484,
"logits/chosen_prompt": -2.830867290496826,
"logits/rejected": -2.6234424114227295,
"logits/rejected_prompt": -2.8216352462768555,
"logps/chosen": -2.024932384490967,
"logps/chosen_both": -2.0056064128875732,
"logps/chosen_prompt": -0.7936287522315979,
"logps/rejected": -6.178097724914551,
"logps/rejected_both": -6.095284938812256,
"logps/rejected_prompt": -0.9350797533988953,
"loss": 2.037,
"nll_loss": 2.0045900344848633,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.809972882270813,
"rewards/margins": 1.6612660884857178,
"rewards/rejected": -2.4712390899658203,
"step": 1010
},
{
"epoch": 0.816,
"grad_norm": 0.20983648268469735,
"learning_rate": 4.1414643893823914e-05,
"log_odds_chosen": 4.862036228179932,
"log_odds_ratio": -0.07260783016681671,
"logits/chosen": -2.9284424781799316,
"logits/chosen_prompt": -2.8569953441619873,
"logits/rejected": -2.5351157188415527,
"logits/rejected_prompt": -2.8426971435546875,
"logps/chosen": -2.1229608058929443,
"logps/chosen_both": -2.10365629196167,
"logps/chosen_prompt": -0.8154341578483582,
"logps/rejected": -6.869643211364746,
"logps/rejected_both": -6.7744574546813965,
"logps/rejected_prompt": -0.9435701370239258,
"loss": 2.1102,
"nll_loss": 2.1023664474487305,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8491843342781067,
"rewards/margins": 1.8986728191375732,
"rewards/rejected": -2.7478575706481934,
"step": 1020
},
{
"epoch": 0.824,
"grad_norm": 0.16714535237522857,
"learning_rate": 4.125609968375072e-05,
"log_odds_chosen": 5.137936115264893,
"log_odds_ratio": -0.0722423866391182,
"logits/chosen": -2.917429208755493,
"logits/chosen_prompt": -2.805572509765625,
"logits/rejected": -2.4986531734466553,
"logits/rejected_prompt": -2.7935025691986084,
"logps/chosen": -1.898790717124939,
"logps/chosen_both": -1.88314688205719,
"logps/chosen_prompt": -0.8224050402641296,
"logps/rejected": -6.880563259124756,
"logps/rejected_both": -6.7928266525268555,
"logps/rejected_prompt": -0.9875515699386597,
"loss": 2.0572,
"nll_loss": 1.8828372955322266,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7595163583755493,
"rewards/margins": 1.9927089214324951,
"rewards/rejected": -2.752225637435913,
"step": 1030
},
{
"epoch": 0.832,
"grad_norm": 0.17116655114302515,
"learning_rate": 4.109641456463135e-05,
"log_odds_chosen": 4.716578006744385,
"log_odds_ratio": -0.05661363527178764,
"logits/chosen": -2.9051055908203125,
"logits/chosen_prompt": -2.861964702606201,
"logits/rejected": -2.489297866821289,
"logits/rejected_prompt": -2.8317601680755615,
"logps/chosen": -2.72660493850708,
"logps/chosen_both": -2.6989545822143555,
"logps/chosen_prompt": -0.786345899105072,
"logps/rejected": -7.32622766494751,
"logps/rejected_both": -7.235006809234619,
"logps/rejected_prompt": -0.9496296048164368,
"loss": 2.0544,
"nll_loss": 2.698387622833252,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.0906422138214111,
"rewards/margins": 1.8398488759994507,
"rewards/rejected": -2.9304909706115723,
"step": 1040
},
{
"epoch": 0.84,
"grad_norm": 2.0421569101702004,
"learning_rate": 4.093559974371725e-05,
"log_odds_chosen": 4.683531284332275,
"log_odds_ratio": -0.14838626980781555,
"logits/chosen": -2.983940601348877,
"logits/chosen_prompt": -2.8726494312286377,
"logits/rejected": -2.683384418487549,
"logits/rejected_prompt": -2.844991683959961,
"logps/chosen": -1.7734657526016235,
"logps/chosen_both": -1.762310266494751,
"logps/chosen_prompt": -0.8980112075805664,
"logps/rejected": -6.07004976272583,
"logps/rejected_both": -5.987616062164307,
"logps/rejected_prompt": -1.1182132959365845,
"loss": 2.145,
"nll_loss": 1.7613089084625244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7093862891197205,
"rewards/margins": 1.7186336517333984,
"rewards/rejected": -2.4280200004577637,
"step": 1050
},
{
"epoch": 0.848,
"grad_norm": 0.1891364752116159,
"learning_rate": 4.077366650754624e-05,
"log_odds_chosen": 4.3087382316589355,
"log_odds_ratio": -0.1364879608154297,
"logits/chosen": -2.9432783126831055,
"logits/chosen_prompt": -2.815147638320923,
"logits/rejected": -2.721280097961426,
"logits/rejected_prompt": -2.818236827850342,
"logps/chosen": -1.8882700204849243,
"logps/chosen_both": -1.8756290674209595,
"logps/chosen_prompt": -0.8526128530502319,
"logps/rejected": -6.065881729125977,
"logps/rejected_both": -6.0042314529418945,
"logps/rejected_prompt": -0.9744648933410645,
"loss": 2.1355,
"nll_loss": 1.8748886585235596,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7553080320358276,
"rewards/margins": 1.6710445880889893,
"rewards/rejected": -2.4263527393341064,
"step": 1060
},
{
"epoch": 0.856,
"grad_norm": 33.963511456298086,
"learning_rate": 4.0610626221150394e-05,
"log_odds_chosen": 4.251172065734863,
"log_odds_ratio": -0.09040095657110214,
"logits/chosen": -2.9414284229278564,
"logits/chosen_prompt": -2.8389973640441895,
"logits/rejected": -2.8033430576324463,
"logits/rejected_prompt": -2.82332706451416,
"logps/chosen": -1.9342035055160522,
"logps/chosen_both": -1.9176651239395142,
"logps/chosen_prompt": -0.8298524022102356,
"logps/rejected": -6.048348903656006,
"logps/rejected_both": -5.979620933532715,
"logps/rejected_prompt": -0.9826586842536926,
"loss": 2.0673,
"nll_loss": 1.9169620275497437,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7736814618110657,
"rewards/margins": 1.645658254623413,
"rewards/rejected": -2.419339656829834,
"step": 1070
},
{
"epoch": 0.864,
"grad_norm": 5.339928107993312,
"learning_rate": 4.044649032725836e-05,
"log_odds_chosen": 4.668586730957031,
"log_odds_ratio": -0.04072408378124237,
"logits/chosen": -2.9805121421813965,
"logits/chosen_prompt": -2.858212947845459,
"logits/rejected": -2.779395580291748,
"logits/rejected_prompt": -2.8353207111358643,
"logps/chosen": -2.4372153282165527,
"logps/chosen_both": -2.4168477058410645,
"logps/chosen_prompt": -0.7482016086578369,
"logps/rejected": -6.966684818267822,
"logps/rejected_both": -6.886708736419678,
"logps/rejected_prompt": -0.9111725687980652,
"loss": 2.1177,
"nll_loss": 2.4160780906677246,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9748862981796265,
"rewards/margins": 1.8117873668670654,
"rewards/rejected": -2.7866737842559814,
"step": 1080
},
{
"epoch": 0.872,
"grad_norm": 0.2060230046824354,
"learning_rate": 4.028127034549229e-05,
"log_odds_chosen": 2.597301483154297,
"log_odds_ratio": -0.6685577630996704,
"logits/chosen": -2.9436233043670654,
"logits/chosen_prompt": -2.8545641899108887,
"logits/rejected": -2.8262507915496826,
"logits/rejected_prompt": -2.8353445529937744,
"logps/chosen": -2.3411784172058105,
"logps/chosen_both": -2.3227829933166504,
"logps/chosen_prompt": -0.7935237884521484,
"logps/rejected": -4.853774070739746,
"logps/rejected_both": -4.805240154266357,
"logps/rejected_prompt": -0.958962082862854,
"loss": 2.139,
"nll_loss": 2.3222460746765137,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.9364713430404663,
"rewards/margins": 1.0050380229949951,
"rewards/rejected": -1.941509485244751,
"step": 1090
},
{
"epoch": 0.88,
"grad_norm": 0.17774111122195055,
"learning_rate": 4.011497787155938e-05,
"log_odds_chosen": 4.53702449798584,
"log_odds_ratio": -0.02008737251162529,
"logits/chosen": -2.898667335510254,
"logits/chosen_prompt": -2.8412561416625977,
"logits/rejected": -2.799050807952881,
"logits/rejected_prompt": -2.819329023361206,
"logps/chosen": -2.120091438293457,
"logps/chosen_both": -2.0994343757629395,
"logps/chosen_prompt": -0.7898808717727661,
"logps/rejected": -6.5274176597595215,
"logps/rejected_both": -6.440402030944824,
"logps/rejected_prompt": -1.0125057697296143,
"loss": 2.0681,
"nll_loss": 2.0985283851623535,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8480366468429565,
"rewards/margins": 1.7629306316375732,
"rewards/rejected": -2.6109673976898193,
"step": 1100
},
{
"epoch": 0.888,
"grad_norm": 0.5492835402833951,
"learning_rate": 3.9947624576437975e-05,
"log_odds_chosen": 3.65099835395813,
"log_odds_ratio": -0.21185067296028137,
"logits/chosen": -2.8890416622161865,
"logits/chosen_prompt": -2.8260998725891113,
"logits/rejected": -2.8036818504333496,
"logits/rejected_prompt": -2.8174471855163574,
"logps/chosen": -2.0846400260925293,
"logps/chosen_both": -2.065948247909546,
"logps/chosen_prompt": -0.8428912162780762,
"logps/rejected": -5.634668350219727,
"logps/rejected_both": -5.555979251861572,
"logps/rejected_prompt": -1.0157763957977295,
"loss": 2.128,
"nll_loss": 2.065037488937378,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8338559865951538,
"rewards/margins": 1.420011281967163,
"rewards/rejected": -2.2538673877716064,
"step": 1110
},
{
"epoch": 0.896,
"grad_norm": 0.2391375753226414,
"learning_rate": 3.977922220555855e-05,
"log_odds_chosen": 4.121129989624023,
"log_odds_ratio": -0.2298469990491867,
"logits/chosen": -2.969383955001831,
"logits/chosen_prompt": -2.841618061065674,
"logits/rejected": -2.8132920265197754,
"logits/rejected_prompt": -2.8176777362823486,
"logps/chosen": -2.3696742057800293,
"logps/chosen_both": -2.350247621536255,
"logps/chosen_prompt": -0.8721768260002136,
"logps/rejected": -6.348196029663086,
"logps/rejected_both": -6.277990818023682,
"logps/rejected_prompt": -1.0750401020050049,
"loss": 2.1621,
"nll_loss": 2.3494279384613037,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.9478696584701538,
"rewards/margins": 1.591408610343933,
"rewards/rejected": -2.539278268814087,
"step": 1120
},
{
"epoch": 0.904,
"grad_norm": 1.0869471605926033,
"learning_rate": 3.960978257797931e-05,
"log_odds_chosen": 3.306716203689575,
"log_odds_ratio": -0.17165422439575195,
"logits/chosen": -2.901864767074585,
"logits/chosen_prompt": -2.8563239574432373,
"logits/rejected": -2.815932273864746,
"logits/rejected_prompt": -2.829672336578369,
"logps/chosen": -2.3288769721984863,
"logps/chosen_both": -2.307668447494507,
"logps/chosen_prompt": -0.8160017132759094,
"logps/rejected": -5.531130790710449,
"logps/rejected_both": -5.466065406799316,
"logps/rejected_prompt": -0.9807281494140625,
"loss": 2.0755,
"nll_loss": 2.3062796592712402,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9315508604049683,
"rewards/margins": 1.280901551246643,
"rewards/rejected": -2.2124524116516113,
"step": 1130
},
{
"epoch": 0.912,
"grad_norm": 0.21229431870443033,
"learning_rate": 3.943931758555669e-05,
"log_odds_chosen": 4.015295505523682,
"log_odds_ratio": -0.14405557513237,
"logits/chosen": -2.9465222358703613,
"logits/chosen_prompt": -2.830146074295044,
"logits/rejected": -2.7873902320861816,
"logits/rejected_prompt": -2.8030102252960205,
"logps/chosen": -1.9876712560653687,
"logps/chosen_both": -1.9711806774139404,
"logps/chosen_prompt": -0.8330597877502441,
"logps/rejected": -5.87436580657959,
"logps/rejected_both": -5.79966402053833,
"logps/rejected_prompt": -1.0102033615112305,
"loss": 1.9833,
"nll_loss": 1.9705440998077393,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7950685620307922,
"rewards/margins": 1.554678201675415,
"rewards/rejected": -2.3497467041015625,
"step": 1140
},
{
"epoch": 0.92,
"grad_norm": 0.18607892338713655,
"learning_rate": 3.92678391921108e-05,
"log_odds_chosen": 4.167088985443115,
"log_odds_ratio": -0.081887386739254,
"logits/chosen": -2.9688785076141357,
"logits/chosen_prompt": -2.8491876125335693,
"logits/rejected": -2.8233845233917236,
"logits/rejected_prompt": -2.836411237716675,
"logps/chosen": -2.0486931800842285,
"logps/chosen_both": -2.0284764766693115,
"logps/chosen_prompt": -0.8191589117050171,
"logps/rejected": -6.082810878753662,
"logps/rejected_both": -5.993044853210449,
"logps/rejected_prompt": -0.957076907157898,
"loss": 2.086,
"nll_loss": 2.0268213748931885,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8194772601127625,
"rewards/margins": 1.613647222518921,
"rewards/rejected": -2.433124303817749,
"step": 1150
},
{
"epoch": 0.928,
"grad_norm": 0.21278740734057763,
"learning_rate": 3.909535943258567e-05,
"log_odds_chosen": 4.548261642456055,
"log_odds_ratio": -0.07581990212202072,
"logits/chosen": -3.092094898223877,
"logits/chosen_prompt": -2.8779349327087402,
"logits/rejected": -2.840526580810547,
"logits/rejected_prompt": -2.8706183433532715,
"logps/chosen": -1.943817138671875,
"logps/chosen_both": -1.9261138439178467,
"logps/chosen_prompt": -0.8740865588188171,
"logps/rejected": -6.346927642822266,
"logps/rejected_both": -6.251557350158691,
"logps/rejected_prompt": -1.028618574142456,
"loss": 2.0516,
"nll_loss": 1.9256139993667603,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.77752685546875,
"rewards/margins": 1.7612441778182983,
"rewards/rejected": -2.538771152496338,
"step": 1160
},
{
"epoch": 0.936,
"grad_norm": 2.074191616812015,
"learning_rate": 3.8921890412204705e-05,
"log_odds_chosen": 3.9714667797088623,
"log_odds_ratio": -0.10122326761484146,
"logits/chosen": -2.9742226600646973,
"logits/chosen_prompt": -2.8603179454803467,
"logits/rejected": -2.8532581329345703,
"logits/rejected_prompt": -2.833484172821045,
"logps/chosen": -2.3508994579315186,
"logps/chosen_both": -2.333052158355713,
"logps/chosen_prompt": -0.8015215992927551,
"logps/rejected": -6.174811363220215,
"logps/rejected_both": -6.111483573913574,
"logps/rejected_prompt": -1.0183693170547485,
"loss": 2.2824,
"nll_loss": 2.3322701454162598,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9403597712516785,
"rewards/margins": 1.5295648574829102,
"rewards/rejected": -2.4699246883392334,
"step": 1170
},
{
"epoch": 0.944,
"grad_norm": 0.2875489978173768,
"learning_rate": 3.8747444305621e-05,
"log_odds_chosen": 4.248479843139648,
"log_odds_ratio": -0.08145709335803986,
"logits/chosen": -2.950727939605713,
"logits/chosen_prompt": -2.822025775909424,
"logits/rejected": -2.663987398147583,
"logits/rejected_prompt": -2.8115882873535156,
"logps/chosen": -1.9704688787460327,
"logps/chosen_both": -1.9537798166275024,
"logps/chosen_prompt": -0.8284621238708496,
"logps/rejected": -6.081311225891113,
"logps/rejected_both": -6.007387161254883,
"logps/rejected_prompt": -1.0018432140350342,
"loss": 1.9987,
"nll_loss": 1.9535901546478271,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7881874442100525,
"rewards/margins": 1.6443370580673218,
"rewards/rejected": -2.4325246810913086,
"step": 1180
},
{
"epoch": 0.952,
"grad_norm": 6.520768567954707,
"learning_rate": 3.8572033356062943e-05,
"log_odds_chosen": 3.6630382537841797,
"log_odds_ratio": -0.1266271471977234,
"logits/chosen": -2.9928297996520996,
"logits/chosen_prompt": -2.8252012729644775,
"logits/rejected": -2.722259521484375,
"logits/rejected_prompt": -2.7941107749938965,
"logps/chosen": -2.0680882930755615,
"logps/chosen_both": -2.0539040565490723,
"logps/chosen_prompt": -0.7603567838668823,
"logps/rejected": -5.370635032653809,
"logps/rejected_both": -5.302577018737793,
"logps/rejected_prompt": -1.007256031036377,
"loss": 2.1861,
"nll_loss": 2.052879810333252,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8272353410720825,
"rewards/margins": 1.3210185766220093,
"rewards/rejected": -2.148253917694092,
"step": 1190
},
{
"epoch": 0.96,
"grad_norm": 2.68559023802143,
"learning_rate": 3.8395669874474915e-05,
"log_odds_chosen": 4.359891414642334,
"log_odds_ratio": -0.015468957833945751,
"logits/chosen": -2.91310453414917,
"logits/chosen_prompt": -2.7794852256774902,
"logits/rejected": -2.6371960639953613,
"logits/rejected_prompt": -2.7625763416290283,
"logps/chosen": -1.8540757894515991,
"logps/chosen_both": -1.839600920677185,
"logps/chosen_prompt": -0.8248388171195984,
"logps/rejected": -6.038485527038574,
"logps/rejected_both": -5.962553977966309,
"logps/rejected_prompt": -0.9856597185134888,
"loss": 2.0673,
"nll_loss": 1.8394546508789062,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7416303753852844,
"rewards/margins": 1.6737639904022217,
"rewards/rejected": -2.4153940677642822,
"step": 1200
},
{
"epoch": 0.968,
"grad_norm": 0.185073881578095,
"learning_rate": 3.821836623865329e-05,
"log_odds_chosen": 4.161174297332764,
"log_odds_ratio": -0.07971666753292084,
"logits/chosen": -2.903371572494507,
"logits/chosen_prompt": -2.778414487838745,
"logits/rejected": -2.5587830543518066,
"logits/rejected_prompt": -2.762293815612793,
"logps/chosen": -2.1283013820648193,
"logps/chosen_both": -2.1046059131622314,
"logps/chosen_prompt": -0.7429525852203369,
"logps/rejected": -6.169132232666016,
"logps/rejected_both": -6.081439018249512,
"logps/rejected_prompt": -0.9049463272094727,
"loss": 2.2118,
"nll_loss": 2.104139566421509,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8513206243515015,
"rewards/margins": 1.6163326501846313,
"rewards/rejected": -2.467653274536133,
"step": 1210
},
{
"epoch": 0.976,
"grad_norm": 0.19264797772361533,
"learning_rate": 3.80401348923777e-05,
"log_odds_chosen": 4.120739936828613,
"log_odds_ratio": -0.14354461431503296,
"logits/chosen": -2.9424567222595215,
"logits/chosen_prompt": -2.7921371459960938,
"logits/rejected": -2.5477294921875,
"logits/rejected_prompt": -2.7542147636413574,
"logps/chosen": -1.913551688194275,
"logps/chosen_both": -1.8978935480117798,
"logps/chosen_prompt": -0.8339295387268066,
"logps/rejected": -5.9061384201049805,
"logps/rejected_both": -5.837408542633057,
"logps/rejected_prompt": -0.9619489908218384,
"loss": 2.0995,
"nll_loss": 1.8977426290512085,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7654207348823547,
"rewards/margins": 1.5970344543457031,
"rewards/rejected": -2.362455129623413,
"step": 1220
},
{
"epoch": 0.984,
"grad_norm": 9.292901066306287,
"learning_rate": 3.786098834453766e-05,
"log_odds_chosen": 3.505579710006714,
"log_odds_ratio": -0.15101362764835358,
"logits/chosen": -2.910395622253418,
"logits/chosen_prompt": -2.8129782676696777,
"logits/rejected": -2.574031352996826,
"logits/rejected_prompt": -2.782696008682251,
"logps/chosen": -2.1372461318969727,
"logps/chosen_both": -2.112764835357666,
"logps/chosen_prompt": -0.8219666481018066,
"logps/rejected": -5.543887138366699,
"logps/rejected_both": -5.4572343826293945,
"logps/rejected_prompt": -0.9813167452812195,
"loss": 2.0645,
"nll_loss": 2.111912488937378,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8548984527587891,
"rewards/margins": 1.3626563549041748,
"rewards/rejected": -2.217555046081543,
"step": 1230
},
{
"epoch": 0.992,
"grad_norm": 1.6964209385464728,
"learning_rate": 3.7680939168254733e-05,
"log_odds_chosen": 3.888018846511841,
"log_odds_ratio": -0.1449870765209198,
"logits/chosen": -2.9042837619781494,
"logits/chosen_prompt": -2.823965549468994,
"logits/rejected": -2.4834845066070557,
"logits/rejected_prompt": -2.7938156127929688,
"logps/chosen": -2.0088438987731934,
"logps/chosen_both": -1.9936256408691406,
"logps/chosen_prompt": -0.7543269395828247,
"logps/rejected": -5.783638000488281,
"logps/rejected_both": -5.7258687019348145,
"logps/rejected_prompt": -0.9668887257575989,
"loss": 2.038,
"nll_loss": 1.992997169494629,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8035375475883484,
"rewards/margins": 1.5099177360534668,
"rewards/rejected": -2.31345534324646,
"step": 1240
},
{
"epoch": 1.0,
"grad_norm": 2.478887419876043,
"learning_rate": 3.7500000000000003e-05,
"log_odds_chosen": 5.275210380554199,
"log_odds_ratio": -0.006285688374191523,
"logits/chosen": -2.9461379051208496,
"logits/chosen_prompt": -2.7684402465820312,
"logits/rejected": -2.312152147293091,
"logits/rejected_prompt": -2.7450311183929443,
"logps/chosen": -1.8539674282073975,
"logps/chosen_both": -1.839685082435608,
"logps/chosen_prompt": -0.8559527397155762,
"logps/rejected": -6.958900451660156,
"logps/rejected_both": -6.868790626525879,
"logps/rejected_prompt": -1.0536139011383057,
"loss": 2.2404,
"nll_loss": 1.8390467166900635,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7415870428085327,
"rewards/margins": 2.04197359085083,
"rewards/rejected": -2.783560276031494,
"step": 1250
},
{
"epoch": 1.008,
"grad_norm": 0.17878604739151382,
"learning_rate": 3.731818353870729e-05,
"log_odds_chosen": 4.191466331481934,
"log_odds_ratio": -0.09246650338172913,
"logits/chosen": -2.957552433013916,
"logits/chosen_prompt": -2.771613359451294,
"logits/rejected": -2.3375356197357178,
"logits/rejected_prompt": -2.7522428035736084,
"logps/chosen": -1.989243745803833,
"logps/chosen_both": -1.9734690189361572,
"logps/chosen_prompt": -0.8279644250869751,
"logps/rejected": -6.043200969696045,
"logps/rejected_both": -5.973423480987549,
"logps/rejected_prompt": -1.0317699909210205,
"loss": 2.0389,
"nll_loss": 1.9726651906967163,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7956975102424622,
"rewards/margins": 1.6215832233428955,
"rewards/rejected": -2.417280673980713,
"step": 1260
},
{
"epoch": 1.016,
"grad_norm": 23.252626998417625,
"learning_rate": 3.713550254488185e-05,
"log_odds_chosen": 3.7449231147766113,
"log_odds_ratio": -0.16642269492149353,
"logits/chosen": -2.8947479724884033,
"logits/chosen_prompt": -2.7788119316101074,
"logits/rejected": -2.3416316509246826,
"logits/rejected_prompt": -2.760896921157837,
"logps/chosen": -2.020059585571289,
"logps/chosen_both": -2.0054023265838623,
"logps/chosen_prompt": -0.8935413360595703,
"logps/rejected": -5.6518659591674805,
"logps/rejected_both": -5.590303897857666,
"logps/rejected_prompt": -1.0056589841842651,
"loss": 2.0643,
"nll_loss": 2.0046825408935547,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8080238103866577,
"rewards/margins": 1.452722430229187,
"rewards/rejected": -2.2607462406158447,
"step": 1270
},
{
"epoch": 1.024,
"grad_norm": 0.1852421213956056,
"learning_rate": 3.695196983970481e-05,
"log_odds_chosen": 5.502694129943848,
"log_odds_ratio": -0.07146742194890976,
"logits/chosen": -2.9081971645355225,
"logits/chosen_prompt": -2.745790719985962,
"logits/rejected": -2.0626957416534424,
"logits/rejected_prompt": -2.7173855304718018,
"logps/chosen": -1.7873703241348267,
"logps/chosen_both": -1.7739589214324951,
"logps/chosen_prompt": -0.8900352716445923,
"logps/rejected": -7.1119537353515625,
"logps/rejected_both": -7.017317295074463,
"logps/rejected_prompt": -1.0950191020965576,
"loss": 2.0059,
"nll_loss": 1.7733700275421143,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7149480581283569,
"rewards/margins": 2.129833698272705,
"rewards/rejected": -2.8447818756103516,
"step": 1280
},
{
"epoch": 1.032,
"grad_norm": 0.1901267311863244,
"learning_rate": 3.6767598304133324e-05,
"log_odds_chosen": 4.644869804382324,
"log_odds_ratio": -0.14166082441806793,
"logits/chosen": -2.9974873065948486,
"logits/chosen_prompt": -2.7224061489105225,
"logits/rejected": -2.2138378620147705,
"logits/rejected_prompt": -2.6832873821258545,
"logps/chosen": -1.9028959274291992,
"logps/chosen_both": -1.8842157125473022,
"logps/chosen_prompt": -0.8141298294067383,
"logps/rejected": -6.421015739440918,
"logps/rejected_both": -6.323419094085693,
"logps/rejected_prompt": -0.979651153087616,
"loss": 1.9806,
"nll_loss": 1.8838021755218506,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7611583471298218,
"rewards/margins": 1.8072481155395508,
"rewards/rejected": -2.568406581878662,
"step": 1290
},
{
"epoch": 1.04,
"grad_norm": 0.1720032056568101,
"learning_rate": 3.6582400877996546e-05,
"log_odds_chosen": 5.198369026184082,
"log_odds_ratio": -0.07235782593488693,
"logits/chosen": -2.8921890258789062,
"logits/chosen_prompt": -2.7482800483703613,
"logits/rejected": -1.9527368545532227,
"logits/rejected_prompt": -2.7276439666748047,
"logps/chosen": -2.0934653282165527,
"logps/chosen_both": -2.076221227645874,
"logps/chosen_prompt": -0.8200351595878601,
"logps/rejected": -7.170855522155762,
"logps/rejected_both": -7.079026699066162,
"logps/rejected_prompt": -0.9832828640937805,
"loss": 2.0527,
"nll_loss": 2.075456380844116,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8373861312866211,
"rewards/margins": 2.0309560298919678,
"rewards/rejected": -2.868342161178589,
"step": 1300
},
{
"epoch": 1.048,
"grad_norm": 0.18338089227039325,
"learning_rate": 3.639639055908751e-05,
"log_odds_chosen": 5.48695707321167,
"log_odds_ratio": -0.07169006019830704,
"logits/chosen": -2.874192953109741,
"logits/chosen_prompt": -2.733611583709717,
"logits/rejected": -1.8326069116592407,
"logits/rejected_prompt": -2.6982951164245605,
"logps/chosen": -2.0102884769439697,
"logps/chosen_both": -1.9914735555648804,
"logps/chosen_prompt": -0.8337292671203613,
"logps/rejected": -7.363123416900635,
"logps/rejected_both": -7.263747215270996,
"logps/rejected_prompt": -0.9874321818351746,
"loss": 1.9824,
"nll_loss": 1.9909473657608032,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8041152954101562,
"rewards/margins": 2.141134023666382,
"rewards/rejected": -2.945249319076538,
"step": 1310
},
{
"epoch": 1.056,
"grad_norm": 0.1837356662895363,
"learning_rate": 3.6209580402250815e-05,
"log_odds_chosen": 5.6873369216918945,
"log_odds_ratio": -0.07120365649461746,
"logits/chosen": -2.9526381492614746,
"logits/chosen_prompt": -2.7081189155578613,
"logits/rejected": -1.8793054819107056,
"logits/rejected_prompt": -2.6829447746276855,
"logps/chosen": -1.9104582071304321,
"logps/chosen_both": -1.8940789699554443,
"logps/chosen_prompt": -0.8755657076835632,
"logps/rejected": -7.447749137878418,
"logps/rejected_both": -7.334907531738281,
"logps/rejected_prompt": -1.0553802251815796,
"loss": 2.1442,
"nll_loss": 1.8928571939468384,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7641832828521729,
"rewards/margins": 2.214916706085205,
"rewards/rejected": -2.979099750518799,
"step": 1320
},
{
"epoch": 1.064,
"grad_norm": 0.3951461995742214,
"learning_rate": 3.602198351846647e-05,
"log_odds_chosen": 4.024718761444092,
"log_odds_ratio": -0.5831412672996521,
"logits/chosen": -2.981672525405884,
"logits/chosen_prompt": -2.7551183700561523,
"logits/rejected": -2.1212754249572754,
"logits/rejected_prompt": -2.7351596355438232,
"logps/chosen": -2.4395077228546143,
"logps/chosen_both": -2.417250871658325,
"logps/chosen_prompt": -0.8564618825912476,
"logps/rejected": -6.365363597869873,
"logps/rejected_both": -6.2751054763793945,
"logps/rejected_prompt": -1.031884789466858,
"loss": 2.2375,
"nll_loss": 2.4155256748199463,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.9758030772209167,
"rewards/margins": 1.5703424215316772,
"rewards/rejected": -2.5461456775665283,
"step": 1330
},
{
"epoch": 1.072,
"grad_norm": 0.18983825409437058,
"learning_rate": 3.5833613073929684e-05,
"log_odds_chosen": 4.155622482299805,
"log_odds_ratio": -0.14320290088653564,
"logits/chosen": -3.005096673965454,
"logits/chosen_prompt": -2.8319993019104004,
"logits/rejected": -2.3421382904052734,
"logits/rejected_prompt": -2.8086118698120117,
"logps/chosen": -1.9423980712890625,
"logps/chosen_both": -1.9247316122055054,
"logps/chosen_prompt": -0.7214570045471191,
"logps/rejected": -5.971634864807129,
"logps/rejected_both": -5.893637657165527,
"logps/rejected_prompt": -0.9021228551864624,
"loss": 2.092,
"nll_loss": 1.9240925312042236,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7769593000411987,
"rewards/margins": 1.6116949319839478,
"rewards/rejected": -2.3886542320251465,
"step": 1340
},
{
"epoch": 1.08,
"grad_norm": 0.6083000414245734,
"learning_rate": 3.564448228912682e-05,
"log_odds_chosen": 4.163081169128418,
"log_odds_ratio": -0.10094372928142548,
"logits/chosen": -2.963536262512207,
"logits/chosen_prompt": -2.846693515777588,
"logits/rejected": -2.542693614959717,
"logits/rejected_prompt": -2.819491386413574,
"logps/chosen": -2.337949275970459,
"logps/chosen_both": -2.3158886432647705,
"logps/chosen_prompt": -0.845288872718811,
"logps/rejected": -6.368934154510498,
"logps/rejected_both": -6.2805986404418945,
"logps/rejected_prompt": -1.0301436185836792,
"loss": 2.0382,
"nll_loss": 2.3151814937591553,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9351798295974731,
"rewards/margins": 1.6123939752578735,
"rewards/rejected": -2.5475735664367676,
"step": 1350
},
{
"epoch": 1.088,
"grad_norm": 0.2154750785789702,
"learning_rate": 3.545460443790753e-05,
"log_odds_chosen": 5.453991889953613,
"log_odds_ratio": -0.004712260328233242,
"logits/chosen": -2.908536434173584,
"logits/chosen_prompt": -2.868119716644287,
"logits/rejected": -2.40228533744812,
"logits/rejected_prompt": -2.843205451965332,
"logps/chosen": -2.089245319366455,
"logps/chosen_both": -2.072594165802002,
"logps/chosen_prompt": -0.8769745826721191,
"logps/rejected": -7.407778739929199,
"logps/rejected_both": -7.316309928894043,
"logps/rejected_prompt": -0.9720737338066101,
"loss": 2.0088,
"nll_loss": 2.071500778198242,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8356983065605164,
"rewards/margins": 2.127413749694824,
"rewards/rejected": -2.9631123542785645,
"step": 1360
},
{
"epoch": 1.096,
"grad_norm": 0.1857265942387655,
"learning_rate": 3.52639928465532e-05,
"log_odds_chosen": 4.4336113929748535,
"log_odds_ratio": -0.14170871675014496,
"logits/chosen": -3.0002169609069824,
"logits/chosen_prompt": -2.8658928871154785,
"logits/rejected": -2.558640956878662,
"logits/rejected_prompt": -2.843383550643921,
"logps/chosen": -1.8998088836669922,
"logps/chosen_both": -1.8837999105453491,
"logps/chosen_prompt": -0.8331824541091919,
"logps/rejected": -6.191910266876221,
"logps/rejected_both": -6.1067986488342285,
"logps/rejected_prompt": -0.9438120126724243,
"loss": 2.0577,
"nll_loss": 1.883371353149414,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7599235773086548,
"rewards/margins": 1.7168405055999756,
"rewards/rejected": -2.47676420211792,
"step": 1370
},
{
"epoch": 1.104,
"grad_norm": 0.22400309241356314,
"learning_rate": 3.507266089284157e-05,
"log_odds_chosen": 5.497137069702148,
"log_odds_ratio": -0.004467605613172054,
"logits/chosen": -2.9908201694488525,
"logits/chosen_prompt": -2.821722984313965,
"logits/rejected": -2.416836977005005,
"logits/rejected_prompt": -2.796220541000366,
"logps/chosen": -1.8564481735229492,
"logps/chosen_both": -1.8395103216171265,
"logps/chosen_prompt": -0.8639839291572571,
"logps/rejected": -7.180043697357178,
"logps/rejected_both": -7.0766448974609375,
"logps/rejected_prompt": -0.9959409832954407,
"loss": 2.0609,
"nll_loss": 1.8391234874725342,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7425792813301086,
"rewards/margins": 2.1294379234313965,
"rewards/rejected": -2.8720173835754395,
"step": 1380
},
{
"epoch": 1.112,
"grad_norm": 0.19384408681406856,
"learning_rate": 3.488062200510791e-05,
"log_odds_chosen": 5.338822841644287,
"log_odds_ratio": -0.00644069816917181,
"logits/chosen": -2.959766387939453,
"logits/chosen_prompt": -2.7905402183532715,
"logits/rejected": -2.3757593631744385,
"logits/rejected_prompt": -2.763526678085327,
"logps/chosen": -1.9314730167388916,
"logps/chosen_both": -1.9157222509384155,
"logps/chosen_prompt": -0.8981779217720032,
"logps/rejected": -7.111077785491943,
"logps/rejected_both": -7.018582344055176,
"logps/rejected_prompt": -0.9950772523880005,
"loss": 1.9482,
"nll_loss": 1.9154551029205322,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7725892066955566,
"rewards/margins": 2.0718419551849365,
"rewards/rejected": -2.8444314002990723,
"step": 1390
},
{
"epoch": 1.12,
"grad_norm": 60.752749653266065,
"learning_rate": 3.4687889661302576e-05,
"log_odds_chosen": 4.680363655090332,
"log_odds_ratio": -0.03717372566461563,
"logits/chosen": -2.920323610305786,
"logits/chosen_prompt": -2.8357200622558594,
"logits/rejected": -2.4031760692596436,
"logits/rejected_prompt": -2.802396535873413,
"logps/chosen": -2.005197286605835,
"logps/chosen_both": -1.9863475561141968,
"logps/chosen_prompt": -0.7522888779640198,
"logps/rejected": -6.545997619628906,
"logps/rejected_both": -6.455955505371094,
"logps/rejected_prompt": -0.965649425983429,
"loss": 2.0466,
"nll_loss": 1.985174536705017,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.802078902721405,
"rewards/margins": 1.8163198232650757,
"rewards/rejected": -2.618398904800415,
"step": 1400
},
{
"epoch": 1.1280000000000001,
"grad_norm": 0.5002021593337239,
"learning_rate": 3.4494477388045035e-05,
"log_odds_chosen": 4.483678340911865,
"log_odds_ratio": -0.028768246993422508,
"logits/chosen": -2.92014741897583,
"logits/chosen_prompt": -2.8309707641601562,
"logits/rejected": -2.486912250518799,
"logits/rejected_prompt": -2.804452419281006,
"logps/chosen": -2.067333459854126,
"logps/chosen_both": -2.0484328269958496,
"logps/chosen_prompt": -0.7646309733390808,
"logps/rejected": -6.416478157043457,
"logps/rejected_both": -6.335555076599121,
"logps/rejected_prompt": -0.9275982975959778,
"loss": 2.062,
"nll_loss": 2.047743558883667,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8269332647323608,
"rewards/margins": 1.7396576404571533,
"rewards/rejected": -2.5665910243988037,
"step": 1410
},
{
"epoch": 1.1360000000000001,
"grad_norm": 0.16513157762808564,
"learning_rate": 3.430039875967454e-05,
"log_odds_chosen": 4.668246746063232,
"log_odds_ratio": -0.07646802067756653,
"logits/chosen": -2.9350738525390625,
"logits/chosen_prompt": -2.8208534717559814,
"logits/rejected": -2.421509265899658,
"logits/rejected_prompt": -2.783437490463257,
"logps/chosen": -2.0800347328186035,
"logps/chosen_both": -2.0644993782043457,
"logps/chosen_prompt": -0.8468448519706726,
"logps/rejected": -6.625657558441162,
"logps/rejected_both": -6.545504570007324,
"logps/rejected_prompt": -1.04305100440979,
"loss": 2.0206,
"nll_loss": 2.0629351139068604,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8320137858390808,
"rewards/margins": 1.8182493448257446,
"rewards/rejected": -2.6502633094787598,
"step": 1420
},
{
"epoch": 1.144,
"grad_norm": 2.2295314469384206,
"learning_rate": 3.410566739729746e-05,
"log_odds_chosen": 5.851050853729248,
"log_odds_ratio": -0.004526123404502869,
"logits/chosen": -2.940370798110962,
"logits/chosen_prompt": -2.7820496559143066,
"logits/rejected": -2.2556514739990234,
"logits/rejected_prompt": -2.7672178745269775,
"logps/chosen": -1.8526496887207031,
"logps/chosen_both": -1.8396713733673096,
"logps/chosen_prompt": -0.8455888628959656,
"logps/rejected": -7.520164489746094,
"logps/rejected_both": -7.432145595550537,
"logps/rejected_prompt": -1.002396821975708,
"loss": 2.1827,
"nll_loss": 1.8387296199798584,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7410598993301392,
"rewards/margins": 2.267005681991577,
"rewards/rejected": -3.008065700531006,
"step": 1430
},
{
"epoch": 1.152,
"grad_norm": 4.408515203042964,
"learning_rate": 3.3910296967831266e-05,
"log_odds_chosen": 4.456727027893066,
"log_odds_ratio": -0.14154654741287231,
"logits/chosen": -2.9346349239349365,
"logits/chosen_prompt": -2.7853639125823975,
"logits/rejected": -2.2783145904541016,
"logits/rejected_prompt": -2.7635715007781982,
"logps/chosen": -1.9494521617889404,
"logps/chosen_both": -1.9318408966064453,
"logps/chosen_prompt": -0.9306742548942566,
"logps/rejected": -6.29015588760376,
"logps/rejected_both": -6.198000907897949,
"logps/rejected_prompt": -1.0760185718536377,
"loss": 2.1572,
"nll_loss": 1.931610107421875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7797808647155762,
"rewards/margins": 1.7362816333770752,
"rewards/rejected": -2.5160624980926514,
"step": 1440
},
{
"epoch": 1.16,
"grad_norm": 0.3551432285571037,
"learning_rate": 3.3714301183045385e-05,
"log_odds_chosen": 5.155561447143555,
"log_odds_ratio": -0.07224146276712418,
"logits/chosen": -2.9873647689819336,
"logits/chosen_prompt": -2.7700507640838623,
"logits/rejected": -2.2287240028381348,
"logits/rejected_prompt": -2.7513465881347656,
"logps/chosen": -1.9037456512451172,
"logps/chosen_both": -1.8827041387557983,
"logps/chosen_prompt": -0.8036454319953918,
"logps/rejected": -6.904747009277344,
"logps/rejected_both": -6.79779052734375,
"logps/rejected_prompt": -0.9606531858444214,
"loss": 2.0135,
"nll_loss": 1.8827041387557983,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7614982724189758,
"rewards/margins": 2.0004005432128906,
"rewards/rejected": -2.7618985176086426,
"step": 1450
},
{
"epoch": 1.168,
"grad_norm": 0.23892058786604192,
"learning_rate": 3.35176937985988e-05,
"log_odds_chosen": 4.485732078552246,
"log_odds_ratio": -0.14207962155342102,
"logits/chosen": -2.945270538330078,
"logits/chosen_prompt": -2.786912441253662,
"logits/rejected": -2.270350217819214,
"logits/rejected_prompt": -2.752725124359131,
"logps/chosen": -2.024524211883545,
"logps/chosen_both": -2.0046331882476807,
"logps/chosen_prompt": -0.774206817150116,
"logps/rejected": -6.382667064666748,
"logps/rejected_both": -6.294032096862793,
"logps/rejected_prompt": -0.9491628408432007,
"loss": 2.0727,
"nll_loss": 2.003938674926758,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.809809684753418,
"rewards/margins": 1.7432572841644287,
"rewards/rejected": -2.5530669689178467,
"step": 1460
},
{
"epoch": 1.176,
"grad_norm": 0.2032800647611215,
"learning_rate": 3.332048861307467e-05,
"log_odds_chosen": 4.051968097686768,
"log_odds_ratio": -0.14674244821071625,
"logits/chosen": -2.99367094039917,
"logits/chosen_prompt": -2.802661657333374,
"logits/rejected": -2.338299512863159,
"logits/rejected_prompt": -2.7645983695983887,
"logps/chosen": -1.9771573543548584,
"logps/chosen_both": -1.9634653329849243,
"logps/chosen_prompt": -0.8673089742660522,
"logps/rejected": -5.909640789031982,
"logps/rejected_both": -5.843233585357666,
"logps/rejected_prompt": -0.918237030506134,
"loss": 2.0442,
"nll_loss": 1.9626314640045166,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.790863037109375,
"rewards/margins": 1.572993516921997,
"rewards/rejected": -2.363856554031372,
"step": 1470
},
{
"epoch": 1.184,
"grad_norm": 0.6791006786877852,
"learning_rate": 3.312269946701191e-05,
"log_odds_chosen": 5.11738395690918,
"log_odds_ratio": -0.08993680030107498,
"logits/chosen": -2.986436605453491,
"logits/chosen_prompt": -2.733582019805908,
"logits/rejected": -2.186984062194824,
"logits/rejected_prompt": -2.714433193206787,
"logps/chosen": -1.95094895362854,
"logps/chosen_both": -1.9355719089508057,
"logps/chosen_prompt": -0.9025853276252747,
"logps/rejected": -6.931356906890869,
"logps/rejected_both": -6.842989444732666,
"logps/rejected_prompt": -0.9505090713500977,
"loss": 2.0225,
"nll_loss": 1.935101866722107,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.780379593372345,
"rewards/margins": 1.992163062095642,
"rewards/rejected": -2.7725424766540527,
"step": 1480
},
{
"epoch": 1.192,
"grad_norm": 12.7002740206941,
"learning_rate": 3.29243402419338e-05,
"log_odds_chosen": 4.771432399749756,
"log_odds_ratio": -0.2453218698501587,
"logits/chosen": -2.9012749195098877,
"logits/chosen_prompt": -2.791215419769287,
"logits/rejected": -2.076328754425049,
"logits/rejected_prompt": -2.7599706649780273,
"logps/chosen": -2.869783878326416,
"logps/chosen_both": -2.8310511112213135,
"logps/chosen_prompt": -0.8819573521614075,
"logps/rejected": -7.530523777008057,
"logps/rejected_both": -7.408067226409912,
"logps/rejected_prompt": -1.0235049724578857,
"loss": 2.0981,
"nll_loss": 2.8310508728027344,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.1479135751724243,
"rewards/margins": 1.8642956018447876,
"rewards/rejected": -3.012209415435791,
"step": 1490
},
{
"epoch": 1.2,
"grad_norm": 0.19981467699086264,
"learning_rate": 3.272542485937369e-05,
"log_odds_chosen": 5.507603645324707,
"log_odds_ratio": -0.020156098529696465,
"logits/chosen": -2.9788875579833984,
"logits/chosen_prompt": -2.7711877822875977,
"logits/rejected": -2.0624115467071533,
"logits/rejected_prompt": -2.744807720184326,
"logps/chosen": -2.279694080352783,
"logps/chosen_both": -2.2537825107574463,
"logps/chosen_prompt": -0.8054102659225464,
"logps/rejected": -7.658332824707031,
"logps/rejected_both": -7.547041416168213,
"logps/rejected_prompt": -1.0083348751068115,
"loss": 2.1891,
"nll_loss": 2.2532057762145996,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9118776321411133,
"rewards/margins": 2.1514554023742676,
"rewards/rejected": -3.063333034515381,
"step": 1500
},
{
"epoch": 1.208,
"grad_norm": 0.2071781414340563,
"learning_rate": 3.2525967279898015e-05,
"log_odds_chosen": 3.779675006866455,
"log_odds_ratio": -0.2771868109703064,
"logits/chosen": -2.9284844398498535,
"logits/chosen_prompt": -2.73115873336792,
"logits/rejected": -2.319711446762085,
"logits/rejected_prompt": -2.7305550575256348,
"logps/chosen": -2.069701910018921,
"logps/chosen_both": -2.0511586666107178,
"logps/chosen_prompt": -0.8410334587097168,
"logps/rejected": -5.765010356903076,
"logps/rejected_both": -5.696343421936035,
"logps/rejected_prompt": -1.032503366470337,
"loss": 2.0199,
"nll_loss": 2.050447940826416,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8278807401657104,
"rewards/margins": 1.4781235456466675,
"rewards/rejected": -2.306004762649536,
"step": 1510
},
{
"epoch": 1.216,
"grad_norm": 0.18615530258539528,
"learning_rate": 3.2325981502126433e-05,
"log_odds_chosen": 4.861352443695068,
"log_odds_ratio": -0.14049410820007324,
"logits/chosen": -2.913702964782715,
"logits/chosen_prompt": -2.647313117980957,
"logits/rejected": -2.130164623260498,
"logits/rejected_prompt": -2.638327121734619,
"logps/chosen": -1.9652678966522217,
"logps/chosen_both": -1.948897361755371,
"logps/chosen_prompt": -0.8634968996047974,
"logps/rejected": -6.705462455749512,
"logps/rejected_both": -6.624319553375244,
"logps/rejected_prompt": -1.035592794418335,
"loss": 2.042,
"nll_loss": 1.948264718055725,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7861071825027466,
"rewards/margins": 1.896078109741211,
"rewards/rejected": -2.682184934616089,
"step": 1520
},
{
"epoch": 1.224,
"grad_norm": 0.3295494652465448,
"learning_rate": 3.21254815617494e-05,
"log_odds_chosen": 5.780041694641113,
"log_odds_ratio": -0.004303447902202606,
"logits/chosen": -2.9733996391296387,
"logits/chosen_prompt": -2.7753407955169678,
"logits/rejected": -2.149972438812256,
"logits/rejected_prompt": -2.7639622688293457,
"logps/chosen": -1.992742896080017,
"logps/chosen_both": -1.975515604019165,
"logps/chosen_prompt": -0.8223434686660767,
"logps/rejected": -7.623780727386475,
"logps/rejected_both": -7.520285606384277,
"logps/rejected_prompt": -0.9390355348587036,
"loss": 2.0442,
"nll_loss": 1.974700689315796,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7970971465110779,
"rewards/margins": 2.252415180206299,
"rewards/rejected": -3.0495123863220215,
"step": 1530
},
{
"epoch": 1.232,
"grad_norm": 0.19966280929549698,
"learning_rate": 3.192448153054306e-05,
"log_odds_chosen": 3.838728427886963,
"log_odds_ratio": -0.14647504687309265,
"logits/chosen": -2.9667465686798096,
"logits/chosen_prompt": -2.8230855464935303,
"logits/rejected": -2.5847840309143066,
"logits/rejected_prompt": -2.822601795196533,
"logps/chosen": -2.122664451599121,
"logps/chosen_both": -2.0995185375213623,
"logps/chosen_prompt": -0.9422351717948914,
"logps/rejected": -5.859042644500732,
"logps/rejected_both": -5.767674446105957,
"logps/rejected_prompt": -1.1390842199325562,
"loss": 2.095,
"nll_loss": 2.0988729000091553,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8490656614303589,
"rewards/margins": 1.494551420211792,
"rewards/rejected": -2.3436172008514404,
"step": 1540
},
{
"epoch": 1.24,
"grad_norm": 0.21556247694383007,
"learning_rate": 3.172299551538164e-05,
"log_odds_chosen": 4.561056137084961,
"log_odds_ratio": -0.07612424343824387,
"logits/chosen": -2.8919880390167236,
"logits/chosen_prompt": -2.808797836303711,
"logits/rejected": -2.5644102096557617,
"logits/rejected_prompt": -2.802969455718994,
"logps/chosen": -1.9356054067611694,
"logps/chosen_both": -1.9162133932113647,
"logps/chosen_prompt": -0.7942633032798767,
"logps/rejected": -6.3366522789001465,
"logps/rejected_both": -6.251999855041504,
"logps/rejected_prompt": -0.9459937810897827,
"loss": 2.1408,
"nll_loss": 1.915776252746582,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7742422223091125,
"rewards/margins": 1.7604186534881592,
"rewards/rejected": -2.534660816192627,
"step": 1550
},
{
"epoch": 1.248,
"grad_norm": 0.19312969446080464,
"learning_rate": 3.152103765724743e-05,
"log_odds_chosen": 3.9786903858184814,
"log_odds_ratio": -0.10893861204385757,
"logits/chosen": -3.0307998657226562,
"logits/chosen_prompt": -2.7775232791900635,
"logits/rejected": -2.6540513038635254,
"logits/rejected_prompt": -2.7630362510681152,
"logps/chosen": -1.9151197671890259,
"logps/chosen_both": -1.8977829217910767,
"logps/chosen_prompt": -0.8471347689628601,
"logps/rejected": -5.757778644561768,
"logps/rejected_both": -5.6885457038879395,
"logps/rejected_prompt": -1.02475106716156,
"loss": 1.9805,
"nll_loss": 1.8967196941375732,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7660478949546814,
"rewards/margins": 1.537063717842102,
"rewards/rejected": -2.3031115531921387,
"step": 1560
},
{
"epoch": 1.256,
"grad_norm": 0.19646035725215968,
"learning_rate": 3.1318622130238236e-05,
"log_odds_chosen": 4.679540157318115,
"log_odds_ratio": -0.07853083312511444,
"logits/chosen": -2.9802026748657227,
"logits/chosen_prompt": -2.761209011077881,
"logits/rejected": -2.5600242614746094,
"logits/rejected_prompt": -2.7424654960632324,
"logps/chosen": -1.7784169912338257,
"logps/chosen_both": -1.7646305561065674,
"logps/chosen_prompt": -0.7139529585838318,
"logps/rejected": -6.263562202453613,
"logps/rejected_both": -6.190931797027588,
"logps/rejected_prompt": -0.9958028793334961,
"loss": 1.9559,
"nll_loss": 1.76325261592865,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7113668322563171,
"rewards/margins": 1.794058084487915,
"rewards/rejected": -2.505424976348877,
"step": 1570
},
{
"epoch": 1.264,
"grad_norm": 0.3866885307142738,
"learning_rate": 3.111576314057268e-05,
"log_odds_chosen": 3.801389694213867,
"log_odds_ratio": -0.20994290709495544,
"logits/chosen": -2.9368879795074463,
"logits/chosen_prompt": -2.7586987018585205,
"logits/rejected": -2.599658966064453,
"logits/rejected_prompt": -2.743234157562256,
"logps/chosen": -1.9905316829681396,
"logps/chosen_both": -1.9739116430282593,
"logps/chosen_prompt": -0.779675304889679,
"logps/rejected": -5.689120292663574,
"logps/rejected_both": -5.620154857635498,
"logps/rejected_prompt": -1.0595715045928955,
"loss": 2.0955,
"nll_loss": 1.9729188680648804,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7962126731872559,
"rewards/margins": 1.4794353246688843,
"rewards/rejected": -2.275648355484009,
"step": 1580
},
{
"epoch": 1.272,
"grad_norm": 0.2325643788869979,
"learning_rate": 3.091247492559312e-05,
"log_odds_chosen": 4.095303058624268,
"log_odds_ratio": -0.1479816436767578,
"logits/chosen": -2.9735686779022217,
"logits/chosen_prompt": -2.8000283241271973,
"logits/rejected": -2.558763027191162,
"logits/rejected_prompt": -2.7583069801330566,
"logps/chosen": -1.8590002059936523,
"logps/chosen_both": -1.8417994976043701,
"logps/chosen_prompt": -0.7681006193161011,
"logps/rejected": -5.769632816314697,
"logps/rejected_both": -5.685044288635254,
"logps/rejected_prompt": -0.97776859998703,
"loss": 2.1087,
"nll_loss": 1.8410179615020752,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.743600070476532,
"rewards/margins": 1.5642529726028442,
"rewards/rejected": -2.3078532218933105,
"step": 1590
},
{
"epoch": 1.28,
"grad_norm": 0.19381282768297478,
"learning_rate": 3.0708771752766394e-05,
"log_odds_chosen": 4.324513912200928,
"log_odds_ratio": -0.0676613599061966,
"logits/chosen": -2.9503540992736816,
"logits/chosen_prompt": -2.7982351779937744,
"logits/rejected": -2.5562634468078613,
"logits/rejected_prompt": -2.7812817096710205,
"logps/chosen": -1.9172391891479492,
"logps/chosen_both": -1.9022390842437744,
"logps/chosen_prompt": -0.7488449811935425,
"logps/rejected": -6.095970630645752,
"logps/rejected_both": -6.031794548034668,
"logps/rejected_prompt": -0.9277693033218384,
"loss": 1.9931,
"nll_loss": 1.9018001556396484,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7668957114219666,
"rewards/margins": 1.671492338180542,
"rewards/rejected": -2.4383881092071533,
"step": 1600
},
{
"epoch": 1.288,
"grad_norm": 0.23915750516620793,
"learning_rate": 3.050466791868254e-05,
"log_odds_chosen": 5.146353721618652,
"log_odds_ratio": -0.07113925367593765,
"logits/chosen": -3.0021820068359375,
"logits/chosen_prompt": -2.7346436977386475,
"logits/rejected": -2.400503635406494,
"logits/rejected_prompt": -2.715362071990967,
"logps/chosen": -1.8657314777374268,
"logps/chosen_both": -1.847728967666626,
"logps/chosen_prompt": -0.8974820375442505,
"logps/rejected": -6.850257873535156,
"logps/rejected_both": -6.73916482925415,
"logps/rejected_prompt": -0.9878479838371277,
"loss": 2.0166,
"nll_loss": 1.8474719524383545,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7462925910949707,
"rewards/margins": 1.9938108921051025,
"rewards/rejected": -2.7401034832000732,
"step": 1610
},
{
"epoch": 1.296,
"grad_norm": 0.25793388819398966,
"learning_rate": 3.0300177748051373e-05,
"log_odds_chosen": 5.57846212387085,
"log_odds_ratio": -0.0040098619647324085,
"logits/chosen": -2.921875476837158,
"logits/chosen_prompt": -2.7485337257385254,
"logits/rejected": -2.2575137615203857,
"logits/rejected_prompt": -2.729705333709717,
"logps/chosen": -2.0379016399383545,
"logps/chosen_both": -2.023336410522461,
"logps/chosen_prompt": -0.8523913621902466,
"logps/rejected": -7.4703474044799805,
"logps/rejected_both": -7.384527683258057,
"logps/rejected_prompt": -1.0912959575653076,
"loss": 2.1288,
"nll_loss": 2.021984100341797,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8151607513427734,
"rewards/margins": 2.172978639602661,
"rewards/rejected": -2.9881393909454346,
"step": 1620
},
{
"epoch": 1.304,
"grad_norm": 0.22406539118846014,
"learning_rate": 3.0095315592697126e-05,
"log_odds_chosen": 4.797575950622559,
"log_odds_ratio": -0.07414670288562775,
"logits/chosen": -2.9373860359191895,
"logits/chosen_prompt": -2.7567806243896484,
"logits/rejected": -2.339370012283325,
"logits/rejected_prompt": -2.738049030303955,
"logps/chosen": -2.040771961212158,
"logps/chosen_both": -2.022752523422241,
"logps/chosen_prompt": -0.8437407612800598,
"logps/rejected": -6.715930938720703,
"logps/rejected_both": -6.622492790222168,
"logps/rejected_prompt": -1.1104066371917725,
"loss": 2.0022,
"nll_loss": 2.021770715713501,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8163086771965027,
"rewards/margins": 1.8700635433197021,
"rewards/rejected": -2.6863722801208496,
"step": 1630
},
{
"epoch": 1.312,
"grad_norm": 0.19146540891141792,
"learning_rate": 2.9890095830551207e-05,
"log_odds_chosen": 5.205162525177002,
"log_odds_ratio": -0.015068802051246166,
"logits/chosen": -2.9850218296051025,
"logits/chosen_prompt": -2.7482991218566895,
"logits/rejected": -2.2866098880767822,
"logits/rejected_prompt": -2.7363736629486084,
"logps/chosen": -1.9450336694717407,
"logps/chosen_both": -1.9250189065933228,
"logps/chosen_prompt": -0.8316828012466431,
"logps/rejected": -6.989903450012207,
"logps/rejected_both": -6.88253927230835,
"logps/rejected_prompt": -0.9859585762023926,
"loss": 2.088,
"nll_loss": 1.924430251121521,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7780135273933411,
"rewards/margins": 2.0179476737976074,
"rewards/rejected": -2.7959611415863037,
"step": 1640
},
{
"epoch": 1.32,
"grad_norm": 0.22495066893496063,
"learning_rate": 2.9684532864643122e-05,
"log_odds_chosen": 5.308048725128174,
"log_odds_ratio": -0.00845087319612503,
"logits/chosen": -2.9742932319641113,
"logits/chosen_prompt": -2.7849392890930176,
"logits/rejected": -2.2982254028320312,
"logits/rejected_prompt": -2.7615458965301514,
"logps/chosen": -1.9874608516693115,
"logps/chosen_both": -1.9658311605453491,
"logps/chosen_prompt": -0.7408405542373657,
"logps/rejected": -7.14414119720459,
"logps/rejected_both": -7.038477897644043,
"logps/rejected_prompt": -1.045243501663208,
"loss": 2.0386,
"nll_loss": 1.9651544094085693,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7949844002723694,
"rewards/margins": 2.0626721382141113,
"rewards/rejected": -2.857656478881836,
"step": 1650
},
{
"epoch": 1.328,
"grad_norm": 0.2286734318135687,
"learning_rate": 2.9478641122089562e-05,
"log_odds_chosen": 4.840089797973633,
"log_odds_ratio": -0.07564349472522736,
"logits/chosen": -3.008890151977539,
"logits/chosen_prompt": -2.8013384342193604,
"logits/rejected": -2.394143581390381,
"logits/rejected_prompt": -2.77929425239563,
"logps/chosen": -1.9756405353546143,
"logps/chosen_both": -1.9581083059310913,
"logps/chosen_prompt": -0.7473115921020508,
"logps/rejected": -6.674158573150635,
"logps/rejected_both": -6.598573207855225,
"logps/rejected_prompt": -0.996438205242157,
"loss": 2.0632,
"nll_loss": 1.9576594829559326,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.790256142616272,
"rewards/margins": 1.8794071674346924,
"rewards/rejected": -2.669663429260254,
"step": 1660
},
{
"epoch": 1.336,
"grad_norm": 1.6039791025981895,
"learning_rate": 2.9272435053081922e-05,
"log_odds_chosen": 4.911754131317139,
"log_odds_ratio": -0.08321253210306168,
"logits/chosen": -2.912379741668701,
"logits/chosen_prompt": -2.7961792945861816,
"logits/rejected": -2.264275312423706,
"logits/rejected_prompt": -2.7643306255340576,
"logps/chosen": -1.951281189918518,
"logps/chosen_both": -1.9351087808609009,
"logps/chosen_prompt": -0.7827764749526978,
"logps/rejected": -6.725755214691162,
"logps/rejected_both": -6.646947383880615,
"logps/rejected_prompt": -1.0157705545425415,
"loss": 2.1063,
"nll_loss": 1.9346641302108765,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7805125713348389,
"rewards/margins": 1.9097894430160522,
"rewards/rejected": -2.6903018951416016,
"step": 1670
},
{
"epoch": 1.3439999999999999,
"grad_norm": 0.3698076131375805,
"learning_rate": 2.9065929129872094e-05,
"log_odds_chosen": 4.74294376373291,
"log_odds_ratio": -0.08516435325145721,
"logits/chosen": -2.9431169033050537,
"logits/chosen_prompt": -2.7804017066955566,
"logits/rejected": -2.2715518474578857,
"logits/rejected_prompt": -2.7543439865112305,
"logps/chosen": -2.047203779220581,
"logps/chosen_both": -2.028724193572998,
"logps/chosen_prompt": -0.8540178537368774,
"logps/rejected": -6.660338401794434,
"logps/rejected_both": -6.572705268859863,
"logps/rejected_prompt": -1.0315988063812256,
"loss": 2.1122,
"nll_loss": 2.0279080867767334,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.818881630897522,
"rewards/margins": 1.8452539443969727,
"rewards/rejected": -2.664135456085205,
"step": 1680
},
{
"epoch": 1.3519999999999999,
"grad_norm": 14.756635490233291,
"learning_rate": 2.8859137845756784e-05,
"log_odds_chosen": 5.338567733764648,
"log_odds_ratio": -0.07245531678199768,
"logits/chosen": -3.0019686222076416,
"logits/chosen_prompt": -2.7564592361450195,
"logits/rejected": -2.10023832321167,
"logits/rejected_prompt": -2.75854754447937,
"logps/chosen": -1.801944375038147,
"logps/chosen_both": -1.7874317169189453,
"logps/chosen_prompt": -0.7828146815299988,
"logps/rejected": -6.980807304382324,
"logps/rejected_both": -6.885933876037598,
"logps/rejected_prompt": -1.0353758335113525,
"loss": 1.9922,
"nll_loss": 1.7853384017944336,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.720777690410614,
"rewards/margins": 2.071545124053955,
"rewards/rejected": -2.792322874069214,
"step": 1690
},
{
"epoch": 1.3599999999999999,
"grad_norm": 1.0205003901521117,
"learning_rate": 2.8652075714060295e-05,
"log_odds_chosen": 4.316029071807861,
"log_odds_ratio": -0.18554985523223877,
"logits/chosen": -2.9789249897003174,
"logits/chosen_prompt": -2.7761483192443848,
"logits/rejected": -2.230045795440674,
"logits/rejected_prompt": -2.7322373390197754,
"logps/chosen": -1.9758758544921875,
"logps/chosen_both": -1.958141565322876,
"logps/chosen_prompt": -0.839580237865448,
"logps/rejected": -6.175426006317139,
"logps/rejected_both": -6.096805572509766,
"logps/rejected_prompt": -1.002239465713501,
"loss": 2.0489,
"nll_loss": 1.957658052444458,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7903503179550171,
"rewards/margins": 1.6798200607299805,
"rewards/rejected": -2.470170497894287,
"step": 1700
},
{
"epoch": 1.3679999999999999,
"grad_norm": 0.5093034024599485,
"learning_rate": 2.844475726711595e-05,
"log_odds_chosen": 5.062729835510254,
"log_odds_ratio": -0.05383139103651047,
"logits/chosen": -2.9323840141296387,
"logits/chosen_prompt": -2.757789134979248,
"logits/rejected": -2.114853620529175,
"logits/rejected_prompt": -2.740206003189087,
"logps/chosen": -1.9980299472808838,
"logps/chosen_both": -1.9810377359390259,
"logps/chosen_prompt": -0.8025790452957153,
"logps/rejected": -6.92165994644165,
"logps/rejected_both": -6.837998867034912,
"logps/rejected_prompt": -1.0708694458007812,
"loss": 2.02,
"nll_loss": 1.980063796043396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7992119789123535,
"rewards/margins": 1.9694522619247437,
"rewards/rejected": -2.7686638832092285,
"step": 1710
},
{
"epoch": 1.376,
"grad_norm": 0.1922091365417996,
"learning_rate": 2.8237197055246172e-05,
"log_odds_chosen": 5.407708644866943,
"log_odds_ratio": -0.07208568602800369,
"logits/chosen": -2.930446147918701,
"logits/chosen_prompt": -2.7493677139282227,
"logits/rejected": -1.8252556324005127,
"logits/rejected_prompt": -2.716831684112549,
"logps/chosen": -1.99956476688385,
"logps/chosen_both": -1.9826438426971436,
"logps/chosen_prompt": -0.8026520609855652,
"logps/rejected": -7.266847133636475,
"logps/rejected_both": -7.16598653793335,
"logps/rejected_prompt": -0.9821138381958008,
"loss": 2.0055,
"nll_loss": 1.9819648265838623,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.79982590675354,
"rewards/margins": 2.1069130897521973,
"rewards/rejected": -2.9067392349243164,
"step": 1720
},
{
"epoch": 1.384,
"grad_norm": 0.19884693939871143,
"learning_rate": 2.8029409645741267e-05,
"log_odds_chosen": 5.655479907989502,
"log_odds_ratio": -0.07094166427850723,
"logits/chosen": -2.9133386611938477,
"logits/chosen_prompt": -2.7181575298309326,
"logits/rejected": -1.8967100381851196,
"logits/rejected_prompt": -2.7026288509368896,
"logps/chosen": -2.0701959133148193,
"logps/chosen_both": -2.0524401664733887,
"logps/chosen_prompt": -0.8565284609794617,
"logps/rejected": -7.606234550476074,
"logps/rejected_both": -7.5077009201049805,
"logps/rejected_prompt": -1.0423924922943115,
"loss": 2.1485,
"nll_loss": 2.0521743297576904,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8280783891677856,
"rewards/margins": 2.2144155502319336,
"rewards/rejected": -3.042494058609009,
"step": 1730
},
{
"epoch": 1.392,
"grad_norm": 0.22986043369921255,
"learning_rate": 2.782140962183704e-05,
"log_odds_chosen": 6.107487678527832,
"log_odds_ratio": -0.0026633774396032095,
"logits/chosen": -2.98026442527771,
"logits/chosen_prompt": -2.780827522277832,
"logits/rejected": -1.9798576831817627,
"logits/rejected_prompt": -2.7703700065612793,
"logps/chosen": -1.9474899768829346,
"logps/chosen_both": -1.9275726079940796,
"logps/chosen_prompt": -0.7816404700279236,
"logps/rejected": -7.895272731781006,
"logps/rejected_both": -7.769126892089844,
"logps/rejected_prompt": -0.9758648872375488,
"loss": 1.9516,
"nll_loss": 1.925616979598999,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.778995931148529,
"rewards/margins": 2.3791134357452393,
"rewards/rejected": -3.158109188079834,
"step": 1740
},
{
"epoch": 1.4,
"grad_norm": 1.3967778423182213,
"learning_rate": 2.761321158169134e-05,
"log_odds_chosen": 5.588977336883545,
"log_odds_ratio": -0.07164627313613892,
"logits/chosen": -2.942800998687744,
"logits/chosen_prompt": -2.765923023223877,
"logits/rejected": -2.1541590690612793,
"logits/rejected_prompt": -2.7391622066497803,
"logps/chosen": -1.8856910467147827,
"logps/chosen_both": -1.8705289363861084,
"logps/chosen_prompt": -0.7254279851913452,
"logps/rejected": -7.315940856933594,
"logps/rejected_both": -7.2315239906311035,
"logps/rejected_prompt": -0.9249277114868164,
"loss": 2.037,
"nll_loss": 1.8701813220977783,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7542763948440552,
"rewards/margins": 2.172100305557251,
"rewards/rejected": -2.9263763427734375,
"step": 1750
},
{
"epoch": 1.408,
"grad_norm": 0.19174060756423858,
"learning_rate": 2.7404830137359444e-05,
"log_odds_chosen": 5.684497356414795,
"log_odds_ratio": -0.03275999799370766,
"logits/chosen": -2.958325147628784,
"logits/chosen_prompt": -2.728274345397949,
"logits/rejected": -2.046318531036377,
"logits/rejected_prompt": -2.6898844242095947,
"logps/chosen": -2.253990411758423,
"logps/chosen_both": -2.2328062057495117,
"logps/chosen_prompt": -0.8659110069274902,
"logps/rejected": -7.784188747406006,
"logps/rejected_both": -7.674757480621338,
"logps/rejected_prompt": -1.1274776458740234,
"loss": 2.1275,
"nll_loss": 2.2321293354034424,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.901596188545227,
"rewards/margins": 2.212078809738159,
"rewards/rejected": -3.1136748790740967,
"step": 1760
},
{
"epoch": 1.416,
"grad_norm": 0.1908777514352998,
"learning_rate": 2.7196279913768584e-05,
"log_odds_chosen": 5.167336940765381,
"log_odds_ratio": -0.07482357323169708,
"logits/chosen": -2.9330124855041504,
"logits/chosen_prompt": -2.7444446086883545,
"logits/rejected": -2.105210065841675,
"logits/rejected_prompt": -2.721642255783081,
"logps/chosen": -2.0776610374450684,
"logps/chosen_both": -2.0597071647644043,
"logps/chosen_prompt": -0.8555063009262085,
"logps/rejected": -7.124932765960693,
"logps/rejected_both": -7.030417442321777,
"logps/rejected_prompt": -1.0413535833358765,
"loss": 1.9978,
"nll_loss": 2.058987617492676,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8310644030570984,
"rewards/margins": 2.018908739089966,
"rewards/rejected": -2.84997296333313,
"step": 1770
},
{
"epoch": 1.424,
"grad_norm": 0.17855815500184188,
"learning_rate": 2.6987575547691497e-05,
"log_odds_chosen": 4.549686908721924,
"log_odds_ratio": -0.20390887558460236,
"logits/chosen": -2.9623754024505615,
"logits/chosen_prompt": -2.74225115776062,
"logits/rejected": -2.1663219928741455,
"logits/rejected_prompt": -2.7345423698425293,
"logps/chosen": -1.9926074743270874,
"logps/chosen_both": -1.9742103815078735,
"logps/chosen_prompt": -0.7784561514854431,
"logps/rejected": -6.431072235107422,
"logps/rejected_both": -6.3410797119140625,
"logps/rejected_prompt": -0.9243408441543579,
"loss": 2.0508,
"nll_loss": 1.973905324935913,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7970430850982666,
"rewards/margins": 1.775386095046997,
"rewards/rejected": -2.5724291801452637,
"step": 1780
},
{
"epoch": 1.432,
"grad_norm": 0.41995614329947717,
"learning_rate": 2.6778731686719178e-05,
"log_odds_chosen": 6.473885536193848,
"log_odds_ratio": -0.0018433562945574522,
"logits/chosen": -2.952514410018921,
"logits/chosen_prompt": -2.7027528285980225,
"logits/rejected": -1.8595733642578125,
"logits/rejected_prompt": -2.6798789501190186,
"logps/chosen": -1.9392732381820679,
"logps/chosen_both": -1.9248685836791992,
"logps/chosen_prompt": -0.931847095489502,
"logps/rejected": -8.25381088256836,
"logps/rejected_both": -8.140459060668945,
"logps/rejected_prompt": -1.0698789358139038,
"loss": 1.9874,
"nll_loss": 1.923288106918335,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7757093906402588,
"rewards/margins": 2.5258147716522217,
"rewards/rejected": -3.3015239238739014,
"step": 1790
},
{
"epoch": 1.44,
"grad_norm": 0.7745820877648287,
"learning_rate": 2.656976298823284e-05,
"log_odds_chosen": 3.4408886432647705,
"log_odds_ratio": -0.27857550978660583,
"logits/chosen": -2.878281831741333,
"logits/chosen_prompt": -2.734473705291748,
"logits/rejected": -2.3365187644958496,
"logits/rejected_prompt": -2.7160048484802246,
"logps/chosen": -2.0569214820861816,
"logps/chosen_both": -2.0396482944488525,
"logps/chosen_prompt": -0.6810625791549683,
"logps/rejected": -5.414828300476074,
"logps/rejected_both": -5.35118293762207,
"logps/rejected_prompt": -0.8160842061042786,
"loss": 2.0419,
"nll_loss": 2.038651943206787,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.82276850938797,
"rewards/margins": 1.3431627750396729,
"rewards/rejected": -2.165931224822998,
"step": 1800
},
{
"epoch": 1.448,
"grad_norm": 0.19675956388988333,
"learning_rate": 2.636068411837523e-05,
"log_odds_chosen": 3.9148197174072266,
"log_odds_ratio": -0.23557178676128387,
"logits/chosen": -3.045487642288208,
"logits/chosen_prompt": -2.759061574935913,
"logits/rejected": -2.4077014923095703,
"logits/rejected_prompt": -2.7576231956481934,
"logps/chosen": -1.8861596584320068,
"logps/chosen_both": -1.8683302402496338,
"logps/chosen_prompt": -0.9071288108825684,
"logps/rejected": -5.683122158050537,
"logps/rejected_both": -5.601851940155029,
"logps/rejected_prompt": -1.0434454679489136,
"loss": 2.0263,
"nll_loss": 1.8671506643295288,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7544639110565186,
"rewards/margins": 1.5187851190567017,
"rewards/rejected": -2.2732491493225098,
"step": 1810
},
{
"epoch": 1.456,
"grad_norm": 6.189918533614061,
"learning_rate": 2.615150975102131e-05,
"log_odds_chosen": 6.713578701019287,
"log_odds_ratio": -0.0015258995117619634,
"logits/chosen": -3.0059263706207275,
"logits/chosen_prompt": -2.7889323234558105,
"logits/rejected": -1.839082956314087,
"logits/rejected_prompt": -2.7647995948791504,
"logps/chosen": -2.004807233810425,
"logps/chosen_both": -1.98598313331604,
"logps/chosen_prompt": -0.7677423357963562,
"logps/rejected": -8.555073738098145,
"logps/rejected_both": -8.430871963500977,
"logps/rejected_prompt": -1.011725664138794,
"loss": 2.0302,
"nll_loss": 1.9841728210449219,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8019229769706726,
"rewards/margins": 2.6201066970825195,
"rewards/rejected": -3.422029495239258,
"step": 1820
},
{
"epoch": 1.464,
"grad_norm": 0.21797873657619965,
"learning_rate": 2.594225456674837e-05,
"log_odds_chosen": 5.328610420227051,
"log_odds_ratio": -0.0812341570854187,
"logits/chosen": -2.979506731033325,
"logits/chosen_prompt": -2.792584180831909,
"logits/rejected": -2.08947491645813,
"logits/rejected_prompt": -2.781327962875366,
"logps/chosen": -1.9279931783676147,
"logps/chosen_both": -1.9127006530761719,
"logps/chosen_prompt": -0.7814801335334778,
"logps/rejected": -7.120486259460449,
"logps/rejected_both": -7.026519775390625,
"logps/rejected_prompt": -0.9352282285690308,
"loss": 2.0587,
"nll_loss": 1.9114625453948975,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7711972594261169,
"rewards/margins": 2.0769975185394287,
"rewards/rejected": -2.8481948375701904,
"step": 1830
},
{
"epoch": 1.472,
"grad_norm": 0.1871801141599041,
"learning_rate": 2.5732933251805713e-05,
"log_odds_chosen": 5.583043575286865,
"log_odds_ratio": -0.13880962133407593,
"logits/chosen": -2.9580206871032715,
"logits/chosen_prompt": -2.7731950283050537,
"logits/rejected": -2.012089490890503,
"logits/rejected_prompt": -2.75722336769104,
"logps/chosen": -1.855268120765686,
"logps/chosen_both": -1.8423293828964233,
"logps/chosen_prompt": -0.8601115942001343,
"logps/rejected": -7.305128574371338,
"logps/rejected_both": -7.2211809158325195,
"logps/rejected_prompt": -1.0132110118865967,
"loss": 1.9359,
"nll_loss": 1.8416475057601929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7421072125434875,
"rewards/margins": 2.1799445152282715,
"rewards/rejected": -2.922051191329956,
"step": 1840
},
{
"epoch": 1.48,
"grad_norm": 0.22592416955066014,
"learning_rate": 2.5523560497083926e-05,
"log_odds_chosen": 5.949292182922363,
"log_odds_ratio": -0.07134632766246796,
"logits/chosen": -2.956613779067993,
"logits/chosen_prompt": -2.722937822341919,
"logits/rejected": -1.9237785339355469,
"logits/rejected_prompt": -2.704369068145752,
"logps/chosen": -1.9562047719955444,
"logps/chosen_both": -1.9380409717559814,
"logps/chosen_prompt": -0.7973084449768066,
"logps/rejected": -7.771543979644775,
"logps/rejected_both": -7.661837577819824,
"logps/rejected_prompt": -0.9722532033920288,
"loss": 1.9892,
"nll_loss": 1.9374074935913086,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7824817895889282,
"rewards/margins": 2.3261356353759766,
"rewards/rejected": -3.1086175441741943,
"step": 1850
},
{
"epoch": 1.488,
"grad_norm": 0.19952883102568983,
"learning_rate": 2.531415099708382e-05,
"log_odds_chosen": 5.468968868255615,
"log_odds_ratio": -0.13928017020225525,
"logits/chosen": -2.901470184326172,
"logits/chosen_prompt": -2.7253496646881104,
"logits/rejected": -1.9635553359985352,
"logits/rejected_prompt": -2.721364736557007,
"logps/chosen": -2.024766683578491,
"logps/chosen_both": -2.0091967582702637,
"logps/chosen_prompt": -0.8794494867324829,
"logps/rejected": -7.388121604919434,
"logps/rejected_both": -7.304760932922363,
"logps/rejected_prompt": -1.0697910785675049,
"loss": 2.1409,
"nll_loss": 2.0086288452148438,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8099067807197571,
"rewards/margins": 2.1453423500061035,
"rewards/rejected": -2.955249071121216,
"step": 1860
},
{
"epoch": 1.496,
"grad_norm": 0.20218369179299622,
"learning_rate": 2.51047194488851e-05,
"log_odds_chosen": 5.442208766937256,
"log_odds_ratio": -0.14097937941551208,
"logits/chosen": -2.9763107299804688,
"logits/chosen_prompt": -2.7768394947052,
"logits/rejected": -2.108531951904297,
"logits/rejected_prompt": -2.7451493740081787,
"logps/chosen": -1.79744553565979,
"logps/chosen_both": -1.7835102081298828,
"logps/chosen_prompt": -0.7872709631919861,
"logps/rejected": -7.031289577484131,
"logps/rejected_both": -6.934246063232422,
"logps/rejected_prompt": -0.9216675758361816,
"loss": 2.1195,
"nll_loss": 1.7827249765396118,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.718978226184845,
"rewards/margins": 2.0935378074645996,
"rewards/rejected": -2.8125159740448,
"step": 1870
},
{
"epoch": 1.504,
"grad_norm": 0.9652790170177806,
"learning_rate": 2.4895280551114907e-05,
"log_odds_chosen": 5.730778694152832,
"log_odds_ratio": -0.07072736322879791,
"logits/chosen": -2.950146198272705,
"logits/chosen_prompt": -2.7803640365600586,
"logits/rejected": -1.9521598815917969,
"logits/rejected_prompt": -2.764260768890381,
"logps/chosen": -2.0558481216430664,
"logps/chosen_both": -2.0352180004119873,
"logps/chosen_prompt": -0.8978110551834106,
"logps/rejected": -7.663902282714844,
"logps/rejected_both": -7.545947074890137,
"logps/rejected_prompt": -1.037939429283142,
"loss": 2.049,
"nll_loss": 2.0345263481140137,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8223392367362976,
"rewards/margins": 2.2432212829589844,
"rewards/rejected": -3.0655605792999268,
"step": 1880
},
{
"epoch": 1.512,
"grad_norm": 0.1890875725333666,
"learning_rate": 2.4685849002916183e-05,
"log_odds_chosen": 6.257909297943115,
"log_odds_ratio": -0.00222708098590374,
"logits/chosen": -2.9233384132385254,
"logits/chosen_prompt": -2.7774055004119873,
"logits/rejected": -1.9378130435943604,
"logits/rejected_prompt": -2.751840114593506,
"logps/chosen": -1.9843826293945312,
"logps/chosen_both": -1.9667317867279053,
"logps/chosen_prompt": -0.6825822591781616,
"logps/rejected": -8.092279434204102,
"logps/rejected_both": -7.992387294769287,
"logps/rejected_prompt": -0.9652584195137024,
"loss": 1.9485,
"nll_loss": 1.965959906578064,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7937530279159546,
"rewards/margins": 2.4431586265563965,
"rewards/rejected": -3.2369117736816406,
"step": 1890
},
{
"epoch": 1.52,
"grad_norm": 0.2373809038859539,
"learning_rate": 2.447643950291608e-05,
"log_odds_chosen": 6.489705562591553,
"log_odds_ratio": -0.0016050601843744516,
"logits/chosen": -2.9970052242279053,
"logits/chosen_prompt": -2.755345106124878,
"logits/rejected": -1.9105993509292603,
"logits/rejected_prompt": -2.7229576110839844,
"logps/chosen": -1.8970317840576172,
"logps/chosen_both": -1.8811533451080322,
"logps/chosen_prompt": -0.7929924726486206,
"logps/rejected": -8.21942138671875,
"logps/rejected_both": -8.108181953430176,
"logps/rejected_prompt": -0.9921186566352844,
"loss": 1.954,
"nll_loss": 1.8801666498184204,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7588127851486206,
"rewards/margins": 2.5289556980133057,
"rewards/rejected": -3.287768602371216,
"step": 1900
},
{
"epoch": 1.528,
"grad_norm": 0.1741002343821723,
"learning_rate": 2.4267066748194296e-05,
"log_odds_chosen": 5.774570941925049,
"log_odds_ratio": -0.07103729248046875,
"logits/chosen": -2.886838436126709,
"logits/chosen_prompt": -2.7209315299987793,
"logits/rejected": -2.010939836502075,
"logits/rejected_prompt": -2.7094690799713135,
"logps/chosen": -2.068047523498535,
"logps/chosen_both": -2.051417350769043,
"logps/chosen_prompt": -0.7632136940956116,
"logps/rejected": -7.714223384857178,
"logps/rejected_both": -7.628198146820068,
"logps/rejected_prompt": -0.9632788896560669,
"loss": 2.0981,
"nll_loss": 2.051051139831543,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8272191286087036,
"rewards/margins": 2.258470296859741,
"rewards/rejected": -3.085689067840576,
"step": 1910
},
{
"epoch": 1.536,
"grad_norm": 0.18057749289339498,
"learning_rate": 2.4057745433251635e-05,
"log_odds_chosen": 6.403738498687744,
"log_odds_ratio": -0.0018427784088999033,
"logits/chosen": -2.9575610160827637,
"logits/chosen_prompt": -2.7303547859191895,
"logits/rejected": -1.862630844116211,
"logits/rejected_prompt": -2.71962833404541,
"logps/chosen": -2.0046885013580322,
"logps/chosen_both": -1.9884449243545532,
"logps/chosen_prompt": -0.763080894947052,
"logps/rejected": -8.254236221313477,
"logps/rejected_both": -8.159029960632324,
"logps/rejected_prompt": -1.045041799545288,
"loss": 2.0516,
"nll_loss": 1.9879404306411743,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8018752932548523,
"rewards/margins": 2.49981951713562,
"rewards/rejected": -3.301694869995117,
"step": 1920
},
{
"epoch": 1.544,
"grad_norm": 0.20142735097076295,
"learning_rate": 2.384849024897869e-05,
"log_odds_chosen": 5.733250617980957,
"log_odds_ratio": -0.004482199437916279,
"logits/chosen": -2.9741549491882324,
"logits/chosen_prompt": -2.7055163383483887,
"logits/rejected": -2.124002456665039,
"logits/rejected_prompt": -2.688239812850952,
"logps/chosen": -1.9430478811264038,
"logps/chosen_both": -1.926995038986206,
"logps/chosen_prompt": -0.7834355235099792,
"logps/rejected": -7.518483638763428,
"logps/rejected_both": -7.4232635498046875,
"logps/rejected_prompt": -1.0878071784973145,
"loss": 2.1323,
"nll_loss": 1.9260002374649048,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7772191166877747,
"rewards/margins": 2.2301743030548096,
"rewards/rejected": -3.0073933601379395,
"step": 1930
},
{
"epoch": 1.552,
"grad_norm": 40.90864961224279,
"learning_rate": 2.3639315881624777e-05,
"log_odds_chosen": 5.306234836578369,
"log_odds_ratio": -0.00918310321867466,
"logits/chosen": -2.9237542152404785,
"logits/chosen_prompt": -2.7105278968811035,
"logits/rejected": -2.2239270210266113,
"logits/rejected_prompt": -2.686476469039917,
"logps/chosen": -1.9409538507461548,
"logps/chosen_both": -1.9275703430175781,
"logps/chosen_prompt": -0.8563373684883118,
"logps/rejected": -7.0894670486450195,
"logps/rejected_both": -7.007052421569824,
"logps/rejected_prompt": -0.9907125234603882,
"loss": 1.9112,
"nll_loss": 1.926429033279419,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7763815522193909,
"rewards/margins": 2.059405565261841,
"rewards/rejected": -2.835787296295166,
"step": 1940
},
{
"epoch": 1.56,
"grad_norm": 0.21885482692879285,
"learning_rate": 2.3430237011767167e-05,
"log_odds_chosen": 5.6596198081970215,
"log_odds_ratio": -0.023314189165830612,
"logits/chosen": -2.9358747005462646,
"logits/chosen_prompt": -2.727999687194824,
"logits/rejected": -2.0308213233947754,
"logits/rejected_prompt": -2.686753749847412,
"logps/chosen": -1.9377899169921875,
"logps/chosen_both": -1.922545075416565,
"logps/chosen_prompt": -0.8713130950927734,
"logps/rejected": -7.442534446716309,
"logps/rejected_both": -7.339343070983887,
"logps/rejected_prompt": -1.057796835899353,
"loss": 2.0015,
"nll_loss": 1.9221293926239014,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7751160264015198,
"rewards/margins": 2.2018978595733643,
"rewards/rejected": -2.9770140647888184,
"step": 1950
},
{
"epoch": 1.568,
"grad_norm": 0.5403488938261225,
"learning_rate": 2.3221268313280838e-05,
"log_odds_chosen": 5.778319358825684,
"log_odds_ratio": -0.07066681236028671,
"logits/chosen": -2.954177141189575,
"logits/chosen_prompt": -2.678536891937256,
"logits/rejected": -1.9524621963500977,
"logits/rejected_prompt": -2.6848878860473633,
"logps/chosen": -1.9211227893829346,
"logps/chosen_both": -1.902917504310608,
"logps/chosen_prompt": -0.8868004083633423,
"logps/rejected": -7.527622222900391,
"logps/rejected_both": -7.4302239418029785,
"logps/rejected_prompt": -1.1353758573532104,
"loss": 2.0128,
"nll_loss": 1.9021003246307373,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.768449068069458,
"rewards/margins": 2.2425997257232666,
"rewards/rejected": -3.0110487937927246,
"step": 1960
},
{
"epoch": 1.576,
"grad_norm": 6.334469044302015,
"learning_rate": 2.301242445230851e-05,
"log_odds_chosen": 4.549070358276367,
"log_odds_ratio": -0.10954795777797699,
"logits/chosen": -2.9302010536193848,
"logits/chosen_prompt": -2.6880440711975098,
"logits/rejected": -2.190250873565674,
"logits/rejected_prompt": -2.6803054809570312,
"logps/chosen": -2.2468152046203613,
"logps/chosen_both": -2.227410316467285,
"logps/chosen_prompt": -0.7418851852416992,
"logps/rejected": -6.677786827087402,
"logps/rejected_both": -6.601284980773926,
"logps/rejected_prompt": -0.9388518333435059,
"loss": 2.1059,
"nll_loss": 2.226693630218506,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.8987261056900024,
"rewards/margins": 1.7723888158798218,
"rewards/rejected": -2.671114683151245,
"step": 1970
},
{
"epoch": 1.584,
"grad_norm": 0.21099709481066398,
"learning_rate": 2.280372008623142e-05,
"log_odds_chosen": 4.277853488922119,
"log_odds_ratio": -0.18287745118141174,
"logits/chosen": -2.989633321762085,
"logits/chosen_prompt": -2.6874613761901855,
"logits/rejected": -2.2610902786254883,
"logits/rejected_prompt": -2.664952516555786,
"logps/chosen": -1.912766695022583,
"logps/chosen_both": -1.8961530923843384,
"logps/chosen_prompt": -0.7984111905097961,
"logps/rejected": -6.0515875816345215,
"logps/rejected_both": -5.97214412689209,
"logps/rejected_prompt": -1.0341233015060425,
"loss": 2.0542,
"nll_loss": 1.894964575767517,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.7651066780090332,
"rewards/margins": 1.6555284261703491,
"rewards/rejected": -2.4206349849700928,
"step": 1980
},
{
"epoch": 1.592,
"grad_norm": 0.23272826174313574,
"learning_rate": 2.2595169862640568e-05,
"log_odds_chosen": 6.768258094787598,
"log_odds_ratio": -0.001374961924739182,
"logits/chosen": -2.973562240600586,
"logits/chosen_prompt": -2.686769962310791,
"logits/rejected": -1.666338562965393,
"logits/rejected_prompt": -2.683814764022827,
"logps/chosen": -1.9322917461395264,
"logps/chosen_both": -1.9166603088378906,
"logps/chosen_prompt": -0.8024829626083374,
"logps/rejected": -8.528871536254883,
"logps/rejected_both": -8.413396835327148,
"logps/rejected_prompt": -1.0380266904830933,
"loss": 2.0648,
"nll_loss": 1.9158977270126343,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7729167342185974,
"rewards/margins": 2.63863205909729,
"rewards/rejected": -3.4115490913391113,
"step": 1990
},
{
"epoch": 1.6,
"grad_norm": 0.20086785213912312,
"learning_rate": 2.238678841830867e-05,
"log_odds_chosen": 6.327115058898926,
"log_odds_ratio": -0.004725167062133551,
"logits/chosen": -2.966679573059082,
"logits/chosen_prompt": -2.6999356746673584,
"logits/rejected": -1.8506364822387695,
"logits/rejected_prompt": -2.6866955757141113,
"logps/chosen": -1.8783817291259766,
"logps/chosen_both": -1.8609817028045654,
"logps/chosen_prompt": -0.7905829548835754,
"logps/rejected": -8.029566764831543,
"logps/rejected_both": -7.908673286437988,
"logps/rejected_prompt": -1.0723146200180054,
"loss": 1.9398,
"nll_loss": 1.860142469406128,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7513527274131775,
"rewards/margins": 2.4604744911193848,
"rewards/rejected": -3.211826801300049,
"step": 2000
},
{
"epoch": 1.608,
"grad_norm": 3.6061661350197456,
"learning_rate": 2.217859037816296e-05,
"log_odds_chosen": 4.772618770599365,
"log_odds_ratio": -0.14787371456623077,
"logits/chosen": -2.9939560890197754,
"logits/chosen_prompt": -2.712306499481201,
"logits/rejected": -2.120854139328003,
"logits/rejected_prompt": -2.699389934539795,
"logps/chosen": -2.0005993843078613,
"logps/chosen_both": -1.9795938730239868,
"logps/chosen_prompt": -0.7556421160697937,
"logps/rejected": -6.654515743255615,
"logps/rejected_both": -6.551595211029053,
"logps/rejected_prompt": -0.9516459703445435,
"loss": 1.9737,
"nll_loss": 1.9790796041488647,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8002398610115051,
"rewards/margins": 1.8615667819976807,
"rewards/rejected": -2.661806344985962,
"step": 2010
},
{
"epoch": 1.616,
"grad_norm": 0.3283349921571691,
"learning_rate": 2.1970590354258745e-05,
"log_odds_chosen": 6.253961086273193,
"log_odds_ratio": -0.07067908346652985,
"logits/chosen": -2.9472057819366455,
"logits/chosen_prompt": -2.6802945137023926,
"logits/rejected": -1.744763731956482,
"logits/rejected_prompt": -2.6687159538269043,
"logps/chosen": -1.955038070678711,
"logps/chosen_both": -1.9407745599746704,
"logps/chosen_prompt": -1.00258469581604,
"logps/rejected": -8.05742073059082,
"logps/rejected_both": -7.938286781311035,
"logps/rejected_prompt": -1.1584670543670654,
"loss": 2.0349,
"nll_loss": 1.940118432044983,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7820152044296265,
"rewards/margins": 2.440953254699707,
"rewards/rejected": -3.222968339920044,
"step": 2020
},
{
"epoch": 1.624,
"grad_norm": 1.8747875530540283,
"learning_rate": 2.176280294475383e-05,
"log_odds_chosen": 6.281460762023926,
"log_odds_ratio": -0.03783145174384117,
"logits/chosen": -3.011366844177246,
"logits/chosen_prompt": -2.6553094387054443,
"logits/rejected": -1.8144845962524414,
"logits/rejected_prompt": -2.649622678756714,
"logps/chosen": -1.9069626331329346,
"logps/chosen_both": -1.8888943195343018,
"logps/chosen_prompt": -0.7433997988700867,
"logps/rejected": -8.008193969726562,
"logps/rejected_both": -7.898676872253418,
"logps/rejected_prompt": -0.9908720254898071,
"loss": 1.9971,
"nll_loss": 1.8877136707305908,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.762785017490387,
"rewards/margins": 2.440492630004883,
"rewards/rejected": -3.203277587890625,
"step": 2030
},
{
"epoch": 1.6320000000000001,
"grad_norm": 2.9635896306517915,
"learning_rate": 2.155524273288405e-05,
"log_odds_chosen": 4.7696404457092285,
"log_odds_ratio": -0.2104126662015915,
"logits/chosen": -2.9527573585510254,
"logits/chosen_prompt": -2.6921048164367676,
"logits/rejected": -2.0738635063171387,
"logits/rejected_prompt": -2.67110538482666,
"logps/chosen": -1.996506690979004,
"logps/chosen_both": -1.9748737812042236,
"logps/chosen_prompt": -0.7325566411018372,
"logps/rejected": -6.6651411056518555,
"logps/rejected_both": -6.573362827301025,
"logps/rejected_prompt": -0.9392368197441101,
"loss": 1.9348,
"nll_loss": 1.9730939865112305,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7986027002334595,
"rewards/margins": 1.8674538135528564,
"rewards/rejected": -2.6660561561584473,
"step": 2040
},
{
"epoch": 1.6400000000000001,
"grad_norm": 0.19964912068774665,
"learning_rate": 2.1347924285939714e-05,
"log_odds_chosen": 6.8775224685668945,
"log_odds_ratio": -0.008257986977696419,
"logits/chosen": -2.917914867401123,
"logits/chosen_prompt": -2.6802151203155518,
"logits/rejected": -1.6495475769042969,
"logits/rejected_prompt": -2.661830186843872,
"logps/chosen": -2.0301578044891357,
"logps/chosen_both": -2.007798910140991,
"logps/chosen_prompt": -0.8403179049491882,
"logps/rejected": -8.763223648071289,
"logps/rejected_both": -8.611532211303711,
"logps/rejected_prompt": -1.09980046749115,
"loss": 2.1549,
"nll_loss": 2.006844997406006,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8120630979537964,
"rewards/margins": 2.6932263374328613,
"rewards/rejected": -3.5052895545959473,
"step": 2050
},
{
"epoch": 1.6480000000000001,
"grad_norm": 0.17460562158440138,
"learning_rate": 2.114086215424322e-05,
"log_odds_chosen": 6.110722064971924,
"log_odds_ratio": -0.023483365774154663,
"logits/chosen": -2.909790515899658,
"logits/chosen_prompt": -2.6986935138702393,
"logits/rejected": -1.758716344833374,
"logits/rejected_prompt": -2.6658692359924316,
"logps/chosen": -2.3056933879852295,
"logps/chosen_both": -2.285371780395508,
"logps/chosen_prompt": -0.833857536315918,
"logps/rejected": -8.272215843200684,
"logps/rejected_both": -8.151971817016602,
"logps/rejected_prompt": -1.0774855613708496,
"loss": 2.0557,
"nll_loss": 2.284456968307495,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9222772717475891,
"rewards/margins": 2.3866093158721924,
"rewards/rejected": -3.308886766433716,
"step": 2060
},
{
"epoch": 1.6560000000000001,
"grad_norm": 0.19823340696579927,
"learning_rate": 2.0934070870127912e-05,
"log_odds_chosen": 5.7514495849609375,
"log_odds_ratio": -0.13919630646705627,
"logits/chosen": -2.9313971996307373,
"logits/chosen_prompt": -2.690089225769043,
"logits/rejected": -1.7628095149993896,
"logits/rejected_prompt": -2.6867289543151855,
"logps/chosen": -2.0054726600646973,
"logps/chosen_both": -1.9867470264434814,
"logps/chosen_prompt": -0.730907678604126,
"logps/rejected": -7.626162528991699,
"logps/rejected_both": -7.516133785247803,
"logps/rejected_prompt": -0.9458767771720886,
"loss": 2.0384,
"nll_loss": 1.9859052896499634,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8021891713142395,
"rewards/margins": 2.2482759952545166,
"rewards/rejected": -3.0504648685455322,
"step": 2070
},
{
"epoch": 1.6640000000000001,
"grad_norm": 0.19392027541652682,
"learning_rate": 2.0727564946918087e-05,
"log_odds_chosen": 7.237205505371094,
"log_odds_ratio": -0.001250033383257687,
"logits/chosen": -2.934305429458618,
"logits/chosen_prompt": -2.7029290199279785,
"logits/rejected": -1.5330889225006104,
"logits/rejected_prompt": -2.6817727088928223,
"logps/chosen": -2.0364651679992676,
"logps/chosen_both": -2.015903949737549,
"logps/chosen_prompt": -0.8590591549873352,
"logps/rejected": -9.122060775756836,
"logps/rejected_both": -8.987492561340332,
"logps/rejected_prompt": -1.0628540515899658,
"loss": 1.9994,
"nll_loss": 2.0151782035827637,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8145861625671387,
"rewards/margins": 2.834237575531006,
"rewards/rejected": -3.6488234996795654,
"step": 2080
},
{
"epoch": 1.6720000000000002,
"grad_norm": 0.2855392155807927,
"learning_rate": 2.0521358877910444e-05,
"log_odds_chosen": 6.342986583709717,
"log_odds_ratio": -0.07219625264406204,
"logits/chosen": -2.9752235412597656,
"logits/chosen_prompt": -2.7005088329315186,
"logits/rejected": -1.7442362308502197,
"logits/rejected_prompt": -2.693645477294922,
"logps/chosen": -1.990447759628296,
"logps/chosen_both": -1.970177412033081,
"logps/chosen_prompt": -0.7856583595275879,
"logps/rejected": -8.199989318847656,
"logps/rejected_both": -8.072303771972656,
"logps/rejected_prompt": -0.9411813020706177,
"loss": 2.021,
"nll_loss": 1.9698638916015625,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7961790561676025,
"rewards/margins": 2.483816623687744,
"rewards/rejected": -3.2799954414367676,
"step": 2090
},
{
"epoch": 1.6800000000000002,
"grad_norm": 0.36170871833517027,
"learning_rate": 2.031546713535688e-05,
"log_odds_chosen": 5.634890079498291,
"log_odds_ratio": -0.1395900696516037,
"logits/chosen": -2.93391752243042,
"logits/chosen_prompt": -2.718055248260498,
"logits/rejected": -1.7808215618133545,
"logits/rejected_prompt": -2.6867878437042236,
"logps/chosen": -2.3721437454223633,
"logps/chosen_both": -2.3435354232788086,
"logps/chosen_prompt": -0.7950377464294434,
"logps/rejected": -7.916224479675293,
"logps/rejected_both": -7.782776832580566,
"logps/rejected_prompt": -0.9661157727241516,
"loss": 2.1271,
"nll_loss": 2.341766595840454,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9488574862480164,
"rewards/margins": 2.21763277053833,
"rewards/rejected": -3.166490077972412,
"step": 2100
},
{
"epoch": 1.688,
"grad_norm": 0.3052641697772741,
"learning_rate": 2.01099041694488e-05,
"log_odds_chosen": 5.173205375671387,
"log_odds_ratio": -0.2093629539012909,
"logits/chosen": -2.913505792617798,
"logits/chosen_prompt": -2.695497512817383,
"logits/rejected": -1.9728949069976807,
"logits/rejected_prompt": -2.681952476501465,
"logps/chosen": -1.9676679372787476,
"logps/chosen_both": -1.9531806707382202,
"logps/chosen_prompt": -0.8127241134643555,
"logps/rejected": -7.031458377838135,
"logps/rejected_both": -6.950935363769531,
"logps/rejected_prompt": -0.9248498678207397,
"loss": 2.0659,
"nll_loss": 1.9526466131210327,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.787067174911499,
"rewards/margins": 2.0255160331726074,
"rewards/rejected": -2.8125832080841064,
"step": 2110
},
{
"epoch": 1.696,
"grad_norm": 0.1918548604852694,
"learning_rate": 1.9904684407302883e-05,
"log_odds_chosen": 7.995016574859619,
"log_odds_ratio": -0.00040107182576321065,
"logits/chosen": -3.0051703453063965,
"logits/chosen_prompt": -2.7128148078918457,
"logits/rejected": -1.3667514324188232,
"logits/rejected_prompt": -2.695828676223755,
"logps/chosen": -1.9211137294769287,
"logps/chosen_both": -1.9036529064178467,
"logps/chosen_prompt": -0.8414414525032043,
"logps/rejected": -9.738038063049316,
"logps/rejected_both": -9.58409309387207,
"logps/rejected_prompt": -0.957872748374939,
"loss": 1.9882,
"nll_loss": 1.9027389287948608,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7684455513954163,
"rewards/margins": 3.126769781112671,
"rewards/rejected": -3.8952155113220215,
"step": 2120
},
{
"epoch": 1.704,
"grad_norm": 25.341642829209718,
"learning_rate": 1.969982225194864e-05,
"log_odds_chosen": 6.443746089935303,
"log_odds_ratio": -0.13866354525089264,
"logits/chosen": -2.8991589546203613,
"logits/chosen_prompt": -2.704436779022217,
"logits/rejected": -1.6840307712554932,
"logits/rejected_prompt": -2.696018695831299,
"logps/chosen": -1.971212387084961,
"logps/chosen_both": -1.9563363790512085,
"logps/chosen_prompt": -0.7664562463760376,
"logps/rejected": -8.291219711303711,
"logps/rejected_both": -8.195323944091797,
"logps/rejected_prompt": -0.8870849609375,
"loss": 2.0512,
"nll_loss": 1.955370545387268,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7884851098060608,
"rewards/margins": 2.5280027389526367,
"rewards/rejected": -3.3164875507354736,
"step": 2130
},
{
"epoch": 1.712,
"grad_norm": 0.20382071740750204,
"learning_rate": 1.9495332081317464e-05,
"log_odds_chosen": 6.890301704406738,
"log_odds_ratio": -0.009469824843108654,
"logits/chosen": -2.8794448375701904,
"logits/chosen_prompt": -2.694141387939453,
"logits/rejected": -1.638772964477539,
"logits/rejected_prompt": -2.6982343196868896,
"logps/chosen": -2.006687641143799,
"logps/chosen_both": -1.9925482273101807,
"logps/chosen_prompt": -0.8075912594795227,
"logps/rejected": -8.752016067504883,
"logps/rejected_both": -8.659661293029785,
"logps/rejected_prompt": -1.0454128980636597,
"loss": 1.9488,
"nll_loss": 1.9920895099639893,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8026750683784485,
"rewards/margins": 2.698131561279297,
"rewards/rejected": -3.5008063316345215,
"step": 2140
},
{
"epoch": 1.72,
"grad_norm": 3.232652124328266,
"learning_rate": 1.9291228247233605e-05,
"log_odds_chosen": 6.535033226013184,
"log_odds_ratio": -0.0724484771490097,
"logits/chosen": -2.8941891193389893,
"logits/chosen_prompt": -2.70381498336792,
"logits/rejected": -1.799768090248108,
"logits/rejected_prompt": -2.6814205646514893,
"logps/chosen": -1.9803783893585205,
"logps/chosen_both": -1.9626888036727905,
"logps/chosen_prompt": -0.8645817041397095,
"logps/rejected": -8.38414192199707,
"logps/rejected_both": -8.267631530761719,
"logps/rejected_prompt": -0.9822869300842285,
"loss": 1.9512,
"nll_loss": 1.9625753164291382,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7921513319015503,
"rewards/margins": 2.5615053176879883,
"rewards/rejected": -3.353656768798828,
"step": 2150
},
{
"epoch": 1.728,
"grad_norm": 0.5121046736628673,
"learning_rate": 1.908752507440689e-05,
"log_odds_chosen": 6.229867458343506,
"log_odds_ratio": -0.0752544105052948,
"logits/chosen": -2.935990571975708,
"logits/chosen_prompt": -2.68332576751709,
"logits/rejected": -1.7542794942855835,
"logits/rejected_prompt": -2.6715810298919678,
"logps/chosen": -2.238250732421875,
"logps/chosen_both": -2.217163562774658,
"logps/chosen_prompt": -0.7275692820549011,
"logps/rejected": -8.351387023925781,
"logps/rejected_both": -8.241617202758789,
"logps/rejected_prompt": -0.9444383382797241,
"loss": 2.1639,
"nll_loss": 2.2166025638580322,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8953002691268921,
"rewards/margins": 2.445254325866699,
"rewards/rejected": -3.340554714202881,
"step": 2160
},
{
"epoch": 1.736,
"grad_norm": 0.18380447787382737,
"learning_rate": 1.888423685942732e-05,
"log_odds_chosen": 7.403123378753662,
"log_odds_ratio": -0.0035772870760411024,
"logits/chosen": -2.9258389472961426,
"logits/chosen_prompt": -2.7035067081451416,
"logits/rejected": -1.6778090000152588,
"logits/rejected_prompt": -2.682382106781006,
"logps/chosen": -1.8578765392303467,
"logps/chosen_both": -1.8427069187164307,
"logps/chosen_prompt": -0.832676887512207,
"logps/rejected": -9.08339786529541,
"logps/rejected_both": -8.959403038024902,
"logps/rejected_prompt": -1.1029479503631592,
"loss": 1.9654,
"nll_loss": 1.8422781229019165,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7431506514549255,
"rewards/margins": 2.8902084827423096,
"rewards/rejected": -3.63335919380188,
"step": 2170
},
{
"epoch": 1.744,
"grad_norm": 15.42646908452697,
"learning_rate": 1.868137786976177e-05,
"log_odds_chosen": 6.83737325668335,
"log_odds_ratio": -0.09123753756284714,
"logits/chosen": -2.9604616165161133,
"logits/chosen_prompt": -2.6771702766418457,
"logits/rejected": -1.7559928894042969,
"logits/rejected_prompt": -2.6906254291534424,
"logps/chosen": -1.9559208154678345,
"logps/chosen_both": -1.9405914545059204,
"logps/chosen_prompt": -0.7949713468551636,
"logps/rejected": -8.641664505004883,
"logps/rejected_both": -8.521966934204102,
"logps/rejected_prompt": -0.9677802324295044,
"loss": 2.0939,
"nll_loss": 1.938951849937439,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7823683619499207,
"rewards/margins": 2.6742970943450928,
"rewards/rejected": -3.4566657543182373,
"step": 2180
},
{
"epoch": 1.752,
"grad_norm": 12.062069037613009,
"learning_rate": 1.8478962342752583e-05,
"log_odds_chosen": 6.820882320404053,
"log_odds_ratio": -0.07564956694841385,
"logits/chosen": -2.904177665710449,
"logits/chosen_prompt": -2.666506052017212,
"logits/rejected": -1.7927961349487305,
"logits/rejected_prompt": -2.67189884185791,
"logps/chosen": -2.0425262451171875,
"logps/chosen_both": -2.0270590782165527,
"logps/chosen_prompt": -0.8014975786209106,
"logps/rejected": -8.73670768737793,
"logps/rejected_both": -8.633912086486816,
"logps/rejected_prompt": -1.0191423892974854,
"loss": 2.0463,
"nll_loss": 2.0263657569885254,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8170105218887329,
"rewards/margins": 2.6776726245880127,
"rewards/rejected": -3.4946835041046143,
"step": 2190
},
{
"epoch": 1.76,
"grad_norm": 1.1920813557914467,
"learning_rate": 1.827700448461836e-05,
"log_odds_chosen": 7.279504299163818,
"log_odds_ratio": -0.13858437538146973,
"logits/chosen": -3.018719434738159,
"logits/chosen_prompt": -2.687682628631592,
"logits/rejected": -1.6826099157333374,
"logits/rejected_prompt": -2.678703784942627,
"logps/chosen": -1.860093355178833,
"logps/chosen_both": -1.8447208404541016,
"logps/chosen_prompt": -0.8991209268569946,
"logps/rejected": -9.011571884155273,
"logps/rejected_both": -8.870678901672363,
"logps/rejected_prompt": -1.096939206123352,
"loss": 2.012,
"nll_loss": 1.84355890750885,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7440372705459595,
"rewards/margins": 2.8605916500091553,
"rewards/rejected": -3.6046290397644043,
"step": 2200
},
{
"epoch": 1.768,
"grad_norm": 0.19530589950798477,
"learning_rate": 1.807551846945694e-05,
"log_odds_chosen": 8.2916898727417,
"log_odds_ratio": -0.06947987526655197,
"logits/chosen": -2.939237117767334,
"logits/chosen_prompt": -2.6988303661346436,
"logits/rejected": -1.6200687885284424,
"logits/rejected_prompt": -2.68789005279541,
"logps/chosen": -1.9331436157226562,
"logps/chosen_both": -1.916733741760254,
"logps/chosen_prompt": -0.7277871370315552,
"logps/rejected": -10.084833145141602,
"logps/rejected_both": -9.953168869018555,
"logps/rejected_prompt": -1.032865285873413,
"loss": 1.9735,
"nll_loss": 1.916029691696167,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7732575535774231,
"rewards/margins": 3.260676145553589,
"rewards/rejected": -4.033933639526367,
"step": 2210
},
{
"epoch": 1.776,
"grad_norm": 15.17903488212651,
"learning_rate": 1.7874518438250597e-05,
"log_odds_chosen": 9.437470436096191,
"log_odds_ratio": -0.00649250065907836,
"logits/chosen": -2.9586923122406006,
"logits/chosen_prompt": -2.700380802154541,
"logits/rejected": -1.6204473972320557,
"logits/rejected_prompt": -2.668332576751709,
"logps/chosen": -2.0388143062591553,
"logps/chosen_both": -2.017122268676758,
"logps/chosen_prompt": -0.7435789108276367,
"logps/rejected": -11.334449768066406,
"logps/rejected_both": -11.154394149780273,
"logps/rejected_prompt": -0.9411799311637878,
"loss": 2.1772,
"nll_loss": 2.0165975093841553,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8155257105827332,
"rewards/margins": 3.718254566192627,
"rewards/rejected": -4.533780097961426,
"step": 2220
},
{
"epoch": 1.784,
"grad_norm": 0.28398933589113434,
"learning_rate": 1.767401849787357e-05,
"log_odds_chosen": 6.384799957275391,
"log_odds_ratio": -0.07637131214141846,
"logits/chosen": -2.9650635719299316,
"logits/chosen_prompt": -2.6936004161834717,
"logits/rejected": -1.797628402709961,
"logits/rejected_prompt": -2.690913438796997,
"logps/chosen": -1.8709478378295898,
"logps/chosen_both": -1.856300950050354,
"logps/chosen_prompt": -0.8806565403938293,
"logps/rejected": -8.102632522583008,
"logps/rejected_both": -7.991517543792725,
"logps/rejected_prompt": -1.0237706899642944,
"loss": 2.1791,
"nll_loss": 1.8553836345672607,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.748379111289978,
"rewards/margins": 2.492673635482788,
"rewards/rejected": -3.2410526275634766,
"step": 2230
},
{
"epoch": 1.792,
"grad_norm": 7.344829678329039,
"learning_rate": 1.747403272010199e-05,
"log_odds_chosen": 4.503691673278809,
"log_odds_ratio": -0.44330325722694397,
"logits/chosen": -2.9304556846618652,
"logits/chosen_prompt": -2.7112066745758057,
"logits/rejected": -2.020601749420166,
"logits/rejected_prompt": -2.6991848945617676,
"logps/chosen": -2.2137069702148438,
"logps/chosen_both": -2.192910671234131,
"logps/chosen_prompt": -0.7757335305213928,
"logps/rejected": -6.606595039367676,
"logps/rejected_both": -6.522683143615723,
"logps/rejected_prompt": -1.0225099325180054,
"loss": 2.0432,
"nll_loss": 2.1926403045654297,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.885482668876648,
"rewards/margins": 1.7571556568145752,
"rewards/rejected": -2.6426382064819336,
"step": 2240
},
{
"epoch": 1.8,
"grad_norm": 0.2830736721750178,
"learning_rate": 1.7274575140626318e-05,
"log_odds_chosen": 6.729086399078369,
"log_odds_ratio": -0.002848730655387044,
"logits/chosen": -2.9603111743927,
"logits/chosen_prompt": -2.712522268295288,
"logits/rejected": -1.471806287765503,
"logits/rejected_prompt": -2.711698055267334,
"logps/chosen": -1.9502222537994385,
"logps/chosen_both": -1.932050347328186,
"logps/chosen_prompt": -0.7791944146156311,
"logps/rejected": -8.519399642944336,
"logps/rejected_both": -8.40225887298584,
"logps/rejected_prompt": -0.9917134046554565,
"loss": 2.004,
"nll_loss": 1.9314903020858765,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7800888419151306,
"rewards/margins": 2.627671241760254,
"rewards/rejected": -3.4077601432800293,
"step": 2250
},
{
"epoch": 1.808,
"grad_norm": 1.3701495350067383,
"learning_rate": 1.7075659758066208e-05,
"log_odds_chosen": 4.735475063323975,
"log_odds_ratio": -0.14837773144245148,
"logits/chosen": -2.9037442207336426,
"logits/chosen_prompt": -2.6904830932617188,
"logits/rejected": -1.9993311166763306,
"logits/rejected_prompt": -2.672048807144165,
"logps/chosen": -2.0128910541534424,
"logps/chosen_both": -1.9941341876983643,
"logps/chosen_prompt": -0.7718429565429688,
"logps/rejected": -6.631512641906738,
"logps/rejected_both": -6.536102294921875,
"logps/rejected_prompt": -0.9579516649246216,
"loss": 2.0311,
"nll_loss": 1.9931504726409912,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.805156409740448,
"rewards/margins": 1.8474489450454712,
"rewards/rejected": -2.6526052951812744,
"step": 2260
},
{
"epoch": 1.8159999999999998,
"grad_norm": 21.366222606488684,
"learning_rate": 1.6877300532988094e-05,
"log_odds_chosen": 7.610182762145996,
"log_odds_ratio": -0.0006168467225506902,
"logits/chosen": -2.9680118560791016,
"logits/chosen_prompt": -2.664792776107788,
"logits/rejected": -1.2061169147491455,
"logits/rejected_prompt": -2.642937183380127,
"logps/chosen": -2.109647512435913,
"logps/chosen_both": -2.0934646129608154,
"logps/chosen_prompt": -0.9366092681884766,
"logps/rejected": -9.573705673217773,
"logps/rejected_both": -9.448970794677734,
"logps/rejected_prompt": -1.088648796081543,
"loss": 2.0712,
"nll_loss": 2.0929782390594482,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.843859076499939,
"rewards/margins": 2.9856228828430176,
"rewards/rejected": -3.829482316970825,
"step": 2270
},
{
"epoch": 1.8239999999999998,
"grad_norm": 16.630798038144235,
"learning_rate": 1.6679511386925337e-05,
"log_odds_chosen": 7.555551052093506,
"log_odds_ratio": -0.0009092552354559302,
"logits/chosen": -2.9446640014648438,
"logits/chosen_prompt": -2.703678607940674,
"logits/rejected": -1.251961588859558,
"logits/rejected_prompt": -2.686135768890381,
"logps/chosen": -1.9308589696884155,
"logps/chosen_both": -1.9166322946548462,
"logps/chosen_prompt": -0.7264224290847778,
"logps/rejected": -9.319347381591797,
"logps/rejected_both": -9.213998794555664,
"logps/rejected_prompt": -0.9491874575614929,
"loss": 1.9893,
"nll_loss": 1.915757179260254,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7723435759544373,
"rewards/margins": 2.955395221710205,
"rewards/rejected": -3.727738857269287,
"step": 2280
},
{
"epoch": 1.8319999999999999,
"grad_norm": 12.241247239412013,
"learning_rate": 1.648230620140121e-05,
"log_odds_chosen": 5.702427864074707,
"log_odds_ratio": -0.08441531658172607,
"logits/chosen": -2.9145102500915527,
"logits/chosen_prompt": -2.7137434482574463,
"logits/rejected": -1.6203930377960205,
"logits/rejected_prompt": -2.7078521251678467,
"logps/chosen": -2.2361724376678467,
"logps/chosen_both": -2.2188587188720703,
"logps/chosen_prompt": -0.8718380928039551,
"logps/rejected": -7.825617790222168,
"logps/rejected_both": -7.731575012207031,
"logps/rejected_prompt": -0.9629694223403931,
"loss": 2.0784,
"nll_loss": 2.2178969383239746,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8944689631462097,
"rewards/margins": 2.2357778549194336,
"rewards/rejected": -3.130246877670288,
"step": 2290
},
{
"epoch": 1.8399999999999999,
"grad_norm": 0.2611980916177983,
"learning_rate": 1.6285698816954624e-05,
"log_odds_chosen": 5.886144638061523,
"log_odds_ratio": -0.14016158878803253,
"logits/chosen": -2.961277484893799,
"logits/chosen_prompt": -2.7103641033172607,
"logits/rejected": -1.6664111614227295,
"logits/rejected_prompt": -2.7068681716918945,
"logps/chosen": -1.891758918762207,
"logps/chosen_both": -1.877873182296753,
"logps/chosen_prompt": -0.8406246304512024,
"logps/rejected": -7.652543067932129,
"logps/rejected_both": -7.56333065032959,
"logps/rejected_prompt": -0.9318545460700989,
"loss": 1.9727,
"nll_loss": 1.877637267112732,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7567036151885986,
"rewards/margins": 2.3043136596679688,
"rewards/rejected": -3.0610175132751465,
"step": 2300
},
{
"epoch": 1.8479999999999999,
"grad_norm": 0.17316872141044676,
"learning_rate": 1.6089703032168733e-05,
"log_odds_chosen": 6.335439205169678,
"log_odds_ratio": -0.007680490612983704,
"logits/chosen": -2.9618372917175293,
"logits/chosen_prompt": -2.6908061504364014,
"logits/rejected": -1.7726625204086304,
"logits/rejected_prompt": -2.684845447540283,
"logps/chosen": -2.021721839904785,
"logps/chosen_both": -2.0070888996124268,
"logps/chosen_prompt": -0.8626869916915894,
"logps/rejected": -8.212113380432129,
"logps/rejected_both": -8.121031761169434,
"logps/rejected_prompt": -1.1338939666748047,
"loss": 2.086,
"nll_loss": 2.0067009925842285,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.808688759803772,
"rewards/margins": 2.4761569499969482,
"rewards/rejected": -3.2848453521728516,
"step": 2310
},
{
"epoch": 1.8559999999999999,
"grad_norm": 0.21046741293754637,
"learning_rate": 1.5894332602702545e-05,
"log_odds_chosen": 5.3062238693237305,
"log_odds_ratio": -0.09238220006227493,
"logits/chosen": -2.863762378692627,
"logits/chosen_prompt": -2.698549747467041,
"logits/rejected": -1.7465136051177979,
"logits/rejected_prompt": -2.68521785736084,
"logps/chosen": -2.2504518032073975,
"logps/chosen_both": -2.2304165363311768,
"logps/chosen_prompt": -0.8663703203201294,
"logps/rejected": -7.458860874176025,
"logps/rejected_both": -7.366589546203613,
"logps/rejected_prompt": -1.0120290517807007,
"loss": 2.0757,
"nll_loss": 2.2292349338531494,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9001806974411011,
"rewards/margins": 2.0833640098571777,
"rewards/rejected": -2.9835448265075684,
"step": 2320
},
{
"epoch": 1.8639999999999999,
"grad_norm": 0.2325223892090008,
"learning_rate": 1.5699601240325474e-05,
"log_odds_chosen": 5.675802230834961,
"log_odds_ratio": -0.14025500416755676,
"logits/chosen": -2.9541871547698975,
"logits/chosen_prompt": -2.739253520965576,
"logits/rejected": -1.7137792110443115,
"logits/rejected_prompt": -2.7213757038116455,
"logps/chosen": -2.011998176574707,
"logps/chosen_both": -1.9911746978759766,
"logps/chosen_prompt": -0.7685104012489319,
"logps/rejected": -7.561570167541504,
"logps/rejected_both": -7.440642356872559,
"logps/rejected_prompt": -0.9734441041946411,
"loss": 2.1005,
"nll_loss": 1.9904701709747314,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8047992587089539,
"rewards/margins": 2.2198290824890137,
"rewards/rejected": -3.024627923965454,
"step": 2330
},
{
"epoch": 1.8719999999999999,
"grad_norm": 0.22047561828057208,
"learning_rate": 1.5505522611954975e-05,
"log_odds_chosen": 5.360434532165527,
"log_odds_ratio": -0.015295952558517456,
"logits/chosen": -2.899050235748291,
"logits/chosen_prompt": -2.718276262283325,
"logits/rejected": -2.08345365524292,
"logits/rejected_prompt": -2.6998016834259033,
"logps/chosen": -1.8844950199127197,
"logps/chosen_both": -1.8703863620758057,
"logps/chosen_prompt": -0.851974606513977,
"logps/rejected": -7.060413360595703,
"logps/rejected_both": -6.973315238952637,
"logps/rejected_prompt": -1.0805187225341797,
"loss": 2.1013,
"nll_loss": 1.8690898418426514,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7537980675697327,
"rewards/margins": 2.0703673362731934,
"rewards/rejected": -2.8241655826568604,
"step": 2340
},
{
"epoch": 1.88,
"grad_norm": 1.1925590095899927,
"learning_rate": 1.5312110338697426e-05,
"log_odds_chosen": 4.792149066925049,
"log_odds_ratio": -0.1287117898464203,
"logits/chosen": -2.9038636684417725,
"logits/chosen_prompt": -2.692437171936035,
"logits/rejected": -1.9894816875457764,
"logits/rejected_prompt": -2.6797823905944824,
"logps/chosen": -1.9725837707519531,
"logps/chosen_both": -1.9566154479980469,
"logps/chosen_prompt": -0.7425985932350159,
"logps/rejected": -6.638279914855957,
"logps/rejected_both": -6.564992427825928,
"logps/rejected_prompt": -0.9972286224365234,
"loss": 1.9786,
"nll_loss": 1.9555227756500244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7890334725379944,
"rewards/margins": 1.8662786483764648,
"rewards/rejected": -2.6553120613098145,
"step": 2350
},
{
"epoch": 1.888,
"grad_norm": 0.2631097802741203,
"learning_rate": 1.5119377994892094e-05,
"log_odds_chosen": 7.000193119049072,
"log_odds_ratio": -0.0028563719242811203,
"logits/chosen": -3.0186381340026855,
"logits/chosen_prompt": -2.723498821258545,
"logits/rejected": -1.5227829217910767,
"logits/rejected_prompt": -2.7204127311706543,
"logps/chosen": -1.8698396682739258,
"logps/chosen_both": -1.850454330444336,
"logps/chosen_prompt": -0.7684019207954407,
"logps/rejected": -8.695045471191406,
"logps/rejected_both": -8.554825782775879,
"logps/rejected_prompt": -1.0279042720794678,
"loss": 2.0014,
"nll_loss": 1.8499305248260498,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7479358315467834,
"rewards/margins": 2.7300820350646973,
"rewards/rejected": -3.478017807006836,
"step": 2360
},
{
"epoch": 1.896,
"grad_norm": 0.20426857310467877,
"learning_rate": 1.4927339107158437e-05,
"log_odds_chosen": 8.02978515625,
"log_odds_ratio": -0.0003904960467480123,
"logits/chosen": -2.951490879058838,
"logits/chosen_prompt": -2.708991289138794,
"logits/rejected": -1.2117061614990234,
"logits/rejected_prompt": -2.6999001502990723,
"logps/chosen": -1.9645278453826904,
"logps/chosen_both": -1.9457321166992188,
"logps/chosen_prompt": -0.761443018913269,
"logps/rejected": -9.840217590332031,
"logps/rejected_both": -9.700372695922852,
"logps/rejected_prompt": -0.9850748181343079,
"loss": 1.9906,
"nll_loss": 1.9449169635772705,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7858111262321472,
"rewards/margins": 3.1502761840820312,
"rewards/rejected": -3.936087131500244,
"step": 2370
},
{
"epoch": 1.904,
"grad_norm": 8.58950626485984,
"learning_rate": 1.4736007153446801e-05,
"log_odds_chosen": 8.620465278625488,
"log_odds_ratio": -0.00021180181647650898,
"logits/chosen": -2.903035879135132,
"logits/chosen_prompt": -2.735071897506714,
"logits/rejected": -1.012452483177185,
"logits/rejected_prompt": -2.7112841606140137,
"logps/chosen": -2.025474786758423,
"logps/chosen_both": -2.007967472076416,
"logps/chosen_prompt": -0.8391423225402832,
"logps/rejected": -10.502188682556152,
"logps/rejected_both": -10.356060028076172,
"logps/rejected_prompt": -0.9537385106086731,
"loss": 2.3368,
"nll_loss": 2.0072412490844727,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8101899027824402,
"rewards/margins": 3.39068603515625,
"rewards/rejected": -4.200875282287598,
"step": 2380
},
{
"epoch": 1.912,
"grad_norm": 0.19583726689690906,
"learning_rate": 1.4545395562092468e-05,
"log_odds_chosen": 6.079274654388428,
"log_odds_ratio": -0.4031279981136322,
"logits/chosen": -2.844682455062866,
"logits/chosen_prompt": -2.8039345741271973,
"logits/rejected": -1.3123562335968018,
"logits/rejected_prompt": -2.7909157276153564,
"logps/chosen": -3.1939139366149902,
"logps/chosen_both": -3.164135217666626,
"logps/chosen_prompt": -0.8311759233474731,
"logps/rejected": -9.186834335327148,
"logps/rejected_both": -9.063508033752441,
"logps/rejected_prompt": -1.087949275970459,
"loss": 2.1303,
"nll_loss": 3.163341999053955,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.277565598487854,
"rewards/margins": 2.3971686363220215,
"rewards/rejected": -3.674734592437744,
"step": 2390
},
{
"epoch": 1.92,
"grad_norm": 4.277812021967688,
"learning_rate": 1.4355517710873184e-05,
"log_odds_chosen": 6.059283256530762,
"log_odds_ratio": -0.09234263747930527,
"logits/chosen": -3.0424129962921143,
"logits/chosen_prompt": -2.83634614944458,
"logits/rejected": -1.6069847345352173,
"logits/rejected_prompt": -2.818171739578247,
"logps/chosen": -1.861696481704712,
"logps/chosen_both": -1.8472903966903687,
"logps/chosen_prompt": -0.783744752407074,
"logps/rejected": -7.765946865081787,
"logps/rejected_both": -7.661751747131348,
"logps/rejected_prompt": -1.0380439758300781,
"loss": 2.0007,
"nll_loss": 1.846143126487732,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.7446784973144531,
"rewards/margins": 2.3617005348205566,
"rewards/rejected": -3.1063787937164307,
"step": 2400
},
{
"epoch": 1.928,
"grad_norm": 0.1934446201158471,
"learning_rate": 1.4166386926070322e-05,
"log_odds_chosen": 7.342792510986328,
"log_odds_ratio": -0.005115572828799486,
"logits/chosen": -2.9572060108184814,
"logits/chosen_prompt": -2.7633354663848877,
"logits/rejected": -1.3063112497329712,
"logits/rejected_prompt": -2.7578670978546143,
"logps/chosen": -1.9242970943450928,
"logps/chosen_both": -1.9093306064605713,
"logps/chosen_prompt": -0.8123539686203003,
"logps/rejected": -9.096908569335938,
"logps/rejected_both": -8.975044250488281,
"logps/rejected_prompt": -1.0593974590301514,
"loss": 1.9902,
"nll_loss": 1.9083023071289062,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7697189450263977,
"rewards/margins": 2.869044780731201,
"rewards/rejected": -3.638763904571533,
"step": 2410
},
{
"epoch": 1.936,
"grad_norm": 0.216837041093156,
"learning_rate": 1.397801648153354e-05,
"log_odds_chosen": 6.378230094909668,
"log_odds_ratio": -0.07421709597110748,
"logits/chosen": -3.0056633949279785,
"logits/chosen_prompt": -2.768573045730591,
"logits/rejected": -1.5620958805084229,
"logits/rejected_prompt": -2.7487571239471436,
"logps/chosen": -1.9807904958724976,
"logps/chosen_both": -1.9623302221298218,
"logps/chosen_prompt": -0.8482378125190735,
"logps/rejected": -8.202530860900879,
"logps/rejected_both": -8.077143669128418,
"logps/rejected_prompt": -1.0352851152420044,
"loss": 1.9778,
"nll_loss": 1.9611247777938843,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7923161387443542,
"rewards/margins": 2.488696575164795,
"rewards/rejected": -3.2810122966766357,
"step": 2420
},
{
"epoch": 1.944,
"grad_norm": 0.21162368892876318,
"learning_rate": 1.3790419597749199e-05,
"log_odds_chosen": 5.369621753692627,
"log_odds_ratio": -0.20802097022533417,
"logits/chosen": -2.925058126449585,
"logits/chosen_prompt": -2.727915048599243,
"logits/rejected": -1.7108662128448486,
"logits/rejected_prompt": -2.729671001434326,
"logps/chosen": -2.030609607696533,
"logps/chosen_both": -2.013143301010132,
"logps/chosen_prompt": -0.7951982021331787,
"logps/rejected": -7.307798862457275,
"logps/rejected_both": -7.217469692230225,
"logps/rejected_prompt": -0.9677292108535767,
"loss": 2.0275,
"nll_loss": 2.0122172832489014,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8122437596321106,
"rewards/margins": 2.1108758449554443,
"rewards/rejected": -2.92311954498291,
"step": 2430
},
{
"epoch": 1.952,
"grad_norm": 0.1882777054319625,
"learning_rate": 1.3603609440912507e-05,
"log_odds_chosen": 7.206502437591553,
"log_odds_ratio": -0.06993956863880157,
"logits/chosen": -2.9723217487335205,
"logits/chosen_prompt": -2.7605624198913574,
"logits/rejected": -1.3072056770324707,
"logits/rejected_prompt": -2.7452828884124756,
"logps/chosen": -2.0292842388153076,
"logps/chosen_both": -2.0142998695373535,
"logps/chosen_prompt": -0.8006251454353333,
"logps/rejected": -9.103940963745117,
"logps/rejected_both": -8.999374389648438,
"logps/rejected_prompt": -0.9301830530166626,
"loss": 1.9849,
"nll_loss": 2.0136048793792725,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8117138147354126,
"rewards/margins": 2.829862356185913,
"rewards/rejected": -3.6415767669677734,
"step": 2440
},
{
"epoch": 1.96,
"grad_norm": 6.854544334281628,
"learning_rate": 1.3417599122003464e-05,
"log_odds_chosen": 5.873773574829102,
"log_odds_ratio": -0.09982452541589737,
"logits/chosen": -2.8911209106445312,
"logits/chosen_prompt": -2.751624584197998,
"logits/rejected": -1.530667781829834,
"logits/rejected_prompt": -2.731210947036743,
"logps/chosen": -2.39859938621521,
"logps/chosen_both": -2.3739638328552246,
"logps/chosen_prompt": -0.818207859992981,
"logps/rejected": -8.159021377563477,
"logps/rejected_both": -8.031126976013184,
"logps/rejected_prompt": -0.9556495547294617,
"loss": 2.0669,
"nll_loss": 2.3730950355529785,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.9594398736953735,
"rewards/margins": 2.304168701171875,
"rewards/rejected": -3.263608455657959,
"step": 2450
},
{
"epoch": 1.968,
"grad_norm": 0.20527915967987695,
"learning_rate": 1.3232401695866687e-05,
"log_odds_chosen": 6.752752780914307,
"log_odds_ratio": -0.093865767121315,
"logits/chosen": -3.0047717094421387,
"logits/chosen_prompt": -2.7637996673583984,
"logits/rejected": -1.454332947731018,
"logits/rejected_prompt": -2.7430145740509033,
"logps/chosen": -1.9008424282073975,
"logps/chosen_both": -1.8828001022338867,
"logps/chosen_prompt": -0.877086341381073,
"logps/rejected": -8.502935409545898,
"logps/rejected_both": -8.374523162841797,
"logps/rejected_prompt": -1.0814439058303833,
"loss": 2.0633,
"nll_loss": 1.8823230266571045,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7603369951248169,
"rewards/margins": 2.6408374309539795,
"rewards/rejected": -3.401175022125244,
"step": 2460
},
{
"epoch": 1.976,
"grad_norm": 6.470506820654642,
"learning_rate": 1.3048030160295196e-05,
"log_odds_chosen": 6.849400520324707,
"log_odds_ratio": -0.07237619161605835,
"logits/chosen": -2.963409662246704,
"logits/chosen_prompt": -2.758953094482422,
"logits/rejected": -1.3645999431610107,
"logits/rejected_prompt": -2.7408089637756348,
"logps/chosen": -2.0132029056549072,
"logps/chosen_both": -1.9941928386688232,
"logps/chosen_prompt": -0.7748836874961853,
"logps/rejected": -8.71554183959961,
"logps/rejected_both": -8.596506118774414,
"logps/rejected_prompt": -0.936238169670105,
"loss": 1.9731,
"nll_loss": 1.9934498071670532,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8052810430526733,
"rewards/margins": 2.6809353828430176,
"rewards/rejected": -3.4862167835235596,
"step": 2470
},
{
"epoch": 1.984,
"grad_norm": 0.18875404411296617,
"learning_rate": 1.2864497455118152e-05,
"log_odds_chosen": 5.949180603027344,
"log_odds_ratio": -0.20756885409355164,
"logits/chosen": -2.90920352935791,
"logits/chosen_prompt": -2.731333017349243,
"logits/rejected": -1.5196672677993774,
"logits/rejected_prompt": -2.7116055488586426,
"logps/chosen": -2.0656113624572754,
"logps/chosen_both": -2.0481104850769043,
"logps/chosen_prompt": -0.7715897560119629,
"logps/rejected": -7.913069725036621,
"logps/rejected_both": -7.810868263244629,
"logps/rejected_prompt": -1.0343679189682007,
"loss": 2.0494,
"nll_loss": 2.0465188026428223,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8262445330619812,
"rewards/margins": 2.3389835357666016,
"rewards/rejected": -3.1652283668518066,
"step": 2480
},
{
"epoch": 1.992,
"grad_norm": 0.19965333207670072,
"learning_rate": 1.2681816461292715e-05,
"log_odds_chosen": 6.9041619300842285,
"log_odds_ratio": -0.07076757401227951,
"logits/chosen": -2.9241271018981934,
"logits/chosen_prompt": -2.7164487838745117,
"logits/rejected": -1.2974779605865479,
"logits/rejected_prompt": -2.7119083404541016,
"logps/chosen": -2.164299249649048,
"logps/chosen_both": -2.1463229656219482,
"logps/chosen_prompt": -0.8179939389228821,
"logps/rejected": -8.948786735534668,
"logps/rejected_both": -8.831026077270508,
"logps/rejected_prompt": -1.014527678489685,
"loss": 2.069,
"nll_loss": 2.1452174186706543,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8657197952270508,
"rewards/margins": 2.713794469833374,
"rewards/rejected": -3.579514265060425,
"step": 2490
},
{
"epoch": 2.0,
"grad_norm": 7.401684464890164,
"learning_rate": 1.2500000000000006e-05,
"log_odds_chosen": 7.955414772033691,
"log_odds_ratio": -0.004814439453184605,
"logits/chosen": -2.977412700653076,
"logits/chosen_prompt": -2.712825298309326,
"logits/rejected": -1.1496913433074951,
"logits/rejected_prompt": -2.6892926692962646,
"logps/chosen": -1.8996845483779907,
"logps/chosen_both": -1.8827598094940186,
"logps/chosen_prompt": -0.8927472233772278,
"logps/rejected": -9.687314987182617,
"logps/rejected_both": -9.540821075439453,
"logps/rejected_prompt": -1.00954270362854,
"loss": 2.0789,
"nll_loss": 1.882759690284729,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7598739266395569,
"rewards/margins": 3.1150519847869873,
"rewards/rejected": -3.8749260902404785,
"step": 2500
}
],
"logging_steps": 10,
"max_steps": 3750,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}