|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 0.4211425877451838, |
|
"learning_rate": 4.999912270696202e-05, |
|
"log_odds_chosen": -0.0004254445375408977, |
|
"log_odds_ratio": -0.6933605670928955, |
|
"logits/chosen": -2.876610279083252, |
|
"logits/chosen_prompt": -2.844738245010376, |
|
"logits/rejected": -2.8758692741394043, |
|
"logits/rejected_prompt": -2.8239073753356934, |
|
"logps/chosen": -1.9094527959823608, |
|
"logps/chosen_both": -1.9286587238311768, |
|
"logps/chosen_prompt": -3.189321756362915, |
|
"logps/rejected": -1.9090824127197266, |
|
"logps/rejected_both": -1.9364073276519775, |
|
"logps/rejected_prompt": -3.4751086235046387, |
|
"loss": 2.325, |
|
"nll_loss": 1.928330421447754, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7637811899185181, |
|
"rewards/margins": -0.00014820098294876516, |
|
"rewards/rejected": -0.7636328935623169, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 0.19485166995413405, |
|
"learning_rate": 4.9996490889419514e-05, |
|
"log_odds_chosen": 0.0011974871158599854, |
|
"log_odds_ratio": -0.6925489902496338, |
|
"logits/chosen": -2.9591917991638184, |
|
"logits/chosen_prompt": -2.8109309673309326, |
|
"logits/rejected": -2.9579415321350098, |
|
"logits/rejected_prompt": -2.789308547973633, |
|
"logps/chosen": -2.084634304046631, |
|
"logps/chosen_both": -2.0863680839538574, |
|
"logps/chosen_prompt": -2.1795780658721924, |
|
"logps/rejected": -2.0856688022613525, |
|
"logps/rejected_both": -2.0941364765167236, |
|
"logps/rejected_prompt": -2.347795009613037, |
|
"loss": 2.2922, |
|
"nll_loss": 2.08614182472229, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8338537216186523, |
|
"rewards/margins": 0.0004138052463531494, |
|
"rewards/rejected": -0.8342674970626831, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 0.16144893961648712, |
|
"learning_rate": 4.99921047320825e-05, |
|
"log_odds_chosen": 0.003194092307239771, |
|
"log_odds_ratio": -0.6915546655654907, |
|
"logits/chosen": -2.9421558380126953, |
|
"logits/chosen_prompt": -2.7285828590393066, |
|
"logits/rejected": -2.939770221710205, |
|
"logits/rejected_prompt": -2.70296311378479, |
|
"logps/chosen": -2.0509393215179443, |
|
"logps/chosen_both": -2.0457570552825928, |
|
"logps/chosen_prompt": -1.5747671127319336, |
|
"logps/rejected": -2.0534369945526123, |
|
"logps/rejected_both": -2.0497002601623535, |
|
"logps/rejected_prompt": -1.6531193256378174, |
|
"loss": 2.2795, |
|
"nll_loss": 2.04412841796875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8203758001327515, |
|
"rewards/margins": 0.0009990095859393477, |
|
"rewards/rejected": -0.821374773979187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.16210904759452727, |
|
"learning_rate": 4.9985964542786614e-05, |
|
"log_odds_chosen": 0.0012136728037148714, |
|
"log_odds_ratio": -0.6925405859947205, |
|
"logits/chosen": -2.92653226852417, |
|
"logits/chosen_prompt": -2.7136194705963135, |
|
"logits/rejected": -2.925443172454834, |
|
"logits/rejected_prompt": -2.700766086578369, |
|
"logps/chosen": -2.0835628509521484, |
|
"logps/chosen_both": -2.070845365524292, |
|
"logps/chosen_prompt": -1.1743593215942383, |
|
"logps/rejected": -2.084618330001831, |
|
"logps/rejected_both": -2.076547384262085, |
|
"logps/rejected_prompt": -1.2668603658676147, |
|
"loss": 2.2852, |
|
"nll_loss": 2.070385694503784, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8334251642227173, |
|
"rewards/margins": 0.00042223333730362356, |
|
"rewards/rejected": -0.8338474035263062, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.1829717877342827, |
|
"learning_rate": 4.997807075247146e-05, |
|
"log_odds_chosen": 0.000906852656044066, |
|
"log_odds_ratio": -0.6926941871643066, |
|
"logits/chosen": -2.8913445472717285, |
|
"logits/chosen_prompt": -2.6892333030700684, |
|
"logits/rejected": -2.8896098136901855, |
|
"logits/rejected_prompt": -2.6766159534454346, |
|
"logps/chosen": -2.009531259536743, |
|
"logps/chosen_both": -1.9982995986938477, |
|
"logps/chosen_prompt": -1.053348422050476, |
|
"logps/rejected": -2.0103189945220947, |
|
"logps/rejected_both": -2.0013835430145264, |
|
"logps/rejected_prompt": -1.2616751194000244, |
|
"loss": 2.2716, |
|
"nll_loss": 1.996681571006775, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8038125038146973, |
|
"rewards/margins": 0.0003150761185679585, |
|
"rewards/rejected": -0.80412757396698, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 0.1926273569998765, |
|
"learning_rate": 4.996842391515044e-05, |
|
"log_odds_chosen": 0.0007017262396402657, |
|
"log_odds_ratio": -0.6927965879440308, |
|
"logits/chosen": -2.9328999519348145, |
|
"logits/chosen_prompt": -2.684788227081299, |
|
"logits/rejected": -2.93101167678833, |
|
"logits/rejected_prompt": -2.659271240234375, |
|
"logps/chosen": -1.9513660669326782, |
|
"logps/chosen_both": -1.93800950050354, |
|
"logps/chosen_prompt": -0.95411616563797, |
|
"logps/rejected": -1.9519250392913818, |
|
"logps/rejected_both": -1.9419523477554321, |
|
"logps/rejected_prompt": -1.0883800983428955, |
|
"loss": 2.2492, |
|
"nll_loss": 1.9371274709701538, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7805464863777161, |
|
"rewards/margins": 0.00022354423708748072, |
|
"rewards/rejected": -0.7807700634002686, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 0.1815660976282933, |
|
"learning_rate": 4.9957024707871806e-05, |
|
"log_odds_chosen": 0.0007978074136190116, |
|
"log_odds_ratio": -0.6927486062049866, |
|
"logits/chosen": -3.0125765800476074, |
|
"logits/chosen_prompt": -2.6774511337280273, |
|
"logits/rejected": -3.0124025344848633, |
|
"logits/rejected_prompt": -2.6662356853485107, |
|
"logps/chosen": -2.0494558811187744, |
|
"logps/chosen_both": -2.0350148677825928, |
|
"logps/chosen_prompt": -0.9741342663764954, |
|
"logps/rejected": -2.050143003463745, |
|
"logps/rejected_both": -2.042119264602661, |
|
"logps/rejected_prompt": -1.1199967861175537, |
|
"loss": 2.2682, |
|
"nll_loss": 2.0335299968719482, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8197824358940125, |
|
"rewards/margins": 0.00027483105077408254, |
|
"rewards/rejected": -0.820057213306427, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.18993029983534432, |
|
"learning_rate": 4.994387393067117e-05, |
|
"log_odds_chosen": 0.0014978877734392881, |
|
"log_odds_ratio": -0.6923991441726685, |
|
"logits/chosen": -2.9860825538635254, |
|
"logits/chosen_prompt": -2.6699416637420654, |
|
"logits/rejected": -2.9854748249053955, |
|
"logits/rejected_prompt": -2.6453309059143066, |
|
"logps/chosen": -2.025066614151001, |
|
"logps/chosen_both": -2.0116593837738037, |
|
"logps/chosen_prompt": -1.0876951217651367, |
|
"logps/rejected": -2.0263991355895996, |
|
"logps/rejected_both": -2.0129716396331787, |
|
"logps/rejected_prompt": -1.1680071353912354, |
|
"loss": 2.2805, |
|
"nll_loss": 2.008460283279419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8100266456604004, |
|
"rewards/margins": 0.0005330622079782188, |
|
"rewards/rejected": -0.8105596303939819, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 0.19392806669970095, |
|
"learning_rate": 4.992897250651535e-05, |
|
"log_odds_chosen": 0.0007344387704506516, |
|
"log_odds_ratio": -0.6927801370620728, |
|
"logits/chosen": -2.998304605484009, |
|
"logits/chosen_prompt": -2.7530579566955566, |
|
"logits/rejected": -2.9966633319854736, |
|
"logits/rejected_prompt": -2.726839542388916, |
|
"logps/chosen": -1.9492180347442627, |
|
"logps/chosen_both": -1.9305731058120728, |
|
"logps/chosen_prompt": -0.871951699256897, |
|
"logps/rejected": -1.9498412609100342, |
|
"logps/rejected_both": -1.9371519088745117, |
|
"logps/rejected_prompt": -1.0174219608306885, |
|
"loss": 2.2152, |
|
"nll_loss": 1.929351806640625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7796871662139893, |
|
"rewards/margins": 0.000249391800025478, |
|
"rewards/rejected": -0.779936671257019, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.18477584362829488, |
|
"learning_rate": 4.991232148123761e-05, |
|
"log_odds_chosen": 0.0013153791660442948, |
|
"log_odds_ratio": -0.6924898624420166, |
|
"logits/chosen": -2.959036350250244, |
|
"logits/chosen_prompt": -2.6582894325256348, |
|
"logits/rejected": -2.959897518157959, |
|
"logits/rejected_prompt": -2.656588077545166, |
|
"logps/chosen": -1.980985403060913, |
|
"logps/chosen_both": -1.965191125869751, |
|
"logps/chosen_prompt": -0.8711269497871399, |
|
"logps/rejected": -1.9821256399154663, |
|
"logps/rejected_both": -1.9721254110336304, |
|
"logps/rejected_prompt": -0.9294773936271667, |
|
"loss": 2.2517, |
|
"nll_loss": 1.964665412902832, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7923941612243652, |
|
"rewards/margins": 0.00045606493949890137, |
|
"rewards/rejected": -0.7928503155708313, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 0.19924379886100949, |
|
"learning_rate": 4.9893922023464236e-05, |
|
"log_odds_chosen": 0.002966083586215973, |
|
"log_odds_ratio": -0.6916661858558655, |
|
"logits/chosen": -3.0152981281280518, |
|
"logits/chosen_prompt": -2.685716152191162, |
|
"logits/rejected": -3.0145790576934814, |
|
"logits/rejected_prompt": -2.6468653678894043, |
|
"logps/chosen": -1.8295310735702515, |
|
"logps/chosen_both": -1.8159011602401733, |
|
"logps/chosen_prompt": -1.0153570175170898, |
|
"logps/rejected": -1.8320270776748657, |
|
"logps/rejected_both": -1.8261594772338867, |
|
"logps/rejected_prompt": -1.1217412948608398, |
|
"loss": 2.2814, |
|
"nll_loss": 1.815495491027832, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7318124175071716, |
|
"rewards/margins": 0.0009983479976654053, |
|
"rewards/rejected": -0.7328108549118042, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 0.2009899005827714, |
|
"learning_rate": 4.987377542453251e-05, |
|
"log_odds_chosen": 0.0022429400123655796, |
|
"log_odds_ratio": -0.6920267939567566, |
|
"logits/chosen": -2.9447622299194336, |
|
"logits/chosen_prompt": -2.632648468017578, |
|
"logits/rejected": -2.9442973136901855, |
|
"logits/rejected_prompt": -2.6101832389831543, |
|
"logps/chosen": -2.0063014030456543, |
|
"logps/chosen_both": -1.991539716720581, |
|
"logps/chosen_prompt": -0.9827820658683777, |
|
"logps/rejected": -2.0082459449768066, |
|
"logps/rejected_both": -1.9994781017303467, |
|
"logps/rejected_prompt": -1.0614566802978516, |
|
"loss": 2.2719, |
|
"nll_loss": 1.99040949344635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8025206327438354, |
|
"rewards/margins": 0.0007776618003845215, |
|
"rewards/rejected": -0.8032983541488647, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 0.18861397575558203, |
|
"learning_rate": 4.985188309840012e-05, |
|
"log_odds_chosen": 0.001361916190944612, |
|
"log_odds_ratio": -0.692466676235199, |
|
"logits/chosen": -2.95689058303833, |
|
"logits/chosen_prompt": -2.6187005043029785, |
|
"logits/rejected": -2.95717191696167, |
|
"logits/rejected_prompt": -2.592301607131958, |
|
"logps/chosen": -2.0394482612609863, |
|
"logps/chosen_both": -2.02314829826355, |
|
"logps/chosen_prompt": -0.9008905291557312, |
|
"logps/rejected": -2.040587902069092, |
|
"logps/rejected_both": -2.0329113006591797, |
|
"logps/rejected_prompt": -1.0704509019851685, |
|
"loss": 2.2882, |
|
"nll_loss": 2.023050546646118, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8157793283462524, |
|
"rewards/margins": 0.00045590996160171926, |
|
"rewards/rejected": -0.8162351846694946, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 0.2030737765327122, |
|
"learning_rate": 4.982824658154589e-05, |
|
"log_odds_chosen": 0.0003186427056789398, |
|
"log_odds_ratio": -0.6929879188537598, |
|
"logits/chosen": -2.934846878051758, |
|
"logits/chosen_prompt": -2.6593239307403564, |
|
"logits/rejected": -2.9346649646759033, |
|
"logits/rejected_prompt": -2.637718677520752, |
|
"logps/chosen": -2.066263437271118, |
|
"logps/chosen_both": -2.0494155883789062, |
|
"logps/chosen_prompt": -0.9298864603042603, |
|
"logps/rejected": -2.066551685333252, |
|
"logps/rejected_both": -2.0526323318481445, |
|
"logps/rejected_prompt": -1.0461074113845825, |
|
"loss": 2.2784, |
|
"nll_loss": 2.048583984375, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8265053629875183, |
|
"rewards/margins": 0.0001151919350377284, |
|
"rewards/rejected": -0.8266205787658691, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.17845448491542337, |
|
"learning_rate": 4.980286753286195e-05, |
|
"log_odds_chosen": 0.0020511746406555176, |
|
"log_odds_ratio": -0.6921236515045166, |
|
"logits/chosen": -2.9423627853393555, |
|
"logits/chosen_prompt": -2.6544814109802246, |
|
"logits/rejected": -2.9413440227508545, |
|
"logits/rejected_prompt": -2.6495890617370605, |
|
"logps/chosen": -2.0567996501922607, |
|
"logps/chosen_both": -2.0376124382019043, |
|
"logps/chosen_prompt": -0.8456690907478333, |
|
"logps/rejected": -2.058603525161743, |
|
"logps/rejected_both": -2.0455679893493652, |
|
"logps/rejected_prompt": -1.0780448913574219, |
|
"loss": 2.2474, |
|
"nll_loss": 2.036198616027832, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.822719931602478, |
|
"rewards/margins": 0.0007214724901132286, |
|
"rewards/rejected": -0.8234413862228394, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.18228364635340788, |
|
"learning_rate": 4.977574773353732e-05, |
|
"log_odds_chosen": 0.0005785167450085282, |
|
"log_odds_ratio": -0.6928580403327942, |
|
"logits/chosen": -2.906240940093994, |
|
"logits/chosen_prompt": -2.656862735748291, |
|
"logits/rejected": -2.906233072280884, |
|
"logits/rejected_prompt": -2.658569812774658, |
|
"logps/chosen": -1.8988163471221924, |
|
"logps/chosen_both": -1.8861125707626343, |
|
"logps/chosen_prompt": -0.9287108182907104, |
|
"logps/rejected": -1.8993009328842163, |
|
"logps/rejected_both": -1.890856146812439, |
|
"logps/rejected_prompt": -1.113793134689331, |
|
"loss": 2.2658, |
|
"nll_loss": 1.8859831094741821, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7595265507698059, |
|
"rewards/margins": 0.00019387007341720164, |
|
"rewards/rejected": -0.7597203850746155, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.21059375256598528, |
|
"learning_rate": 4.9746889086932895e-05, |
|
"log_odds_chosen": 0.0012606128584593534, |
|
"log_odds_ratio": -0.6925175786018372, |
|
"logits/chosen": -2.9255146980285645, |
|
"logits/chosen_prompt": -2.681833505630493, |
|
"logits/rejected": -2.9241907596588135, |
|
"logits/rejected_prompt": -2.6375930309295654, |
|
"logps/chosen": -2.018401861190796, |
|
"logps/chosen_both": -2.0020346641540527, |
|
"logps/chosen_prompt": -0.8163633346557617, |
|
"logps/rejected": -2.0194990634918213, |
|
"logps/rejected_both": -2.0088753700256348, |
|
"logps/rejected_prompt": -1.024702787399292, |
|
"loss": 2.2545, |
|
"nll_loss": 2.0013086795806885, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8073607683181763, |
|
"rewards/margins": 0.00043891073437407613, |
|
"rewards/rejected": -0.8077996373176575, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.2593749816883702, |
|
"learning_rate": 4.971629361844785e-05, |
|
"log_odds_chosen": 0.000588211405556649, |
|
"log_odds_ratio": -0.6928532123565674, |
|
"logits/chosen": -2.9365015029907227, |
|
"logits/chosen_prompt": -2.6852712631225586, |
|
"logits/rejected": -2.9362454414367676, |
|
"logits/rejected_prompt": -2.6527528762817383, |
|
"logps/chosen": -2.049866199493408, |
|
"logps/chosen_both": -2.03619122505188, |
|
"logps/chosen_prompt": -0.8910077214241028, |
|
"logps/rejected": -2.050372838973999, |
|
"logps/rejected_both": -2.0393173694610596, |
|
"logps/rejected_prompt": -1.0920004844665527, |
|
"loss": 2.2312, |
|
"nll_loss": 2.0342373847961426, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8199464678764343, |
|
"rewards/margins": 0.00020260215387679636, |
|
"rewards/rejected": -0.8201491236686707, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 0.21239961737940086, |
|
"learning_rate": 4.968396347537751e-05, |
|
"log_odds_chosen": 0.0017036155331879854, |
|
"log_odds_ratio": -0.6922971606254578, |
|
"logits/chosen": -2.9285712242126465, |
|
"logits/chosen_prompt": -2.637676477432251, |
|
"logits/rejected": -2.9268641471862793, |
|
"logits/rejected_prompt": -2.601259231567383, |
|
"logps/chosen": -2.019813060760498, |
|
"logps/chosen_both": -2.003007173538208, |
|
"logps/chosen_prompt": -0.9411777257919312, |
|
"logps/rejected": -2.0213375091552734, |
|
"logps/rejected_both": -2.013278007507324, |
|
"logps/rejected_prompt": -1.0966544151306152, |
|
"loss": 2.2257, |
|
"nll_loss": 2.003007173538208, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8079251050949097, |
|
"rewards/margins": 0.0006098627927713096, |
|
"rewards/rejected": -0.8085349798202515, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.2296631901191577, |
|
"learning_rate": 4.964990092676263e-05, |
|
"log_odds_chosen": 0.002268400741741061, |
|
"log_odds_ratio": -0.6920153498649597, |
|
"logits/chosen": -2.9518988132476807, |
|
"logits/chosen_prompt": -2.6878037452697754, |
|
"logits/rejected": -2.9512124061584473, |
|
"logits/rejected_prompt": -2.6565701961517334, |
|
"logps/chosen": -1.69021475315094, |
|
"logps/chosen_both": -1.6815983057022095, |
|
"logps/chosen_prompt": -0.8377019762992859, |
|
"logps/rejected": -1.6910902261734009, |
|
"logps/rejected_both": -1.686661958694458, |
|
"logps/rejected_prompt": -0.9836887121200562, |
|
"loss": 2.2189, |
|
"nll_loss": 1.6812556982040405, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6760859489440918, |
|
"rewards/margins": 0.0003500869497656822, |
|
"rewards/rejected": -0.6764360666275024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 0.20512599393851222, |
|
"learning_rate": 4.9614108363230135e-05, |
|
"log_odds_chosen": 0.0021390921901911497, |
|
"log_odds_ratio": -0.6920791268348694, |
|
"logits/chosen": -2.9732565879821777, |
|
"logits/chosen_prompt": -2.6687545776367188, |
|
"logits/rejected": -2.9718270301818848, |
|
"logits/rejected_prompt": -2.6496801376342773, |
|
"logps/chosen": -2.0387587547302246, |
|
"logps/chosen_both": -2.017876148223877, |
|
"logps/chosen_prompt": -0.897871196269989, |
|
"logps/rejected": -2.040605068206787, |
|
"logps/rejected_both": -2.0265369415283203, |
|
"logps/rejected_prompt": -1.0972706079483032, |
|
"loss": 2.2179, |
|
"nll_loss": 2.0162312984466553, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8155035972595215, |
|
"rewards/margins": 0.000738424074370414, |
|
"rewards/rejected": -0.8162419199943542, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 0.2127533742878833, |
|
"learning_rate": 4.9576588296825386e-05, |
|
"log_odds_chosen": 0.0020120560657233, |
|
"log_odds_ratio": -0.6921423077583313, |
|
"logits/chosen": -2.8992626667022705, |
|
"logits/chosen_prompt": -2.7236571311950684, |
|
"logits/rejected": -2.8986992835998535, |
|
"logits/rejected_prompt": -2.676098346710205, |
|
"logps/chosen": -2.0563912391662598, |
|
"logps/chosen_both": -2.036818027496338, |
|
"logps/chosen_prompt": -0.9310529828071594, |
|
"logps/rejected": -2.058103322982788, |
|
"logps/rejected_both": -2.0425784587860107, |
|
"logps/rejected_prompt": -1.0257813930511475, |
|
"loss": 2.273, |
|
"nll_loss": 2.03584885597229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8225564956665039, |
|
"rewards/margins": 0.0006849050405435264, |
|
"rewards/rejected": -0.8232414126396179, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 0.17056867832509964, |
|
"learning_rate": 4.953734336083583e-05, |
|
"log_odds_chosen": 0.0011583305895328522, |
|
"log_odds_ratio": -0.6925683617591858, |
|
"logits/chosen": -3.0050501823425293, |
|
"logits/chosen_prompt": -2.7037124633789062, |
|
"logits/rejected": -3.0038866996765137, |
|
"logits/rejected_prompt": -2.6890504360198975, |
|
"logps/chosen": -2.0860724449157715, |
|
"logps/chosen_both": -2.067084550857544, |
|
"logps/chosen_prompt": -0.8457021713256836, |
|
"logps/rejected": -2.087078332901001, |
|
"logps/rejected_both": -2.0733180046081543, |
|
"logps/rejected_prompt": -1.0261476039886475, |
|
"loss": 2.2779, |
|
"nll_loss": 2.065519094467163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8344290852546692, |
|
"rewards/margins": 0.00040218234062194824, |
|
"rewards/rejected": -0.8348312377929688, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.2058632754394824, |
|
"learning_rate": 4.949637630960617e-05, |
|
"log_odds_chosen": 0.0013900771737098694, |
|
"log_odds_ratio": -0.6924527883529663, |
|
"logits/chosen": -2.966139316558838, |
|
"logits/chosen_prompt": -2.7504935264587402, |
|
"logits/rejected": -2.965026378631592, |
|
"logits/rejected_prompt": -2.7268807888031006, |
|
"logps/chosen": -1.945728063583374, |
|
"logps/chosen_both": -1.9301140308380127, |
|
"logps/chosen_prompt": -0.9403144717216492, |
|
"logps/rejected": -1.946915626525879, |
|
"logps/rejected_both": -1.936022162437439, |
|
"logps/rejected_prompt": -1.0291379690170288, |
|
"loss": 2.2775, |
|
"nll_loss": 1.9295330047607422, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7782912254333496, |
|
"rewards/margins": 0.000475037086289376, |
|
"rewards/rejected": -0.7787662744522095, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.18030355585658703, |
|
"learning_rate": 4.9453690018345144e-05, |
|
"log_odds_chosen": 0.0017323314677923918, |
|
"log_odds_ratio": -0.6922817826271057, |
|
"logits/chosen": -2.9892709255218506, |
|
"logits/chosen_prompt": -2.7419209480285645, |
|
"logits/rejected": -2.9878451824188232, |
|
"logits/rejected_prompt": -2.706714391708374, |
|
"logps/chosen": -2.0075595378875732, |
|
"logps/chosen_both": -1.9899797439575195, |
|
"logps/chosen_prompt": -0.8903474807739258, |
|
"logps/rejected": -2.0090558528900146, |
|
"logps/rejected_both": -1.998038649559021, |
|
"logps/rejected_prompt": -1.0070338249206543, |
|
"loss": 2.2079, |
|
"nll_loss": 1.9889189004898071, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8030239343643188, |
|
"rewards/margins": 0.0005984127637930214, |
|
"rewards/rejected": -0.8036222457885742, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 0.18677326033959232, |
|
"learning_rate": 4.940928748292363e-05, |
|
"log_odds_chosen": 0.0003323271812405437, |
|
"log_odds_ratio": -0.6929812431335449, |
|
"logits/chosen": -2.8448781967163086, |
|
"logits/chosen_prompt": -2.6570119857788086, |
|
"logits/rejected": -2.844160795211792, |
|
"logits/rejected_prompt": -2.6436538696289062, |
|
"logps/chosen": -2.090553045272827, |
|
"logps/chosen_both": -2.077347993850708, |
|
"logps/chosen_prompt": -0.8073711395263672, |
|
"logps/rejected": -2.090845823287964, |
|
"logps/rejected_both": -2.077338695526123, |
|
"logps/rejected_prompt": -0.9910534024238586, |
|
"loss": 2.2579, |
|
"nll_loss": 2.0748660564422607, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8362210988998413, |
|
"rewards/margins": 0.00011717081360984594, |
|
"rewards/rejected": -0.8363384008407593, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 0.19524819903076443, |
|
"learning_rate": 4.9363171819664434e-05, |
|
"log_odds_chosen": 0.001574930502101779, |
|
"log_odds_ratio": -0.6923605799674988, |
|
"logits/chosen": -2.9072844982147217, |
|
"logits/chosen_prompt": -2.6988863945007324, |
|
"logits/rejected": -2.9070873260498047, |
|
"logits/rejected_prompt": -2.6662864685058594, |
|
"logps/chosen": -1.8586593866348267, |
|
"logps/chosen_both": -1.847161889076233, |
|
"logps/chosen_prompt": -0.8614280819892883, |
|
"logps/rejected": -1.8599656820297241, |
|
"logps/rejected_both": -1.8520950078964233, |
|
"logps/rejected_prompt": -1.0004897117614746, |
|
"loss": 2.2122, |
|
"nll_loss": 1.8460156917572021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7434637546539307, |
|
"rewards/margins": 0.0005225300556048751, |
|
"rewards/rejected": -0.7439862489700317, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 0.17891708421025293, |
|
"learning_rate": 4.9315346265123594e-05, |
|
"log_odds_chosen": 0.0014710575342178345, |
|
"log_odds_ratio": -0.6924123764038086, |
|
"logits/chosen": -2.893035888671875, |
|
"logits/chosen_prompt": -2.6818959712982178, |
|
"logits/rejected": -2.8925375938415527, |
|
"logits/rejected_prompt": -2.6510303020477295, |
|
"logps/chosen": -1.959538221359253, |
|
"logps/chosen_both": -1.9448583126068115, |
|
"logps/chosen_prompt": -0.8354212641716003, |
|
"logps/rejected": -1.9608103036880493, |
|
"logps/rejected_both": -1.9502098560333252, |
|
"logps/rejected_prompt": -0.9869192838668823, |
|
"loss": 2.2903, |
|
"nll_loss": 1.94313645362854, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7838152647018433, |
|
"rewards/margins": 0.0005089103942736983, |
|
"rewards/rejected": -0.7843241691589355, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 0.21094148380709188, |
|
"learning_rate": 4.9265814175863186e-05, |
|
"log_odds_chosen": 0.0009952529799193144, |
|
"log_odds_ratio": -0.6926498413085938, |
|
"logits/chosen": -2.9005274772644043, |
|
"logits/chosen_prompt": -2.71238374710083, |
|
"logits/rejected": -2.8991751670837402, |
|
"logits/rejected_prompt": -2.6699583530426025, |
|
"logps/chosen": -2.1492276191711426, |
|
"logps/chosen_both": -2.1339974403381348, |
|
"logps/chosen_prompt": -0.9373821020126343, |
|
"logps/rejected": -2.1501176357269287, |
|
"logps/rejected_both": -2.1395199298858643, |
|
"logps/rejected_prompt": -1.100056529045105, |
|
"loss": 2.2923, |
|
"nll_loss": 2.1338019371032715, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8596910238265991, |
|
"rewards/margins": 0.00035610198392532766, |
|
"rewards/rejected": -0.8600472211837769, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.19361551914630554, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"log_odds_chosen": 0.0016762830782681704, |
|
"log_odds_ratio": -0.6923099160194397, |
|
"logits/chosen": -2.9360158443450928, |
|
"logits/chosen_prompt": -2.7480220794677734, |
|
"logits/rejected": -2.9349968433380127, |
|
"logits/rejected_prompt": -2.733687400817871, |
|
"logps/chosen": -1.8898597955703735, |
|
"logps/chosen_both": -1.874415636062622, |
|
"logps/chosen_prompt": -0.8352281451225281, |
|
"logps/rejected": -1.8912776708602905, |
|
"logps/rejected_both": -1.8779733180999756, |
|
"logps/rejected_prompt": -0.9313365817070007, |
|
"loss": 2.2525, |
|
"nll_loss": 1.8733183145523071, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7559438943862915, |
|
"rewards/margins": 0.0005672037368640304, |
|
"rewards/rejected": -0.7565110921859741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 0.19645084360565487, |
|
"learning_rate": 4.916164441804044e-05, |
|
"log_odds_chosen": 0.0019232749473303556, |
|
"log_odds_ratio": -0.692186176776886, |
|
"logits/chosen": -2.9699971675872803, |
|
"logits/chosen_prompt": -2.7393062114715576, |
|
"logits/rejected": -2.9690558910369873, |
|
"logits/rejected_prompt": -2.7017319202423096, |
|
"logps/chosen": -1.9972589015960693, |
|
"logps/chosen_both": -1.981871247291565, |
|
"logps/chosen_prompt": -0.8229547739028931, |
|
"logps/rejected": -1.9988943338394165, |
|
"logps/rejected_both": -1.9911056756973267, |
|
"logps/rejected_prompt": -0.9741779565811157, |
|
"loss": 2.2527, |
|
"nll_loss": 1.981127381324768, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7989035844802856, |
|
"rewards/margins": 0.000654196715913713, |
|
"rewards/rejected": -0.7995578050613403, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.17696578224649318, |
|
"learning_rate": 4.910701406047037e-05, |
|
"log_odds_chosen": 0.0012397505342960358, |
|
"log_odds_ratio": -0.6925276517868042, |
|
"logits/chosen": -2.9160306453704834, |
|
"logits/chosen_prompt": -2.7327325344085693, |
|
"logits/rejected": -2.915261745452881, |
|
"logits/rejected_prompt": -2.701322078704834, |
|
"logps/chosen": -1.9081172943115234, |
|
"logps/chosen_both": -1.892844557762146, |
|
"logps/chosen_prompt": -0.8174566030502319, |
|
"logps/rejected": -1.9091819524765015, |
|
"logps/rejected_both": -1.9010097980499268, |
|
"logps/rejected_prompt": -1.0786253213882446, |
|
"loss": 2.2602, |
|
"nll_loss": 1.8927319049835205, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7632468938827515, |
|
"rewards/margins": 0.00042594075785018504, |
|
"rewards/rejected": -0.7636728286743164, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 0.17292787330822676, |
|
"learning_rate": 4.905069178965215e-05, |
|
"log_odds_chosen": 0.0019163743127137423, |
|
"log_odds_ratio": -0.692189633846283, |
|
"logits/chosen": -2.9151923656463623, |
|
"logits/chosen_prompt": -2.7165563106536865, |
|
"logits/rejected": -2.914482593536377, |
|
"logits/rejected_prompt": -2.6829206943511963, |
|
"logps/chosen": -1.8700447082519531, |
|
"logps/chosen_both": -1.8556480407714844, |
|
"logps/chosen_prompt": -0.8194649815559387, |
|
"logps/rejected": -1.8716179132461548, |
|
"logps/rejected_both": -1.864458680152893, |
|
"logps/rejected_prompt": -1.1078553199768066, |
|
"loss": 2.1808, |
|
"nll_loss": 1.8551757335662842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7480179071426392, |
|
"rewards/margins": 0.0006292253965511918, |
|
"rewards/rejected": -0.7486470937728882, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 0.19147435771992855, |
|
"learning_rate": 4.899268155847667e-05, |
|
"log_odds_chosen": 0.002677363809198141, |
|
"log_odds_ratio": -0.6918100118637085, |
|
"logits/chosen": -3.017524242401123, |
|
"logits/chosen_prompt": -2.756082534790039, |
|
"logits/rejected": -3.016745090484619, |
|
"logits/rejected_prompt": -2.7283802032470703, |
|
"logps/chosen": -1.8907134532928467, |
|
"logps/chosen_both": -1.8744417428970337, |
|
"logps/chosen_prompt": -0.8424029350280762, |
|
"logps/rejected": -1.8929758071899414, |
|
"logps/rejected_both": -1.8843475580215454, |
|
"logps/rejected_prompt": -1.0425379276275635, |
|
"loss": 2.225, |
|
"nll_loss": 1.8739697933197021, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7562853693962097, |
|
"rewards/margins": 0.000904941582120955, |
|
"rewards/rejected": -0.7571902275085449, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.19050905162566348, |
|
"learning_rate": 4.893298743830168e-05, |
|
"log_odds_chosen": 0.0014245070051401854, |
|
"log_odds_ratio": -0.6924355626106262, |
|
"logits/chosen": -2.881587028503418, |
|
"logits/chosen_prompt": -2.7358975410461426, |
|
"logits/rejected": -2.8803658485412598, |
|
"logits/rejected_prompt": -2.693080186843872, |
|
"logps/chosen": -2.135007381439209, |
|
"logps/chosen_both": -2.115304470062256, |
|
"logps/chosen_prompt": -0.8588684797286987, |
|
"logps/rejected": -2.1362690925598145, |
|
"logps/rejected_both": -2.1251254081726074, |
|
"logps/rejected_prompt": -1.0595465898513794, |
|
"loss": 2.3085, |
|
"nll_loss": 2.1149659156799316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8540030717849731, |
|
"rewards/margins": 0.0005046069854870439, |
|
"rewards/rejected": -0.8545076251029968, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.2409286506380079, |
|
"learning_rate": 4.887161361866608e-05, |
|
"log_odds_chosen": 0.0026388473343104124, |
|
"log_odds_ratio": -0.6918294429779053, |
|
"logits/chosen": -2.983471632003784, |
|
"logits/chosen_prompt": -2.755098819732666, |
|
"logits/rejected": -2.982506513595581, |
|
"logits/rejected_prompt": -2.7400355339050293, |
|
"logps/chosen": -1.9234100580215454, |
|
"logps/chosen_both": -1.904706597328186, |
|
"logps/chosen_prompt": -0.8400828242301941, |
|
"logps/rejected": -1.9256340265274048, |
|
"logps/rejected_both": -1.912940263748169, |
|
"logps/rejected_prompt": -0.9321552515029907, |
|
"loss": 2.2324, |
|
"nll_loss": 1.9039466381072998, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.769364058971405, |
|
"rewards/margins": 0.0008895128848962486, |
|
"rewards/rejected": -0.7702535390853882, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 0.16083280812237927, |
|
"learning_rate": 4.880856440699582e-05, |
|
"log_odds_chosen": 0.0021248466800898314, |
|
"log_odds_ratio": -0.6920855641365051, |
|
"logits/chosen": -2.9351096153259277, |
|
"logits/chosen_prompt": -2.723745107650757, |
|
"logits/rejected": -2.93329119682312, |
|
"logits/rejected_prompt": -2.689175844192505, |
|
"logps/chosen": -2.005812644958496, |
|
"logps/chosen_both": -1.9874347448349, |
|
"logps/chosen_prompt": -0.8169828653335571, |
|
"logps/rejected": -2.0076451301574707, |
|
"logps/rejected_both": -1.9974247217178345, |
|
"logps/rejected_prompt": -0.9817326664924622, |
|
"loss": 2.2565, |
|
"nll_loss": 1.9868465662002563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8023250699043274, |
|
"rewards/margins": 0.0007329642539843917, |
|
"rewards/rejected": -0.8030580282211304, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 0.22470013003589273, |
|
"learning_rate": 4.874384422830167e-05, |
|
"log_odds_chosen": 0.0011979244882240891, |
|
"log_odds_ratio": -0.6925488710403442, |
|
"logits/chosen": -2.9063477516174316, |
|
"logits/chosen_prompt": -2.607713222503662, |
|
"logits/rejected": -2.905827760696411, |
|
"logits/rejected_prompt": -2.5853092670440674, |
|
"logps/chosen": -1.9979126453399658, |
|
"logps/chosen_both": -1.982242226600647, |
|
"logps/chosen_prompt": -0.8234804272651672, |
|
"logps/rejected": -1.9988930225372314, |
|
"logps/rejected_both": -1.9891548156738281, |
|
"logps/rejected_prompt": -0.9966527223587036, |
|
"loss": 2.266, |
|
"nll_loss": 1.9814211130142212, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7991650104522705, |
|
"rewards/margins": 0.0003922194300685078, |
|
"rewards/rejected": -0.7995571494102478, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 0.16501384834156196, |
|
"learning_rate": 4.867745762486861e-05, |
|
"log_odds_chosen": 0.0010735094547271729, |
|
"log_odds_ratio": -0.6926108598709106, |
|
"logits/chosen": -2.9659483432769775, |
|
"logits/chosen_prompt": -2.684511661529541, |
|
"logits/rejected": -2.9646358489990234, |
|
"logits/rejected_prompt": -2.6466262340545654, |
|
"logps/chosen": -1.8777449131011963, |
|
"logps/chosen_both": -1.8621854782104492, |
|
"logps/chosen_prompt": -0.8326584100723267, |
|
"logps/rejected": -1.8786296844482422, |
|
"logps/rejected_both": -1.8694502115249634, |
|
"logps/rejected_prompt": -1.119554042816162, |
|
"loss": 2.2551, |
|
"nll_loss": 1.8609716892242432, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7510979771614075, |
|
"rewards/margins": 0.0003538370074238628, |
|
"rewards/rejected": -0.751451849937439, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.18496197696993874, |
|
"learning_rate": 4.860940925593703e-05, |
|
"log_odds_chosen": 0.0022099569905549288, |
|
"log_odds_ratio": -0.6920434832572937, |
|
"logits/chosen": -2.8903660774230957, |
|
"logits/chosen_prompt": -2.6781816482543945, |
|
"logits/rejected": -2.890045166015625, |
|
"logits/rejected_prompt": -2.6534364223480225, |
|
"logps/chosen": -1.969386339187622, |
|
"logps/chosen_both": -1.954185128211975, |
|
"logps/chosen_prompt": -0.7636314630508423, |
|
"logps/rejected": -1.9712820053100586, |
|
"logps/rejected_both": -1.9598472118377686, |
|
"logps/rejected_prompt": -0.9155877232551575, |
|
"loss": 2.243, |
|
"nll_loss": 1.9532957077026367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7877545356750488, |
|
"rewards/margins": 0.0007582366233691573, |
|
"rewards/rejected": -0.7885128259658813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 0.22859080108494093, |
|
"learning_rate": 4.8539703897375755e-05, |
|
"log_odds_chosen": 0.004624041263014078, |
|
"log_odds_ratio": -0.690842866897583, |
|
"logits/chosen": -2.9258294105529785, |
|
"logits/chosen_prompt": -2.6813464164733887, |
|
"logits/rejected": -2.9250378608703613, |
|
"logits/rejected_prompt": -2.6571106910705566, |
|
"logps/chosen": -2.0521552562713623, |
|
"logps/chosen_both": -2.034921646118164, |
|
"logps/chosen_prompt": -0.8797234296798706, |
|
"logps/rejected": -2.056114673614502, |
|
"logps/rejected_both": -2.044158935546875, |
|
"logps/rejected_prompt": -0.9540025591850281, |
|
"loss": 2.2663, |
|
"nll_loss": 2.0334911346435547, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8208619952201843, |
|
"rewards/margins": 0.0015838384861126542, |
|
"rewards/rejected": -0.8224459886550903, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.2070567132691218, |
|
"learning_rate": 4.846834644134686e-05, |
|
"log_odds_chosen": 0.001986052840948105, |
|
"log_odds_ratio": -0.6921548843383789, |
|
"logits/chosen": -2.9888834953308105, |
|
"logits/chosen_prompt": -2.6887311935424805, |
|
"logits/rejected": -2.989170789718628, |
|
"logits/rejected_prompt": -2.694418430328369, |
|
"logps/chosen": -1.9955952167510986, |
|
"logps/chosen_both": -1.9792373180389404, |
|
"logps/chosen_prompt": -0.8381233215332031, |
|
"logps/rejected": -1.997323751449585, |
|
"logps/rejected_both": -1.9859631061553955, |
|
"logps/rejected_prompt": -0.9913262128829956, |
|
"loss": 2.2321, |
|
"nll_loss": 1.9785674810409546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7982381582260132, |
|
"rewards/margins": 0.0006913721445016563, |
|
"rewards/rejected": -0.7989295721054077, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 0.19605531509972363, |
|
"learning_rate": 4.839534189596228e-05, |
|
"log_odds_chosen": 0.0027246386744081974, |
|
"log_odds_ratio": -0.6917861104011536, |
|
"logits/chosen": -2.912360429763794, |
|
"logits/chosen_prompt": -2.653672218322754, |
|
"logits/rejected": -2.910978317260742, |
|
"logits/rejected_prompt": -2.627488613128662, |
|
"logps/chosen": -2.060957908630371, |
|
"logps/chosen_both": -2.043726921081543, |
|
"logps/chosen_prompt": -0.7695341110229492, |
|
"logps/rejected": -2.0633223056793213, |
|
"logps/rejected_both": -2.051257371902466, |
|
"logps/rejected_prompt": -1.0156570672988892, |
|
"loss": 2.2675, |
|
"nll_loss": 2.042490005493164, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8243831396102905, |
|
"rewards/margins": 0.0009458243730477989, |
|
"rewards/rejected": -0.8253289461135864, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 0.44833865318290117, |
|
"learning_rate": 4.832069538493237e-05, |
|
"log_odds_chosen": 0.04500371962785721, |
|
"log_odds_ratio": -0.6715863943099976, |
|
"logits/chosen": -2.9302279949188232, |
|
"logits/chosen_prompt": -2.6701042652130127, |
|
"logits/rejected": -2.9281227588653564, |
|
"logits/rejected_prompt": -2.666865587234497, |
|
"logps/chosen": -1.9099162817001343, |
|
"logps/chosen_both": -1.898306131362915, |
|
"logps/chosen_prompt": -0.702593982219696, |
|
"logps/rejected": -1.948999047279358, |
|
"logps/rejected_both": -1.9378995895385742, |
|
"logps/rejected_prompt": -0.971504807472229, |
|
"loss": 2.2392, |
|
"nll_loss": 1.895094633102417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.76396644115448, |
|
"rewards/margins": 0.015633201226592064, |
|
"rewards/rejected": -0.7795997262001038, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.21226948111262156, |
|
"learning_rate": 4.8244412147206284e-05, |
|
"log_odds_chosen": 2.9653515815734863, |
|
"log_odds_ratio": -0.40015140175819397, |
|
"logits/chosen": -2.9068620204925537, |
|
"logits/chosen_prompt": -2.6536412239074707, |
|
"logits/rejected": -2.1202731132507324, |
|
"logits/rejected_prompt": -2.6555583477020264, |
|
"logps/chosen": -2.0414326190948486, |
|
"logps/chosen_both": -2.0248727798461914, |
|
"logps/chosen_prompt": -0.8300280570983887, |
|
"logps/rejected": -4.945545196533203, |
|
"logps/rejected_both": -4.884528160095215, |
|
"logps/rejected_prompt": -0.9442939758300781, |
|
"loss": 2.1853, |
|
"nll_loss": 2.0240979194641113, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8165730237960815, |
|
"rewards/margins": 1.1616451740264893, |
|
"rewards/rejected": -1.9782178401947021, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 0.1736633646525648, |
|
"learning_rate": 4.81664975366043e-05, |
|
"log_odds_chosen": 7.59240198135376, |
|
"log_odds_ratio": -0.1370885670185089, |
|
"logits/chosen": -2.9020304679870605, |
|
"logits/chosen_prompt": -2.6753904819488525, |
|
"logits/rejected": -0.7233905792236328, |
|
"logits/rejected_prompt": -2.637943983078003, |
|
"logps/chosen": -1.8611255884170532, |
|
"logps/chosen_both": -1.8469617366790771, |
|
"logps/chosen_prompt": -0.8501307368278503, |
|
"logps/rejected": -9.215188026428223, |
|
"logps/rejected_both": -9.101489067077637, |
|
"logps/rejected_prompt": -1.2299854755401611, |
|
"loss": 2.0244, |
|
"nll_loss": 1.8459827899932861, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7444502115249634, |
|
"rewards/margins": 2.9416251182556152, |
|
"rewards/rejected": -3.686075210571289, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 0.17769599500435684, |
|
"learning_rate": 4.808695702144206e-05, |
|
"log_odds_chosen": 5.727511882781982, |
|
"log_odds_ratio": -0.2772656977176666, |
|
"logits/chosen": -2.879725694656372, |
|
"logits/chosen_prompt": -2.642578125, |
|
"logits/rejected": -1.0399138927459717, |
|
"logits/rejected_prompt": -2.6099534034729004, |
|
"logps/chosen": -2.0047779083251953, |
|
"logps/chosen_both": -1.9910427331924438, |
|
"logps/chosen_prompt": -0.8587312698364258, |
|
"logps/rejected": -7.64484167098999, |
|
"logps/rejected_both": -7.5631890296936035, |
|
"logps/rejected_prompt": -1.0231356620788574, |
|
"loss": 2.0507, |
|
"nll_loss": 1.990276575088501, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8019111752510071, |
|
"rewards/margins": 2.2560253143310547, |
|
"rewards/rejected": -3.057936429977417, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.18584682979949957, |
|
"learning_rate": 4.800579618414676e-05, |
|
"log_odds_chosen": 4.071249961853027, |
|
"log_odds_ratio": -0.34571754932403564, |
|
"logits/chosen": -2.903729200363159, |
|
"logits/chosen_prompt": -2.7958900928497314, |
|
"logits/rejected": -3.239121198654175, |
|
"logits/rejected_prompt": -2.7663371562957764, |
|
"logps/chosen": -1.9373371601104736, |
|
"logps/chosen_both": -1.921233892440796, |
|
"logps/chosen_prompt": -0.9925417900085449, |
|
"logps/rejected": -5.936069488525391, |
|
"logps/rejected_both": -5.837677955627441, |
|
"logps/rejected_prompt": -1.1928670406341553, |
|
"loss": 2.4809, |
|
"nll_loss": 1.919942855834961, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7749348282814026, |
|
"rewards/margins": 1.5994927883148193, |
|
"rewards/rejected": -2.3744280338287354, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 0.1641543403493392, |
|
"learning_rate": 4.7923020720865414e-05, |
|
"log_odds_chosen": 3.001093626022339, |
|
"log_odds_ratio": -0.484192430973053, |
|
"logits/chosen": -2.983025312423706, |
|
"logits/chosen_prompt": -3.0399768352508545, |
|
"logits/rejected": -4.017498970031738, |
|
"logits/rejected_prompt": -3.0394999980926514, |
|
"logps/chosen": -2.209317922592163, |
|
"logps/chosen_both": -2.1894264221191406, |
|
"logps/chosen_prompt": -0.8747655153274536, |
|
"logps/rejected": -5.173645496368408, |
|
"logps/rejected_both": -5.1185221672058105, |
|
"logps/rejected_prompt": -1.2934271097183228, |
|
"loss": 2.1178, |
|
"nll_loss": 2.188310384750366, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8837271928787231, |
|
"rewards/margins": 1.185731053352356, |
|
"rewards/rejected": -2.069458484649658, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.569078846846734, |
|
"learning_rate": 4.783863644106502e-05, |
|
"log_odds_chosen": 6.397196292877197, |
|
"log_odds_ratio": -0.20790621638298035, |
|
"logits/chosen": -2.8709733486175537, |
|
"logits/chosen_prompt": -2.905733585357666, |
|
"logits/rejected": -4.449090480804443, |
|
"logits/rejected_prompt": -2.8762049674987793, |
|
"logps/chosen": -1.861519455909729, |
|
"logps/chosen_both": -1.8485714197158813, |
|
"logps/chosen_prompt": -0.7894952893257141, |
|
"logps/rejected": -8.093868255615234, |
|
"logps/rejected_both": -7.9878997802734375, |
|
"logps/rejected_prompt": -1.098191499710083, |
|
"loss": 2.2466, |
|
"nll_loss": 1.847815752029419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7446077466011047, |
|
"rewards/margins": 2.4929394721984863, |
|
"rewards/rejected": -3.2375473976135254, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 26.906300876077555, |
|
"learning_rate": 4.775264926712489e-05, |
|
"log_odds_chosen": 5.443802833557129, |
|
"log_odds_ratio": -0.13954684138298035, |
|
"logits/chosen": -2.9360134601593018, |
|
"logits/chosen_prompt": -2.6900744438171387, |
|
"logits/rejected": -3.0484580993652344, |
|
"logits/rejected_prompt": -2.612032890319824, |
|
"logps/chosen": -1.974119782447815, |
|
"logps/chosen_both": -1.958168625831604, |
|
"logps/chosen_prompt": -0.8577529788017273, |
|
"logps/rejected": -7.293883323669434, |
|
"logps/rejected_both": -7.204199314117432, |
|
"logps/rejected_prompt": -1.3446273803710938, |
|
"loss": 2.518, |
|
"nll_loss": 1.9573103189468384, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7896479368209839, |
|
"rewards/margins": 2.1279053688049316, |
|
"rewards/rejected": -2.917553424835205, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 1.0236738821403857, |
|
"learning_rate": 4.7665065233920945e-05, |
|
"log_odds_chosen": 4.726571559906006, |
|
"log_odds_ratio": -0.14057810604572296, |
|
"logits/chosen": -2.9554474353790283, |
|
"logits/chosen_prompt": -3.076146364212036, |
|
"logits/rejected": -3.131758689880371, |
|
"logits/rejected_prompt": -3.045212507247925, |
|
"logps/chosen": -1.9218995571136475, |
|
"logps/chosen_both": -1.910244345664978, |
|
"logps/chosen_prompt": -0.8790926933288574, |
|
"logps/rejected": -6.504288673400879, |
|
"logps/rejected_both": -6.445836544036865, |
|
"logps/rejected_prompt": -1.288549542427063, |
|
"loss": 2.0423, |
|
"nll_loss": 1.909478783607483, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7687598466873169, |
|
"rewards/margins": 1.8329557180404663, |
|
"rewards/rejected": -2.601715564727783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 0.6288533211783596, |
|
"learning_rate": 4.7575890488402185e-05, |
|
"log_odds_chosen": 4.645321846008301, |
|
"log_odds_ratio": -0.14102457463741302, |
|
"logits/chosen": -2.9634203910827637, |
|
"logits/chosen_prompt": -3.0218586921691895, |
|
"logits/rejected": -3.2898871898651123, |
|
"logits/rejected_prompt": -3.0139455795288086, |
|
"logps/chosen": -1.9550220966339111, |
|
"logps/chosen_both": -1.9388656616210938, |
|
"logps/chosen_prompt": -0.826554000377655, |
|
"logps/rejected": -6.471889495849609, |
|
"logps/rejected_both": -6.390293121337891, |
|
"logps/rejected_prompt": -1.0643904209136963, |
|
"loss": 2.2513, |
|
"nll_loss": 1.9378074407577515, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7820087671279907, |
|
"rewards/margins": 1.8067471981048584, |
|
"rewards/rejected": -2.5887560844421387, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 2.277583713971035, |
|
"learning_rate": 4.7485131289159276e-05, |
|
"log_odds_chosen": 4.095762252807617, |
|
"log_odds_ratio": -0.15678586065769196, |
|
"logits/chosen": -2.9781079292297363, |
|
"logits/chosen_prompt": -3.05256986618042, |
|
"logits/rejected": -2.9668664932250977, |
|
"logits/rejected_prompt": -3.041161060333252, |
|
"logps/chosen": -1.9822967052459717, |
|
"logps/chosen_both": -1.9686206579208374, |
|
"logps/chosen_prompt": -0.9377325177192688, |
|
"logps/rejected": -5.9602532386779785, |
|
"logps/rejected_both": -5.897341728210449, |
|
"logps/rejected_prompt": -1.051451563835144, |
|
"loss": 2.0657, |
|
"nll_loss": 1.9684457778930664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7929186820983887, |
|
"rewards/margins": 1.5911824703216553, |
|
"rewards/rejected": -2.384101390838623, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.17707808472563716, |
|
"learning_rate": 4.7392794005985326e-05, |
|
"log_odds_chosen": 4.996828556060791, |
|
"log_odds_ratio": -0.1402866542339325, |
|
"logits/chosen": -2.9852428436279297, |
|
"logits/chosen_prompt": -3.1000924110412598, |
|
"logits/rejected": -3.4309897422790527, |
|
"logits/rejected_prompt": -3.088724374771118, |
|
"logps/chosen": -1.9283807277679443, |
|
"logps/chosen_both": -1.913000464439392, |
|
"logps/chosen_prompt": -0.7973994612693787, |
|
"logps/rejected": -6.7942705154418945, |
|
"logps/rejected_both": -6.711949348449707, |
|
"logps/rejected_prompt": -1.098016619682312, |
|
"loss": 2.2189, |
|
"nll_loss": 1.9121148586273193, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7713521718978882, |
|
"rewards/margins": 1.946356177330017, |
|
"rewards/rejected": -2.7177083492279053, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 5.519018494250257, |
|
"learning_rate": 4.7298885119428773e-05, |
|
"log_odds_chosen": 5.843784332275391, |
|
"log_odds_ratio": -0.07069602608680725, |
|
"logits/chosen": -3.0550990104675293, |
|
"logits/chosen_prompt": -3.058029890060425, |
|
"logits/rejected": -3.9521071910858154, |
|
"logits/rejected_prompt": -3.025411367416382, |
|
"logps/chosen": -1.8835957050323486, |
|
"logps/chosen_both": -1.8681533336639404, |
|
"logps/chosen_prompt": -0.8553426861763, |
|
"logps/rejected": -7.572214603424072, |
|
"logps/rejected_both": -7.47025203704834, |
|
"logps/rejected_prompt": -1.0323774814605713, |
|
"loss": 2.077, |
|
"nll_loss": 1.8675563335418701, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7534382939338684, |
|
"rewards/margins": 2.275447368621826, |
|
"rewards/rejected": -3.02888560295105, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 0.6103366310438396, |
|
"learning_rate": 4.720341122033862e-05, |
|
"log_odds_chosen": 5.190781593322754, |
|
"log_odds_ratio": -0.4892934262752533, |
|
"logits/chosen": -2.9757232666015625, |
|
"logits/chosen_prompt": -3.0236659049987793, |
|
"logits/rejected": -3.8188633918762207, |
|
"logits/rejected_prompt": -3.0117480754852295, |
|
"logps/chosen": -2.410020351409912, |
|
"logps/chosen_both": -2.387420415878296, |
|
"logps/chosen_prompt": -0.8877968788146973, |
|
"logps/rejected": -7.459628105163574, |
|
"logps/rejected_both": -7.362242698669434, |
|
"logps/rejected_prompt": -1.1302134990692139, |
|
"loss": 2.4112, |
|
"nll_loss": 2.3871912956237793, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9640080332756042, |
|
"rewards/margins": 2.019843339920044, |
|
"rewards/rejected": -2.983851432800293, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 0.151774002914212, |
|
"learning_rate": 4.710637900940181e-05, |
|
"log_odds_chosen": 3.729964017868042, |
|
"log_odds_ratio": -0.2660212516784668, |
|
"logits/chosen": -2.9713380336761475, |
|
"logits/chosen_prompt": -2.968736410140991, |
|
"logits/rejected": -3.0788886547088623, |
|
"logits/rejected_prompt": -2.944664478302002, |
|
"logps/chosen": -1.842739462852478, |
|
"logps/chosen_both": -1.829923391342163, |
|
"logps/chosen_prompt": -0.7877852320671082, |
|
"logps/rejected": -5.440505504608154, |
|
"logps/rejected_both": -5.388018608093262, |
|
"logps/rejected_prompt": -1.0643196105957031, |
|
"loss": 2.2685, |
|
"nll_loss": 1.8282448053359985, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7370957732200623, |
|
"rewards/margins": 1.4391063451766968, |
|
"rewards/rejected": -2.1762022972106934, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 0.19312538023716122, |
|
"learning_rate": 4.7007795296673006e-05, |
|
"log_odds_chosen": 3.5488052368164062, |
|
"log_odds_ratio": -0.27949827909469604, |
|
"logits/chosen": -2.9776198863983154, |
|
"logits/chosen_prompt": -3.0068747997283936, |
|
"logits/rejected": -3.2581207752227783, |
|
"logits/rejected_prompt": -2.980543613433838, |
|
"logps/chosen": -1.9455007314682007, |
|
"logps/chosen_both": -1.929386854171753, |
|
"logps/chosen_prompt": -0.7683624625205994, |
|
"logps/rejected": -5.4047675132751465, |
|
"logps/rejected_both": -5.334201812744141, |
|
"logps/rejected_prompt": -1.0063989162445068, |
|
"loss": 2.0098, |
|
"nll_loss": 1.927821159362793, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7782004475593567, |
|
"rewards/margins": 1.3837066888809204, |
|
"rewards/rejected": -2.161907196044922, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.6876461909667617, |
|
"learning_rate": 4.690766700109659e-05, |
|
"log_odds_chosen": 3.753337860107422, |
|
"log_odds_ratio": -0.21310639381408691, |
|
"logits/chosen": -2.983619213104248, |
|
"logits/chosen_prompt": -3.056485652923584, |
|
"logits/rejected": -3.4968714714050293, |
|
"logits/rejected_prompt": -3.052788496017456, |
|
"logps/chosen": -2.029822587966919, |
|
"logps/chosen_both": -2.0120925903320312, |
|
"logps/chosen_prompt": -0.8819751739501953, |
|
"logps/rejected": -5.680521488189697, |
|
"logps/rejected_both": -5.621560096740723, |
|
"logps/rejected_prompt": -1.1547878980636597, |
|
"loss": 2.2063, |
|
"nll_loss": 2.011672258377075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8119290471076965, |
|
"rewards/margins": 1.4602794647216797, |
|
"rewards/rejected": -2.2722086906433105, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 0.19286504559147355, |
|
"learning_rate": 4.68060011500211e-05, |
|
"log_odds_chosen": 4.486660957336426, |
|
"log_odds_ratio": -0.16551145911216736, |
|
"logits/chosen": -2.9143826961517334, |
|
"logits/chosen_prompt": -3.077587366104126, |
|
"logits/rejected": -3.641350507736206, |
|
"logits/rejected_prompt": -3.062753677368164, |
|
"logps/chosen": -1.9688940048217773, |
|
"logps/chosen_both": -1.954045295715332, |
|
"logps/chosen_prompt": -0.6965051293373108, |
|
"logps/rejected": -6.356810569763184, |
|
"logps/rejected_both": -6.294190406799316, |
|
"logps/rejected_prompt": -0.9163694381713867, |
|
"loss": 2.0169, |
|
"nll_loss": 1.953741431236267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7875575423240662, |
|
"rewards/margins": 1.7551662921905518, |
|
"rewards/rejected": -2.542724132537842, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 0.19049752930142771, |
|
"learning_rate": 4.670280487870598e-05, |
|
"log_odds_chosen": 4.947572708129883, |
|
"log_odds_ratio": -0.14103658497333527, |
|
"logits/chosen": -2.8884735107421875, |
|
"logits/chosen_prompt": -3.0340023040771484, |
|
"logits/rejected": -3.598095655441284, |
|
"logits/rejected_prompt": -3.0135154724121094, |
|
"logps/chosen": -2.0803651809692383, |
|
"logps/chosen_both": -2.065659284591675, |
|
"logps/chosen_prompt": -0.7768818140029907, |
|
"logps/rejected": -6.917575836181641, |
|
"logps/rejected_both": -6.847512245178223, |
|
"logps/rejected_prompt": -1.0173327922821045, |
|
"loss": 2.4222, |
|
"nll_loss": 2.0645294189453125, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.832146167755127, |
|
"rewards/margins": 1.9348840713500977, |
|
"rewards/rejected": -2.7670302391052246, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 0.17010508801078386, |
|
"learning_rate": 4.659808542982088e-05, |
|
"log_odds_chosen": 4.44757604598999, |
|
"log_odds_ratio": -0.07313639670610428, |
|
"logits/chosen": -2.8788280487060547, |
|
"logits/chosen_prompt": -2.848573923110962, |
|
"logits/rejected": -2.6464812755584717, |
|
"logits/rejected_prompt": -2.814408540725708, |
|
"logps/chosen": -2.0289366245269775, |
|
"logps/chosen_both": -2.014009952545166, |
|
"logps/chosen_prompt": -0.7678987979888916, |
|
"logps/rejected": -6.3392744064331055, |
|
"logps/rejected_both": -6.2660441398620605, |
|
"logps/rejected_prompt": -1.0411919355392456, |
|
"loss": 2.0605, |
|
"nll_loss": 2.0118680000305176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8115746378898621, |
|
"rewards/margins": 1.724135160446167, |
|
"rewards/rejected": -2.535709857940674, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 102.20159023426744, |
|
"learning_rate": 4.649185015293728e-05, |
|
"log_odds_chosen": 5.305100440979004, |
|
"log_odds_ratio": -0.02886788547039032, |
|
"logits/chosen": -2.934922456741333, |
|
"logits/chosen_prompt": -2.8038196563720703, |
|
"logits/rejected": -2.483616828918457, |
|
"logits/rejected_prompt": -2.801661491394043, |
|
"logps/chosen": -1.7393245697021484, |
|
"logps/chosen_both": -1.728514313697815, |
|
"logps/chosen_prompt": -0.882293701171875, |
|
"logps/rejected": -6.811369895935059, |
|
"logps/rejected_both": -6.727609157562256, |
|
"logps/rejected_prompt": -1.0623975992202759, |
|
"loss": 2.1612, |
|
"nll_loss": 1.7267690896987915, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6957297921180725, |
|
"rewards/margins": 2.028818130493164, |
|
"rewards/rejected": -2.724547863006592, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.577120716963003, |
|
"learning_rate": 4.638410650401267e-05, |
|
"log_odds_chosen": 5.029098033905029, |
|
"log_odds_ratio": -0.0729464739561081, |
|
"logits/chosen": -2.946472644805908, |
|
"logits/chosen_prompt": -2.7987747192382812, |
|
"logits/rejected": -2.31748628616333, |
|
"logits/rejected_prompt": -2.7790069580078125, |
|
"logps/chosen": -1.9928621053695679, |
|
"logps/chosen_both": -1.97336745262146, |
|
"logps/chosen_prompt": -0.8152757883071899, |
|
"logps/rejected": -6.8893632888793945, |
|
"logps/rejected_both": -6.791792392730713, |
|
"logps/rejected_prompt": -1.0174424648284912, |
|
"loss": 2.0913, |
|
"nll_loss": 1.9712880849838257, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7971449494361877, |
|
"rewards/margins": 1.9586002826690674, |
|
"rewards/rejected": -2.7557451725006104, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 0.15356571620190568, |
|
"learning_rate": 4.6274862044867304e-05, |
|
"log_odds_chosen": 4.515711307525635, |
|
"log_odds_ratio": -0.14140725135803223, |
|
"logits/chosen": -2.93347430229187, |
|
"logits/chosen_prompt": -2.790188789367676, |
|
"logits/rejected": -2.197619915008545, |
|
"logits/rejected_prompt": -2.7709336280822754, |
|
"logps/chosen": -1.9486901760101318, |
|
"logps/chosen_both": -1.936274766921997, |
|
"logps/chosen_prompt": -0.9808751940727234, |
|
"logps/rejected": -6.346037864685059, |
|
"logps/rejected_both": -6.276151180267334, |
|
"logps/rejected_prompt": -1.2042269706726074, |
|
"loss": 2.0583, |
|
"nll_loss": 1.9354143142700195, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7794761657714844, |
|
"rewards/margins": 1.7589390277862549, |
|
"rewards/rejected": -2.5384154319763184, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 0.18548636024672094, |
|
"learning_rate": 4.616412444265345e-05, |
|
"log_odds_chosen": 5.104066371917725, |
|
"log_odds_ratio": -0.0724453255534172, |
|
"logits/chosen": -2.9771525859832764, |
|
"logits/chosen_prompt": -2.8243517875671387, |
|
"logits/rejected": -2.083482265472412, |
|
"logits/rejected_prompt": -2.8059630393981934, |
|
"logps/chosen": -2.0861048698425293, |
|
"logps/chosen_both": -2.068869113922119, |
|
"logps/chosen_prompt": -0.8699228167533875, |
|
"logps/rejected": -7.067320823669434, |
|
"logps/rejected_both": -6.978930473327637, |
|
"logps/rejected_prompt": -1.0220625400543213, |
|
"loss": 2.1363, |
|
"nll_loss": 2.0682852268218994, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8344419598579407, |
|
"rewards/margins": 1.9924862384796143, |
|
"rewards/rejected": -2.8269283771514893, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 59.01092604551995, |
|
"learning_rate": 4.605190146931731e-05, |
|
"log_odds_chosen": 4.40061092376709, |
|
"log_odds_ratio": -0.1419232189655304, |
|
"logits/chosen": -2.9263124465942383, |
|
"logits/chosen_prompt": -2.8417701721191406, |
|
"logits/rejected": -2.351675510406494, |
|
"logits/rejected_prompt": -2.8414313793182373, |
|
"logps/chosen": -2.124084711074829, |
|
"logps/chosen_both": -2.102914571762085, |
|
"logps/chosen_prompt": -0.8957809209823608, |
|
"logps/rejected": -6.422041893005371, |
|
"logps/rejected_both": -6.32672643661499, |
|
"logps/rejected_prompt": -1.0718226432800293, |
|
"loss": 2.1268, |
|
"nll_loss": 2.1024184226989746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8496338725090027, |
|
"rewards/margins": 1.7191829681396484, |
|
"rewards/rejected": -2.568816661834717, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 0.1936209207703668, |
|
"learning_rate": 4.593820100105355e-05, |
|
"log_odds_chosen": 4.4033403396606445, |
|
"log_odds_ratio": -0.1418362557888031, |
|
"logits/chosen": -2.947152614593506, |
|
"logits/chosen_prompt": -2.8191583156585693, |
|
"logits/rejected": -2.3703582286834717, |
|
"logits/rejected_prompt": -2.8038182258605957, |
|
"logps/chosen": -1.993703842163086, |
|
"logps/chosen_both": -1.9738051891326904, |
|
"logps/chosen_prompt": -0.8131387829780579, |
|
"logps/rejected": -6.278976917266846, |
|
"logps/rejected_both": -6.194762229919434, |
|
"logps/rejected_prompt": -0.9806526303291321, |
|
"loss": 2.0429, |
|
"nll_loss": 1.9733550548553467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7974814772605896, |
|
"rewards/margins": 1.7141094207763672, |
|
"rewards/rejected": -2.5115909576416016, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.21779512193360956, |
|
"learning_rate": 4.5823031017752485e-05, |
|
"log_odds_chosen": 4.373869895935059, |
|
"log_odds_ratio": -0.1618097722530365, |
|
"logits/chosen": -2.9762911796569824, |
|
"logits/chosen_prompt": -2.787757396697998, |
|
"logits/rejected": -2.3213400840759277, |
|
"logits/rejected_prompt": -2.7804551124572754, |
|
"logps/chosen": -1.8093370199203491, |
|
"logps/chosen_both": -1.7962630987167358, |
|
"logps/chosen_prompt": -0.7294620871543884, |
|
"logps/rejected": -6.035723686218262, |
|
"logps/rejected_both": -5.961843490600586, |
|
"logps/rejected_prompt": -0.9543176889419556, |
|
"loss": 2.0382, |
|
"nll_loss": 1.7948728799819946, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7237349152565002, |
|
"rewards/margins": 1.6905548572540283, |
|
"rewards/rejected": -2.414289712905884, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 1.9769361000953782, |
|
"learning_rate": 4.5706399602440106e-05, |
|
"log_odds_chosen": 4.656636714935303, |
|
"log_odds_ratio": -0.1408310979604721, |
|
"logits/chosen": -2.916656255722046, |
|
"logits/chosen_prompt": -2.787416458129883, |
|
"logits/rejected": -2.190491199493408, |
|
"logits/rejected_prompt": -2.754542589187622, |
|
"logps/chosen": -2.000397205352783, |
|
"logps/chosen_both": -1.983769416809082, |
|
"logps/chosen_prompt": -0.7894454002380371, |
|
"logps/rejected": -6.537571907043457, |
|
"logps/rejected_both": -6.459201812744141, |
|
"logps/rejected_prompt": -1.0599520206451416, |
|
"loss": 2.098, |
|
"nll_loss": 1.9831438064575195, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8001587986946106, |
|
"rewards/margins": 1.8148695230484009, |
|
"rewards/rejected": -2.6150283813476562, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.18745727904701265, |
|
"learning_rate": 4.558831494071069e-05, |
|
"log_odds_chosen": 4.969104290008545, |
|
"log_odds_ratio": -0.14006975293159485, |
|
"logits/chosen": -2.9004273414611816, |
|
"logits/chosen_prompt": -2.7481789588928223, |
|
"logits/rejected": -1.9203866720199585, |
|
"logits/rejected_prompt": -2.7317967414855957, |
|
"logps/chosen": -2.000072479248047, |
|
"logps/chosen_both": -1.9829126596450806, |
|
"logps/chosen_prompt": -0.9659306406974792, |
|
"logps/rejected": -6.8479132652282715, |
|
"logps/rejected_both": -6.743927955627441, |
|
"logps/rejected_prompt": -1.1112347841262817, |
|
"loss": 2.0041, |
|
"nll_loss": 1.982696533203125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.800028920173645, |
|
"rewards/margins": 1.9391365051269531, |
|
"rewards/rejected": -2.7391655445098877, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 44.61607145258881, |
|
"learning_rate": 4.5468785320152365e-05, |
|
"log_odds_chosen": 4.449766635894775, |
|
"log_odds_ratio": -0.20899026095867157, |
|
"logits/chosen": -3.0241429805755615, |
|
"logits/chosen_prompt": -2.746372699737549, |
|
"logits/rejected": -2.07698917388916, |
|
"logits/rejected_prompt": -2.746025562286377, |
|
"logps/chosen": -1.9495675563812256, |
|
"logps/chosen_both": -1.9276573657989502, |
|
"logps/chosen_prompt": -0.8301995992660522, |
|
"logps/rejected": -6.287846565246582, |
|
"logps/rejected_both": -6.172031402587891, |
|
"logps/rejected_prompt": -0.9652963876724243, |
|
"loss": 2.1169, |
|
"nll_loss": 1.9262176752090454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7798271179199219, |
|
"rewards/margins": 1.735311508178711, |
|
"rewards/rejected": -2.515138626098633, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 0.39108071766635244, |
|
"learning_rate": 4.534781912976546e-05, |
|
"log_odds_chosen": 3.2947051525115967, |
|
"log_odds_ratio": -0.2812163829803467, |
|
"logits/chosen": -2.989047050476074, |
|
"logits/chosen_prompt": -2.7699084281921387, |
|
"logits/rejected": -2.4307093620300293, |
|
"logits/rejected_prompt": -2.756155014038086, |
|
"logps/chosen": -1.9651190042495728, |
|
"logps/chosen_both": -1.9502513408660889, |
|
"logps/chosen_prompt": -0.7651479840278625, |
|
"logps/rejected": -5.176846981048584, |
|
"logps/rejected_both": -5.1231608390808105, |
|
"logps/rejected_prompt": -0.8976105451583862, |
|
"loss": 2.0946, |
|
"nll_loss": 1.949180245399475, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7860475778579712, |
|
"rewards/margins": 1.2846912145614624, |
|
"rewards/rejected": -2.070739269256592, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.489644128912903, |
|
"learning_rate": 4.522542485937369e-05, |
|
"log_odds_chosen": 4.886274337768555, |
|
"log_odds_ratio": -0.14077258110046387, |
|
"logits/chosen": -2.948451519012451, |
|
"logits/chosen_prompt": -2.7478134632110596, |
|
"logits/rejected": -2.1101903915405273, |
|
"logits/rejected_prompt": -2.7366366386413574, |
|
"logps/chosen": -1.992583990097046, |
|
"logps/chosen_both": -1.9766371250152588, |
|
"logps/chosen_prompt": -0.8634021878242493, |
|
"logps/rejected": -6.756987571716309, |
|
"logps/rejected_both": -6.673755645751953, |
|
"logps/rejected_prompt": -1.0165636539459229, |
|
"loss": 2.1241, |
|
"nll_loss": 1.9759677648544312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7970336675643921, |
|
"rewards/margins": 1.90576171875, |
|
"rewards/rejected": -2.7027952671051025, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 0.4533909423122121, |
|
"learning_rate": 4.510161109902837e-05, |
|
"log_odds_chosen": 3.120637893676758, |
|
"log_odds_ratio": -0.6285208463668823, |
|
"logits/chosen": -2.909808397293091, |
|
"logits/chosen_prompt": -2.8316149711608887, |
|
"logits/rejected": -2.377187490463257, |
|
"logits/rejected_prompt": -2.823117971420288, |
|
"logps/chosen": -2.327125072479248, |
|
"logps/chosen_both": -2.3096871376037598, |
|
"logps/chosen_prompt": -0.868097186088562, |
|
"logps/rejected": -5.366008281707764, |
|
"logps/rejected_both": -5.30277681350708, |
|
"logps/rejected_prompt": -1.0501350164413452, |
|
"loss": 2.1836, |
|
"nll_loss": 2.3085296154022217, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9308500289916992, |
|
"rewards/margins": 1.2155535221099854, |
|
"rewards/rejected": -2.1464035511016846, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 0.19141971158001736, |
|
"learning_rate": 4.4976386538405495e-05, |
|
"log_odds_chosen": 2.943345546722412, |
|
"log_odds_ratio": -0.2832263708114624, |
|
"logits/chosen": -2.926583766937256, |
|
"logits/chosen_prompt": -2.8340327739715576, |
|
"logits/rejected": -2.5858168601989746, |
|
"logits/rejected_prompt": -2.8149476051330566, |
|
"logps/chosen": -2.0653610229492188, |
|
"logps/chosen_both": -2.0445759296417236, |
|
"logps/chosen_prompt": -0.8157526254653931, |
|
"logps/rejected": -4.919131278991699, |
|
"logps/rejected_both": -4.849064350128174, |
|
"logps/rejected_prompt": -1.005324125289917, |
|
"loss": 2.0024, |
|
"nll_loss": 2.0445759296417236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8261443972587585, |
|
"rewards/margins": 1.1415081024169922, |
|
"rewards/rejected": -1.9676525592803955, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 0.299820230370255, |
|
"learning_rate": 4.484975996619589e-05, |
|
"log_odds_chosen": 4.539975166320801, |
|
"log_odds_ratio": -0.11812126636505127, |
|
"logits/chosen": -2.87815523147583, |
|
"logits/chosen_prompt": -2.8412280082702637, |
|
"logits/rejected": -2.3637688159942627, |
|
"logits/rejected_prompt": -2.8588156700134277, |
|
"logps/chosen": -2.4759485721588135, |
|
"logps/chosen_both": -2.454190731048584, |
|
"logps/chosen_prompt": -0.7899399995803833, |
|
"logps/rejected": -6.8973388671875, |
|
"logps/rejected_both": -6.819916725158691, |
|
"logps/rejected_prompt": -1.066646695137024, |
|
"loss": 2.3702, |
|
"nll_loss": 2.454133987426758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9903793334960938, |
|
"rewards/margins": 1.7685562372207642, |
|
"rewards/rejected": -2.7589354515075684, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 5.192675922080671, |
|
"learning_rate": 4.4721740269488355e-05, |
|
"log_odds_chosen": 2.496995210647583, |
|
"log_odds_ratio": -0.32391008734703064, |
|
"logits/chosen": -2.966625213623047, |
|
"logits/chosen_prompt": -2.795879602432251, |
|
"logits/rejected": -2.514392137527466, |
|
"logits/rejected_prompt": -2.783583164215088, |
|
"logps/chosen": -2.563605546951294, |
|
"logps/chosen_both": -2.541128635406494, |
|
"logps/chosen_prompt": -0.9771214723587036, |
|
"logps/rejected": -4.989082336425781, |
|
"logps/rejected_both": -4.936980724334717, |
|
"logps/rejected_prompt": -1.0889393091201782, |
|
"loss": 2.1847, |
|
"nll_loss": 2.5405211448669434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.025442123413086, |
|
"rewards/margins": 0.9701908230781555, |
|
"rewards/rejected": -1.9956328868865967, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.4695325524437554, |
|
"learning_rate": 4.4592336433146e-05, |
|
"log_odds_chosen": 5.124607563018799, |
|
"log_odds_ratio": -0.018428776413202286, |
|
"logits/chosen": -3.051105260848999, |
|
"logits/chosen_prompt": -2.8179726600646973, |
|
"logits/rejected": -1.909102201461792, |
|
"logits/rejected_prompt": -2.7916340827941895, |
|
"logps/chosen": -1.8969109058380127, |
|
"logps/chosen_both": -1.8779878616333008, |
|
"logps/chosen_prompt": -0.8452935218811035, |
|
"logps/rejected": -6.845399379730225, |
|
"logps/rejected_both": -6.747313022613525, |
|
"logps/rejected_prompt": -0.9934666752815247, |
|
"loss": 2.0368, |
|
"nll_loss": 1.8772528171539307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.758764386177063, |
|
"rewards/margins": 1.9793955087661743, |
|
"rewards/rejected": -2.7381598949432373, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 0.21280813340257887, |
|
"learning_rate": 4.4461557539175594e-05, |
|
"log_odds_chosen": 5.451117515563965, |
|
"log_odds_ratio": -0.07145892083644867, |
|
"logits/chosen": -2.9378345012664795, |
|
"logits/chosen_prompt": -2.762908458709717, |
|
"logits/rejected": -1.6283600330352783, |
|
"logits/rejected_prompt": -2.7498764991760254, |
|
"logps/chosen": -2.0257043838500977, |
|
"logps/chosen_both": -2.008737087249756, |
|
"logps/chosen_prompt": -0.8673852682113647, |
|
"logps/rejected": -7.346819877624512, |
|
"logps/rejected_both": -7.247427940368652, |
|
"logps/rejected_prompt": -1.0632621049880981, |
|
"loss": 2.0447, |
|
"nll_loss": 2.0078537464141846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8102817535400391, |
|
"rewards/margins": 2.128446340560913, |
|
"rewards/rejected": -2.938728094100952, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 0.209653515397789, |
|
"learning_rate": 4.432941276609018e-05, |
|
"log_odds_chosen": 5.421745777130127, |
|
"log_odds_ratio": -0.07243818789720535, |
|
"logits/chosen": -2.9660727977752686, |
|
"logits/chosen_prompt": -2.805607318878174, |
|
"logits/rejected": -1.6398050785064697, |
|
"logits/rejected_prompt": -2.7811026573181152, |
|
"logps/chosen": -2.0751829147338867, |
|
"logps/chosen_both": -2.0558664798736572, |
|
"logps/chosen_prompt": -0.7402461767196655, |
|
"logps/rejected": -7.376537322998047, |
|
"logps/rejected_both": -7.285178184509277, |
|
"logps/rejected_prompt": -0.9955169558525085, |
|
"loss": 2.1673, |
|
"nll_loss": 2.05536150932312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8300731778144836, |
|
"rewards/margins": 2.1205410957336426, |
|
"rewards/rejected": -2.9506144523620605, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 0.2932004663372407, |
|
"learning_rate": 4.4195911388264946e-05, |
|
"log_odds_chosen": 3.337216854095459, |
|
"log_odds_ratio": -0.28040507435798645, |
|
"logits/chosen": -3.0083236694335938, |
|
"logits/chosen_prompt": -2.7438673973083496, |
|
"logits/rejected": -2.2188708782196045, |
|
"logits/rejected_prompt": -2.710932970046997, |
|
"logps/chosen": -1.7532163858413696, |
|
"logps/chosen_both": -1.7392990589141846, |
|
"logps/chosen_prompt": -0.881622314453125, |
|
"logps/rejected": -4.988051891326904, |
|
"logps/rejected_both": -4.921896934509277, |
|
"logps/rejected_prompt": -0.8814730644226074, |
|
"loss": 2.0387, |
|
"nll_loss": 1.7385940551757812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7012865543365479, |
|
"rewards/margins": 1.2939343452453613, |
|
"rewards/rejected": -1.9952208995819092, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 2.9403489436512475, |
|
"learning_rate": 4.40610627752862e-05, |
|
"log_odds_chosen": 5.995909690856934, |
|
"log_odds_ratio": -0.07048363983631134, |
|
"logits/chosen": -2.951843738555908, |
|
"logits/chosen_prompt": -2.657824993133545, |
|
"logits/rejected": -1.3483891487121582, |
|
"logits/rejected_prompt": -2.6459240913391113, |
|
"logps/chosen": -2.0297625064849854, |
|
"logps/chosen_both": -2.011107921600342, |
|
"logps/chosen_prompt": -0.8041833639144897, |
|
"logps/rejected": -7.886776924133301, |
|
"logps/rejected_both": -7.784094333648682, |
|
"logps/rejected_prompt": -0.9874393343925476, |
|
"loss": 2.0868, |
|
"nll_loss": 2.0107545852661133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8119049072265625, |
|
"rewards/margins": 2.342806100845337, |
|
"rewards/rejected": -3.1547107696533203, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.1951986041864062, |
|
"learning_rate": 4.3924876391293915e-05, |
|
"log_odds_chosen": 5.405202865600586, |
|
"log_odds_ratio": -0.4933692514896393, |
|
"logits/chosen": -2.8229470252990723, |
|
"logits/chosen_prompt": -2.70353102684021, |
|
"logits/rejected": -1.516230821609497, |
|
"logits/rejected_prompt": -2.682372570037842, |
|
"logps/chosen": -2.4473724365234375, |
|
"logps/chosen_both": -2.4278030395507812, |
|
"logps/chosen_prompt": -0.8016360402107239, |
|
"logps/rejected": -7.731281280517578, |
|
"logps/rejected_both": -7.645183563232422, |
|
"logps/rejected_prompt": -0.9825652241706848, |
|
"loss": 2.2426, |
|
"nll_loss": 2.427164316177368, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9789490699768066, |
|
"rewards/margins": 2.113563299179077, |
|
"rewards/rejected": -3.092512369155884, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 0.19812900890844543, |
|
"learning_rate": 4.3787361794317405e-05, |
|
"log_odds_chosen": 3.4184670448303223, |
|
"log_odds_ratio": -0.22132563591003418, |
|
"logits/chosen": -2.9762589931488037, |
|
"logits/chosen_prompt": -2.764681816101074, |
|
"logits/rejected": -2.4695773124694824, |
|
"logits/rejected_prompt": -2.739607095718384, |
|
"logps/chosen": -1.889784812927246, |
|
"logps/chosen_both": -1.8726049661636353, |
|
"logps/chosen_prompt": -0.8000418543815613, |
|
"logps/rejected": -5.191944122314453, |
|
"logps/rejected_both": -5.127084732055664, |
|
"logps/rejected_prompt": -0.973870575428009, |
|
"loss": 2.0017, |
|
"nll_loss": 1.8721071481704712, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7559138536453247, |
|
"rewards/margins": 1.3208638429641724, |
|
"rewards/rejected": -2.076777935028076, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 1.1208289382374679, |
|
"learning_rate": 4.3648528635604556e-05, |
|
"log_odds_chosen": 4.736769199371338, |
|
"log_odds_ratio": -0.07410699129104614, |
|
"logits/chosen": -2.9047577381134033, |
|
"logits/chosen_prompt": -2.7688372135162354, |
|
"logits/rejected": -2.297377824783325, |
|
"logits/rejected_prompt": -2.7379658222198486, |
|
"logps/chosen": -2.166656017303467, |
|
"logps/chosen_both": -2.149369955062866, |
|
"logps/chosen_prompt": -0.7613478899002075, |
|
"logps/rejected": -6.790528774261475, |
|
"logps/rejected_both": -6.711920738220215, |
|
"logps/rejected_prompt": -0.9217512011528015, |
|
"loss": 2.19, |
|
"nll_loss": 2.1481828689575195, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8666625022888184, |
|
"rewards/margins": 1.8495492935180664, |
|
"rewards/rejected": -2.7162115573883057, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.18802597714184358, |
|
"learning_rate": 4.350838665894446e-05, |
|
"log_odds_chosen": 3.573579788208008, |
|
"log_odds_ratio": -0.2119835913181305, |
|
"logits/chosen": -2.9564337730407715, |
|
"logits/chosen_prompt": -2.8878400325775146, |
|
"logits/rejected": -2.7999844551086426, |
|
"logits/rejected_prompt": -2.8850619792938232, |
|
"logps/chosen": -2.041067361831665, |
|
"logps/chosen_both": -2.0219027996063232, |
|
"logps/chosen_prompt": -0.7945634126663208, |
|
"logps/rejected": -5.52020788192749, |
|
"logps/rejected_both": -5.447958946228027, |
|
"logps/rejected_prompt": -0.9404302835464478, |
|
"loss": 2.1522, |
|
"nll_loss": 2.0212433338165283, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8164268732070923, |
|
"rewards/margins": 1.3916563987731934, |
|
"rewards/rejected": -2.208083152770996, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 1.3417035590493764, |
|
"learning_rate": 4.336694569998354e-05, |
|
"log_odds_chosen": 4.419407367706299, |
|
"log_odds_ratio": -0.07842884957790375, |
|
"logits/chosen": -2.980591297149658, |
|
"logits/chosen_prompt": -2.9254255294799805, |
|
"logits/rejected": -2.7680697441101074, |
|
"logits/rejected_prompt": -2.905561923980713, |
|
"logps/chosen": -2.0169148445129395, |
|
"logps/chosen_both": -2.0003621578216553, |
|
"logps/chosen_prompt": -0.8039913177490234, |
|
"logps/rejected": -6.302676200866699, |
|
"logps/rejected_both": -6.233563423156738, |
|
"logps/rejected_prompt": -0.9547332525253296, |
|
"loss": 2.0996, |
|
"nll_loss": 2.000209331512451, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8067659139633179, |
|
"rewards/margins": 1.7143046855926514, |
|
"rewards/rejected": -2.5210704803466797, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.17015695407576262, |
|
"learning_rate": 4.3224215685535294e-05, |
|
"log_odds_chosen": 3.736863613128662, |
|
"log_odds_ratio": -0.21099340915679932, |
|
"logits/chosen": -2.9480998516082764, |
|
"logits/chosen_prompt": -2.909301519393921, |
|
"logits/rejected": -2.5860133171081543, |
|
"logits/rejected_prompt": -2.8961730003356934, |
|
"logps/chosen": -1.99604070186615, |
|
"logps/chosen_both": -1.9824683666229248, |
|
"logps/chosen_prompt": -0.8537474870681763, |
|
"logps/rejected": -5.6191020011901855, |
|
"logps/rejected_both": -5.559712886810303, |
|
"logps/rejected_prompt": -1.0109044313430786, |
|
"loss": 2.0333, |
|
"nll_loss": 1.9815161228179932, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.798416256904602, |
|
"rewards/margins": 1.449224591255188, |
|
"rewards/rejected": -2.247641086578369, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 0.1938256131016386, |
|
"learning_rate": 4.3080206632883554e-05, |
|
"log_odds_chosen": 4.993983745574951, |
|
"log_odds_ratio": -0.07278299331665039, |
|
"logits/chosen": -2.9305057525634766, |
|
"logits/chosen_prompt": -2.8883767127990723, |
|
"logits/rejected": -2.744293212890625, |
|
"logits/rejected_prompt": -2.865830183029175, |
|
"logps/chosen": -1.9137989282608032, |
|
"logps/chosen_both": -1.897878646850586, |
|
"logps/chosen_prompt": -0.8952886462211609, |
|
"logps/rejected": -6.773948669433594, |
|
"logps/rejected_both": -6.680284023284912, |
|
"logps/rejected_prompt": -1.1111478805541992, |
|
"loss": 2.072, |
|
"nll_loss": 1.896592378616333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7655196189880371, |
|
"rewards/margins": 1.9440600872039795, |
|
"rewards/rejected": -2.7095799446105957, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 0.19422924079693882, |
|
"learning_rate": 4.293492864907947e-05, |
|
"log_odds_chosen": 4.982480049133301, |
|
"log_odds_ratio": -0.07303477078676224, |
|
"logits/chosen": -2.897078275680542, |
|
"logits/chosen_prompt": -2.8844199180603027, |
|
"logits/rejected": -2.5853612422943115, |
|
"logits/rejected_prompt": -2.896810531616211, |
|
"logps/chosen": -2.046506404876709, |
|
"logps/chosen_both": -2.027215003967285, |
|
"logps/chosen_prompt": -0.8521916270256042, |
|
"logps/rejected": -6.898811340332031, |
|
"logps/rejected_both": -6.797191619873047, |
|
"logps/rejected_prompt": -1.0783166885375977, |
|
"loss": 2.0343, |
|
"nll_loss": 2.025817394256592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8186025619506836, |
|
"rewards/margins": 1.9409217834472656, |
|
"rewards/rejected": -2.7595245838165283, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 0.17503233577716112, |
|
"learning_rate": 4.278839193023214e-05, |
|
"log_odds_chosen": 5.051764011383057, |
|
"log_odds_ratio": -0.07269078493118286, |
|
"logits/chosen": -2.968621015548706, |
|
"logits/chosen_prompt": -2.8850250244140625, |
|
"logits/rejected": -2.575244426727295, |
|
"logits/rejected_prompt": -2.879965305328369, |
|
"logps/chosen": -2.0476856231689453, |
|
"logps/chosen_both": -2.0287888050079346, |
|
"logps/chosen_prompt": -0.8320780992507935, |
|
"logps/rejected": -6.972892761230469, |
|
"logps/rejected_both": -6.875253200531006, |
|
"logps/rejected_prompt": -0.9857944250106812, |
|
"loss": 2.4164, |
|
"nll_loss": 2.027635335922241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8190741539001465, |
|
"rewards/margins": 1.9700825214385986, |
|
"rewards/rejected": -2.7891571521759033, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 47.44652080135077, |
|
"learning_rate": 4.264060676079302e-05, |
|
"log_odds_chosen": 3.4615960121154785, |
|
"log_odds_ratio": -0.25266528129577637, |
|
"logits/chosen": -2.9501328468322754, |
|
"logits/chosen_prompt": -2.8721659183502197, |
|
"logits/rejected": -3.1557369232177734, |
|
"logits/rejected_prompt": -2.854639768600464, |
|
"logps/chosen": -2.153719425201416, |
|
"logps/chosen_both": -2.135387897491455, |
|
"logps/chosen_prompt": -0.9698511958122253, |
|
"logps/rejected": -5.52289342880249, |
|
"logps/rejected_both": -5.454329490661621, |
|
"logps/rejected_prompt": -1.0520834922790527, |
|
"loss": 2.1268, |
|
"nll_loss": 2.1349105834960938, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8614877462387085, |
|
"rewards/margins": 1.3476698398590088, |
|
"rewards/rejected": -2.2091574668884277, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.19086614631182744, |
|
"learning_rate": 4.249158351283414e-05, |
|
"log_odds_chosen": 4.672451496124268, |
|
"log_odds_ratio": -0.14073383808135986, |
|
"logits/chosen": -3.003997325897217, |
|
"logits/chosen_prompt": -2.9195713996887207, |
|
"logits/rejected": -3.2987685203552246, |
|
"logits/rejected_prompt": -2.9031708240509033, |
|
"logps/chosen": -2.006805896759033, |
|
"logps/chosen_both": -1.9856882095336914, |
|
"logps/chosen_prompt": -0.8608209490776062, |
|
"logps/rejected": -6.554454803466797, |
|
"logps/rejected_both": -6.446510314941406, |
|
"logps/rejected_prompt": -1.0303418636322021, |
|
"loss": 2.0534, |
|
"nll_loss": 1.9856884479522705, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8027224540710449, |
|
"rewards/margins": 1.8190593719482422, |
|
"rewards/rejected": -2.621781826019287, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.16090899053372315, |
|
"learning_rate": 4.234133264532012e-05, |
|
"log_odds_chosen": 6.077364444732666, |
|
"log_odds_ratio": -0.004217286594212055, |
|
"logits/chosen": -2.842454433441162, |
|
"logits/chosen_prompt": -2.8957276344299316, |
|
"logits/rejected": -3.5180137157440186, |
|
"logits/rejected_prompt": -2.9135992527008057, |
|
"logps/chosen": -1.9932161569595337, |
|
"logps/chosen_both": -1.9756605625152588, |
|
"logps/chosen_prompt": -0.8626230359077454, |
|
"logps/rejected": -7.9156999588012695, |
|
"logps/rejected_both": -7.813823699951172, |
|
"logps/rejected_prompt": -1.0395594835281372, |
|
"loss": 2.0091, |
|
"nll_loss": 1.975542664527893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7972863912582397, |
|
"rewards/margins": 2.3689935207366943, |
|
"rewards/rejected": -3.1662800312042236, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 0.4432866833057449, |
|
"learning_rate": 4.218986470337419e-05, |
|
"log_odds_chosen": 5.5125412940979, |
|
"log_odds_ratio": -0.07154224812984467, |
|
"logits/chosen": -2.9377503395080566, |
|
"logits/chosen_prompt": -2.926082134246826, |
|
"logits/rejected": -3.535740375518799, |
|
"logits/rejected_prompt": -2.9182417392730713, |
|
"logps/chosen": -1.919931411743164, |
|
"logps/chosen_both": -1.9039018154144287, |
|
"logps/chosen_prompt": -0.7944774627685547, |
|
"logps/rejected": -7.288356781005859, |
|
"logps/rejected_both": -7.193412780761719, |
|
"logps/rejected_prompt": -0.9629098773002625, |
|
"loss": 2.3092, |
|
"nll_loss": 1.9036260843276978, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7679725289344788, |
|
"rewards/margins": 2.147369861602783, |
|
"rewards/rejected": -2.9153425693511963, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 0.19680727751711977, |
|
"learning_rate": 4.2037190317538e-05, |
|
"log_odds_chosen": 4.595906734466553, |
|
"log_odds_ratio": -0.07939890027046204, |
|
"logits/chosen": -2.9524266719818115, |
|
"logits/chosen_prompt": -2.790818691253662, |
|
"logits/rejected": -2.9070940017700195, |
|
"logits/rejected_prompt": -2.781165599822998, |
|
"logps/chosen": -1.9940401315689087, |
|
"logps/chosen_both": -1.978316068649292, |
|
"logps/chosen_prompt": -0.7690817713737488, |
|
"logps/rejected": -6.455039024353027, |
|
"logps/rejected_both": -6.385528087615967, |
|
"logps/rejected_prompt": -0.9404104948043823, |
|
"loss": 2.0872, |
|
"nll_loss": 1.9778735637664795, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7976160049438477, |
|
"rewards/margins": 1.7843996286392212, |
|
"rewards/rejected": -2.5820157527923584, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 0.1584132780664858, |
|
"learning_rate": 4.188332020302561e-05, |
|
"log_odds_chosen": 4.230597496032715, |
|
"log_odds_ratio": -0.14310847222805023, |
|
"logits/chosen": -2.956609010696411, |
|
"logits/chosen_prompt": -2.8512063026428223, |
|
"logits/rejected": -2.678597927093506, |
|
"logits/rejected_prompt": -2.8333568572998047, |
|
"logps/chosen": -1.8776973485946655, |
|
"logps/chosen_both": -1.8625962734222412, |
|
"logps/chosen_prompt": -0.8090478777885437, |
|
"logps/rejected": -5.976474761962891, |
|
"logps/rejected_both": -5.902680397033691, |
|
"logps/rejected_prompt": -0.9692068099975586, |
|
"loss": 1.9999, |
|
"nll_loss": 1.861577033996582, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7510789632797241, |
|
"rewards/margins": 1.6395108699798584, |
|
"rewards/rejected": -2.390589952468872, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.18982243368973564, |
|
"learning_rate": 4.172826515897146e-05, |
|
"log_odds_chosen": 4.3918375968933105, |
|
"log_odds_ratio": -0.14247746765613556, |
|
"logits/chosen": -2.9714953899383545, |
|
"logits/chosen_prompt": -2.824305772781372, |
|
"logits/rejected": -2.6518845558166504, |
|
"logits/rejected_prompt": -2.8202338218688965, |
|
"logps/chosen": -1.8688671588897705, |
|
"logps/chosen_both": -1.8508541584014893, |
|
"logps/chosen_prompt": -0.9176328778266907, |
|
"logps/rejected": -6.10614538192749, |
|
"logps/rejected_both": -6.007752418518066, |
|
"logps/rejected_prompt": -1.0590510368347168, |
|
"loss": 2.0857, |
|
"nll_loss": 1.8497679233551025, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.747546911239624, |
|
"rewards/margins": 1.694911241531372, |
|
"rewards/rejected": -2.442458391189575, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 0.15641654006436478, |
|
"learning_rate": 4.157203606767238e-05, |
|
"log_odds_chosen": 4.2656779289245605, |
|
"log_odds_ratio": -0.14230065047740936, |
|
"logits/chosen": -2.9932308197021484, |
|
"logits/chosen_prompt": -2.830867290496826, |
|
"logits/rejected": -2.6234424114227295, |
|
"logits/rejected_prompt": -2.8216352462768555, |
|
"logps/chosen": -2.024932384490967, |
|
"logps/chosen_both": -2.0056064128875732, |
|
"logps/chosen_prompt": -0.7936287522315979, |
|
"logps/rejected": -6.178097724914551, |
|
"logps/rejected_both": -6.095284938812256, |
|
"logps/rejected_prompt": -0.9350797533988953, |
|
"loss": 2.037, |
|
"nll_loss": 2.0045900344848633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.809972882270813, |
|
"rewards/margins": 1.6612660884857178, |
|
"rewards/rejected": -2.4712390899658203, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 0.20983648268469735, |
|
"learning_rate": 4.1414643893823914e-05, |
|
"log_odds_chosen": 4.862036228179932, |
|
"log_odds_ratio": -0.07260783016681671, |
|
"logits/chosen": -2.9284424781799316, |
|
"logits/chosen_prompt": -2.8569953441619873, |
|
"logits/rejected": -2.5351157188415527, |
|
"logits/rejected_prompt": -2.8426971435546875, |
|
"logps/chosen": -2.1229608058929443, |
|
"logps/chosen_both": -2.10365629196167, |
|
"logps/chosen_prompt": -0.8154341578483582, |
|
"logps/rejected": -6.869643211364746, |
|
"logps/rejected_both": -6.7744574546813965, |
|
"logps/rejected_prompt": -0.9435701370239258, |
|
"loss": 2.1102, |
|
"nll_loss": 2.1023664474487305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8491843342781067, |
|
"rewards/margins": 1.8986728191375732, |
|
"rewards/rejected": -2.7478575706481934, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 0.16714535237522857, |
|
"learning_rate": 4.125609968375072e-05, |
|
"log_odds_chosen": 5.137936115264893, |
|
"log_odds_ratio": -0.0722423866391182, |
|
"logits/chosen": -2.917429208755493, |
|
"logits/chosen_prompt": -2.805572509765625, |
|
"logits/rejected": -2.4986531734466553, |
|
"logits/rejected_prompt": -2.7935025691986084, |
|
"logps/chosen": -1.898790717124939, |
|
"logps/chosen_both": -1.88314688205719, |
|
"logps/chosen_prompt": -0.8224050402641296, |
|
"logps/rejected": -6.880563259124756, |
|
"logps/rejected_both": -6.7928266525268555, |
|
"logps/rejected_prompt": -0.9875515699386597, |
|
"loss": 2.0572, |
|
"nll_loss": 1.8828372955322266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7595163583755493, |
|
"rewards/margins": 1.9927089214324951, |
|
"rewards/rejected": -2.752225637435913, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.17116655114302515, |
|
"learning_rate": 4.109641456463135e-05, |
|
"log_odds_chosen": 4.716578006744385, |
|
"log_odds_ratio": -0.05661363527178764, |
|
"logits/chosen": -2.9051055908203125, |
|
"logits/chosen_prompt": -2.861964702606201, |
|
"logits/rejected": -2.489297866821289, |
|
"logits/rejected_prompt": -2.8317601680755615, |
|
"logps/chosen": -2.72660493850708, |
|
"logps/chosen_both": -2.6989545822143555, |
|
"logps/chosen_prompt": -0.786345899105072, |
|
"logps/rejected": -7.32622766494751, |
|
"logps/rejected_both": -7.235006809234619, |
|
"logps/rejected_prompt": -0.9496296048164368, |
|
"loss": 2.0544, |
|
"nll_loss": 2.698387622833252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0906422138214111, |
|
"rewards/margins": 1.8398488759994507, |
|
"rewards/rejected": -2.9304909706115723, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.0421569101702004, |
|
"learning_rate": 4.093559974371725e-05, |
|
"log_odds_chosen": 4.683531284332275, |
|
"log_odds_ratio": -0.14838626980781555, |
|
"logits/chosen": -2.983940601348877, |
|
"logits/chosen_prompt": -2.8726494312286377, |
|
"logits/rejected": -2.683384418487549, |
|
"logits/rejected_prompt": -2.844991683959961, |
|
"logps/chosen": -1.7734657526016235, |
|
"logps/chosen_both": -1.762310266494751, |
|
"logps/chosen_prompt": -0.8980112075805664, |
|
"logps/rejected": -6.07004976272583, |
|
"logps/rejected_both": -5.987616062164307, |
|
"logps/rejected_prompt": -1.1182132959365845, |
|
"loss": 2.145, |
|
"nll_loss": 1.7613089084625244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7093862891197205, |
|
"rewards/margins": 1.7186336517333984, |
|
"rewards/rejected": -2.4280200004577637, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 0.1891364752116159, |
|
"learning_rate": 4.077366650754624e-05, |
|
"log_odds_chosen": 4.3087382316589355, |
|
"log_odds_ratio": -0.1364879608154297, |
|
"logits/chosen": -2.9432783126831055, |
|
"logits/chosen_prompt": -2.815147638320923, |
|
"logits/rejected": -2.721280097961426, |
|
"logits/rejected_prompt": -2.818236827850342, |
|
"logps/chosen": -1.8882700204849243, |
|
"logps/chosen_both": -1.8756290674209595, |
|
"logps/chosen_prompt": -0.8526128530502319, |
|
"logps/rejected": -6.065881729125977, |
|
"logps/rejected_both": -6.0042314529418945, |
|
"logps/rejected_prompt": -0.9744648933410645, |
|
"loss": 2.1355, |
|
"nll_loss": 1.8748886585235596, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7553080320358276, |
|
"rewards/margins": 1.6710445880889893, |
|
"rewards/rejected": -2.4263527393341064, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 33.963511456298086, |
|
"learning_rate": 4.0610626221150394e-05, |
|
"log_odds_chosen": 4.251172065734863, |
|
"log_odds_ratio": -0.09040095657110214, |
|
"logits/chosen": -2.9414284229278564, |
|
"logits/chosen_prompt": -2.8389973640441895, |
|
"logits/rejected": -2.8033430576324463, |
|
"logits/rejected_prompt": -2.82332706451416, |
|
"logps/chosen": -1.9342035055160522, |
|
"logps/chosen_both": -1.9176651239395142, |
|
"logps/chosen_prompt": -0.8298524022102356, |
|
"logps/rejected": -6.048348903656006, |
|
"logps/rejected_both": -5.979620933532715, |
|
"logps/rejected_prompt": -0.9826586842536926, |
|
"loss": 2.0673, |
|
"nll_loss": 1.9169620275497437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7736814618110657, |
|
"rewards/margins": 1.645658254623413, |
|
"rewards/rejected": -2.419339656829834, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 5.339928107993312, |
|
"learning_rate": 4.044649032725836e-05, |
|
"log_odds_chosen": 4.668586730957031, |
|
"log_odds_ratio": -0.04072408378124237, |
|
"logits/chosen": -2.9805121421813965, |
|
"logits/chosen_prompt": -2.858212947845459, |
|
"logits/rejected": -2.779395580291748, |
|
"logits/rejected_prompt": -2.8353207111358643, |
|
"logps/chosen": -2.4372153282165527, |
|
"logps/chosen_both": -2.4168477058410645, |
|
"logps/chosen_prompt": -0.7482016086578369, |
|
"logps/rejected": -6.966684818267822, |
|
"logps/rejected_both": -6.886708736419678, |
|
"logps/rejected_prompt": -0.9111725687980652, |
|
"loss": 2.1177, |
|
"nll_loss": 2.4160780906677246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9748862981796265, |
|
"rewards/margins": 1.8117873668670654, |
|
"rewards/rejected": -2.7866737842559814, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 0.2060230046824354, |
|
"learning_rate": 4.028127034549229e-05, |
|
"log_odds_chosen": 2.597301483154297, |
|
"log_odds_ratio": -0.6685577630996704, |
|
"logits/chosen": -2.9436233043670654, |
|
"logits/chosen_prompt": -2.8545641899108887, |
|
"logits/rejected": -2.8262507915496826, |
|
"logits/rejected_prompt": -2.8353445529937744, |
|
"logps/chosen": -2.3411784172058105, |
|
"logps/chosen_both": -2.3227829933166504, |
|
"logps/chosen_prompt": -0.7935237884521484, |
|
"logps/rejected": -4.853774070739746, |
|
"logps/rejected_both": -4.805240154266357, |
|
"logps/rejected_prompt": -0.958962082862854, |
|
"loss": 2.139, |
|
"nll_loss": 2.3222460746765137, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9364713430404663, |
|
"rewards/margins": 1.0050380229949951, |
|
"rewards/rejected": -1.941509485244751, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.17774111122195055, |
|
"learning_rate": 4.011497787155938e-05, |
|
"log_odds_chosen": 4.53702449798584, |
|
"log_odds_ratio": -0.02008737251162529, |
|
"logits/chosen": -2.898667335510254, |
|
"logits/chosen_prompt": -2.8412561416625977, |
|
"logits/rejected": -2.799050807952881, |
|
"logits/rejected_prompt": -2.819329023361206, |
|
"logps/chosen": -2.120091438293457, |
|
"logps/chosen_both": -2.0994343757629395, |
|
"logps/chosen_prompt": -0.7898808717727661, |
|
"logps/rejected": -6.5274176597595215, |
|
"logps/rejected_both": -6.440402030944824, |
|
"logps/rejected_prompt": -1.0125057697296143, |
|
"loss": 2.0681, |
|
"nll_loss": 2.0985283851623535, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8480366468429565, |
|
"rewards/margins": 1.7629306316375732, |
|
"rewards/rejected": -2.6109673976898193, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 0.5492835402833951, |
|
"learning_rate": 3.9947624576437975e-05, |
|
"log_odds_chosen": 3.65099835395813, |
|
"log_odds_ratio": -0.21185067296028137, |
|
"logits/chosen": -2.8890416622161865, |
|
"logits/chosen_prompt": -2.8260998725891113, |
|
"logits/rejected": -2.8036818504333496, |
|
"logits/rejected_prompt": -2.8174471855163574, |
|
"logps/chosen": -2.0846400260925293, |
|
"logps/chosen_both": -2.065948247909546, |
|
"logps/chosen_prompt": -0.8428912162780762, |
|
"logps/rejected": -5.634668350219727, |
|
"logps/rejected_both": -5.555979251861572, |
|
"logps/rejected_prompt": -1.0157763957977295, |
|
"loss": 2.128, |
|
"nll_loss": 2.065037488937378, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8338559865951538, |
|
"rewards/margins": 1.420011281967163, |
|
"rewards/rejected": -2.2538673877716064, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.2391375753226414, |
|
"learning_rate": 3.977922220555855e-05, |
|
"log_odds_chosen": 4.121129989624023, |
|
"log_odds_ratio": -0.2298469990491867, |
|
"logits/chosen": -2.969383955001831, |
|
"logits/chosen_prompt": -2.841618061065674, |
|
"logits/rejected": -2.8132920265197754, |
|
"logits/rejected_prompt": -2.8176777362823486, |
|
"logps/chosen": -2.3696742057800293, |
|
"logps/chosen_both": -2.350247621536255, |
|
"logps/chosen_prompt": -0.8721768260002136, |
|
"logps/rejected": -6.348196029663086, |
|
"logps/rejected_both": -6.277990818023682, |
|
"logps/rejected_prompt": -1.0750401020050049, |
|
"loss": 2.1621, |
|
"nll_loss": 2.3494279384613037, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9478696584701538, |
|
"rewards/margins": 1.591408610343933, |
|
"rewards/rejected": -2.539278268814087, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 1.0869471605926033, |
|
"learning_rate": 3.960978257797931e-05, |
|
"log_odds_chosen": 3.306716203689575, |
|
"log_odds_ratio": -0.17165422439575195, |
|
"logits/chosen": -2.901864767074585, |
|
"logits/chosen_prompt": -2.8563239574432373, |
|
"logits/rejected": -2.815932273864746, |
|
"logits/rejected_prompt": -2.829672336578369, |
|
"logps/chosen": -2.3288769721984863, |
|
"logps/chosen_both": -2.307668447494507, |
|
"logps/chosen_prompt": -0.8160017132759094, |
|
"logps/rejected": -5.531130790710449, |
|
"logps/rejected_both": -5.466065406799316, |
|
"logps/rejected_prompt": -0.9807281494140625, |
|
"loss": 2.0755, |
|
"nll_loss": 2.3062796592712402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9315508604049683, |
|
"rewards/margins": 1.280901551246643, |
|
"rewards/rejected": -2.2124524116516113, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 0.21229431870443033, |
|
"learning_rate": 3.943931758555669e-05, |
|
"log_odds_chosen": 4.015295505523682, |
|
"log_odds_ratio": -0.14405557513237, |
|
"logits/chosen": -2.9465222358703613, |
|
"logits/chosen_prompt": -2.830146074295044, |
|
"logits/rejected": -2.7873902320861816, |
|
"logits/rejected_prompt": -2.8030102252960205, |
|
"logps/chosen": -1.9876712560653687, |
|
"logps/chosen_both": -1.9711806774139404, |
|
"logps/chosen_prompt": -0.8330597877502441, |
|
"logps/rejected": -5.87436580657959, |
|
"logps/rejected_both": -5.79966402053833, |
|
"logps/rejected_prompt": -1.0102033615112305, |
|
"loss": 1.9833, |
|
"nll_loss": 1.9705440998077393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7950685620307922, |
|
"rewards/margins": 1.554678201675415, |
|
"rewards/rejected": -2.3497467041015625, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.18607892338713655, |
|
"learning_rate": 3.92678391921108e-05, |
|
"log_odds_chosen": 4.167088985443115, |
|
"log_odds_ratio": -0.081887386739254, |
|
"logits/chosen": -2.9688785076141357, |
|
"logits/chosen_prompt": -2.8491876125335693, |
|
"logits/rejected": -2.8233845233917236, |
|
"logits/rejected_prompt": -2.836411237716675, |
|
"logps/chosen": -2.0486931800842285, |
|
"logps/chosen_both": -2.0284764766693115, |
|
"logps/chosen_prompt": -0.8191589117050171, |
|
"logps/rejected": -6.082810878753662, |
|
"logps/rejected_both": -5.993044853210449, |
|
"logps/rejected_prompt": -0.957076907157898, |
|
"loss": 2.086, |
|
"nll_loss": 2.0268213748931885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8194772601127625, |
|
"rewards/margins": 1.613647222518921, |
|
"rewards/rejected": -2.433124303817749, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 0.21278740734057763, |
|
"learning_rate": 3.909535943258567e-05, |
|
"log_odds_chosen": 4.548261642456055, |
|
"log_odds_ratio": -0.07581990212202072, |
|
"logits/chosen": -3.092094898223877, |
|
"logits/chosen_prompt": -2.8779349327087402, |
|
"logits/rejected": -2.840526580810547, |
|
"logits/rejected_prompt": -2.8706183433532715, |
|
"logps/chosen": -1.943817138671875, |
|
"logps/chosen_both": -1.9261138439178467, |
|
"logps/chosen_prompt": -0.8740865588188171, |
|
"logps/rejected": -6.346927642822266, |
|
"logps/rejected_both": -6.251557350158691, |
|
"logps/rejected_prompt": -1.028618574142456, |
|
"loss": 2.0516, |
|
"nll_loss": 1.9256139993667603, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.77752685546875, |
|
"rewards/margins": 1.7612441778182983, |
|
"rewards/rejected": -2.538771152496338, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 2.074191616812015, |
|
"learning_rate": 3.8921890412204705e-05, |
|
"log_odds_chosen": 3.9714667797088623, |
|
"log_odds_ratio": -0.10122326761484146, |
|
"logits/chosen": -2.9742226600646973, |
|
"logits/chosen_prompt": -2.8603179454803467, |
|
"logits/rejected": -2.8532581329345703, |
|
"logits/rejected_prompt": -2.833484172821045, |
|
"logps/chosen": -2.3508994579315186, |
|
"logps/chosen_both": -2.333052158355713, |
|
"logps/chosen_prompt": -0.8015215992927551, |
|
"logps/rejected": -6.174811363220215, |
|
"logps/rejected_both": -6.111483573913574, |
|
"logps/rejected_prompt": -1.0183693170547485, |
|
"loss": 2.2824, |
|
"nll_loss": 2.3322701454162598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9403597712516785, |
|
"rewards/margins": 1.5295648574829102, |
|
"rewards/rejected": -2.4699246883392334, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 0.2875489978173768, |
|
"learning_rate": 3.8747444305621e-05, |
|
"log_odds_chosen": 4.248479843139648, |
|
"log_odds_ratio": -0.08145709335803986, |
|
"logits/chosen": -2.950727939605713, |
|
"logits/chosen_prompt": -2.822025775909424, |
|
"logits/rejected": -2.663987398147583, |
|
"logits/rejected_prompt": -2.8115882873535156, |
|
"logps/chosen": -1.9704688787460327, |
|
"logps/chosen_both": -1.9537798166275024, |
|
"logps/chosen_prompt": -0.8284621238708496, |
|
"logps/rejected": -6.081311225891113, |
|
"logps/rejected_both": -6.007387161254883, |
|
"logps/rejected_prompt": -1.0018432140350342, |
|
"loss": 1.9987, |
|
"nll_loss": 1.9535901546478271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7881874442100525, |
|
"rewards/margins": 1.6443370580673218, |
|
"rewards/rejected": -2.4325246810913086, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 6.520768567954707, |
|
"learning_rate": 3.8572033356062943e-05, |
|
"log_odds_chosen": 3.6630382537841797, |
|
"log_odds_ratio": -0.1266271471977234, |
|
"logits/chosen": -2.9928297996520996, |
|
"logits/chosen_prompt": -2.8252012729644775, |
|
"logits/rejected": -2.722259521484375, |
|
"logits/rejected_prompt": -2.7941107749938965, |
|
"logps/chosen": -2.0680882930755615, |
|
"logps/chosen_both": -2.0539040565490723, |
|
"logps/chosen_prompt": -0.7603567838668823, |
|
"logps/rejected": -5.370635032653809, |
|
"logps/rejected_both": -5.302577018737793, |
|
"logps/rejected_prompt": -1.007256031036377, |
|
"loss": 2.1861, |
|
"nll_loss": 2.052879810333252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8272353410720825, |
|
"rewards/margins": 1.3210185766220093, |
|
"rewards/rejected": -2.148253917694092, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.68559023802143, |
|
"learning_rate": 3.8395669874474915e-05, |
|
"log_odds_chosen": 4.359891414642334, |
|
"log_odds_ratio": -0.015468957833945751, |
|
"logits/chosen": -2.91310453414917, |
|
"logits/chosen_prompt": -2.7794852256774902, |
|
"logits/rejected": -2.6371960639953613, |
|
"logits/rejected_prompt": -2.7625763416290283, |
|
"logps/chosen": -1.8540757894515991, |
|
"logps/chosen_both": -1.839600920677185, |
|
"logps/chosen_prompt": -0.8248388171195984, |
|
"logps/rejected": -6.038485527038574, |
|
"logps/rejected_both": -5.962553977966309, |
|
"logps/rejected_prompt": -0.9856597185134888, |
|
"loss": 2.0673, |
|
"nll_loss": 1.8394546508789062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7416303753852844, |
|
"rewards/margins": 1.6737639904022217, |
|
"rewards/rejected": -2.4153940677642822, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 0.185073881578095, |
|
"learning_rate": 3.821836623865329e-05, |
|
"log_odds_chosen": 4.161174297332764, |
|
"log_odds_ratio": -0.07971666753292084, |
|
"logits/chosen": -2.903371572494507, |
|
"logits/chosen_prompt": -2.778414487838745, |
|
"logits/rejected": -2.5587830543518066, |
|
"logits/rejected_prompt": -2.762293815612793, |
|
"logps/chosen": -2.1283013820648193, |
|
"logps/chosen_both": -2.1046059131622314, |
|
"logps/chosen_prompt": -0.7429525852203369, |
|
"logps/rejected": -6.169132232666016, |
|
"logps/rejected_both": -6.081439018249512, |
|
"logps/rejected_prompt": -0.9049463272094727, |
|
"loss": 2.2118, |
|
"nll_loss": 2.104139566421509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8513206243515015, |
|
"rewards/margins": 1.6163326501846313, |
|
"rewards/rejected": -2.467653274536133, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 0.19264797772361533, |
|
"learning_rate": 3.80401348923777e-05, |
|
"log_odds_chosen": 4.120739936828613, |
|
"log_odds_ratio": -0.14354461431503296, |
|
"logits/chosen": -2.9424567222595215, |
|
"logits/chosen_prompt": -2.7921371459960938, |
|
"logits/rejected": -2.5477294921875, |
|
"logits/rejected_prompt": -2.7542147636413574, |
|
"logps/chosen": -1.913551688194275, |
|
"logps/chosen_both": -1.8978935480117798, |
|
"logps/chosen_prompt": -0.8339295387268066, |
|
"logps/rejected": -5.9061384201049805, |
|
"logps/rejected_both": -5.837408542633057, |
|
"logps/rejected_prompt": -0.9619489908218384, |
|
"loss": 2.0995, |
|
"nll_loss": 1.8977426290512085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7654207348823547, |
|
"rewards/margins": 1.5970344543457031, |
|
"rewards/rejected": -2.362455129623413, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 9.292901066306287, |
|
"learning_rate": 3.786098834453766e-05, |
|
"log_odds_chosen": 3.505579710006714, |
|
"log_odds_ratio": -0.15101362764835358, |
|
"logits/chosen": -2.910395622253418, |
|
"logits/chosen_prompt": -2.8129782676696777, |
|
"logits/rejected": -2.574031352996826, |
|
"logits/rejected_prompt": -2.782696008682251, |
|
"logps/chosen": -2.1372461318969727, |
|
"logps/chosen_both": -2.112764835357666, |
|
"logps/chosen_prompt": -0.8219666481018066, |
|
"logps/rejected": -5.543887138366699, |
|
"logps/rejected_both": -5.4572343826293945, |
|
"logps/rejected_prompt": -0.9813167452812195, |
|
"loss": 2.0645, |
|
"nll_loss": 2.111912488937378, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8548984527587891, |
|
"rewards/margins": 1.3626563549041748, |
|
"rewards/rejected": -2.217555046081543, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 1.6964209385464728, |
|
"learning_rate": 3.7680939168254733e-05, |
|
"log_odds_chosen": 3.888018846511841, |
|
"log_odds_ratio": -0.1449870765209198, |
|
"logits/chosen": -2.9042837619781494, |
|
"logits/chosen_prompt": -2.823965549468994, |
|
"logits/rejected": -2.4834845066070557, |
|
"logits/rejected_prompt": -2.7938156127929688, |
|
"logps/chosen": -2.0088438987731934, |
|
"logps/chosen_both": -1.9936256408691406, |
|
"logps/chosen_prompt": -0.7543269395828247, |
|
"logps/rejected": -5.783638000488281, |
|
"logps/rejected_both": -5.7258687019348145, |
|
"logps/rejected_prompt": -0.9668887257575989, |
|
"loss": 2.038, |
|
"nll_loss": 1.992997169494629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8035375475883484, |
|
"rewards/margins": 1.5099177360534668, |
|
"rewards/rejected": -2.31345534324646, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.478887419876043, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"log_odds_chosen": 5.275210380554199, |
|
"log_odds_ratio": -0.006285688374191523, |
|
"logits/chosen": -2.9461379051208496, |
|
"logits/chosen_prompt": -2.7684402465820312, |
|
"logits/rejected": -2.312152147293091, |
|
"logits/rejected_prompt": -2.7450311183929443, |
|
"logps/chosen": -1.8539674282073975, |
|
"logps/chosen_both": -1.839685082435608, |
|
"logps/chosen_prompt": -0.8559527397155762, |
|
"logps/rejected": -6.958900451660156, |
|
"logps/rejected_both": -6.868790626525879, |
|
"logps/rejected_prompt": -1.0536139011383057, |
|
"loss": 2.2404, |
|
"nll_loss": 1.8390467166900635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7415870428085327, |
|
"rewards/margins": 2.04197359085083, |
|
"rewards/rejected": -2.783560276031494, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.008, |
|
"grad_norm": 0.17878604739151382, |
|
"learning_rate": 3.731818353870729e-05, |
|
"log_odds_chosen": 4.191466331481934, |
|
"log_odds_ratio": -0.09246650338172913, |
|
"logits/chosen": -2.957552433013916, |
|
"logits/chosen_prompt": -2.771613359451294, |
|
"logits/rejected": -2.3375356197357178, |
|
"logits/rejected_prompt": -2.7522428035736084, |
|
"logps/chosen": -1.989243745803833, |
|
"logps/chosen_both": -1.9734690189361572, |
|
"logps/chosen_prompt": -0.8279644250869751, |
|
"logps/rejected": -6.043200969696045, |
|
"logps/rejected_both": -5.973423480987549, |
|
"logps/rejected_prompt": -1.0317699909210205, |
|
"loss": 2.0389, |
|
"nll_loss": 1.9726651906967163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7956975102424622, |
|
"rewards/margins": 1.6215832233428955, |
|
"rewards/rejected": -2.417280673980713, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.016, |
|
"grad_norm": 23.252626998417625, |
|
"learning_rate": 3.713550254488185e-05, |
|
"log_odds_chosen": 3.7449231147766113, |
|
"log_odds_ratio": -0.16642269492149353, |
|
"logits/chosen": -2.8947479724884033, |
|
"logits/chosen_prompt": -2.7788119316101074, |
|
"logits/rejected": -2.3416316509246826, |
|
"logits/rejected_prompt": -2.760896921157837, |
|
"logps/chosen": -2.020059585571289, |
|
"logps/chosen_both": -2.0054023265838623, |
|
"logps/chosen_prompt": -0.8935413360595703, |
|
"logps/rejected": -5.6518659591674805, |
|
"logps/rejected_both": -5.590303897857666, |
|
"logps/rejected_prompt": -1.0056589841842651, |
|
"loss": 2.0643, |
|
"nll_loss": 2.0046825408935547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8080238103866577, |
|
"rewards/margins": 1.452722430229187, |
|
"rewards/rejected": -2.2607462406158447, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.024, |
|
"grad_norm": 0.1852421213956056, |
|
"learning_rate": 3.695196983970481e-05, |
|
"log_odds_chosen": 5.502694129943848, |
|
"log_odds_ratio": -0.07146742194890976, |
|
"logits/chosen": -2.9081971645355225, |
|
"logits/chosen_prompt": -2.745790719985962, |
|
"logits/rejected": -2.0626957416534424, |
|
"logits/rejected_prompt": -2.7173855304718018, |
|
"logps/chosen": -1.7873703241348267, |
|
"logps/chosen_both": -1.7739589214324951, |
|
"logps/chosen_prompt": -0.8900352716445923, |
|
"logps/rejected": -7.1119537353515625, |
|
"logps/rejected_both": -7.017317295074463, |
|
"logps/rejected_prompt": -1.0950191020965576, |
|
"loss": 2.0059, |
|
"nll_loss": 1.7733700275421143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7149480581283569, |
|
"rewards/margins": 2.129833698272705, |
|
"rewards/rejected": -2.8447818756103516, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.032, |
|
"grad_norm": 0.1901267311863244, |
|
"learning_rate": 3.6767598304133324e-05, |
|
"log_odds_chosen": 4.644869804382324, |
|
"log_odds_ratio": -0.14166082441806793, |
|
"logits/chosen": -2.9974873065948486, |
|
"logits/chosen_prompt": -2.7224061489105225, |
|
"logits/rejected": -2.2138378620147705, |
|
"logits/rejected_prompt": -2.6832873821258545, |
|
"logps/chosen": -1.9028959274291992, |
|
"logps/chosen_both": -1.8842157125473022, |
|
"logps/chosen_prompt": -0.8141298294067383, |
|
"logps/rejected": -6.421015739440918, |
|
"logps/rejected_both": -6.323419094085693, |
|
"logps/rejected_prompt": -0.979651153087616, |
|
"loss": 1.9806, |
|
"nll_loss": 1.8838021755218506, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7611583471298218, |
|
"rewards/margins": 1.8072481155395508, |
|
"rewards/rejected": -2.568406581878662, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.1720032056568101, |
|
"learning_rate": 3.6582400877996546e-05, |
|
"log_odds_chosen": 5.198369026184082, |
|
"log_odds_ratio": -0.07235782593488693, |
|
"logits/chosen": -2.8921890258789062, |
|
"logits/chosen_prompt": -2.7482800483703613, |
|
"logits/rejected": -1.9527368545532227, |
|
"logits/rejected_prompt": -2.7276439666748047, |
|
"logps/chosen": -2.0934653282165527, |
|
"logps/chosen_both": -2.076221227645874, |
|
"logps/chosen_prompt": -0.8200351595878601, |
|
"logps/rejected": -7.170855522155762, |
|
"logps/rejected_both": -7.079026699066162, |
|
"logps/rejected_prompt": -0.9832828640937805, |
|
"loss": 2.0527, |
|
"nll_loss": 2.075456380844116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8373861312866211, |
|
"rewards/margins": 2.0309560298919678, |
|
"rewards/rejected": -2.868342161178589, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.048, |
|
"grad_norm": 0.18338089227039325, |
|
"learning_rate": 3.639639055908751e-05, |
|
"log_odds_chosen": 5.48695707321167, |
|
"log_odds_ratio": -0.07169006019830704, |
|
"logits/chosen": -2.874192953109741, |
|
"logits/chosen_prompt": -2.733611583709717, |
|
"logits/rejected": -1.8326069116592407, |
|
"logits/rejected_prompt": -2.6982951164245605, |
|
"logps/chosen": -2.0102884769439697, |
|
"logps/chosen_both": -1.9914735555648804, |
|
"logps/chosen_prompt": -0.8337292671203613, |
|
"logps/rejected": -7.363123416900635, |
|
"logps/rejected_both": -7.263747215270996, |
|
"logps/rejected_prompt": -0.9874321818351746, |
|
"loss": 1.9824, |
|
"nll_loss": 1.9909473657608032, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8041152954101562, |
|
"rewards/margins": 2.141134023666382, |
|
"rewards/rejected": -2.945249319076538, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.056, |
|
"grad_norm": 0.1837356662895363, |
|
"learning_rate": 3.6209580402250815e-05, |
|
"log_odds_chosen": 5.6873369216918945, |
|
"log_odds_ratio": -0.07120365649461746, |
|
"logits/chosen": -2.9526381492614746, |
|
"logits/chosen_prompt": -2.7081189155578613, |
|
"logits/rejected": -1.8793054819107056, |
|
"logits/rejected_prompt": -2.6829447746276855, |
|
"logps/chosen": -1.9104582071304321, |
|
"logps/chosen_both": -1.8940789699554443, |
|
"logps/chosen_prompt": -0.8755657076835632, |
|
"logps/rejected": -7.447749137878418, |
|
"logps/rejected_both": -7.334907531738281, |
|
"logps/rejected_prompt": -1.0553802251815796, |
|
"loss": 2.1442, |
|
"nll_loss": 1.8928571939468384, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7641832828521729, |
|
"rewards/margins": 2.214916706085205, |
|
"rewards/rejected": -2.979099750518799, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.064, |
|
"grad_norm": 0.3951461995742214, |
|
"learning_rate": 3.602198351846647e-05, |
|
"log_odds_chosen": 4.024718761444092, |
|
"log_odds_ratio": -0.5831412672996521, |
|
"logits/chosen": -2.981672525405884, |
|
"logits/chosen_prompt": -2.7551183700561523, |
|
"logits/rejected": -2.1212754249572754, |
|
"logits/rejected_prompt": -2.7351596355438232, |
|
"logps/chosen": -2.4395077228546143, |
|
"logps/chosen_both": -2.417250871658325, |
|
"logps/chosen_prompt": -0.8564618825912476, |
|
"logps/rejected": -6.365363597869873, |
|
"logps/rejected_both": -6.2751054763793945, |
|
"logps/rejected_prompt": -1.031884789466858, |
|
"loss": 2.2375, |
|
"nll_loss": 2.4155256748199463, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9758030772209167, |
|
"rewards/margins": 1.5703424215316772, |
|
"rewards/rejected": -2.5461456775665283, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.072, |
|
"grad_norm": 0.18983825409437058, |
|
"learning_rate": 3.5833613073929684e-05, |
|
"log_odds_chosen": 4.155622482299805, |
|
"log_odds_ratio": -0.14320290088653564, |
|
"logits/chosen": -3.005096673965454, |
|
"logits/chosen_prompt": -2.8319993019104004, |
|
"logits/rejected": -2.3421382904052734, |
|
"logits/rejected_prompt": -2.8086118698120117, |
|
"logps/chosen": -1.9423980712890625, |
|
"logps/chosen_both": -1.9247316122055054, |
|
"logps/chosen_prompt": -0.7214570045471191, |
|
"logps/rejected": -5.971634864807129, |
|
"logps/rejected_both": -5.893637657165527, |
|
"logps/rejected_prompt": -0.9021228551864624, |
|
"loss": 2.092, |
|
"nll_loss": 1.9240925312042236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7769593000411987, |
|
"rewards/margins": 1.6116949319839478, |
|
"rewards/rejected": -2.3886542320251465, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.6083000414245734, |
|
"learning_rate": 3.564448228912682e-05, |
|
"log_odds_chosen": 4.163081169128418, |
|
"log_odds_ratio": -0.10094372928142548, |
|
"logits/chosen": -2.963536262512207, |
|
"logits/chosen_prompt": -2.846693515777588, |
|
"logits/rejected": -2.542693614959717, |
|
"logits/rejected_prompt": -2.819491386413574, |
|
"logps/chosen": -2.337949275970459, |
|
"logps/chosen_both": -2.3158886432647705, |
|
"logps/chosen_prompt": -0.845288872718811, |
|
"logps/rejected": -6.368934154510498, |
|
"logps/rejected_both": -6.2805986404418945, |
|
"logps/rejected_prompt": -1.0301436185836792, |
|
"loss": 2.0382, |
|
"nll_loss": 2.3151814937591553, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9351798295974731, |
|
"rewards/margins": 1.6123939752578735, |
|
"rewards/rejected": -2.5475735664367676, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.088, |
|
"grad_norm": 0.2154750785789702, |
|
"learning_rate": 3.545460443790753e-05, |
|
"log_odds_chosen": 5.453991889953613, |
|
"log_odds_ratio": -0.004712260328233242, |
|
"logits/chosen": -2.908536434173584, |
|
"logits/chosen_prompt": -2.868119716644287, |
|
"logits/rejected": -2.40228533744812, |
|
"logits/rejected_prompt": -2.843205451965332, |
|
"logps/chosen": -2.089245319366455, |
|
"logps/chosen_both": -2.072594165802002, |
|
"logps/chosen_prompt": -0.8769745826721191, |
|
"logps/rejected": -7.407778739929199, |
|
"logps/rejected_both": -7.316309928894043, |
|
"logps/rejected_prompt": -0.9720737338066101, |
|
"loss": 2.0088, |
|
"nll_loss": 2.071500778198242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8356983065605164, |
|
"rewards/margins": 2.127413749694824, |
|
"rewards/rejected": -2.9631123542785645, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.096, |
|
"grad_norm": 0.1857265942387655, |
|
"learning_rate": 3.52639928465532e-05, |
|
"log_odds_chosen": 4.4336113929748535, |
|
"log_odds_ratio": -0.14170871675014496, |
|
"logits/chosen": -3.0002169609069824, |
|
"logits/chosen_prompt": -2.8658928871154785, |
|
"logits/rejected": -2.558640956878662, |
|
"logits/rejected_prompt": -2.843383550643921, |
|
"logps/chosen": -1.8998088836669922, |
|
"logps/chosen_both": -1.8837999105453491, |
|
"logps/chosen_prompt": -0.8331824541091919, |
|
"logps/rejected": -6.191910266876221, |
|
"logps/rejected_both": -6.1067986488342285, |
|
"logps/rejected_prompt": -0.9438120126724243, |
|
"loss": 2.0577, |
|
"nll_loss": 1.883371353149414, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7599235773086548, |
|
"rewards/margins": 1.7168405055999756, |
|
"rewards/rejected": -2.47676420211792, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.104, |
|
"grad_norm": 0.22400309241356314, |
|
"learning_rate": 3.507266089284157e-05, |
|
"log_odds_chosen": 5.497137069702148, |
|
"log_odds_ratio": -0.004467605613172054, |
|
"logits/chosen": -2.9908201694488525, |
|
"logits/chosen_prompt": -2.821722984313965, |
|
"logits/rejected": -2.416836977005005, |
|
"logits/rejected_prompt": -2.796220541000366, |
|
"logps/chosen": -1.8564481735229492, |
|
"logps/chosen_both": -1.8395103216171265, |
|
"logps/chosen_prompt": -0.8639839291572571, |
|
"logps/rejected": -7.180043697357178, |
|
"logps/rejected_both": -7.0766448974609375, |
|
"logps/rejected_prompt": -0.9959409832954407, |
|
"loss": 2.0609, |
|
"nll_loss": 1.8391234874725342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7425792813301086, |
|
"rewards/margins": 2.1294379234313965, |
|
"rewards/rejected": -2.8720173835754395, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.112, |
|
"grad_norm": 0.19384408681406856, |
|
"learning_rate": 3.488062200510791e-05, |
|
"log_odds_chosen": 5.338822841644287, |
|
"log_odds_ratio": -0.00644069816917181, |
|
"logits/chosen": -2.959766387939453, |
|
"logits/chosen_prompt": -2.7905402183532715, |
|
"logits/rejected": -2.3757593631744385, |
|
"logits/rejected_prompt": -2.763526678085327, |
|
"logps/chosen": -1.9314730167388916, |
|
"logps/chosen_both": -1.9157222509384155, |
|
"logps/chosen_prompt": -0.8981779217720032, |
|
"logps/rejected": -7.111077785491943, |
|
"logps/rejected_both": -7.018582344055176, |
|
"logps/rejected_prompt": -0.9950772523880005, |
|
"loss": 1.9482, |
|
"nll_loss": 1.9154551029205322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7725892066955566, |
|
"rewards/margins": 2.0718419551849365, |
|
"rewards/rejected": -2.8444314002990723, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 60.752749653266065, |
|
"learning_rate": 3.4687889661302576e-05, |
|
"log_odds_chosen": 4.680363655090332, |
|
"log_odds_ratio": -0.03717372566461563, |
|
"logits/chosen": -2.920323610305786, |
|
"logits/chosen_prompt": -2.8357200622558594, |
|
"logits/rejected": -2.4031760692596436, |
|
"logits/rejected_prompt": -2.802396535873413, |
|
"logps/chosen": -2.005197286605835, |
|
"logps/chosen_both": -1.9863475561141968, |
|
"logps/chosen_prompt": -0.7522888779640198, |
|
"logps/rejected": -6.545997619628906, |
|
"logps/rejected_both": -6.455955505371094, |
|
"logps/rejected_prompt": -0.965649425983429, |
|
"loss": 2.0466, |
|
"nll_loss": 1.985174536705017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.802078902721405, |
|
"rewards/margins": 1.8163198232650757, |
|
"rewards/rejected": -2.618398904800415, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1280000000000001, |
|
"grad_norm": 0.5002021593337239, |
|
"learning_rate": 3.4494477388045035e-05, |
|
"log_odds_chosen": 4.483678340911865, |
|
"log_odds_ratio": -0.028768246993422508, |
|
"logits/chosen": -2.92014741897583, |
|
"logits/chosen_prompt": -2.8309707641601562, |
|
"logits/rejected": -2.486912250518799, |
|
"logits/rejected_prompt": -2.804452419281006, |
|
"logps/chosen": -2.067333459854126, |
|
"logps/chosen_both": -2.0484328269958496, |
|
"logps/chosen_prompt": -0.7646309733390808, |
|
"logps/rejected": -6.416478157043457, |
|
"logps/rejected_both": -6.335555076599121, |
|
"logps/rejected_prompt": -0.9275982975959778, |
|
"loss": 2.062, |
|
"nll_loss": 2.047743558883667, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8269332647323608, |
|
"rewards/margins": 1.7396576404571533, |
|
"rewards/rejected": -2.5665910243988037, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.1360000000000001, |
|
"grad_norm": 0.16513157762808564, |
|
"learning_rate": 3.430039875967454e-05, |
|
"log_odds_chosen": 4.668246746063232, |
|
"log_odds_ratio": -0.07646802067756653, |
|
"logits/chosen": -2.9350738525390625, |
|
"logits/chosen_prompt": -2.8208534717559814, |
|
"logits/rejected": -2.421509265899658, |
|
"logits/rejected_prompt": -2.783437490463257, |
|
"logps/chosen": -2.0800347328186035, |
|
"logps/chosen_both": -2.0644993782043457, |
|
"logps/chosen_prompt": -0.8468448519706726, |
|
"logps/rejected": -6.625657558441162, |
|
"logps/rejected_both": -6.545504570007324, |
|
"logps/rejected_prompt": -1.04305100440979, |
|
"loss": 2.0206, |
|
"nll_loss": 2.0629351139068604, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8320137858390808, |
|
"rewards/margins": 1.8182493448257446, |
|
"rewards/rejected": -2.6502633094787598, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.144, |
|
"grad_norm": 2.2295314469384206, |
|
"learning_rate": 3.410566739729746e-05, |
|
"log_odds_chosen": 5.851050853729248, |
|
"log_odds_ratio": -0.004526123404502869, |
|
"logits/chosen": -2.940370798110962, |
|
"logits/chosen_prompt": -2.7820496559143066, |
|
"logits/rejected": -2.2556514739990234, |
|
"logits/rejected_prompt": -2.7672178745269775, |
|
"logps/chosen": -1.8526496887207031, |
|
"logps/chosen_both": -1.8396713733673096, |
|
"logps/chosen_prompt": -0.8455888628959656, |
|
"logps/rejected": -7.520164489746094, |
|
"logps/rejected_both": -7.432145595550537, |
|
"logps/rejected_prompt": -1.002396821975708, |
|
"loss": 2.1827, |
|
"nll_loss": 1.8387296199798584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7410598993301392, |
|
"rewards/margins": 2.267005681991577, |
|
"rewards/rejected": -3.008065700531006, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 4.408515203042964, |
|
"learning_rate": 3.3910296967831266e-05, |
|
"log_odds_chosen": 4.456727027893066, |
|
"log_odds_ratio": -0.14154654741287231, |
|
"logits/chosen": -2.9346349239349365, |
|
"logits/chosen_prompt": -2.7853639125823975, |
|
"logits/rejected": -2.2783145904541016, |
|
"logits/rejected_prompt": -2.7635715007781982, |
|
"logps/chosen": -1.9494521617889404, |
|
"logps/chosen_both": -1.9318408966064453, |
|
"logps/chosen_prompt": -0.9306742548942566, |
|
"logps/rejected": -6.29015588760376, |
|
"logps/rejected_both": -6.198000907897949, |
|
"logps/rejected_prompt": -1.0760185718536377, |
|
"loss": 2.1572, |
|
"nll_loss": 1.931610107421875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7797808647155762, |
|
"rewards/margins": 1.7362816333770752, |
|
"rewards/rejected": -2.5160624980926514, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.3551432285571037, |
|
"learning_rate": 3.3714301183045385e-05, |
|
"log_odds_chosen": 5.155561447143555, |
|
"log_odds_ratio": -0.07224146276712418, |
|
"logits/chosen": -2.9873647689819336, |
|
"logits/chosen_prompt": -2.7700507640838623, |
|
"logits/rejected": -2.2287240028381348, |
|
"logits/rejected_prompt": -2.7513465881347656, |
|
"logps/chosen": -1.9037456512451172, |
|
"logps/chosen_both": -1.8827041387557983, |
|
"logps/chosen_prompt": -0.8036454319953918, |
|
"logps/rejected": -6.904747009277344, |
|
"logps/rejected_both": -6.79779052734375, |
|
"logps/rejected_prompt": -0.9606531858444214, |
|
"loss": 2.0135, |
|
"nll_loss": 1.8827041387557983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7614982724189758, |
|
"rewards/margins": 2.0004005432128906, |
|
"rewards/rejected": -2.7618985176086426, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.168, |
|
"grad_norm": 0.23892058786604192, |
|
"learning_rate": 3.35176937985988e-05, |
|
"log_odds_chosen": 4.485732078552246, |
|
"log_odds_ratio": -0.14207962155342102, |
|
"logits/chosen": -2.945270538330078, |
|
"logits/chosen_prompt": -2.786912441253662, |
|
"logits/rejected": -2.270350217819214, |
|
"logits/rejected_prompt": -2.752725124359131, |
|
"logps/chosen": -2.024524211883545, |
|
"logps/chosen_both": -2.0046331882476807, |
|
"logps/chosen_prompt": -0.774206817150116, |
|
"logps/rejected": -6.382667064666748, |
|
"logps/rejected_both": -6.294032096862793, |
|
"logps/rejected_prompt": -0.9491628408432007, |
|
"loss": 2.0727, |
|
"nll_loss": 2.003938674926758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.809809684753418, |
|
"rewards/margins": 1.7432572841644287, |
|
"rewards/rejected": -2.5530669689178467, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.176, |
|
"grad_norm": 0.2032800647611215, |
|
"learning_rate": 3.332048861307467e-05, |
|
"log_odds_chosen": 4.051968097686768, |
|
"log_odds_ratio": -0.14674244821071625, |
|
"logits/chosen": -2.99367094039917, |
|
"logits/chosen_prompt": -2.802661657333374, |
|
"logits/rejected": -2.338299512863159, |
|
"logits/rejected_prompt": -2.7645983695983887, |
|
"logps/chosen": -1.9771573543548584, |
|
"logps/chosen_both": -1.9634653329849243, |
|
"logps/chosen_prompt": -0.8673089742660522, |
|
"logps/rejected": -5.909640789031982, |
|
"logps/rejected_both": -5.843233585357666, |
|
"logps/rejected_prompt": -0.918237030506134, |
|
"loss": 2.0442, |
|
"nll_loss": 1.9626314640045166, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.790863037109375, |
|
"rewards/margins": 1.572993516921997, |
|
"rewards/rejected": -2.363856554031372, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.184, |
|
"grad_norm": 0.6791006786877852, |
|
"learning_rate": 3.312269946701191e-05, |
|
"log_odds_chosen": 5.11738395690918, |
|
"log_odds_ratio": -0.08993680030107498, |
|
"logits/chosen": -2.986436605453491, |
|
"logits/chosen_prompt": -2.733582019805908, |
|
"logits/rejected": -2.186984062194824, |
|
"logits/rejected_prompt": -2.714433193206787, |
|
"logps/chosen": -1.95094895362854, |
|
"logps/chosen_both": -1.9355719089508057, |
|
"logps/chosen_prompt": -0.9025853276252747, |
|
"logps/rejected": -6.931356906890869, |
|
"logps/rejected_both": -6.842989444732666, |
|
"logps/rejected_prompt": -0.9505090713500977, |
|
"loss": 2.0225, |
|
"nll_loss": 1.935101866722107, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.780379593372345, |
|
"rewards/margins": 1.992163062095642, |
|
"rewards/rejected": -2.7725424766540527, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.192, |
|
"grad_norm": 12.7002740206941, |
|
"learning_rate": 3.29243402419338e-05, |
|
"log_odds_chosen": 4.771432399749756, |
|
"log_odds_ratio": -0.2453218698501587, |
|
"logits/chosen": -2.9012749195098877, |
|
"logits/chosen_prompt": -2.791215419769287, |
|
"logits/rejected": -2.076328754425049, |
|
"logits/rejected_prompt": -2.7599706649780273, |
|
"logps/chosen": -2.869783878326416, |
|
"logps/chosen_both": -2.8310511112213135, |
|
"logps/chosen_prompt": -0.8819573521614075, |
|
"logps/rejected": -7.530523777008057, |
|
"logps/rejected_both": -7.408067226409912, |
|
"logps/rejected_prompt": -1.0235049724578857, |
|
"loss": 2.0981, |
|
"nll_loss": 2.8310508728027344, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1479135751724243, |
|
"rewards/margins": 1.8642956018447876, |
|
"rewards/rejected": -3.012209415435791, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.19981467699086264, |
|
"learning_rate": 3.272542485937369e-05, |
|
"log_odds_chosen": 5.507603645324707, |
|
"log_odds_ratio": -0.020156098529696465, |
|
"logits/chosen": -2.9788875579833984, |
|
"logits/chosen_prompt": -2.7711877822875977, |
|
"logits/rejected": -2.0624115467071533, |
|
"logits/rejected_prompt": -2.744807720184326, |
|
"logps/chosen": -2.279694080352783, |
|
"logps/chosen_both": -2.2537825107574463, |
|
"logps/chosen_prompt": -0.8054102659225464, |
|
"logps/rejected": -7.658332824707031, |
|
"logps/rejected_both": -7.547041416168213, |
|
"logps/rejected_prompt": -1.0083348751068115, |
|
"loss": 2.1891, |
|
"nll_loss": 2.2532057762145996, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9118776321411133, |
|
"rewards/margins": 2.1514554023742676, |
|
"rewards/rejected": -3.063333034515381, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.208, |
|
"grad_norm": 0.2071781414340563, |
|
"learning_rate": 3.2525967279898015e-05, |
|
"log_odds_chosen": 3.779675006866455, |
|
"log_odds_ratio": -0.2771868109703064, |
|
"logits/chosen": -2.9284844398498535, |
|
"logits/chosen_prompt": -2.73115873336792, |
|
"logits/rejected": -2.319711446762085, |
|
"logits/rejected_prompt": -2.7305550575256348, |
|
"logps/chosen": -2.069701910018921, |
|
"logps/chosen_both": -2.0511586666107178, |
|
"logps/chosen_prompt": -0.8410334587097168, |
|
"logps/rejected": -5.765010356903076, |
|
"logps/rejected_both": -5.696343421936035, |
|
"logps/rejected_prompt": -1.032503366470337, |
|
"loss": 2.0199, |
|
"nll_loss": 2.050447940826416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8278807401657104, |
|
"rewards/margins": 1.4781235456466675, |
|
"rewards/rejected": -2.306004762649536, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.216, |
|
"grad_norm": 0.18615530258539528, |
|
"learning_rate": 3.2325981502126433e-05, |
|
"log_odds_chosen": 4.861352443695068, |
|
"log_odds_ratio": -0.14049410820007324, |
|
"logits/chosen": -2.913702964782715, |
|
"logits/chosen_prompt": -2.647313117980957, |
|
"logits/rejected": -2.130164623260498, |
|
"logits/rejected_prompt": -2.638327121734619, |
|
"logps/chosen": -1.9652678966522217, |
|
"logps/chosen_both": -1.948897361755371, |
|
"logps/chosen_prompt": -0.8634968996047974, |
|
"logps/rejected": -6.705462455749512, |
|
"logps/rejected_both": -6.624319553375244, |
|
"logps/rejected_prompt": -1.035592794418335, |
|
"loss": 2.042, |
|
"nll_loss": 1.948264718055725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7861071825027466, |
|
"rewards/margins": 1.896078109741211, |
|
"rewards/rejected": -2.682184934616089, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.224, |
|
"grad_norm": 0.3295494652465448, |
|
"learning_rate": 3.21254815617494e-05, |
|
"log_odds_chosen": 5.780041694641113, |
|
"log_odds_ratio": -0.004303447902202606, |
|
"logits/chosen": -2.9733996391296387, |
|
"logits/chosen_prompt": -2.7753407955169678, |
|
"logits/rejected": -2.149972438812256, |
|
"logits/rejected_prompt": -2.7639622688293457, |
|
"logps/chosen": -1.992742896080017, |
|
"logps/chosen_both": -1.975515604019165, |
|
"logps/chosen_prompt": -0.8223434686660767, |
|
"logps/rejected": -7.623780727386475, |
|
"logps/rejected_both": -7.520285606384277, |
|
"logps/rejected_prompt": -0.9390355348587036, |
|
"loss": 2.0442, |
|
"nll_loss": 1.974700689315796, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7970971465110779, |
|
"rewards/margins": 2.252415180206299, |
|
"rewards/rejected": -3.0495123863220215, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.232, |
|
"grad_norm": 0.19966280929549698, |
|
"learning_rate": 3.192448153054306e-05, |
|
"log_odds_chosen": 3.838728427886963, |
|
"log_odds_ratio": -0.14647504687309265, |
|
"logits/chosen": -2.9667465686798096, |
|
"logits/chosen_prompt": -2.8230855464935303, |
|
"logits/rejected": -2.5847840309143066, |
|
"logits/rejected_prompt": -2.822601795196533, |
|
"logps/chosen": -2.122664451599121, |
|
"logps/chosen_both": -2.0995185375213623, |
|
"logps/chosen_prompt": -0.9422351717948914, |
|
"logps/rejected": -5.859042644500732, |
|
"logps/rejected_both": -5.767674446105957, |
|
"logps/rejected_prompt": -1.1390842199325562, |
|
"loss": 2.095, |
|
"nll_loss": 2.0988729000091553, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8490656614303589, |
|
"rewards/margins": 1.494551420211792, |
|
"rewards/rejected": -2.3436172008514404, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.21556247694383007, |
|
"learning_rate": 3.172299551538164e-05, |
|
"log_odds_chosen": 4.561056137084961, |
|
"log_odds_ratio": -0.07612424343824387, |
|
"logits/chosen": -2.8919880390167236, |
|
"logits/chosen_prompt": -2.808797836303711, |
|
"logits/rejected": -2.5644102096557617, |
|
"logits/rejected_prompt": -2.802969455718994, |
|
"logps/chosen": -1.9356054067611694, |
|
"logps/chosen_both": -1.9162133932113647, |
|
"logps/chosen_prompt": -0.7942633032798767, |
|
"logps/rejected": -6.3366522789001465, |
|
"logps/rejected_both": -6.251999855041504, |
|
"logps/rejected_prompt": -0.9459937810897827, |
|
"loss": 2.1408, |
|
"nll_loss": 1.915776252746582, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7742422223091125, |
|
"rewards/margins": 1.7604186534881592, |
|
"rewards/rejected": -2.534660816192627, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.248, |
|
"grad_norm": 0.19312969446080464, |
|
"learning_rate": 3.152103765724743e-05, |
|
"log_odds_chosen": 3.9786903858184814, |
|
"log_odds_ratio": -0.10893861204385757, |
|
"logits/chosen": -3.0307998657226562, |
|
"logits/chosen_prompt": -2.7775232791900635, |
|
"logits/rejected": -2.6540513038635254, |
|
"logits/rejected_prompt": -2.7630362510681152, |
|
"logps/chosen": -1.9151197671890259, |
|
"logps/chosen_both": -1.8977829217910767, |
|
"logps/chosen_prompt": -0.8471347689628601, |
|
"logps/rejected": -5.757778644561768, |
|
"logps/rejected_both": -5.6885457038879395, |
|
"logps/rejected_prompt": -1.02475106716156, |
|
"loss": 1.9805, |
|
"nll_loss": 1.8967196941375732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7660478949546814, |
|
"rewards/margins": 1.537063717842102, |
|
"rewards/rejected": -2.3031115531921387, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.256, |
|
"grad_norm": 0.19646035725215968, |
|
"learning_rate": 3.1318622130238236e-05, |
|
"log_odds_chosen": 4.679540157318115, |
|
"log_odds_ratio": -0.07853083312511444, |
|
"logits/chosen": -2.9802026748657227, |
|
"logits/chosen_prompt": -2.761209011077881, |
|
"logits/rejected": -2.5600242614746094, |
|
"logits/rejected_prompt": -2.7424654960632324, |
|
"logps/chosen": -1.7784169912338257, |
|
"logps/chosen_both": -1.7646305561065674, |
|
"logps/chosen_prompt": -0.7139529585838318, |
|
"logps/rejected": -6.263562202453613, |
|
"logps/rejected_both": -6.190931797027588, |
|
"logps/rejected_prompt": -0.9958028793334961, |
|
"loss": 1.9559, |
|
"nll_loss": 1.76325261592865, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7113668322563171, |
|
"rewards/margins": 1.794058084487915, |
|
"rewards/rejected": -2.505424976348877, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.264, |
|
"grad_norm": 0.3866885307142738, |
|
"learning_rate": 3.111576314057268e-05, |
|
"log_odds_chosen": 3.801389694213867, |
|
"log_odds_ratio": -0.20994290709495544, |
|
"logits/chosen": -2.9368879795074463, |
|
"logits/chosen_prompt": -2.7586987018585205, |
|
"logits/rejected": -2.599658966064453, |
|
"logits/rejected_prompt": -2.743234157562256, |
|
"logps/chosen": -1.9905316829681396, |
|
"logps/chosen_both": -1.9739116430282593, |
|
"logps/chosen_prompt": -0.779675304889679, |
|
"logps/rejected": -5.689120292663574, |
|
"logps/rejected_both": -5.620154857635498, |
|
"logps/rejected_prompt": -1.0595715045928955, |
|
"loss": 2.0955, |
|
"nll_loss": 1.9729188680648804, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7962126731872559, |
|
"rewards/margins": 1.4794353246688843, |
|
"rewards/rejected": -2.275648355484009, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.272, |
|
"grad_norm": 0.2325643788869979, |
|
"learning_rate": 3.091247492559312e-05, |
|
"log_odds_chosen": 4.095303058624268, |
|
"log_odds_ratio": -0.1479816436767578, |
|
"logits/chosen": -2.9735686779022217, |
|
"logits/chosen_prompt": -2.8000283241271973, |
|
"logits/rejected": -2.558763027191162, |
|
"logits/rejected_prompt": -2.7583069801330566, |
|
"logps/chosen": -1.8590002059936523, |
|
"logps/chosen_both": -1.8417994976043701, |
|
"logps/chosen_prompt": -0.7681006193161011, |
|
"logps/rejected": -5.769632816314697, |
|
"logps/rejected_both": -5.685044288635254, |
|
"logps/rejected_prompt": -0.97776859998703, |
|
"loss": 2.1087, |
|
"nll_loss": 1.8410179615020752, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.743600070476532, |
|
"rewards/margins": 1.5642529726028442, |
|
"rewards/rejected": -2.3078532218933105, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.19381282768297478, |
|
"learning_rate": 3.0708771752766394e-05, |
|
"log_odds_chosen": 4.324513912200928, |
|
"log_odds_ratio": -0.0676613599061966, |
|
"logits/chosen": -2.9503540992736816, |
|
"logits/chosen_prompt": -2.7982351779937744, |
|
"logits/rejected": -2.5562634468078613, |
|
"logits/rejected_prompt": -2.7812817096710205, |
|
"logps/chosen": -1.9172391891479492, |
|
"logps/chosen_both": -1.9022390842437744, |
|
"logps/chosen_prompt": -0.7488449811935425, |
|
"logps/rejected": -6.095970630645752, |
|
"logps/rejected_both": -6.031794548034668, |
|
"logps/rejected_prompt": -0.9277693033218384, |
|
"loss": 1.9931, |
|
"nll_loss": 1.9018001556396484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7668957114219666, |
|
"rewards/margins": 1.671492338180542, |
|
"rewards/rejected": -2.4383881092071533, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.288, |
|
"grad_norm": 0.23915750516620793, |
|
"learning_rate": 3.050466791868254e-05, |
|
"log_odds_chosen": 5.146353721618652, |
|
"log_odds_ratio": -0.07113925367593765, |
|
"logits/chosen": -3.0021820068359375, |
|
"logits/chosen_prompt": -2.7346436977386475, |
|
"logits/rejected": -2.400503635406494, |
|
"logits/rejected_prompt": -2.715362071990967, |
|
"logps/chosen": -1.8657314777374268, |
|
"logps/chosen_both": -1.847728967666626, |
|
"logps/chosen_prompt": -0.8974820375442505, |
|
"logps/rejected": -6.850257873535156, |
|
"logps/rejected_both": -6.73916482925415, |
|
"logps/rejected_prompt": -0.9878479838371277, |
|
"loss": 2.0166, |
|
"nll_loss": 1.8474719524383545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7462925910949707, |
|
"rewards/margins": 1.9938108921051025, |
|
"rewards/rejected": -2.7401034832000732, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.296, |
|
"grad_norm": 0.25793388819398966, |
|
"learning_rate": 3.0300177748051373e-05, |
|
"log_odds_chosen": 5.57846212387085, |
|
"log_odds_ratio": -0.0040098619647324085, |
|
"logits/chosen": -2.921875476837158, |
|
"logits/chosen_prompt": -2.7485337257385254, |
|
"logits/rejected": -2.2575137615203857, |
|
"logits/rejected_prompt": -2.729705333709717, |
|
"logps/chosen": -2.0379016399383545, |
|
"logps/chosen_both": -2.023336410522461, |
|
"logps/chosen_prompt": -0.8523913621902466, |
|
"logps/rejected": -7.4703474044799805, |
|
"logps/rejected_both": -7.384527683258057, |
|
"logps/rejected_prompt": -1.0912959575653076, |
|
"loss": 2.1288, |
|
"nll_loss": 2.021984100341797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8151607513427734, |
|
"rewards/margins": 2.172978639602661, |
|
"rewards/rejected": -2.9881393909454346, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.304, |
|
"grad_norm": 0.22406539118846014, |
|
"learning_rate": 3.0095315592697126e-05, |
|
"log_odds_chosen": 4.797575950622559, |
|
"log_odds_ratio": -0.07414670288562775, |
|
"logits/chosen": -2.9373860359191895, |
|
"logits/chosen_prompt": -2.7567806243896484, |
|
"logits/rejected": -2.339370012283325, |
|
"logits/rejected_prompt": -2.738049030303955, |
|
"logps/chosen": -2.040771961212158, |
|
"logps/chosen_both": -2.022752523422241, |
|
"logps/chosen_prompt": -0.8437407612800598, |
|
"logps/rejected": -6.715930938720703, |
|
"logps/rejected_both": -6.622492790222168, |
|
"logps/rejected_prompt": -1.1104066371917725, |
|
"loss": 2.0022, |
|
"nll_loss": 2.021770715713501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8163086771965027, |
|
"rewards/margins": 1.8700635433197021, |
|
"rewards/rejected": -2.6863722801208496, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.312, |
|
"grad_norm": 0.19146540891141792, |
|
"learning_rate": 2.9890095830551207e-05, |
|
"log_odds_chosen": 5.205162525177002, |
|
"log_odds_ratio": -0.015068802051246166, |
|
"logits/chosen": -2.9850218296051025, |
|
"logits/chosen_prompt": -2.7482991218566895, |
|
"logits/rejected": -2.2866098880767822, |
|
"logits/rejected_prompt": -2.7363736629486084, |
|
"logps/chosen": -1.9450336694717407, |
|
"logps/chosen_both": -1.9250189065933228, |
|
"logps/chosen_prompt": -0.8316828012466431, |
|
"logps/rejected": -6.989903450012207, |
|
"logps/rejected_both": -6.88253927230835, |
|
"logps/rejected_prompt": -0.9859585762023926, |
|
"loss": 2.088, |
|
"nll_loss": 1.924430251121521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7780135273933411, |
|
"rewards/margins": 2.0179476737976074, |
|
"rewards/rejected": -2.7959611415863037, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.22495066893496063, |
|
"learning_rate": 2.9684532864643122e-05, |
|
"log_odds_chosen": 5.308048725128174, |
|
"log_odds_ratio": -0.00845087319612503, |
|
"logits/chosen": -2.9742932319641113, |
|
"logits/chosen_prompt": -2.7849392890930176, |
|
"logits/rejected": -2.2982254028320312, |
|
"logits/rejected_prompt": -2.7615458965301514, |
|
"logps/chosen": -1.9874608516693115, |
|
"logps/chosen_both": -1.9658311605453491, |
|
"logps/chosen_prompt": -0.7408405542373657, |
|
"logps/rejected": -7.14414119720459, |
|
"logps/rejected_both": -7.038477897644043, |
|
"logps/rejected_prompt": -1.045243501663208, |
|
"loss": 2.0386, |
|
"nll_loss": 1.9651544094085693, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7949844002723694, |
|
"rewards/margins": 2.0626721382141113, |
|
"rewards/rejected": -2.857656478881836, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.328, |
|
"grad_norm": 0.2286734318135687, |
|
"learning_rate": 2.9478641122089562e-05, |
|
"log_odds_chosen": 4.840089797973633, |
|
"log_odds_ratio": -0.07564349472522736, |
|
"logits/chosen": -3.008890151977539, |
|
"logits/chosen_prompt": -2.8013384342193604, |
|
"logits/rejected": -2.394143581390381, |
|
"logits/rejected_prompt": -2.77929425239563, |
|
"logps/chosen": -1.9756405353546143, |
|
"logps/chosen_both": -1.9581083059310913, |
|
"logps/chosen_prompt": -0.7473115921020508, |
|
"logps/rejected": -6.674158573150635, |
|
"logps/rejected_both": -6.598573207855225, |
|
"logps/rejected_prompt": -0.996438205242157, |
|
"loss": 2.0632, |
|
"nll_loss": 1.9576594829559326, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.790256142616272, |
|
"rewards/margins": 1.8794071674346924, |
|
"rewards/rejected": -2.669663429260254, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.336, |
|
"grad_norm": 1.6039791025981895, |
|
"learning_rate": 2.9272435053081922e-05, |
|
"log_odds_chosen": 4.911754131317139, |
|
"log_odds_ratio": -0.08321253210306168, |
|
"logits/chosen": -2.912379741668701, |
|
"logits/chosen_prompt": -2.7961792945861816, |
|
"logits/rejected": -2.264275312423706, |
|
"logits/rejected_prompt": -2.7643306255340576, |
|
"logps/chosen": -1.951281189918518, |
|
"logps/chosen_both": -1.9351087808609009, |
|
"logps/chosen_prompt": -0.7827764749526978, |
|
"logps/rejected": -6.725755214691162, |
|
"logps/rejected_both": -6.646947383880615, |
|
"logps/rejected_prompt": -1.0157705545425415, |
|
"loss": 2.1063, |
|
"nll_loss": 1.9346641302108765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7805125713348389, |
|
"rewards/margins": 1.9097894430160522, |
|
"rewards/rejected": -2.6903018951416016, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.3439999999999999, |
|
"grad_norm": 0.3698076131375805, |
|
"learning_rate": 2.9065929129872094e-05, |
|
"log_odds_chosen": 4.74294376373291, |
|
"log_odds_ratio": -0.08516435325145721, |
|
"logits/chosen": -2.9431169033050537, |
|
"logits/chosen_prompt": -2.7804017066955566, |
|
"logits/rejected": -2.2715518474578857, |
|
"logits/rejected_prompt": -2.7543439865112305, |
|
"logps/chosen": -2.047203779220581, |
|
"logps/chosen_both": -2.028724193572998, |
|
"logps/chosen_prompt": -0.8540178537368774, |
|
"logps/rejected": -6.660338401794434, |
|
"logps/rejected_both": -6.572705268859863, |
|
"logps/rejected_prompt": -1.0315988063812256, |
|
"loss": 2.1122, |
|
"nll_loss": 2.0279080867767334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.818881630897522, |
|
"rewards/margins": 1.8452539443969727, |
|
"rewards/rejected": -2.664135456085205, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.3519999999999999, |
|
"grad_norm": 14.756635490233291, |
|
"learning_rate": 2.8859137845756784e-05, |
|
"log_odds_chosen": 5.338567733764648, |
|
"log_odds_ratio": -0.07245531678199768, |
|
"logits/chosen": -3.0019686222076416, |
|
"logits/chosen_prompt": -2.7564592361450195, |
|
"logits/rejected": -2.10023832321167, |
|
"logits/rejected_prompt": -2.75854754447937, |
|
"logps/chosen": -1.801944375038147, |
|
"logps/chosen_both": -1.7874317169189453, |
|
"logps/chosen_prompt": -0.7828146815299988, |
|
"logps/rejected": -6.980807304382324, |
|
"logps/rejected_both": -6.885933876037598, |
|
"logps/rejected_prompt": -1.0353758335113525, |
|
"loss": 1.9922, |
|
"nll_loss": 1.7853384017944336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.720777690410614, |
|
"rewards/margins": 2.071545124053955, |
|
"rewards/rejected": -2.792322874069214, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 1.0205003901521117, |
|
"learning_rate": 2.8652075714060295e-05, |
|
"log_odds_chosen": 4.316029071807861, |
|
"log_odds_ratio": -0.18554985523223877, |
|
"logits/chosen": -2.9789249897003174, |
|
"logits/chosen_prompt": -2.7761483192443848, |
|
"logits/rejected": -2.230045795440674, |
|
"logits/rejected_prompt": -2.7322373390197754, |
|
"logps/chosen": -1.9758758544921875, |
|
"logps/chosen_both": -1.958141565322876, |
|
"logps/chosen_prompt": -0.839580237865448, |
|
"logps/rejected": -6.175426006317139, |
|
"logps/rejected_both": -6.096805572509766, |
|
"logps/rejected_prompt": -1.002239465713501, |
|
"loss": 2.0489, |
|
"nll_loss": 1.957658052444458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7903503179550171, |
|
"rewards/margins": 1.6798200607299805, |
|
"rewards/rejected": -2.470170497894287, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3679999999999999, |
|
"grad_norm": 0.5093034024599485, |
|
"learning_rate": 2.844475726711595e-05, |
|
"log_odds_chosen": 5.062729835510254, |
|
"log_odds_ratio": -0.05383139103651047, |
|
"logits/chosen": -2.9323840141296387, |
|
"logits/chosen_prompt": -2.757789134979248, |
|
"logits/rejected": -2.114853620529175, |
|
"logits/rejected_prompt": -2.740206003189087, |
|
"logps/chosen": -1.9980299472808838, |
|
"logps/chosen_both": -1.9810377359390259, |
|
"logps/chosen_prompt": -0.8025790452957153, |
|
"logps/rejected": -6.92165994644165, |
|
"logps/rejected_both": -6.837998867034912, |
|
"logps/rejected_prompt": -1.0708694458007812, |
|
"loss": 2.02, |
|
"nll_loss": 1.980063796043396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7992119789123535, |
|
"rewards/margins": 1.9694522619247437, |
|
"rewards/rejected": -2.7686638832092285, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.376, |
|
"grad_norm": 0.1922091365417996, |
|
"learning_rate": 2.8237197055246172e-05, |
|
"log_odds_chosen": 5.407708644866943, |
|
"log_odds_ratio": -0.07208568602800369, |
|
"logits/chosen": -2.930446147918701, |
|
"logits/chosen_prompt": -2.7493677139282227, |
|
"logits/rejected": -1.8252556324005127, |
|
"logits/rejected_prompt": -2.716831684112549, |
|
"logps/chosen": -1.99956476688385, |
|
"logps/chosen_both": -1.9826438426971436, |
|
"logps/chosen_prompt": -0.8026520609855652, |
|
"logps/rejected": -7.266847133636475, |
|
"logps/rejected_both": -7.16598653793335, |
|
"logps/rejected_prompt": -0.9821138381958008, |
|
"loss": 2.0055, |
|
"nll_loss": 1.9819648265838623, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.79982590675354, |
|
"rewards/margins": 2.1069130897521973, |
|
"rewards/rejected": -2.9067392349243164, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.384, |
|
"grad_norm": 0.19884693939871143, |
|
"learning_rate": 2.8029409645741267e-05, |
|
"log_odds_chosen": 5.655479907989502, |
|
"log_odds_ratio": -0.07094166427850723, |
|
"logits/chosen": -2.9133386611938477, |
|
"logits/chosen_prompt": -2.7181575298309326, |
|
"logits/rejected": -1.8967100381851196, |
|
"logits/rejected_prompt": -2.7026288509368896, |
|
"logps/chosen": -2.0701959133148193, |
|
"logps/chosen_both": -2.0524401664733887, |
|
"logps/chosen_prompt": -0.8565284609794617, |
|
"logps/rejected": -7.606234550476074, |
|
"logps/rejected_both": -7.5077009201049805, |
|
"logps/rejected_prompt": -1.0423924922943115, |
|
"loss": 2.1485, |
|
"nll_loss": 2.0521743297576904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8280783891677856, |
|
"rewards/margins": 2.2144155502319336, |
|
"rewards/rejected": -3.042494058609009, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.392, |
|
"grad_norm": 0.22986043369921255, |
|
"learning_rate": 2.782140962183704e-05, |
|
"log_odds_chosen": 6.107487678527832, |
|
"log_odds_ratio": -0.0026633774396032095, |
|
"logits/chosen": -2.98026442527771, |
|
"logits/chosen_prompt": -2.780827522277832, |
|
"logits/rejected": -1.9798576831817627, |
|
"logits/rejected_prompt": -2.7703700065612793, |
|
"logps/chosen": -1.9474899768829346, |
|
"logps/chosen_both": -1.9275726079940796, |
|
"logps/chosen_prompt": -0.7816404700279236, |
|
"logps/rejected": -7.895272731781006, |
|
"logps/rejected_both": -7.769126892089844, |
|
"logps/rejected_prompt": -0.9758648872375488, |
|
"loss": 1.9516, |
|
"nll_loss": 1.925616979598999, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.778995931148529, |
|
"rewards/margins": 2.3791134357452393, |
|
"rewards/rejected": -3.158109188079834, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.3967778423182213, |
|
"learning_rate": 2.761321158169134e-05, |
|
"log_odds_chosen": 5.588977336883545, |
|
"log_odds_ratio": -0.07164627313613892, |
|
"logits/chosen": -2.942800998687744, |
|
"logits/chosen_prompt": -2.765923023223877, |
|
"logits/rejected": -2.1541590690612793, |
|
"logits/rejected_prompt": -2.7391622066497803, |
|
"logps/chosen": -1.8856910467147827, |
|
"logps/chosen_both": -1.8705289363861084, |
|
"logps/chosen_prompt": -0.7254279851913452, |
|
"logps/rejected": -7.315940856933594, |
|
"logps/rejected_both": -7.2315239906311035, |
|
"logps/rejected_prompt": -0.9249277114868164, |
|
"loss": 2.037, |
|
"nll_loss": 1.8701813220977783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7542763948440552, |
|
"rewards/margins": 2.172100305557251, |
|
"rewards/rejected": -2.9263763427734375, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.408, |
|
"grad_norm": 0.19174060756423858, |
|
"learning_rate": 2.7404830137359444e-05, |
|
"log_odds_chosen": 5.684497356414795, |
|
"log_odds_ratio": -0.03275999799370766, |
|
"logits/chosen": -2.958325147628784, |
|
"logits/chosen_prompt": -2.728274345397949, |
|
"logits/rejected": -2.046318531036377, |
|
"logits/rejected_prompt": -2.6898844242095947, |
|
"logps/chosen": -2.253990411758423, |
|
"logps/chosen_both": -2.2328062057495117, |
|
"logps/chosen_prompt": -0.8659110069274902, |
|
"logps/rejected": -7.784188747406006, |
|
"logps/rejected_both": -7.674757480621338, |
|
"logps/rejected_prompt": -1.1274776458740234, |
|
"loss": 2.1275, |
|
"nll_loss": 2.2321293354034424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.901596188545227, |
|
"rewards/margins": 2.212078809738159, |
|
"rewards/rejected": -3.1136748790740967, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.416, |
|
"grad_norm": 0.1908777514352998, |
|
"learning_rate": 2.7196279913768584e-05, |
|
"log_odds_chosen": 5.167336940765381, |
|
"log_odds_ratio": -0.07482357323169708, |
|
"logits/chosen": -2.9330124855041504, |
|
"logits/chosen_prompt": -2.7444446086883545, |
|
"logits/rejected": -2.105210065841675, |
|
"logits/rejected_prompt": -2.721642255783081, |
|
"logps/chosen": -2.0776610374450684, |
|
"logps/chosen_both": -2.0597071647644043, |
|
"logps/chosen_prompt": -0.8555063009262085, |
|
"logps/rejected": -7.124932765960693, |
|
"logps/rejected_both": -7.030417442321777, |
|
"logps/rejected_prompt": -1.0413535833358765, |
|
"loss": 1.9978, |
|
"nll_loss": 2.058987617492676, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8310644030570984, |
|
"rewards/margins": 2.018908739089966, |
|
"rewards/rejected": -2.84997296333313, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.424, |
|
"grad_norm": 0.17855815500184188, |
|
"learning_rate": 2.6987575547691497e-05, |
|
"log_odds_chosen": 4.549686908721924, |
|
"log_odds_ratio": -0.20390887558460236, |
|
"logits/chosen": -2.9623754024505615, |
|
"logits/chosen_prompt": -2.74225115776062, |
|
"logits/rejected": -2.1663219928741455, |
|
"logits/rejected_prompt": -2.7345423698425293, |
|
"logps/chosen": -1.9926074743270874, |
|
"logps/chosen_both": -1.9742103815078735, |
|
"logps/chosen_prompt": -0.7784561514854431, |
|
"logps/rejected": -6.431072235107422, |
|
"logps/rejected_both": -6.3410797119140625, |
|
"logps/rejected_prompt": -0.9243408441543579, |
|
"loss": 2.0508, |
|
"nll_loss": 1.973905324935913, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7970430850982666, |
|
"rewards/margins": 1.775386095046997, |
|
"rewards/rejected": -2.5724291801452637, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.432, |
|
"grad_norm": 0.41995614329947717, |
|
"learning_rate": 2.6778731686719178e-05, |
|
"log_odds_chosen": 6.473885536193848, |
|
"log_odds_ratio": -0.0018433562945574522, |
|
"logits/chosen": -2.952514410018921, |
|
"logits/chosen_prompt": -2.7027528285980225, |
|
"logits/rejected": -1.8595733642578125, |
|
"logits/rejected_prompt": -2.6798789501190186, |
|
"logps/chosen": -1.9392732381820679, |
|
"logps/chosen_both": -1.9248685836791992, |
|
"logps/chosen_prompt": -0.931847095489502, |
|
"logps/rejected": -8.25381088256836, |
|
"logps/rejected_both": -8.140459060668945, |
|
"logps/rejected_prompt": -1.0698789358139038, |
|
"loss": 1.9874, |
|
"nll_loss": 1.923288106918335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7757093906402588, |
|
"rewards/margins": 2.5258147716522217, |
|
"rewards/rejected": -3.3015239238739014, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.7745820877648287, |
|
"learning_rate": 2.656976298823284e-05, |
|
"log_odds_chosen": 3.4408886432647705, |
|
"log_odds_ratio": -0.27857550978660583, |
|
"logits/chosen": -2.878281831741333, |
|
"logits/chosen_prompt": -2.734473705291748, |
|
"logits/rejected": -2.3365187644958496, |
|
"logits/rejected_prompt": -2.7160048484802246, |
|
"logps/chosen": -2.0569214820861816, |
|
"logps/chosen_both": -2.0396482944488525, |
|
"logps/chosen_prompt": -0.6810625791549683, |
|
"logps/rejected": -5.414828300476074, |
|
"logps/rejected_both": -5.35118293762207, |
|
"logps/rejected_prompt": -0.8160842061042786, |
|
"loss": 2.0419, |
|
"nll_loss": 2.038651943206787, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.82276850938797, |
|
"rewards/margins": 1.3431627750396729, |
|
"rewards/rejected": -2.165931224822998, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.448, |
|
"grad_norm": 0.19675956388988333, |
|
"learning_rate": 2.636068411837523e-05, |
|
"log_odds_chosen": 3.9148197174072266, |
|
"log_odds_ratio": -0.23557178676128387, |
|
"logits/chosen": -3.045487642288208, |
|
"logits/chosen_prompt": -2.759061574935913, |
|
"logits/rejected": -2.4077014923095703, |
|
"logits/rejected_prompt": -2.7576231956481934, |
|
"logps/chosen": -1.8861596584320068, |
|
"logps/chosen_both": -1.8683302402496338, |
|
"logps/chosen_prompt": -0.9071288108825684, |
|
"logps/rejected": -5.683122158050537, |
|
"logps/rejected_both": -5.601851940155029, |
|
"logps/rejected_prompt": -1.0434454679489136, |
|
"loss": 2.0263, |
|
"nll_loss": 1.8671506643295288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7544639110565186, |
|
"rewards/margins": 1.5187851190567017, |
|
"rewards/rejected": -2.2732491493225098, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.456, |
|
"grad_norm": 6.189918533614061, |
|
"learning_rate": 2.615150975102131e-05, |
|
"log_odds_chosen": 6.713578701019287, |
|
"log_odds_ratio": -0.0015258995117619634, |
|
"logits/chosen": -3.0059263706207275, |
|
"logits/chosen_prompt": -2.7889323234558105, |
|
"logits/rejected": -1.839082956314087, |
|
"logits/rejected_prompt": -2.7647995948791504, |
|
"logps/chosen": -2.004807233810425, |
|
"logps/chosen_both": -1.98598313331604, |
|
"logps/chosen_prompt": -0.7677423357963562, |
|
"logps/rejected": -8.555073738098145, |
|
"logps/rejected_both": -8.430871963500977, |
|
"logps/rejected_prompt": -1.011725664138794, |
|
"loss": 2.0302, |
|
"nll_loss": 1.9841728210449219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8019229769706726, |
|
"rewards/margins": 2.6201066970825195, |
|
"rewards/rejected": -3.422029495239258, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.464, |
|
"grad_norm": 0.21797873657619965, |
|
"learning_rate": 2.594225456674837e-05, |
|
"log_odds_chosen": 5.328610420227051, |
|
"log_odds_ratio": -0.0812341570854187, |
|
"logits/chosen": -2.979506731033325, |
|
"logits/chosen_prompt": -2.792584180831909, |
|
"logits/rejected": -2.08947491645813, |
|
"logits/rejected_prompt": -2.781327962875366, |
|
"logps/chosen": -1.9279931783676147, |
|
"logps/chosen_both": -1.9127006530761719, |
|
"logps/chosen_prompt": -0.7814801335334778, |
|
"logps/rejected": -7.120486259460449, |
|
"logps/rejected_both": -7.026519775390625, |
|
"logps/rejected_prompt": -0.9352282285690308, |
|
"loss": 2.0587, |
|
"nll_loss": 1.9114625453948975, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7711972594261169, |
|
"rewards/margins": 2.0769975185394287, |
|
"rewards/rejected": -2.8481948375701904, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.472, |
|
"grad_norm": 0.1871801141599041, |
|
"learning_rate": 2.5732933251805713e-05, |
|
"log_odds_chosen": 5.583043575286865, |
|
"log_odds_ratio": -0.13880962133407593, |
|
"logits/chosen": -2.9580206871032715, |
|
"logits/chosen_prompt": -2.7731950283050537, |
|
"logits/rejected": -2.012089490890503, |
|
"logits/rejected_prompt": -2.75722336769104, |
|
"logps/chosen": -1.855268120765686, |
|
"logps/chosen_both": -1.8423293828964233, |
|
"logps/chosen_prompt": -0.8601115942001343, |
|
"logps/rejected": -7.305128574371338, |
|
"logps/rejected_both": -7.2211809158325195, |
|
"logps/rejected_prompt": -1.0132110118865967, |
|
"loss": 1.9359, |
|
"nll_loss": 1.8416475057601929, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7421072125434875, |
|
"rewards/margins": 2.1799445152282715, |
|
"rewards/rejected": -2.922051191329956, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.22592416955066014, |
|
"learning_rate": 2.5523560497083926e-05, |
|
"log_odds_chosen": 5.949292182922363, |
|
"log_odds_ratio": -0.07134632766246796, |
|
"logits/chosen": -2.956613779067993, |
|
"logits/chosen_prompt": -2.722937822341919, |
|
"logits/rejected": -1.9237785339355469, |
|
"logits/rejected_prompt": -2.704369068145752, |
|
"logps/chosen": -1.9562047719955444, |
|
"logps/chosen_both": -1.9380409717559814, |
|
"logps/chosen_prompt": -0.7973084449768066, |
|
"logps/rejected": -7.771543979644775, |
|
"logps/rejected_both": -7.661837577819824, |
|
"logps/rejected_prompt": -0.9722532033920288, |
|
"loss": 1.9892, |
|
"nll_loss": 1.9374074935913086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7824817895889282, |
|
"rewards/margins": 2.3261356353759766, |
|
"rewards/rejected": -3.1086175441741943, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.488, |
|
"grad_norm": 0.19952883102568983, |
|
"learning_rate": 2.531415099708382e-05, |
|
"log_odds_chosen": 5.468968868255615, |
|
"log_odds_ratio": -0.13928017020225525, |
|
"logits/chosen": -2.901470184326172, |
|
"logits/chosen_prompt": -2.7253496646881104, |
|
"logits/rejected": -1.9635553359985352, |
|
"logits/rejected_prompt": -2.721364736557007, |
|
"logps/chosen": -2.024766683578491, |
|
"logps/chosen_both": -2.0091967582702637, |
|
"logps/chosen_prompt": -0.8794494867324829, |
|
"logps/rejected": -7.388121604919434, |
|
"logps/rejected_both": -7.304760932922363, |
|
"logps/rejected_prompt": -1.0697910785675049, |
|
"loss": 2.1409, |
|
"nll_loss": 2.0086288452148438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8099067807197571, |
|
"rewards/margins": 2.1453423500061035, |
|
"rewards/rejected": -2.955249071121216, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.496, |
|
"grad_norm": 0.20218369179299622, |
|
"learning_rate": 2.51047194488851e-05, |
|
"log_odds_chosen": 5.442208766937256, |
|
"log_odds_ratio": -0.14097937941551208, |
|
"logits/chosen": -2.9763107299804688, |
|
"logits/chosen_prompt": -2.7768394947052, |
|
"logits/rejected": -2.108531951904297, |
|
"logits/rejected_prompt": -2.7451493740081787, |
|
"logps/chosen": -1.79744553565979, |
|
"logps/chosen_both": -1.7835102081298828, |
|
"logps/chosen_prompt": -0.7872709631919861, |
|
"logps/rejected": -7.031289577484131, |
|
"logps/rejected_both": -6.934246063232422, |
|
"logps/rejected_prompt": -0.9216675758361816, |
|
"loss": 2.1195, |
|
"nll_loss": 1.7827249765396118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.718978226184845, |
|
"rewards/margins": 2.0935378074645996, |
|
"rewards/rejected": -2.8125159740448, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.504, |
|
"grad_norm": 0.9652790170177806, |
|
"learning_rate": 2.4895280551114907e-05, |
|
"log_odds_chosen": 5.730778694152832, |
|
"log_odds_ratio": -0.07072736322879791, |
|
"logits/chosen": -2.950146198272705, |
|
"logits/chosen_prompt": -2.7803640365600586, |
|
"logits/rejected": -1.9521598815917969, |
|
"logits/rejected_prompt": -2.764260768890381, |
|
"logps/chosen": -2.0558481216430664, |
|
"logps/chosen_both": -2.0352180004119873, |
|
"logps/chosen_prompt": -0.8978110551834106, |
|
"logps/rejected": -7.663902282714844, |
|
"logps/rejected_both": -7.545947074890137, |
|
"logps/rejected_prompt": -1.037939429283142, |
|
"loss": 2.049, |
|
"nll_loss": 2.0345263481140137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8223392367362976, |
|
"rewards/margins": 2.2432212829589844, |
|
"rewards/rejected": -3.0655605792999268, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 0.1890875725333666, |
|
"learning_rate": 2.4685849002916183e-05, |
|
"log_odds_chosen": 6.257909297943115, |
|
"log_odds_ratio": -0.00222708098590374, |
|
"logits/chosen": -2.9233384132385254, |
|
"logits/chosen_prompt": -2.7774055004119873, |
|
"logits/rejected": -1.9378130435943604, |
|
"logits/rejected_prompt": -2.751840114593506, |
|
"logps/chosen": -1.9843826293945312, |
|
"logps/chosen_both": -1.9667317867279053, |
|
"logps/chosen_prompt": -0.6825822591781616, |
|
"logps/rejected": -8.092279434204102, |
|
"logps/rejected_both": -7.992387294769287, |
|
"logps/rejected_prompt": -0.9652584195137024, |
|
"loss": 1.9485, |
|
"nll_loss": 1.965959906578064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7937530279159546, |
|
"rewards/margins": 2.4431586265563965, |
|
"rewards/rejected": -3.2369117736816406, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.2373809038859539, |
|
"learning_rate": 2.447643950291608e-05, |
|
"log_odds_chosen": 6.489705562591553, |
|
"log_odds_ratio": -0.0016050601843744516, |
|
"logits/chosen": -2.9970052242279053, |
|
"logits/chosen_prompt": -2.755345106124878, |
|
"logits/rejected": -1.9105993509292603, |
|
"logits/rejected_prompt": -2.7229576110839844, |
|
"logps/chosen": -1.8970317840576172, |
|
"logps/chosen_both": -1.8811533451080322, |
|
"logps/chosen_prompt": -0.7929924726486206, |
|
"logps/rejected": -8.21942138671875, |
|
"logps/rejected_both": -8.108181953430176, |
|
"logps/rejected_prompt": -0.9921186566352844, |
|
"loss": 1.954, |
|
"nll_loss": 1.8801666498184204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7588127851486206, |
|
"rewards/margins": 2.5289556980133057, |
|
"rewards/rejected": -3.287768602371216, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.528, |
|
"grad_norm": 0.1741002343821723, |
|
"learning_rate": 2.4267066748194296e-05, |
|
"log_odds_chosen": 5.774570941925049, |
|
"log_odds_ratio": -0.07103729248046875, |
|
"logits/chosen": -2.886838436126709, |
|
"logits/chosen_prompt": -2.7209315299987793, |
|
"logits/rejected": -2.010939836502075, |
|
"logits/rejected_prompt": -2.7094690799713135, |
|
"logps/chosen": -2.068047523498535, |
|
"logps/chosen_both": -2.051417350769043, |
|
"logps/chosen_prompt": -0.7632136940956116, |
|
"logps/rejected": -7.714223384857178, |
|
"logps/rejected_both": -7.628198146820068, |
|
"logps/rejected_prompt": -0.9632788896560669, |
|
"loss": 2.0981, |
|
"nll_loss": 2.051051139831543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8272191286087036, |
|
"rewards/margins": 2.258470296859741, |
|
"rewards/rejected": -3.085689067840576, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 0.18057749289339498, |
|
"learning_rate": 2.4057745433251635e-05, |
|
"log_odds_chosen": 6.403738498687744, |
|
"log_odds_ratio": -0.0018427784088999033, |
|
"logits/chosen": -2.9575610160827637, |
|
"logits/chosen_prompt": -2.7303547859191895, |
|
"logits/rejected": -1.862630844116211, |
|
"logits/rejected_prompt": -2.71962833404541, |
|
"logps/chosen": -2.0046885013580322, |
|
"logps/chosen_both": -1.9884449243545532, |
|
"logps/chosen_prompt": -0.763080894947052, |
|
"logps/rejected": -8.254236221313477, |
|
"logps/rejected_both": -8.159029960632324, |
|
"logps/rejected_prompt": -1.045041799545288, |
|
"loss": 2.0516, |
|
"nll_loss": 1.9879404306411743, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8018752932548523, |
|
"rewards/margins": 2.49981951713562, |
|
"rewards/rejected": -3.301694869995117, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.544, |
|
"grad_norm": 0.20142735097076295, |
|
"learning_rate": 2.384849024897869e-05, |
|
"log_odds_chosen": 5.733250617980957, |
|
"log_odds_ratio": -0.004482199437916279, |
|
"logits/chosen": -2.9741549491882324, |
|
"logits/chosen_prompt": -2.7055163383483887, |
|
"logits/rejected": -2.124002456665039, |
|
"logits/rejected_prompt": -2.688239812850952, |
|
"logps/chosen": -1.9430478811264038, |
|
"logps/chosen_both": -1.926995038986206, |
|
"logps/chosen_prompt": -0.7834355235099792, |
|
"logps/rejected": -7.518483638763428, |
|
"logps/rejected_both": -7.4232635498046875, |
|
"logps/rejected_prompt": -1.0878071784973145, |
|
"loss": 2.1323, |
|
"nll_loss": 1.9260002374649048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7772191166877747, |
|
"rewards/margins": 2.2301743030548096, |
|
"rewards/rejected": -3.0073933601379395, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.552, |
|
"grad_norm": 40.90864961224279, |
|
"learning_rate": 2.3639315881624777e-05, |
|
"log_odds_chosen": 5.306234836578369, |
|
"log_odds_ratio": -0.00918310321867466, |
|
"logits/chosen": -2.9237542152404785, |
|
"logits/chosen_prompt": -2.7105278968811035, |
|
"logits/rejected": -2.2239270210266113, |
|
"logits/rejected_prompt": -2.686476469039917, |
|
"logps/chosen": -1.9409538507461548, |
|
"logps/chosen_both": -1.9275703430175781, |
|
"logps/chosen_prompt": -0.8563373684883118, |
|
"logps/rejected": -7.0894670486450195, |
|
"logps/rejected_both": -7.007052421569824, |
|
"logps/rejected_prompt": -0.9907125234603882, |
|
"loss": 1.9112, |
|
"nll_loss": 1.926429033279419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7763815522193909, |
|
"rewards/margins": 2.059405565261841, |
|
"rewards/rejected": -2.835787296295166, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.21885482692879285, |
|
"learning_rate": 2.3430237011767167e-05, |
|
"log_odds_chosen": 5.6596198081970215, |
|
"log_odds_ratio": -0.023314189165830612, |
|
"logits/chosen": -2.9358747005462646, |
|
"logits/chosen_prompt": -2.727999687194824, |
|
"logits/rejected": -2.0308213233947754, |
|
"logits/rejected_prompt": -2.686753749847412, |
|
"logps/chosen": -1.9377899169921875, |
|
"logps/chosen_both": -1.922545075416565, |
|
"logps/chosen_prompt": -0.8713130950927734, |
|
"logps/rejected": -7.442534446716309, |
|
"logps/rejected_both": -7.339343070983887, |
|
"logps/rejected_prompt": -1.057796835899353, |
|
"loss": 2.0015, |
|
"nll_loss": 1.9221293926239014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7751160264015198, |
|
"rewards/margins": 2.2018978595733643, |
|
"rewards/rejected": -2.9770140647888184, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.568, |
|
"grad_norm": 0.5403488938261225, |
|
"learning_rate": 2.3221268313280838e-05, |
|
"log_odds_chosen": 5.778319358825684, |
|
"log_odds_ratio": -0.07066681236028671, |
|
"logits/chosen": -2.954177141189575, |
|
"logits/chosen_prompt": -2.678536891937256, |
|
"logits/rejected": -1.9524621963500977, |
|
"logits/rejected_prompt": -2.6848878860473633, |
|
"logps/chosen": -1.9211227893829346, |
|
"logps/chosen_both": -1.902917504310608, |
|
"logps/chosen_prompt": -0.8868004083633423, |
|
"logps/rejected": -7.527622222900391, |
|
"logps/rejected_both": -7.4302239418029785, |
|
"logps/rejected_prompt": -1.1353758573532104, |
|
"loss": 2.0128, |
|
"nll_loss": 1.9021003246307373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.768449068069458, |
|
"rewards/margins": 2.2425997257232666, |
|
"rewards/rejected": -3.0110487937927246, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.576, |
|
"grad_norm": 6.334469044302015, |
|
"learning_rate": 2.301242445230851e-05, |
|
"log_odds_chosen": 4.549070358276367, |
|
"log_odds_ratio": -0.10954795777797699, |
|
"logits/chosen": -2.9302010536193848, |
|
"logits/chosen_prompt": -2.6880440711975098, |
|
"logits/rejected": -2.190250873565674, |
|
"logits/rejected_prompt": -2.6803054809570312, |
|
"logps/chosen": -2.2468152046203613, |
|
"logps/chosen_both": -2.227410316467285, |
|
"logps/chosen_prompt": -0.7418851852416992, |
|
"logps/rejected": -6.677786827087402, |
|
"logps/rejected_both": -6.601284980773926, |
|
"logps/rejected_prompt": -0.9388518333435059, |
|
"loss": 2.1059, |
|
"nll_loss": 2.226693630218506, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8987261056900024, |
|
"rewards/margins": 1.7723888158798218, |
|
"rewards/rejected": -2.671114683151245, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.584, |
|
"grad_norm": 0.21099709481066398, |
|
"learning_rate": 2.280372008623142e-05, |
|
"log_odds_chosen": 4.277853488922119, |
|
"log_odds_ratio": -0.18287745118141174, |
|
"logits/chosen": -2.989633321762085, |
|
"logits/chosen_prompt": -2.6874613761901855, |
|
"logits/rejected": -2.2610902786254883, |
|
"logits/rejected_prompt": -2.664952516555786, |
|
"logps/chosen": -1.912766695022583, |
|
"logps/chosen_both": -1.8961530923843384, |
|
"logps/chosen_prompt": -0.7984111905097961, |
|
"logps/rejected": -6.0515875816345215, |
|
"logps/rejected_both": -5.97214412689209, |
|
"logps/rejected_prompt": -1.0341233015060425, |
|
"loss": 2.0542, |
|
"nll_loss": 1.894964575767517, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7651066780090332, |
|
"rewards/margins": 1.6555284261703491, |
|
"rewards/rejected": -2.4206349849700928, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.592, |
|
"grad_norm": 0.23272826174313574, |
|
"learning_rate": 2.2595169862640568e-05, |
|
"log_odds_chosen": 6.768258094787598, |
|
"log_odds_ratio": -0.001374961924739182, |
|
"logits/chosen": -2.973562240600586, |
|
"logits/chosen_prompt": -2.686769962310791, |
|
"logits/rejected": -1.666338562965393, |
|
"logits/rejected_prompt": -2.683814764022827, |
|
"logps/chosen": -1.9322917461395264, |
|
"logps/chosen_both": -1.9166603088378906, |
|
"logps/chosen_prompt": -0.8024829626083374, |
|
"logps/rejected": -8.528871536254883, |
|
"logps/rejected_both": -8.413396835327148, |
|
"logps/rejected_prompt": -1.0380266904830933, |
|
"loss": 2.0648, |
|
"nll_loss": 1.9158977270126343, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7729167342185974, |
|
"rewards/margins": 2.63863205909729, |
|
"rewards/rejected": -3.4115490913391113, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.20086785213912312, |
|
"learning_rate": 2.238678841830867e-05, |
|
"log_odds_chosen": 6.327115058898926, |
|
"log_odds_ratio": -0.004725167062133551, |
|
"logits/chosen": -2.966679573059082, |
|
"logits/chosen_prompt": -2.6999356746673584, |
|
"logits/rejected": -1.8506364822387695, |
|
"logits/rejected_prompt": -2.6866955757141113, |
|
"logps/chosen": -1.8783817291259766, |
|
"logps/chosen_both": -1.8609817028045654, |
|
"logps/chosen_prompt": -0.7905829548835754, |
|
"logps/rejected": -8.029566764831543, |
|
"logps/rejected_both": -7.908673286437988, |
|
"logps/rejected_prompt": -1.0723146200180054, |
|
"loss": 1.9398, |
|
"nll_loss": 1.860142469406128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7513527274131775, |
|
"rewards/margins": 2.4604744911193848, |
|
"rewards/rejected": -3.211826801300049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.608, |
|
"grad_norm": 3.6061661350197456, |
|
"learning_rate": 2.217859037816296e-05, |
|
"log_odds_chosen": 4.772618770599365, |
|
"log_odds_ratio": -0.14787371456623077, |
|
"logits/chosen": -2.9939560890197754, |
|
"logits/chosen_prompt": -2.712306499481201, |
|
"logits/rejected": -2.120854139328003, |
|
"logits/rejected_prompt": -2.699389934539795, |
|
"logps/chosen": -2.0005993843078613, |
|
"logps/chosen_both": -1.9795938730239868, |
|
"logps/chosen_prompt": -0.7556421160697937, |
|
"logps/rejected": -6.654515743255615, |
|
"logps/rejected_both": -6.551595211029053, |
|
"logps/rejected_prompt": -0.9516459703445435, |
|
"loss": 1.9737, |
|
"nll_loss": 1.9790796041488647, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8002398610115051, |
|
"rewards/margins": 1.8615667819976807, |
|
"rewards/rejected": -2.661806344985962, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.616, |
|
"grad_norm": 0.3283349921571691, |
|
"learning_rate": 2.1970590354258745e-05, |
|
"log_odds_chosen": 6.253961086273193, |
|
"log_odds_ratio": -0.07067908346652985, |
|
"logits/chosen": -2.9472057819366455, |
|
"logits/chosen_prompt": -2.6802945137023926, |
|
"logits/rejected": -1.744763731956482, |
|
"logits/rejected_prompt": -2.6687159538269043, |
|
"logps/chosen": -1.955038070678711, |
|
"logps/chosen_both": -1.9407745599746704, |
|
"logps/chosen_prompt": -1.00258469581604, |
|
"logps/rejected": -8.05742073059082, |
|
"logps/rejected_both": -7.938286781311035, |
|
"logps/rejected_prompt": -1.1584670543670654, |
|
"loss": 2.0349, |
|
"nll_loss": 1.940118432044983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7820152044296265, |
|
"rewards/margins": 2.440953254699707, |
|
"rewards/rejected": -3.222968339920044, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.624, |
|
"grad_norm": 1.8747875530540283, |
|
"learning_rate": 2.176280294475383e-05, |
|
"log_odds_chosen": 6.281460762023926, |
|
"log_odds_ratio": -0.03783145174384117, |
|
"logits/chosen": -3.011366844177246, |
|
"logits/chosen_prompt": -2.6553094387054443, |
|
"logits/rejected": -1.8144845962524414, |
|
"logits/rejected_prompt": -2.649622678756714, |
|
"logps/chosen": -1.9069626331329346, |
|
"logps/chosen_both": -1.8888943195343018, |
|
"logps/chosen_prompt": -0.7433997988700867, |
|
"logps/rejected": -8.008193969726562, |
|
"logps/rejected_both": -7.898676872253418, |
|
"logps/rejected_prompt": -0.9908720254898071, |
|
"loss": 1.9971, |
|
"nll_loss": 1.8877136707305908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.762785017490387, |
|
"rewards/margins": 2.440492630004883, |
|
"rewards/rejected": -3.203277587890625, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.6320000000000001, |
|
"grad_norm": 2.9635896306517915, |
|
"learning_rate": 2.155524273288405e-05, |
|
"log_odds_chosen": 4.7696404457092285, |
|
"log_odds_ratio": -0.2104126662015915, |
|
"logits/chosen": -2.9527573585510254, |
|
"logits/chosen_prompt": -2.6921048164367676, |
|
"logits/rejected": -2.0738635063171387, |
|
"logits/rejected_prompt": -2.67110538482666, |
|
"logps/chosen": -1.996506690979004, |
|
"logps/chosen_both": -1.9748737812042236, |
|
"logps/chosen_prompt": -0.7325566411018372, |
|
"logps/rejected": -6.6651411056518555, |
|
"logps/rejected_both": -6.573362827301025, |
|
"logps/rejected_prompt": -0.9392368197441101, |
|
"loss": 1.9348, |
|
"nll_loss": 1.9730939865112305, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7986027002334595, |
|
"rewards/margins": 1.8674538135528564, |
|
"rewards/rejected": -2.6660561561584473, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 0.19964912068774665, |
|
"learning_rate": 2.1347924285939714e-05, |
|
"log_odds_chosen": 6.8775224685668945, |
|
"log_odds_ratio": -0.008257986977696419, |
|
"logits/chosen": -2.917914867401123, |
|
"logits/chosen_prompt": -2.6802151203155518, |
|
"logits/rejected": -1.6495475769042969, |
|
"logits/rejected_prompt": -2.661830186843872, |
|
"logps/chosen": -2.0301578044891357, |
|
"logps/chosen_both": -2.007798910140991, |
|
"logps/chosen_prompt": -0.8403179049491882, |
|
"logps/rejected": -8.763223648071289, |
|
"logps/rejected_both": -8.611532211303711, |
|
"logps/rejected_prompt": -1.09980046749115, |
|
"loss": 2.1549, |
|
"nll_loss": 2.006844997406006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8120630979537964, |
|
"rewards/margins": 2.6932263374328613, |
|
"rewards/rejected": -3.5052895545959473, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.6480000000000001, |
|
"grad_norm": 0.17460562158440138, |
|
"learning_rate": 2.114086215424322e-05, |
|
"log_odds_chosen": 6.110722064971924, |
|
"log_odds_ratio": -0.023483365774154663, |
|
"logits/chosen": -2.909790515899658, |
|
"logits/chosen_prompt": -2.6986935138702393, |
|
"logits/rejected": -1.758716344833374, |
|
"logits/rejected_prompt": -2.6658692359924316, |
|
"logps/chosen": -2.3056933879852295, |
|
"logps/chosen_both": -2.285371780395508, |
|
"logps/chosen_prompt": -0.833857536315918, |
|
"logps/rejected": -8.272215843200684, |
|
"logps/rejected_both": -8.151971817016602, |
|
"logps/rejected_prompt": -1.0774855613708496, |
|
"loss": 2.0557, |
|
"nll_loss": 2.284456968307495, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9222772717475891, |
|
"rewards/margins": 2.3866093158721924, |
|
"rewards/rejected": -3.308886766433716, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.6560000000000001, |
|
"grad_norm": 0.19823340696579927, |
|
"learning_rate": 2.0934070870127912e-05, |
|
"log_odds_chosen": 5.7514495849609375, |
|
"log_odds_ratio": -0.13919630646705627, |
|
"logits/chosen": -2.9313971996307373, |
|
"logits/chosen_prompt": -2.690089225769043, |
|
"logits/rejected": -1.7628095149993896, |
|
"logits/rejected_prompt": -2.6867289543151855, |
|
"logps/chosen": -2.0054726600646973, |
|
"logps/chosen_both": -1.9867470264434814, |
|
"logps/chosen_prompt": -0.730907678604126, |
|
"logps/rejected": -7.626162528991699, |
|
"logps/rejected_both": -7.516133785247803, |
|
"logps/rejected_prompt": -0.9458767771720886, |
|
"loss": 2.0384, |
|
"nll_loss": 1.9859052896499634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8021891713142395, |
|
"rewards/margins": 2.2482759952545166, |
|
"rewards/rejected": -3.0504648685455322, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.6640000000000001, |
|
"grad_norm": 0.19392027541652682, |
|
"learning_rate": 2.0727564946918087e-05, |
|
"log_odds_chosen": 7.237205505371094, |
|
"log_odds_ratio": -0.001250033383257687, |
|
"logits/chosen": -2.934305429458618, |
|
"logits/chosen_prompt": -2.7029290199279785, |
|
"logits/rejected": -1.5330889225006104, |
|
"logits/rejected_prompt": -2.6817727088928223, |
|
"logps/chosen": -2.0364651679992676, |
|
"logps/chosen_both": -2.015903949737549, |
|
"logps/chosen_prompt": -0.8590591549873352, |
|
"logps/rejected": -9.122060775756836, |
|
"logps/rejected_both": -8.987492561340332, |
|
"logps/rejected_prompt": -1.0628540515899658, |
|
"loss": 1.9994, |
|
"nll_loss": 2.0151782035827637, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8145861625671387, |
|
"rewards/margins": 2.834237575531006, |
|
"rewards/rejected": -3.6488234996795654, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.6720000000000002, |
|
"grad_norm": 0.2855392155807927, |
|
"learning_rate": 2.0521358877910444e-05, |
|
"log_odds_chosen": 6.342986583709717, |
|
"log_odds_ratio": -0.07219625264406204, |
|
"logits/chosen": -2.9752235412597656, |
|
"logits/chosen_prompt": -2.7005088329315186, |
|
"logits/rejected": -1.7442362308502197, |
|
"logits/rejected_prompt": -2.693645477294922, |
|
"logps/chosen": -1.990447759628296, |
|
"logps/chosen_both": -1.970177412033081, |
|
"logps/chosen_prompt": -0.7856583595275879, |
|
"logps/rejected": -8.199989318847656, |
|
"logps/rejected_both": -8.072303771972656, |
|
"logps/rejected_prompt": -0.9411813020706177, |
|
"loss": 2.021, |
|
"nll_loss": 1.9698638916015625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7961790561676025, |
|
"rewards/margins": 2.483816623687744, |
|
"rewards/rejected": -3.2799954414367676, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 0.36170871833517027, |
|
"learning_rate": 2.031546713535688e-05, |
|
"log_odds_chosen": 5.634890079498291, |
|
"log_odds_ratio": -0.1395900696516037, |
|
"logits/chosen": -2.93391752243042, |
|
"logits/chosen_prompt": -2.718055248260498, |
|
"logits/rejected": -1.7808215618133545, |
|
"logits/rejected_prompt": -2.6867878437042236, |
|
"logps/chosen": -2.3721437454223633, |
|
"logps/chosen_both": -2.3435354232788086, |
|
"logps/chosen_prompt": -0.7950377464294434, |
|
"logps/rejected": -7.916224479675293, |
|
"logps/rejected_both": -7.782776832580566, |
|
"logps/rejected_prompt": -0.9661157727241516, |
|
"loss": 2.1271, |
|
"nll_loss": 2.341766595840454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9488574862480164, |
|
"rewards/margins": 2.21763277053833, |
|
"rewards/rejected": -3.166490077972412, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.688, |
|
"grad_norm": 0.3052641697772741, |
|
"learning_rate": 2.01099041694488e-05, |
|
"log_odds_chosen": 5.173205375671387, |
|
"log_odds_ratio": -0.2093629539012909, |
|
"logits/chosen": -2.913505792617798, |
|
"logits/chosen_prompt": -2.695497512817383, |
|
"logits/rejected": -1.9728949069976807, |
|
"logits/rejected_prompt": -2.681952476501465, |
|
"logps/chosen": -1.9676679372787476, |
|
"logps/chosen_both": -1.9531806707382202, |
|
"logps/chosen_prompt": -0.8127241134643555, |
|
"logps/rejected": -7.031458377838135, |
|
"logps/rejected_both": -6.950935363769531, |
|
"logps/rejected_prompt": -0.9248498678207397, |
|
"loss": 2.0659, |
|
"nll_loss": 1.9526466131210327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.787067174911499, |
|
"rewards/margins": 2.0255160331726074, |
|
"rewards/rejected": -2.8125832080841064, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.696, |
|
"grad_norm": 0.1918548604852694, |
|
"learning_rate": 1.9904684407302883e-05, |
|
"log_odds_chosen": 7.995016574859619, |
|
"log_odds_ratio": -0.00040107182576321065, |
|
"logits/chosen": -3.0051703453063965, |
|
"logits/chosen_prompt": -2.7128148078918457, |
|
"logits/rejected": -1.3667514324188232, |
|
"logits/rejected_prompt": -2.695828676223755, |
|
"logps/chosen": -1.9211137294769287, |
|
"logps/chosen_both": -1.9036529064178467, |
|
"logps/chosen_prompt": -0.8414414525032043, |
|
"logps/rejected": -9.738038063049316, |
|
"logps/rejected_both": -9.58409309387207, |
|
"logps/rejected_prompt": -0.957872748374939, |
|
"loss": 1.9882, |
|
"nll_loss": 1.9027389287948608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7684455513954163, |
|
"rewards/margins": 3.126769781112671, |
|
"rewards/rejected": -3.8952155113220215, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.704, |
|
"grad_norm": 25.341642829209718, |
|
"learning_rate": 1.969982225194864e-05, |
|
"log_odds_chosen": 6.443746089935303, |
|
"log_odds_ratio": -0.13866354525089264, |
|
"logits/chosen": -2.8991589546203613, |
|
"logits/chosen_prompt": -2.704436779022217, |
|
"logits/rejected": -1.6840307712554932, |
|
"logits/rejected_prompt": -2.696018695831299, |
|
"logps/chosen": -1.971212387084961, |
|
"logps/chosen_both": -1.9563363790512085, |
|
"logps/chosen_prompt": -0.7664562463760376, |
|
"logps/rejected": -8.291219711303711, |
|
"logps/rejected_both": -8.195323944091797, |
|
"logps/rejected_prompt": -0.8870849609375, |
|
"loss": 2.0512, |
|
"nll_loss": 1.955370545387268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7884851098060608, |
|
"rewards/margins": 2.5280027389526367, |
|
"rewards/rejected": -3.3164875507354736, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.712, |
|
"grad_norm": 0.20382071740750204, |
|
"learning_rate": 1.9495332081317464e-05, |
|
"log_odds_chosen": 6.890301704406738, |
|
"log_odds_ratio": -0.009469824843108654, |
|
"logits/chosen": -2.8794448375701904, |
|
"logits/chosen_prompt": -2.694141387939453, |
|
"logits/rejected": -1.638772964477539, |
|
"logits/rejected_prompt": -2.6982343196868896, |
|
"logps/chosen": -2.006687641143799, |
|
"logps/chosen_both": -1.9925482273101807, |
|
"logps/chosen_prompt": -0.8075912594795227, |
|
"logps/rejected": -8.752016067504883, |
|
"logps/rejected_both": -8.659661293029785, |
|
"logps/rejected_prompt": -1.0454128980636597, |
|
"loss": 1.9488, |
|
"nll_loss": 1.9920895099639893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8026750683784485, |
|
"rewards/margins": 2.698131561279297, |
|
"rewards/rejected": -3.5008063316345215, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 3.232652124328266, |
|
"learning_rate": 1.9291228247233605e-05, |
|
"log_odds_chosen": 6.535033226013184, |
|
"log_odds_ratio": -0.0724484771490097, |
|
"logits/chosen": -2.8941891193389893, |
|
"logits/chosen_prompt": -2.70381498336792, |
|
"logits/rejected": -1.799768090248108, |
|
"logits/rejected_prompt": -2.6814205646514893, |
|
"logps/chosen": -1.9803783893585205, |
|
"logps/chosen_both": -1.9626888036727905, |
|
"logps/chosen_prompt": -0.8645817041397095, |
|
"logps/rejected": -8.38414192199707, |
|
"logps/rejected_both": -8.267631530761719, |
|
"logps/rejected_prompt": -0.9822869300842285, |
|
"loss": 1.9512, |
|
"nll_loss": 1.9625753164291382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7921513319015503, |
|
"rewards/margins": 2.5615053176879883, |
|
"rewards/rejected": -3.353656768798828, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.728, |
|
"grad_norm": 0.5121046736628673, |
|
"learning_rate": 1.908752507440689e-05, |
|
"log_odds_chosen": 6.229867458343506, |
|
"log_odds_ratio": -0.0752544105052948, |
|
"logits/chosen": -2.935990571975708, |
|
"logits/chosen_prompt": -2.68332576751709, |
|
"logits/rejected": -1.7542794942855835, |
|
"logits/rejected_prompt": -2.6715810298919678, |
|
"logps/chosen": -2.238250732421875, |
|
"logps/chosen_both": -2.217163562774658, |
|
"logps/chosen_prompt": -0.7275692820549011, |
|
"logps/rejected": -8.351387023925781, |
|
"logps/rejected_both": -8.241617202758789, |
|
"logps/rejected_prompt": -0.9444383382797241, |
|
"loss": 2.1639, |
|
"nll_loss": 2.2166025638580322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8953002691268921, |
|
"rewards/margins": 2.445254325866699, |
|
"rewards/rejected": -3.340554714202881, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.736, |
|
"grad_norm": 0.18380447787382737, |
|
"learning_rate": 1.888423685942732e-05, |
|
"log_odds_chosen": 7.403123378753662, |
|
"log_odds_ratio": -0.0035772870760411024, |
|
"logits/chosen": -2.9258389472961426, |
|
"logits/chosen_prompt": -2.7035067081451416, |
|
"logits/rejected": -1.6778090000152588, |
|
"logits/rejected_prompt": -2.682382106781006, |
|
"logps/chosen": -1.8578765392303467, |
|
"logps/chosen_both": -1.8427069187164307, |
|
"logps/chosen_prompt": -0.832676887512207, |
|
"logps/rejected": -9.08339786529541, |
|
"logps/rejected_both": -8.959403038024902, |
|
"logps/rejected_prompt": -1.1029479503631592, |
|
"loss": 1.9654, |
|
"nll_loss": 1.8422781229019165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7431506514549255, |
|
"rewards/margins": 2.8902084827423096, |
|
"rewards/rejected": -3.63335919380188, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.744, |
|
"grad_norm": 15.42646908452697, |
|
"learning_rate": 1.868137786976177e-05, |
|
"log_odds_chosen": 6.83737325668335, |
|
"log_odds_ratio": -0.09123753756284714, |
|
"logits/chosen": -2.9604616165161133, |
|
"logits/chosen_prompt": -2.6771702766418457, |
|
"logits/rejected": -1.7559928894042969, |
|
"logits/rejected_prompt": -2.6906254291534424, |
|
"logps/chosen": -1.9559208154678345, |
|
"logps/chosen_both": -1.9405914545059204, |
|
"logps/chosen_prompt": -0.7949713468551636, |
|
"logps/rejected": -8.641664505004883, |
|
"logps/rejected_both": -8.521966934204102, |
|
"logps/rejected_prompt": -0.9677802324295044, |
|
"loss": 2.0939, |
|
"nll_loss": 1.938951849937439, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7823683619499207, |
|
"rewards/margins": 2.6742970943450928, |
|
"rewards/rejected": -3.4566657543182373, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.752, |
|
"grad_norm": 12.062069037613009, |
|
"learning_rate": 1.8478962342752583e-05, |
|
"log_odds_chosen": 6.820882320404053, |
|
"log_odds_ratio": -0.07564956694841385, |
|
"logits/chosen": -2.904177665710449, |
|
"logits/chosen_prompt": -2.666506052017212, |
|
"logits/rejected": -1.7927961349487305, |
|
"logits/rejected_prompt": -2.67189884185791, |
|
"logps/chosen": -2.0425262451171875, |
|
"logps/chosen_both": -2.0270590782165527, |
|
"logps/chosen_prompt": -0.8014975786209106, |
|
"logps/rejected": -8.73670768737793, |
|
"logps/rejected_both": -8.633912086486816, |
|
"logps/rejected_prompt": -1.0191423892974854, |
|
"loss": 2.0463, |
|
"nll_loss": 2.0263657569885254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8170105218887329, |
|
"rewards/margins": 2.6776726245880127, |
|
"rewards/rejected": -3.4946835041046143, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.1920813557914467, |
|
"learning_rate": 1.827700448461836e-05, |
|
"log_odds_chosen": 7.279504299163818, |
|
"log_odds_ratio": -0.13858437538146973, |
|
"logits/chosen": -3.018719434738159, |
|
"logits/chosen_prompt": -2.687682628631592, |
|
"logits/rejected": -1.6826099157333374, |
|
"logits/rejected_prompt": -2.678703784942627, |
|
"logps/chosen": -1.860093355178833, |
|
"logps/chosen_both": -1.8447208404541016, |
|
"logps/chosen_prompt": -0.8991209268569946, |
|
"logps/rejected": -9.011571884155273, |
|
"logps/rejected_both": -8.870678901672363, |
|
"logps/rejected_prompt": -1.096939206123352, |
|
"loss": 2.012, |
|
"nll_loss": 1.84355890750885, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7440372705459595, |
|
"rewards/margins": 2.8605916500091553, |
|
"rewards/rejected": -3.6046290397644043, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.768, |
|
"grad_norm": 0.19530589950798477, |
|
"learning_rate": 1.807551846945694e-05, |
|
"log_odds_chosen": 8.2916898727417, |
|
"log_odds_ratio": -0.06947987526655197, |
|
"logits/chosen": -2.939237117767334, |
|
"logits/chosen_prompt": -2.6988303661346436, |
|
"logits/rejected": -1.6200687885284424, |
|
"logits/rejected_prompt": -2.68789005279541, |
|
"logps/chosen": -1.9331436157226562, |
|
"logps/chosen_both": -1.916733741760254, |
|
"logps/chosen_prompt": -0.7277871370315552, |
|
"logps/rejected": -10.084833145141602, |
|
"logps/rejected_both": -9.953168869018555, |
|
"logps/rejected_prompt": -1.032865285873413, |
|
"loss": 1.9735, |
|
"nll_loss": 1.916029691696167, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7732575535774231, |
|
"rewards/margins": 3.260676145553589, |
|
"rewards/rejected": -4.033933639526367, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.776, |
|
"grad_norm": 15.17903488212651, |
|
"learning_rate": 1.7874518438250597e-05, |
|
"log_odds_chosen": 9.437470436096191, |
|
"log_odds_ratio": -0.00649250065907836, |
|
"logits/chosen": -2.9586923122406006, |
|
"logits/chosen_prompt": -2.700380802154541, |
|
"logits/rejected": -1.6204473972320557, |
|
"logits/rejected_prompt": -2.668332576751709, |
|
"logps/chosen": -2.0388143062591553, |
|
"logps/chosen_both": -2.017122268676758, |
|
"logps/chosen_prompt": -0.7435789108276367, |
|
"logps/rejected": -11.334449768066406, |
|
"logps/rejected_both": -11.154394149780273, |
|
"logps/rejected_prompt": -0.9411799311637878, |
|
"loss": 2.1772, |
|
"nll_loss": 2.0165975093841553, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8155257105827332, |
|
"rewards/margins": 3.718254566192627, |
|
"rewards/rejected": -4.533780097961426, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.784, |
|
"grad_norm": 0.28398933589113434, |
|
"learning_rate": 1.767401849787357e-05, |
|
"log_odds_chosen": 6.384799957275391, |
|
"log_odds_ratio": -0.07637131214141846, |
|
"logits/chosen": -2.9650635719299316, |
|
"logits/chosen_prompt": -2.6936004161834717, |
|
"logits/rejected": -1.797628402709961, |
|
"logits/rejected_prompt": -2.690913438796997, |
|
"logps/chosen": -1.8709478378295898, |
|
"logps/chosen_both": -1.856300950050354, |
|
"logps/chosen_prompt": -0.8806565403938293, |
|
"logps/rejected": -8.102632522583008, |
|
"logps/rejected_both": -7.991517543792725, |
|
"logps/rejected_prompt": -1.0237706899642944, |
|
"loss": 2.1791, |
|
"nll_loss": 1.8553836345672607, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.748379111289978, |
|
"rewards/margins": 2.492673635482788, |
|
"rewards/rejected": -3.2410526275634766, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.792, |
|
"grad_norm": 7.344829678329039, |
|
"learning_rate": 1.747403272010199e-05, |
|
"log_odds_chosen": 4.503691673278809, |
|
"log_odds_ratio": -0.44330325722694397, |
|
"logits/chosen": -2.9304556846618652, |
|
"logits/chosen_prompt": -2.7112066745758057, |
|
"logits/rejected": -2.020601749420166, |
|
"logits/rejected_prompt": -2.6991848945617676, |
|
"logps/chosen": -2.2137069702148438, |
|
"logps/chosen_both": -2.192910671234131, |
|
"logps/chosen_prompt": -0.7757335305213928, |
|
"logps/rejected": -6.606595039367676, |
|
"logps/rejected_both": -6.522683143615723, |
|
"logps/rejected_prompt": -1.0225099325180054, |
|
"loss": 2.0432, |
|
"nll_loss": 2.1926403045654297, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.885482668876648, |
|
"rewards/margins": 1.7571556568145752, |
|
"rewards/rejected": -2.6426382064819336, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.2830736721750178, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"log_odds_chosen": 6.729086399078369, |
|
"log_odds_ratio": -0.002848730655387044, |
|
"logits/chosen": -2.9603111743927, |
|
"logits/chosen_prompt": -2.712522268295288, |
|
"logits/rejected": -1.471806287765503, |
|
"logits/rejected_prompt": -2.711698055267334, |
|
"logps/chosen": -1.9502222537994385, |
|
"logps/chosen_both": -1.932050347328186, |
|
"logps/chosen_prompt": -0.7791944146156311, |
|
"logps/rejected": -8.519399642944336, |
|
"logps/rejected_both": -8.40225887298584, |
|
"logps/rejected_prompt": -0.9917134046554565, |
|
"loss": 2.004, |
|
"nll_loss": 1.9314903020858765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7800888419151306, |
|
"rewards/margins": 2.627671241760254, |
|
"rewards/rejected": -3.4077601432800293, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.808, |
|
"grad_norm": 1.3701495350067383, |
|
"learning_rate": 1.7075659758066208e-05, |
|
"log_odds_chosen": 4.735475063323975, |
|
"log_odds_ratio": -0.14837773144245148, |
|
"logits/chosen": -2.9037442207336426, |
|
"logits/chosen_prompt": -2.6904830932617188, |
|
"logits/rejected": -1.9993311166763306, |
|
"logits/rejected_prompt": -2.672048807144165, |
|
"logps/chosen": -2.0128910541534424, |
|
"logps/chosen_both": -1.9941341876983643, |
|
"logps/chosen_prompt": -0.7718429565429688, |
|
"logps/rejected": -6.631512641906738, |
|
"logps/rejected_both": -6.536102294921875, |
|
"logps/rejected_prompt": -0.9579516649246216, |
|
"loss": 2.0311, |
|
"nll_loss": 1.9931504726409912, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.805156409740448, |
|
"rewards/margins": 1.8474489450454712, |
|
"rewards/rejected": -2.6526052951812744, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.8159999999999998, |
|
"grad_norm": 21.366222606488684, |
|
"learning_rate": 1.6877300532988094e-05, |
|
"log_odds_chosen": 7.610182762145996, |
|
"log_odds_ratio": -0.0006168467225506902, |
|
"logits/chosen": -2.9680118560791016, |
|
"logits/chosen_prompt": -2.664792776107788, |
|
"logits/rejected": -1.2061169147491455, |
|
"logits/rejected_prompt": -2.642937183380127, |
|
"logps/chosen": -2.109647512435913, |
|
"logps/chosen_both": -2.0934646129608154, |
|
"logps/chosen_prompt": -0.9366092681884766, |
|
"logps/rejected": -9.573705673217773, |
|
"logps/rejected_both": -9.448970794677734, |
|
"logps/rejected_prompt": -1.088648796081543, |
|
"loss": 2.0712, |
|
"nll_loss": 2.0929782390594482, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.843859076499939, |
|
"rewards/margins": 2.9856228828430176, |
|
"rewards/rejected": -3.829482316970825, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.8239999999999998, |
|
"grad_norm": 16.630798038144235, |
|
"learning_rate": 1.6679511386925337e-05, |
|
"log_odds_chosen": 7.555551052093506, |
|
"log_odds_ratio": -0.0009092552354559302, |
|
"logits/chosen": -2.9446640014648438, |
|
"logits/chosen_prompt": -2.703678607940674, |
|
"logits/rejected": -1.251961588859558, |
|
"logits/rejected_prompt": -2.686135768890381, |
|
"logps/chosen": -1.9308589696884155, |
|
"logps/chosen_both": -1.9166322946548462, |
|
"logps/chosen_prompt": -0.7264224290847778, |
|
"logps/rejected": -9.319347381591797, |
|
"logps/rejected_both": -9.213998794555664, |
|
"logps/rejected_prompt": -0.9491874575614929, |
|
"loss": 1.9893, |
|
"nll_loss": 1.915757179260254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7723435759544373, |
|
"rewards/margins": 2.955395221710205, |
|
"rewards/rejected": -3.727738857269287, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.8319999999999999, |
|
"grad_norm": 12.241247239412013, |
|
"learning_rate": 1.648230620140121e-05, |
|
"log_odds_chosen": 5.702427864074707, |
|
"log_odds_ratio": -0.08441531658172607, |
|
"logits/chosen": -2.9145102500915527, |
|
"logits/chosen_prompt": -2.7137434482574463, |
|
"logits/rejected": -1.6203930377960205, |
|
"logits/rejected_prompt": -2.7078521251678467, |
|
"logps/chosen": -2.2361724376678467, |
|
"logps/chosen_both": -2.2188587188720703, |
|
"logps/chosen_prompt": -0.8718380928039551, |
|
"logps/rejected": -7.825617790222168, |
|
"logps/rejected_both": -7.731575012207031, |
|
"logps/rejected_prompt": -0.9629694223403931, |
|
"loss": 2.0784, |
|
"nll_loss": 2.2178969383239746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8944689631462097, |
|
"rewards/margins": 2.2357778549194336, |
|
"rewards/rejected": -3.130246877670288, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.2611980916177983, |
|
"learning_rate": 1.6285698816954624e-05, |
|
"log_odds_chosen": 5.886144638061523, |
|
"log_odds_ratio": -0.14016158878803253, |
|
"logits/chosen": -2.961277484893799, |
|
"logits/chosen_prompt": -2.7103641033172607, |
|
"logits/rejected": -1.6664111614227295, |
|
"logits/rejected_prompt": -2.7068681716918945, |
|
"logps/chosen": -1.891758918762207, |
|
"logps/chosen_both": -1.877873182296753, |
|
"logps/chosen_prompt": -0.8406246304512024, |
|
"logps/rejected": -7.652543067932129, |
|
"logps/rejected_both": -7.56333065032959, |
|
"logps/rejected_prompt": -0.9318545460700989, |
|
"loss": 1.9727, |
|
"nll_loss": 1.877637267112732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7567036151885986, |
|
"rewards/margins": 2.3043136596679688, |
|
"rewards/rejected": -3.0610175132751465, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8479999999999999, |
|
"grad_norm": 0.17316872141044676, |
|
"learning_rate": 1.6089703032168733e-05, |
|
"log_odds_chosen": 6.335439205169678, |
|
"log_odds_ratio": -0.007680490612983704, |
|
"logits/chosen": -2.9618372917175293, |
|
"logits/chosen_prompt": -2.6908061504364014, |
|
"logits/rejected": -1.7726625204086304, |
|
"logits/rejected_prompt": -2.684845447540283, |
|
"logps/chosen": -2.021721839904785, |
|
"logps/chosen_both": -2.0070888996124268, |
|
"logps/chosen_prompt": -0.8626869916915894, |
|
"logps/rejected": -8.212113380432129, |
|
"logps/rejected_both": -8.121031761169434, |
|
"logps/rejected_prompt": -1.1338939666748047, |
|
"loss": 2.086, |
|
"nll_loss": 2.0067009925842285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.808688759803772, |
|
"rewards/margins": 2.4761569499969482, |
|
"rewards/rejected": -3.2848453521728516, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.8559999999999999, |
|
"grad_norm": 0.21046741293754637, |
|
"learning_rate": 1.5894332602702545e-05, |
|
"log_odds_chosen": 5.3062238693237305, |
|
"log_odds_ratio": -0.09238220006227493, |
|
"logits/chosen": -2.863762378692627, |
|
"logits/chosen_prompt": -2.698549747467041, |
|
"logits/rejected": -1.7465136051177979, |
|
"logits/rejected_prompt": -2.68521785736084, |
|
"logps/chosen": -2.2504518032073975, |
|
"logps/chosen_both": -2.2304165363311768, |
|
"logps/chosen_prompt": -0.8663703203201294, |
|
"logps/rejected": -7.458860874176025, |
|
"logps/rejected_both": -7.366589546203613, |
|
"logps/rejected_prompt": -1.0120290517807007, |
|
"loss": 2.0757, |
|
"nll_loss": 2.2292349338531494, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9001806974411011, |
|
"rewards/margins": 2.0833640098571777, |
|
"rewards/rejected": -2.9835448265075684, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.8639999999999999, |
|
"grad_norm": 0.2325223892090008, |
|
"learning_rate": 1.5699601240325474e-05, |
|
"log_odds_chosen": 5.675802230834961, |
|
"log_odds_ratio": -0.14025500416755676, |
|
"logits/chosen": -2.9541871547698975, |
|
"logits/chosen_prompt": -2.739253520965576, |
|
"logits/rejected": -1.7137792110443115, |
|
"logits/rejected_prompt": -2.7213757038116455, |
|
"logps/chosen": -2.011998176574707, |
|
"logps/chosen_both": -1.9911746978759766, |
|
"logps/chosen_prompt": -0.7685104012489319, |
|
"logps/rejected": -7.561570167541504, |
|
"logps/rejected_both": -7.440642356872559, |
|
"logps/rejected_prompt": -0.9734441041946411, |
|
"loss": 2.1005, |
|
"nll_loss": 1.9904701709747314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8047992587089539, |
|
"rewards/margins": 2.2198290824890137, |
|
"rewards/rejected": -3.024627923965454, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.8719999999999999, |
|
"grad_norm": 0.22047561828057208, |
|
"learning_rate": 1.5505522611954975e-05, |
|
"log_odds_chosen": 5.360434532165527, |
|
"log_odds_ratio": -0.015295952558517456, |
|
"logits/chosen": -2.899050235748291, |
|
"logits/chosen_prompt": -2.718276262283325, |
|
"logits/rejected": -2.08345365524292, |
|
"logits/rejected_prompt": -2.6998016834259033, |
|
"logps/chosen": -1.8844950199127197, |
|
"logps/chosen_both": -1.8703863620758057, |
|
"logps/chosen_prompt": -0.851974606513977, |
|
"logps/rejected": -7.060413360595703, |
|
"logps/rejected_both": -6.973315238952637, |
|
"logps/rejected_prompt": -1.0805187225341797, |
|
"loss": 2.1013, |
|
"nll_loss": 1.8690898418426514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7537980675697327, |
|
"rewards/margins": 2.0703673362731934, |
|
"rewards/rejected": -2.8241655826568604, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.1925590095899927, |
|
"learning_rate": 1.5312110338697426e-05, |
|
"log_odds_chosen": 4.792149066925049, |
|
"log_odds_ratio": -0.1287117898464203, |
|
"logits/chosen": -2.9038636684417725, |
|
"logits/chosen_prompt": -2.692437171936035, |
|
"logits/rejected": -1.9894816875457764, |
|
"logits/rejected_prompt": -2.6797823905944824, |
|
"logps/chosen": -1.9725837707519531, |
|
"logps/chosen_both": -1.9566154479980469, |
|
"logps/chosen_prompt": -0.7425985932350159, |
|
"logps/rejected": -6.638279914855957, |
|
"logps/rejected_both": -6.564992427825928, |
|
"logps/rejected_prompt": -0.9972286224365234, |
|
"loss": 1.9786, |
|
"nll_loss": 1.9555227756500244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7890334725379944, |
|
"rewards/margins": 1.8662786483764648, |
|
"rewards/rejected": -2.6553120613098145, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.888, |
|
"grad_norm": 0.2631097802741203, |
|
"learning_rate": 1.5119377994892094e-05, |
|
"log_odds_chosen": 7.000193119049072, |
|
"log_odds_ratio": -0.0028563719242811203, |
|
"logits/chosen": -3.0186381340026855, |
|
"logits/chosen_prompt": -2.723498821258545, |
|
"logits/rejected": -1.5227829217910767, |
|
"logits/rejected_prompt": -2.7204127311706543, |
|
"logps/chosen": -1.8698396682739258, |
|
"logps/chosen_both": -1.850454330444336, |
|
"logps/chosen_prompt": -0.7684019207954407, |
|
"logps/rejected": -8.695045471191406, |
|
"logps/rejected_both": -8.554825782775879, |
|
"logps/rejected_prompt": -1.0279042720794678, |
|
"loss": 2.0014, |
|
"nll_loss": 1.8499305248260498, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7479358315467834, |
|
"rewards/margins": 2.7300820350646973, |
|
"rewards/rejected": -3.478017807006836, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.896, |
|
"grad_norm": 0.20426857310467877, |
|
"learning_rate": 1.4927339107158437e-05, |
|
"log_odds_chosen": 8.02978515625, |
|
"log_odds_ratio": -0.0003904960467480123, |
|
"logits/chosen": -2.951490879058838, |
|
"logits/chosen_prompt": -2.708991289138794, |
|
"logits/rejected": -1.2117061614990234, |
|
"logits/rejected_prompt": -2.6999001502990723, |
|
"logps/chosen": -1.9645278453826904, |
|
"logps/chosen_both": -1.9457321166992188, |
|
"logps/chosen_prompt": -0.761443018913269, |
|
"logps/rejected": -9.840217590332031, |
|
"logps/rejected_both": -9.700372695922852, |
|
"logps/rejected_prompt": -0.9850748181343079, |
|
"loss": 1.9906, |
|
"nll_loss": 1.9449169635772705, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7858111262321472, |
|
"rewards/margins": 3.1502761840820312, |
|
"rewards/rejected": -3.936087131500244, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.904, |
|
"grad_norm": 8.58950626485984, |
|
"learning_rate": 1.4736007153446801e-05, |
|
"log_odds_chosen": 8.620465278625488, |
|
"log_odds_ratio": -0.00021180181647650898, |
|
"logits/chosen": -2.903035879135132, |
|
"logits/chosen_prompt": -2.735071897506714, |
|
"logits/rejected": -1.012452483177185, |
|
"logits/rejected_prompt": -2.7112841606140137, |
|
"logps/chosen": -2.025474786758423, |
|
"logps/chosen_both": -2.007967472076416, |
|
"logps/chosen_prompt": -0.8391423225402832, |
|
"logps/rejected": -10.502188682556152, |
|
"logps/rejected_both": -10.356060028076172, |
|
"logps/rejected_prompt": -0.9537385106086731, |
|
"loss": 2.3368, |
|
"nll_loss": 2.0072412490844727, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8101899027824402, |
|
"rewards/margins": 3.39068603515625, |
|
"rewards/rejected": -4.200875282287598, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.912, |
|
"grad_norm": 0.19583726689690906, |
|
"learning_rate": 1.4545395562092468e-05, |
|
"log_odds_chosen": 6.079274654388428, |
|
"log_odds_ratio": -0.4031279981136322, |
|
"logits/chosen": -2.844682455062866, |
|
"logits/chosen_prompt": -2.8039345741271973, |
|
"logits/rejected": -1.3123562335968018, |
|
"logits/rejected_prompt": -2.7909157276153564, |
|
"logps/chosen": -3.1939139366149902, |
|
"logps/chosen_both": -3.164135217666626, |
|
"logps/chosen_prompt": -0.8311759233474731, |
|
"logps/rejected": -9.186834335327148, |
|
"logps/rejected_both": -9.063508033752441, |
|
"logps/rejected_prompt": -1.087949275970459, |
|
"loss": 2.1303, |
|
"nll_loss": 3.163341999053955, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.277565598487854, |
|
"rewards/margins": 2.3971686363220215, |
|
"rewards/rejected": -3.674734592437744, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 4.277812021967688, |
|
"learning_rate": 1.4355517710873184e-05, |
|
"log_odds_chosen": 6.059283256530762, |
|
"log_odds_ratio": -0.09234263747930527, |
|
"logits/chosen": -3.0424129962921143, |
|
"logits/chosen_prompt": -2.83634614944458, |
|
"logits/rejected": -1.6069847345352173, |
|
"logits/rejected_prompt": -2.818171739578247, |
|
"logps/chosen": -1.861696481704712, |
|
"logps/chosen_both": -1.8472903966903687, |
|
"logps/chosen_prompt": -0.783744752407074, |
|
"logps/rejected": -7.765946865081787, |
|
"logps/rejected_both": -7.661751747131348, |
|
"logps/rejected_prompt": -1.0380439758300781, |
|
"loss": 2.0007, |
|
"nll_loss": 1.846143126487732, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7446784973144531, |
|
"rewards/margins": 2.3617005348205566, |
|
"rewards/rejected": -3.1063787937164307, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.928, |
|
"grad_norm": 0.1934446201158471, |
|
"learning_rate": 1.4166386926070322e-05, |
|
"log_odds_chosen": 7.342792510986328, |
|
"log_odds_ratio": -0.005115572828799486, |
|
"logits/chosen": -2.9572060108184814, |
|
"logits/chosen_prompt": -2.7633354663848877, |
|
"logits/rejected": -1.3063112497329712, |
|
"logits/rejected_prompt": -2.7578670978546143, |
|
"logps/chosen": -1.9242970943450928, |
|
"logps/chosen_both": -1.9093306064605713, |
|
"logps/chosen_prompt": -0.8123539686203003, |
|
"logps/rejected": -9.096908569335938, |
|
"logps/rejected_both": -8.975044250488281, |
|
"logps/rejected_prompt": -1.0593974590301514, |
|
"loss": 1.9902, |
|
"nll_loss": 1.9083023071289062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7697189450263977, |
|
"rewards/margins": 2.869044780731201, |
|
"rewards/rejected": -3.638763904571533, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.936, |
|
"grad_norm": 0.216837041093156, |
|
"learning_rate": 1.397801648153354e-05, |
|
"log_odds_chosen": 6.378230094909668, |
|
"log_odds_ratio": -0.07421709597110748, |
|
"logits/chosen": -3.0056633949279785, |
|
"logits/chosen_prompt": -2.768573045730591, |
|
"logits/rejected": -1.5620958805084229, |
|
"logits/rejected_prompt": -2.7487571239471436, |
|
"logps/chosen": -1.9807904958724976, |
|
"logps/chosen_both": -1.9623302221298218, |
|
"logps/chosen_prompt": -0.8482378125190735, |
|
"logps/rejected": -8.202530860900879, |
|
"logps/rejected_both": -8.077143669128418, |
|
"logps/rejected_prompt": -1.0352851152420044, |
|
"loss": 1.9778, |
|
"nll_loss": 1.9611247777938843, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7923161387443542, |
|
"rewards/margins": 2.488696575164795, |
|
"rewards/rejected": -3.2810122966766357, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.944, |
|
"grad_norm": 0.21162368892876318, |
|
"learning_rate": 1.3790419597749199e-05, |
|
"log_odds_chosen": 5.369621753692627, |
|
"log_odds_ratio": -0.20802097022533417, |
|
"logits/chosen": -2.925058126449585, |
|
"logits/chosen_prompt": -2.727915048599243, |
|
"logits/rejected": -1.7108662128448486, |
|
"logits/rejected_prompt": -2.729671001434326, |
|
"logps/chosen": -2.030609607696533, |
|
"logps/chosen_both": -2.013143301010132, |
|
"logps/chosen_prompt": -0.7951982021331787, |
|
"logps/rejected": -7.307798862457275, |
|
"logps/rejected_both": -7.217469692230225, |
|
"logps/rejected_prompt": -0.9677292108535767, |
|
"loss": 2.0275, |
|
"nll_loss": 2.0122172832489014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8122437596321106, |
|
"rewards/margins": 2.1108758449554443, |
|
"rewards/rejected": -2.92311954498291, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.952, |
|
"grad_norm": 0.1882777054319625, |
|
"learning_rate": 1.3603609440912507e-05, |
|
"log_odds_chosen": 7.206502437591553, |
|
"log_odds_ratio": -0.06993956863880157, |
|
"logits/chosen": -2.9723217487335205, |
|
"logits/chosen_prompt": -2.7605624198913574, |
|
"logits/rejected": -1.3072056770324707, |
|
"logits/rejected_prompt": -2.7452828884124756, |
|
"logps/chosen": -2.0292842388153076, |
|
"logps/chosen_both": -2.0142998695373535, |
|
"logps/chosen_prompt": -0.8006251454353333, |
|
"logps/rejected": -9.103940963745117, |
|
"logps/rejected_both": -8.999374389648438, |
|
"logps/rejected_prompt": -0.9301830530166626, |
|
"loss": 1.9849, |
|
"nll_loss": 2.0136048793792725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8117138147354126, |
|
"rewards/margins": 2.829862356185913, |
|
"rewards/rejected": -3.6415767669677734, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 6.854544334281628, |
|
"learning_rate": 1.3417599122003464e-05, |
|
"log_odds_chosen": 5.873773574829102, |
|
"log_odds_ratio": -0.09982452541589737, |
|
"logits/chosen": -2.8911209106445312, |
|
"logits/chosen_prompt": -2.751624584197998, |
|
"logits/rejected": -1.530667781829834, |
|
"logits/rejected_prompt": -2.731210947036743, |
|
"logps/chosen": -2.39859938621521, |
|
"logps/chosen_both": -2.3739638328552246, |
|
"logps/chosen_prompt": -0.818207859992981, |
|
"logps/rejected": -8.159021377563477, |
|
"logps/rejected_both": -8.031126976013184, |
|
"logps/rejected_prompt": -0.9556495547294617, |
|
"loss": 2.0669, |
|
"nll_loss": 2.3730950355529785, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9594398736953735, |
|
"rewards/margins": 2.304168701171875, |
|
"rewards/rejected": -3.263608455657959, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.968, |
|
"grad_norm": 0.20527915967987695, |
|
"learning_rate": 1.3232401695866687e-05, |
|
"log_odds_chosen": 6.752752780914307, |
|
"log_odds_ratio": -0.093865767121315, |
|
"logits/chosen": -3.0047717094421387, |
|
"logits/chosen_prompt": -2.7637996673583984, |
|
"logits/rejected": -1.454332947731018, |
|
"logits/rejected_prompt": -2.7430145740509033, |
|
"logps/chosen": -1.9008424282073975, |
|
"logps/chosen_both": -1.8828001022338867, |
|
"logps/chosen_prompt": -0.877086341381073, |
|
"logps/rejected": -8.502935409545898, |
|
"logps/rejected_both": -8.374523162841797, |
|
"logps/rejected_prompt": -1.0814439058303833, |
|
"loss": 2.0633, |
|
"nll_loss": 1.8823230266571045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7603369951248169, |
|
"rewards/margins": 2.6408374309539795, |
|
"rewards/rejected": -3.401175022125244, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.976, |
|
"grad_norm": 6.470506820654642, |
|
"learning_rate": 1.3048030160295196e-05, |
|
"log_odds_chosen": 6.849400520324707, |
|
"log_odds_ratio": -0.07237619161605835, |
|
"logits/chosen": -2.963409662246704, |
|
"logits/chosen_prompt": -2.758953094482422, |
|
"logits/rejected": -1.3645999431610107, |
|
"logits/rejected_prompt": -2.7408089637756348, |
|
"logps/chosen": -2.0132029056549072, |
|
"logps/chosen_both": -1.9941928386688232, |
|
"logps/chosen_prompt": -0.7748836874961853, |
|
"logps/rejected": -8.71554183959961, |
|
"logps/rejected_both": -8.596506118774414, |
|
"logps/rejected_prompt": -0.936238169670105, |
|
"loss": 1.9731, |
|
"nll_loss": 1.9934498071670532, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8052810430526733, |
|
"rewards/margins": 2.6809353828430176, |
|
"rewards/rejected": -3.4862167835235596, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.984, |
|
"grad_norm": 0.18875404411296617, |
|
"learning_rate": 1.2864497455118152e-05, |
|
"log_odds_chosen": 5.949180603027344, |
|
"log_odds_ratio": -0.20756885409355164, |
|
"logits/chosen": -2.90920352935791, |
|
"logits/chosen_prompt": -2.731333017349243, |
|
"logits/rejected": -1.5196672677993774, |
|
"logits/rejected_prompt": -2.7116055488586426, |
|
"logps/chosen": -2.0656113624572754, |
|
"logps/chosen_both": -2.0481104850769043, |
|
"logps/chosen_prompt": -0.7715897560119629, |
|
"logps/rejected": -7.913069725036621, |
|
"logps/rejected_both": -7.810868263244629, |
|
"logps/rejected_prompt": -1.0343679189682007, |
|
"loss": 2.0494, |
|
"nll_loss": 2.0465188026428223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8262445330619812, |
|
"rewards/margins": 2.3389835357666016, |
|
"rewards/rejected": -3.1652283668518066, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.992, |
|
"grad_norm": 0.19965333207670072, |
|
"learning_rate": 1.2681816461292715e-05, |
|
"log_odds_chosen": 6.9041619300842285, |
|
"log_odds_ratio": -0.07076757401227951, |
|
"logits/chosen": -2.9241271018981934, |
|
"logits/chosen_prompt": -2.7164487838745117, |
|
"logits/rejected": -1.2974779605865479, |
|
"logits/rejected_prompt": -2.7119083404541016, |
|
"logps/chosen": -2.164299249649048, |
|
"logps/chosen_both": -2.1463229656219482, |
|
"logps/chosen_prompt": -0.8179939389228821, |
|
"logps/rejected": -8.948786735534668, |
|
"logps/rejected_both": -8.831026077270508, |
|
"logps/rejected_prompt": -1.014527678489685, |
|
"loss": 2.069, |
|
"nll_loss": 2.1452174186706543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8657197952270508, |
|
"rewards/margins": 2.713794469833374, |
|
"rewards/rejected": -3.579514265060425, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.401684464890164, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"log_odds_chosen": 7.955414772033691, |
|
"log_odds_ratio": -0.004814439453184605, |
|
"logits/chosen": -2.977412700653076, |
|
"logits/chosen_prompt": -2.712825298309326, |
|
"logits/rejected": -1.1496913433074951, |
|
"logits/rejected_prompt": -2.6892926692962646, |
|
"logps/chosen": -1.8996845483779907, |
|
"logps/chosen_both": -1.8827598094940186, |
|
"logps/chosen_prompt": -0.8927472233772278, |
|
"logps/rejected": -9.687314987182617, |
|
"logps/rejected_both": -9.540821075439453, |
|
"logps/rejected_prompt": -1.00954270362854, |
|
"loss": 2.0789, |
|
"nll_loss": 1.882759690284729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7598739266395569, |
|
"rewards/margins": 3.1150519847869873, |
|
"rewards/rejected": -3.8749260902404785, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|