|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 17.795262033524956, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.8715742826461792, |
|
"logits/rejected": -0.8603207468986511, |
|
"logps/chosen": -159.96241760253906, |
|
"logps/rejected": -161.35955810546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 19.169242875132866, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.814410924911499, |
|
"logits/rejected": -0.9245802760124207, |
|
"logps/chosen": -385.7136535644531, |
|
"logps/rejected": -314.6463317871094, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.00037840951699763536, |
|
"rewards/margins": 0.00037865538615733385, |
|
"rewards/rejected": -2.458723997733614e-07, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 20.179782123714144, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.8099907636642456, |
|
"logits/rejected": -0.8517779111862183, |
|
"logps/chosen": -261.3640441894531, |
|
"logps/rejected": -229.53433227539062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00015402937424369156, |
|
"rewards/margins": 0.0002674986608326435, |
|
"rewards/rejected": -0.00042152791866101325, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 17.380603143891506, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.9012816548347473, |
|
"logits/rejected": -0.958112359046936, |
|
"logps/chosen": -270.1044006347656, |
|
"logps/rejected": -280.1490783691406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0001717156992526725, |
|
"rewards/margins": -0.00020027835853397846, |
|
"rewards/rejected": 2.856254650396295e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 17.690501066822726, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.9090603590011597, |
|
"logits/rejected": -0.9678953289985657, |
|
"logps/chosen": -285.2132263183594, |
|
"logps/rejected": -269.2960205078125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0008744841325096786, |
|
"rewards/margins": 4.5643791963811964e-05, |
|
"rewards/rejected": 0.0008288401877507567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 20.444591908979433, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.8581441640853882, |
|
"logits/rejected": -0.8862231373786926, |
|
"logps/chosen": -291.10552978515625, |
|
"logps/rejected": -254.93826293945312, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00029464391991496086, |
|
"rewards/margins": -5.370187500375323e-05, |
|
"rewards/rejected": -0.00024094199761748314, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 19.943653269622523, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.9401885867118835, |
|
"logits/rejected": -0.8925131559371948, |
|
"logps/chosen": -298.0589599609375, |
|
"logps/rejected": -280.24847412109375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00024081622541416436, |
|
"rewards/margins": 9.3202615971677e-05, |
|
"rewards/rejected": 0.00014761353668291122, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 17.639853930149055, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.7496763467788696, |
|
"logits/rejected": -0.7735220193862915, |
|
"logps/chosen": -301.75421142578125, |
|
"logps/rejected": -286.83038330078125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.000381526886485517, |
|
"rewards/margins": 0.001184981781989336, |
|
"rewards/rejected": -0.0015665087848901749, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 19.54840265620775, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.9379879236221313, |
|
"logits/rejected": -0.7719442844390869, |
|
"logps/chosen": -218.69320678710938, |
|
"logps/rejected": -270.59100341796875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0003624963865149766, |
|
"rewards/margins": 0.0022385469637811184, |
|
"rewards/rejected": -0.002601043786853552, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 19.506540618964472, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.9408594965934753, |
|
"logits/rejected": -0.9835623502731323, |
|
"logps/chosen": -353.5335388183594, |
|
"logps/rejected": -307.4195861816406, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.000557263265363872, |
|
"rewards/margins": -0.00037886807695031166, |
|
"rewards/rejected": -0.00017839523206930608, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 19.967663842239915, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.7954875826835632, |
|
"logits/rejected": -0.7265406847000122, |
|
"logps/chosen": -281.5005798339844, |
|
"logps/rejected": -298.16107177734375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0004761295276694, |
|
"rewards/margins": 0.0013876723824068904, |
|
"rewards/rejected": -0.0018638020846992731, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 16.433432383471715, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.8957487344741821, |
|
"logits/rejected": -0.9176713824272156, |
|
"logps/chosen": -249.4070281982422, |
|
"logps/rejected": -247.0310821533203, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.00018012721557170153, |
|
"rewards/margins": 0.0018838949035853148, |
|
"rewards/rejected": -0.002064022235572338, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 18.071192120461006, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.8508350253105164, |
|
"logits/rejected": -0.918735146522522, |
|
"logps/chosen": -310.43096923828125, |
|
"logps/rejected": -292.1014099121094, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0003216363547835499, |
|
"rewards/margins": 0.004491996020078659, |
|
"rewards/rejected": -0.004170359577983618, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 17.238946788912294, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.9169099926948547, |
|
"logits/rejected": -0.8832875490188599, |
|
"logps/chosen": -233.2968292236328, |
|
"logps/rejected": -325.32159423828125, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0014909366145730019, |
|
"rewards/margins": 0.005786728113889694, |
|
"rewards/rejected": -0.007277664728462696, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 18.778590512016653, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.7318301200866699, |
|
"logits/rejected": -0.7551624178886414, |
|
"logps/chosen": -309.093505859375, |
|
"logps/rejected": -301.21978759765625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.002536451444029808, |
|
"rewards/margins": 0.008022397756576538, |
|
"rewards/rejected": -0.010558849200606346, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 19.50806830601253, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.8769510984420776, |
|
"logits/rejected": -0.8700349926948547, |
|
"logps/chosen": -244.7518310546875, |
|
"logps/rejected": -240.1066131591797, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0022977327462285757, |
|
"rewards/margins": 0.008738956414163113, |
|
"rewards/rejected": -0.011036688461899757, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 19.994113924215736, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.7424201965332031, |
|
"logits/rejected": -0.794622540473938, |
|
"logps/chosen": -324.0123596191406, |
|
"logps/rejected": -252.9361572265625, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.007680603303015232, |
|
"rewards/margins": 0.010698428377509117, |
|
"rewards/rejected": -0.018379030749201775, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 17.688740673471237, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.7349440455436707, |
|
"logits/rejected": -0.7516080141067505, |
|
"logps/chosen": -355.27398681640625, |
|
"logps/rejected": -348.34478759765625, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.005323306657373905, |
|
"rewards/margins": 0.013897763565182686, |
|
"rewards/rejected": -0.019221071153879166, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 19.71232099109114, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.9080783724784851, |
|
"logits/rejected": -0.9029335975646973, |
|
"logps/chosen": -261.2442321777344, |
|
"logps/rejected": -253.060791015625, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.015925409272313118, |
|
"rewards/margins": 0.01347384788095951, |
|
"rewards/rejected": -0.02939925529062748, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 22.415101210716212, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.7793295979499817, |
|
"logits/rejected": -0.8237630724906921, |
|
"logps/chosen": -333.27923583984375, |
|
"logps/rejected": -276.1312561035156, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.007626083679497242, |
|
"rewards/margins": 0.00626841327175498, |
|
"rewards/rejected": -0.01389449555426836, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 18.36644658663811, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.8222543597221375, |
|
"logits/rejected": -0.7763038873672485, |
|
"logps/chosen": -334.2631530761719, |
|
"logps/rejected": -319.40789794921875, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.009842587634921074, |
|
"rewards/margins": 0.0359802320599556, |
|
"rewards/rejected": -0.04582281410694122, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 19.14100584170777, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.8009635210037231, |
|
"logits/rejected": -0.8082984685897827, |
|
"logps/chosen": -263.1017761230469, |
|
"logps/rejected": -286.7780456542969, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02310958318412304, |
|
"rewards/margins": 0.029321227222681046, |
|
"rewards/rejected": -0.05243081599473953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 18.099432903182958, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.7926379442214966, |
|
"logits/rejected": -0.8239801526069641, |
|
"logps/chosen": -336.43572998046875, |
|
"logps/rejected": -336.51702880859375, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02941535785794258, |
|
"rewards/margins": 0.027248481288552284, |
|
"rewards/rejected": -0.05666383355855942, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 21.5645581626947, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.7559607028961182, |
|
"logits/rejected": -0.6915712356567383, |
|
"logps/chosen": -257.9927062988281, |
|
"logps/rejected": -293.657958984375, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.03933503106236458, |
|
"rewards/margins": 0.020843397825956345, |
|
"rewards/rejected": -0.06017842888832092, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 22.07996822383328, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.8138487935066223, |
|
"logits/rejected": -0.8245016932487488, |
|
"logps/chosen": -318.49066162109375, |
|
"logps/rejected": -269.34393310546875, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.030368736013770103, |
|
"rewards/margins": 0.0394073985517025, |
|
"rewards/rejected": -0.06977613270282745, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 20.050752891084144, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.7305746674537659, |
|
"logits/rejected": -0.7579324245452881, |
|
"logps/chosen": -254.9734649658203, |
|
"logps/rejected": -248.8967742919922, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05234571546316147, |
|
"rewards/margins": 0.051434457302093506, |
|
"rewards/rejected": -0.10378017276525497, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 20.27921725168791, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.8236411809921265, |
|
"logits/rejected": -0.7567560076713562, |
|
"logps/chosen": -294.4985656738281, |
|
"logps/rejected": -302.1468505859375, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07084973156452179, |
|
"rewards/margins": 0.10025066137313843, |
|
"rewards/rejected": -0.1711004078388214, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 21.187035013002586, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.7243493795394897, |
|
"logits/rejected": -0.8352082371711731, |
|
"logps/chosen": -307.9176330566406, |
|
"logps/rejected": -253.34619140625, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08669537305831909, |
|
"rewards/margins": 0.024461787194013596, |
|
"rewards/rejected": -0.11115716397762299, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 21.941014543157813, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.7913055419921875, |
|
"logits/rejected": -0.767580509185791, |
|
"logps/chosen": -324.0011291503906, |
|
"logps/rejected": -317.0337829589844, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1112295612692833, |
|
"rewards/margins": 0.10771697759628296, |
|
"rewards/rejected": -0.21894654631614685, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 21.27338383622298, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.6899883151054382, |
|
"logits/rejected": -0.6340277194976807, |
|
"logps/chosen": -318.7247619628906, |
|
"logps/rejected": -298.4729919433594, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1419946849346161, |
|
"rewards/margins": 0.092744842171669, |
|
"rewards/rejected": -0.2347395420074463, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 22.689768100464402, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.7942506074905396, |
|
"logits/rejected": -0.8030725717544556, |
|
"logps/chosen": -305.0411071777344, |
|
"logps/rejected": -331.61187744140625, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2406449317932129, |
|
"rewards/margins": 0.11460767686367035, |
|
"rewards/rejected": -0.35525262355804443, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 23.00978265992737, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.6863288283348083, |
|
"logits/rejected": -0.7276574373245239, |
|
"logps/chosen": -312.44342041015625, |
|
"logps/rejected": -292.2610168457031, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23600120842456818, |
|
"rewards/margins": 0.14900444447994232, |
|
"rewards/rejected": -0.3850056529045105, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 23.999840275343665, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.7339412569999695, |
|
"logits/rejected": -0.7518739700317383, |
|
"logps/chosen": -313.2015380859375, |
|
"logps/rejected": -292.16790771484375, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22780270874500275, |
|
"rewards/margins": 0.14297997951507568, |
|
"rewards/rejected": -0.37078267335891724, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 20.7910805187295, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.6403811573982239, |
|
"logits/rejected": -0.5926216244697571, |
|
"logps/chosen": -308.43511962890625, |
|
"logps/rejected": -362.1648864746094, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.38278818130493164, |
|
"rewards/margins": 0.20073220133781433, |
|
"rewards/rejected": -0.5835203528404236, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 21.70459813427768, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.6347512602806091, |
|
"logits/rejected": -0.6552717089653015, |
|
"logps/chosen": -283.39971923828125, |
|
"logps/rejected": -328.188232421875, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2871987521648407, |
|
"rewards/margins": 0.24844729900360107, |
|
"rewards/rejected": -0.5356460213661194, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 26.43494876007901, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.7904044389724731, |
|
"logits/rejected": -0.7824649810791016, |
|
"logps/chosen": -365.49053955078125, |
|
"logps/rejected": -372.4767761230469, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4950784146785736, |
|
"rewards/margins": 0.16402028501033783, |
|
"rewards/rejected": -0.659098744392395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 25.332276261499395, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.6737385988235474, |
|
"logits/rejected": -0.7595090866088867, |
|
"logps/chosen": -322.38262939453125, |
|
"logps/rejected": -289.7132568359375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4882277846336365, |
|
"rewards/margins": 0.1080218106508255, |
|
"rewards/rejected": -0.5962495803833008, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 26.410332730228134, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.675466001033783, |
|
"logits/rejected": -0.6468461751937866, |
|
"logps/chosen": -313.9189453125, |
|
"logps/rejected": -358.2328186035156, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5075892210006714, |
|
"rewards/margins": 0.34930387139320374, |
|
"rewards/rejected": -0.8568930625915527, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 22.73260511246945, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.6696101427078247, |
|
"logits/rejected": -0.6224175691604614, |
|
"logps/chosen": -297.02288818359375, |
|
"logps/rejected": -331.91241455078125, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4413999915122986, |
|
"rewards/margins": 0.33556288480758667, |
|
"rewards/rejected": -0.7769628763198853, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 29.514769661274965, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.627165675163269, |
|
"logits/rejected": -0.6330434679985046, |
|
"logps/chosen": -349.56072998046875, |
|
"logps/rejected": -338.3889465332031, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6146546602249146, |
|
"rewards/margins": 0.2036333531141281, |
|
"rewards/rejected": -0.8182880282402039, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 24.938162121837912, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.668857216835022, |
|
"logits/rejected": -0.7078772783279419, |
|
"logps/chosen": -337.70526123046875, |
|
"logps/rejected": -354.5548400878906, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6592297554016113, |
|
"rewards/margins": 0.34175071120262146, |
|
"rewards/rejected": -1.0009806156158447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 28.728593622364386, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.7140421271324158, |
|
"logits/rejected": -0.6968683004379272, |
|
"logps/chosen": -339.4532775878906, |
|
"logps/rejected": -395.1648254394531, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6659790873527527, |
|
"rewards/margins": 0.3416122794151306, |
|
"rewards/rejected": -1.0075912475585938, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 30.638988495099387, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.6645852327346802, |
|
"logits/rejected": -0.666391134262085, |
|
"logps/chosen": -426.98675537109375, |
|
"logps/rejected": -428.32135009765625, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8368836641311646, |
|
"rewards/margins": 0.4071938991546631, |
|
"rewards/rejected": -1.244077444076538, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 31.537854952773728, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.7861429452896118, |
|
"logits/rejected": -0.7303667664527893, |
|
"logps/chosen": -336.2944641113281, |
|
"logps/rejected": -388.2402648925781, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7111964225769043, |
|
"rewards/margins": 0.5776985883712769, |
|
"rewards/rejected": -1.2888951301574707, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 30.162592491304828, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.6594127416610718, |
|
"logits/rejected": -0.6956216096878052, |
|
"logps/chosen": -371.6212158203125, |
|
"logps/rejected": -386.111328125, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7979390025138855, |
|
"rewards/margins": 0.2378624975681305, |
|
"rewards/rejected": -1.035801649093628, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 31.69329510628287, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.6090772151947021, |
|
"logits/rejected": -0.6116262674331665, |
|
"logps/chosen": -334.34307861328125, |
|
"logps/rejected": -349.0302429199219, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8086203336715698, |
|
"rewards/margins": 0.2887929677963257, |
|
"rewards/rejected": -1.0974133014678955, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 30.304116211050854, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.6026021242141724, |
|
"logits/rejected": -0.6114420890808105, |
|
"logps/chosen": -351.45770263671875, |
|
"logps/rejected": -392.4924621582031, |
|
"loss": 0.5592, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6711496114730835, |
|
"rewards/margins": 0.46558037400245667, |
|
"rewards/rejected": -1.1367299556732178, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 30.12585566872316, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -0.5692498683929443, |
|
"logits/rejected": -0.5843815207481384, |
|
"logps/chosen": -293.83648681640625, |
|
"logps/rejected": -339.39727783203125, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6546895503997803, |
|
"rewards/margins": 0.4268415868282318, |
|
"rewards/rejected": -1.0815311670303345, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 32.007901316477586, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -0.7471034526824951, |
|
"logits/rejected": -0.7076970338821411, |
|
"logps/chosen": -326.08172607421875, |
|
"logps/rejected": -419.4745178222656, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7582639455795288, |
|
"rewards/margins": 0.43702253699302673, |
|
"rewards/rejected": -1.195286512374878, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 34.085599220707635, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.5471186637878418, |
|
"logits/rejected": -0.5438566207885742, |
|
"logps/chosen": -302.33807373046875, |
|
"logps/rejected": -376.98809814453125, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6895132064819336, |
|
"rewards/margins": 0.7165959477424622, |
|
"rewards/rejected": -1.406109094619751, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 36.40869354914484, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.5377882719039917, |
|
"logits/rejected": -0.5630967617034912, |
|
"logps/chosen": -394.183837890625, |
|
"logps/rejected": -395.992919921875, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9748881459236145, |
|
"rewards/margins": 0.34379926323890686, |
|
"rewards/rejected": -1.3186874389648438, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 38.66198604293044, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -0.5370495915412903, |
|
"logits/rejected": -0.5186640620231628, |
|
"logps/chosen": -330.3926086425781, |
|
"logps/rejected": -413.3533630371094, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6817417144775391, |
|
"rewards/margins": 0.8517974615097046, |
|
"rewards/rejected": -1.5335391759872437, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 44.82571580433164, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.4602839946746826, |
|
"logits/rejected": -0.4909666180610657, |
|
"logps/chosen": -386.23114013671875, |
|
"logps/rejected": -434.2461853027344, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9170669317245483, |
|
"rewards/margins": 0.5261942744255066, |
|
"rewards/rejected": -1.4432612657546997, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 36.97544534321359, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -0.5554064512252808, |
|
"logits/rejected": -0.521535336971283, |
|
"logps/chosen": -303.0024108886719, |
|
"logps/rejected": -400.53570556640625, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7325838804244995, |
|
"rewards/margins": 0.615298867225647, |
|
"rewards/rejected": -1.3478827476501465, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 27.205651803603914, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.47285860776901245, |
|
"logits/rejected": -0.4613783359527588, |
|
"logps/chosen": -339.0109558105469, |
|
"logps/rejected": -386.68646240234375, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6566378474235535, |
|
"rewards/margins": 0.5466252565383911, |
|
"rewards/rejected": -1.2032630443572998, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 26.575355603242237, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.4012818932533264, |
|
"logits/rejected": -0.4090479016304016, |
|
"logps/chosen": -409.76904296875, |
|
"logps/rejected": -460.48443603515625, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8697434663772583, |
|
"rewards/margins": 0.5134061574935913, |
|
"rewards/rejected": -1.38314950466156, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 34.33879094064971, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.46804818511009216, |
|
"logits/rejected": -0.5018707513809204, |
|
"logps/chosen": -303.33465576171875, |
|
"logps/rejected": -297.3377380371094, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8622892498970032, |
|
"rewards/margins": 0.18945768475532532, |
|
"rewards/rejected": -1.0517469644546509, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 46.949418213734766, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.32136762142181396, |
|
"logits/rejected": -0.3220544457435608, |
|
"logps/chosen": -413.630126953125, |
|
"logps/rejected": -485.76239013671875, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0072054862976074, |
|
"rewards/margins": 0.48566046357154846, |
|
"rewards/rejected": -1.4928659200668335, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 49.422956000364415, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -0.37429919838905334, |
|
"logits/rejected": -0.366951048374176, |
|
"logps/chosen": -355.41912841796875, |
|
"logps/rejected": -382.027099609375, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7793852090835571, |
|
"rewards/margins": 0.4203456938266754, |
|
"rewards/rejected": -1.1997307538986206, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 40.30477537639971, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.3029858469963074, |
|
"logits/rejected": -0.312110960483551, |
|
"logps/chosen": -326.59417724609375, |
|
"logps/rejected": -364.6795349121094, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8730165362358093, |
|
"rewards/margins": 0.38284337520599365, |
|
"rewards/rejected": -1.2558599710464478, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 36.62993811728441, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.3053416609764099, |
|
"logits/rejected": -0.33385586738586426, |
|
"logps/chosen": -376.3214416503906, |
|
"logps/rejected": -395.13446044921875, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7246559858322144, |
|
"rewards/margins": 0.5450745820999146, |
|
"rewards/rejected": -1.269730567932129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 33.86992000319081, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.3689904510974884, |
|
"logits/rejected": -0.37907394766807556, |
|
"logps/chosen": -377.38897705078125, |
|
"logps/rejected": -401.9908142089844, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7758709788322449, |
|
"rewards/margins": 0.5479923486709595, |
|
"rewards/rejected": -1.3238633871078491, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 38.28181058019974, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.2678986191749573, |
|
"logits/rejected": -0.2772577404975891, |
|
"logps/chosen": -461.5928649902344, |
|
"logps/rejected": -409.40496826171875, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9694966077804565, |
|
"rewards/margins": 0.4102768003940582, |
|
"rewards/rejected": -1.379773497581482, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 37.19294834268039, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -0.3819618821144104, |
|
"logits/rejected": -0.39741310477256775, |
|
"logps/chosen": -337.7065734863281, |
|
"logps/rejected": -384.42047119140625, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9619113802909851, |
|
"rewards/margins": 0.4606667459011078, |
|
"rewards/rejected": -1.4225783348083496, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 35.87976087956379, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.3673608899116516, |
|
"logits/rejected": -0.33217328786849976, |
|
"logps/chosen": -343.62176513671875, |
|
"logps/rejected": -393.496826171875, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8986355662345886, |
|
"rewards/margins": 0.4927578568458557, |
|
"rewards/rejected": -1.3913934230804443, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 50.48137982894001, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.31850799918174744, |
|
"logits/rejected": -0.3128277361392975, |
|
"logps/chosen": -417.2852478027344, |
|
"logps/rejected": -434.23553466796875, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.183450698852539, |
|
"rewards/margins": 0.39802995324134827, |
|
"rewards/rejected": -1.581480622291565, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 40.23605131167377, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -0.4198373854160309, |
|
"logits/rejected": -0.4242327809333801, |
|
"logps/chosen": -347.42401123046875, |
|
"logps/rejected": -407.2917785644531, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9353069067001343, |
|
"rewards/margins": 0.6847809553146362, |
|
"rewards/rejected": -1.62008798122406, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 47.98235563571966, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.3835527300834656, |
|
"logits/rejected": -0.3816671073436737, |
|
"logps/chosen": -432.5252990722656, |
|
"logps/rejected": -451.444580078125, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.916308581829071, |
|
"rewards/margins": 0.42745262384414673, |
|
"rewards/rejected": -1.3437612056732178, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 31.426247666521856, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.3885757327079773, |
|
"logits/rejected": -0.4052307605743408, |
|
"logps/chosen": -353.3683166503906, |
|
"logps/rejected": -387.3346252441406, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7533296346664429, |
|
"rewards/margins": 0.5021446347236633, |
|
"rewards/rejected": -1.2554742097854614, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 43.71353225840619, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.347800612449646, |
|
"logits/rejected": -0.3473549485206604, |
|
"logps/chosen": -352.99664306640625, |
|
"logps/rejected": -402.1784973144531, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9006279706954956, |
|
"rewards/margins": 0.4584806561470032, |
|
"rewards/rejected": -1.3591086864471436, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 41.8541704073788, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -0.4211471974849701, |
|
"logits/rejected": -0.40916356444358826, |
|
"logps/chosen": -313.6016845703125, |
|
"logps/rejected": -363.8100891113281, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8448683023452759, |
|
"rewards/margins": 0.7651897668838501, |
|
"rewards/rejected": -1.610058069229126, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 46.81829755540666, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -0.32413139939308167, |
|
"logits/rejected": -0.3692261874675751, |
|
"logps/chosen": -394.79010009765625, |
|
"logps/rejected": -425.03173828125, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7528802752494812, |
|
"rewards/margins": 0.8449785113334656, |
|
"rewards/rejected": -1.5978589057922363, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 36.45844848656005, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.4341300427913666, |
|
"logits/rejected": -0.47233128547668457, |
|
"logps/chosen": -374.00927734375, |
|
"logps/rejected": -434.5907287597656, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9868295788764954, |
|
"rewards/margins": 0.6939688324928284, |
|
"rewards/rejected": -1.6807985305786133, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 53.74249656615485, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -0.5327308773994446, |
|
"logits/rejected": -0.5439242124557495, |
|
"logps/chosen": -327.3343505859375, |
|
"logps/rejected": -395.0537414550781, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8013119697570801, |
|
"rewards/margins": 0.6934621930122375, |
|
"rewards/rejected": -1.4947742223739624, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 45.99938018946246, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.3698914647102356, |
|
"logits/rejected": -0.37493056058883667, |
|
"logps/chosen": -399.5855712890625, |
|
"logps/rejected": -412.091796875, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0506041049957275, |
|
"rewards/margins": 0.43465113639831543, |
|
"rewards/rejected": -1.4852551221847534, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 46.495135111366004, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -0.33391234278678894, |
|
"logits/rejected": -0.36340492963790894, |
|
"logps/chosen": -383.1959228515625, |
|
"logps/rejected": -454.8154296875, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0103901624679565, |
|
"rewards/margins": 0.7534034848213196, |
|
"rewards/rejected": -1.7637935876846313, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 37.671176684888835, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.33363935351371765, |
|
"logits/rejected": -0.34147655963897705, |
|
"logps/chosen": -458.62799072265625, |
|
"logps/rejected": -521.7030639648438, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2031135559082031, |
|
"rewards/margins": 0.6097708344459534, |
|
"rewards/rejected": -1.8128843307495117, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 42.68878919588678, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -0.37516340613365173, |
|
"logits/rejected": -0.38123488426208496, |
|
"logps/chosen": -420.92034912109375, |
|
"logps/rejected": -519.4351806640625, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9645318984985352, |
|
"rewards/margins": 1.0626075267791748, |
|
"rewards/rejected": -2.02713942527771, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 46.50714017004812, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -0.3206332325935364, |
|
"logits/rejected": -0.3464817404747009, |
|
"logps/chosen": -368.8122863769531, |
|
"logps/rejected": -456.5826110839844, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9912222623825073, |
|
"rewards/margins": 0.718090832233429, |
|
"rewards/rejected": -1.709313154220581, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 48.24903919861122, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -0.34129756689071655, |
|
"logits/rejected": -0.3763147294521332, |
|
"logps/chosen": -345.6600341796875, |
|
"logps/rejected": -410.33148193359375, |
|
"loss": 0.5269, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.100229263305664, |
|
"rewards/margins": 0.5510459542274475, |
|
"rewards/rejected": -1.6512752771377563, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 45.393039501063576, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -0.3056468367576599, |
|
"logits/rejected": -0.27101820707321167, |
|
"logps/chosen": -405.1756286621094, |
|
"logps/rejected": -517.0303955078125, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1387064456939697, |
|
"rewards/margins": 1.1627776622772217, |
|
"rewards/rejected": -2.3014841079711914, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 72.69822778659203, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -0.3432585895061493, |
|
"logits/rejected": -0.3551956117153168, |
|
"logps/chosen": -399.278076171875, |
|
"logps/rejected": -417.18756103515625, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2946819067001343, |
|
"rewards/margins": 0.43043017387390137, |
|
"rewards/rejected": -1.7251121997833252, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 49.59400561849621, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.33510535955429077, |
|
"logits/rejected": -0.33167213201522827, |
|
"logps/chosen": -420.84600830078125, |
|
"logps/rejected": -448.96441650390625, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2530834674835205, |
|
"rewards/margins": 0.4601351320743561, |
|
"rewards/rejected": -1.7132186889648438, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 52.913578732595134, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -0.15027588605880737, |
|
"logits/rejected": -0.13931182026863098, |
|
"logps/chosen": -377.5069885253906, |
|
"logps/rejected": -436.057861328125, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0396711826324463, |
|
"rewards/margins": 0.7874752283096313, |
|
"rewards/rejected": -1.827146291732788, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 46.98941865398599, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -0.09798092395067215, |
|
"logits/rejected": -0.07184808701276779, |
|
"logps/chosen": -354.3853454589844, |
|
"logps/rejected": -389.4168395996094, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9079411625862122, |
|
"rewards/margins": 0.5621434450149536, |
|
"rewards/rejected": -1.470084547996521, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 39.713188316559794, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": 0.04212416708469391, |
|
"logits/rejected": 0.026333481073379517, |
|
"logps/chosen": -386.2543640136719, |
|
"logps/rejected": -472.2422790527344, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7561712861061096, |
|
"rewards/margins": 1.0946016311645508, |
|
"rewards/rejected": -1.8507730960845947, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 41.98304251178839, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": 0.07442867755889893, |
|
"logits/rejected": 0.061352748423814774, |
|
"logps/chosen": -335.7804870605469, |
|
"logps/rejected": -402.56683349609375, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8378859758377075, |
|
"rewards/margins": 0.45741820335388184, |
|
"rewards/rejected": -1.2953040599822998, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 44.81856360836949, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": 0.06865431368350983, |
|
"logits/rejected": 0.05884439870715141, |
|
"logps/chosen": -373.0031433105469, |
|
"logps/rejected": -437.69830322265625, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8334062695503235, |
|
"rewards/margins": 0.833687424659729, |
|
"rewards/rejected": -1.6670936346054077, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 46.510744350000365, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -0.007160467095673084, |
|
"logits/rejected": -0.018424618989229202, |
|
"logps/chosen": -370.64276123046875, |
|
"logps/rejected": -411.3119201660156, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9860130548477173, |
|
"rewards/margins": 0.5094862580299377, |
|
"rewards/rejected": -1.4954993724822998, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 41.88055991722605, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": 0.07054750621318817, |
|
"logits/rejected": 0.04635707288980484, |
|
"logps/chosen": -364.1808166503906, |
|
"logps/rejected": -433.61083984375, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9996932148933411, |
|
"rewards/margins": 0.6235132217407227, |
|
"rewards/rejected": -1.6232064962387085, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 55.42997601239575, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": 0.024583891034126282, |
|
"logits/rejected": -0.019349288195371628, |
|
"logps/chosen": -369.00787353515625, |
|
"logps/rejected": -418.3794860839844, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.060436725616455, |
|
"rewards/margins": 0.3801059126853943, |
|
"rewards/rejected": -1.440542459487915, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 38.171737131023725, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -0.07863293588161469, |
|
"logits/rejected": -0.07448319345712662, |
|
"logps/chosen": -313.26434326171875, |
|
"logps/rejected": -413.4766540527344, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7598863840103149, |
|
"rewards/margins": 0.9288042783737183, |
|
"rewards/rejected": -1.6886907815933228, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 35.40962863232318, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": 0.05154382437467575, |
|
"logits/rejected": 0.09211069345474243, |
|
"logps/chosen": -438.11834716796875, |
|
"logps/rejected": -475.9991760253906, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2968177795410156, |
|
"rewards/margins": 0.310837984085083, |
|
"rewards/rejected": -1.6076555252075195, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 44.52858778321019, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": 0.02580409124493599, |
|
"logits/rejected": 0.014431046321988106, |
|
"logps/chosen": -409.44293212890625, |
|
"logps/rejected": -475.50909423828125, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0939719676971436, |
|
"rewards/margins": 0.6862825155258179, |
|
"rewards/rejected": -1.7802543640136719, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 58.01427104657535, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": 0.020050814375281334, |
|
"logits/rejected": 0.0072703612968325615, |
|
"logps/chosen": -396.80255126953125, |
|
"logps/rejected": -487.52056884765625, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0798569917678833, |
|
"rewards/margins": 1.0289558172225952, |
|
"rewards/rejected": -2.1088128089904785, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 58.62131948625022, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": 0.09693816304206848, |
|
"logits/rejected": 0.11189769208431244, |
|
"logps/chosen": -440.23779296875, |
|
"logps/rejected": -418.70294189453125, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.374324083328247, |
|
"rewards/margins": 0.44979920983314514, |
|
"rewards/rejected": -1.8241230249404907, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 50.107935284133745, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": 0.1475858986377716, |
|
"logits/rejected": 0.1856994330883026, |
|
"logps/chosen": -456.3863830566406, |
|
"logps/rejected": -446.6289978027344, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1958545446395874, |
|
"rewards/margins": 0.5085662603378296, |
|
"rewards/rejected": -1.704420804977417, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 57.15407754861982, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": 0.0016381435561925173, |
|
"logits/rejected": -0.0012822365388274193, |
|
"logps/chosen": -465.39556884765625, |
|
"logps/rejected": -497.4779357910156, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2583961486816406, |
|
"rewards/margins": 0.595262885093689, |
|
"rewards/rejected": -1.8536590337753296, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 46.50066693151769, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": 0.0598614327609539, |
|
"logits/rejected": 0.07429414987564087, |
|
"logps/chosen": -419.4317932128906, |
|
"logps/rejected": -445.7073669433594, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2420616149902344, |
|
"rewards/margins": 0.34711843729019165, |
|
"rewards/rejected": -1.5891802310943604, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 52.54444410566085, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": 0.0846981406211853, |
|
"logits/rejected": 0.1653452217578888, |
|
"logps/chosen": -422.04840087890625, |
|
"logps/rejected": -423.120849609375, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3373234272003174, |
|
"rewards/margins": 0.4356490671634674, |
|
"rewards/rejected": -1.7729724645614624, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 60.25325154823285, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": 0.07058246433734894, |
|
"logits/rejected": 0.050206851214170456, |
|
"logps/chosen": -396.4923400878906, |
|
"logps/rejected": -487.3773498535156, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2637110948562622, |
|
"rewards/margins": 0.8276708722114563, |
|
"rewards/rejected": -2.091381788253784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 59.84736645737947, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": 0.297431081533432, |
|
"logits/rejected": 0.2413591593503952, |
|
"logps/chosen": -433.361328125, |
|
"logps/rejected": -550.1572875976562, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5706839561462402, |
|
"rewards/margins": 0.6886904835700989, |
|
"rewards/rejected": -2.2593743801116943, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 53.900916939077874, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": 0.0987381860613823, |
|
"logits/rejected": 0.053351711481809616, |
|
"logps/chosen": -424.6581115722656, |
|
"logps/rejected": -550.7093505859375, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.411415696144104, |
|
"rewards/margins": 0.8357686996459961, |
|
"rewards/rejected": -2.2471842765808105, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 51.661128978707815, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": 0.21172530949115753, |
|
"logits/rejected": 0.2920139729976654, |
|
"logps/chosen": -427.90069580078125, |
|
"logps/rejected": -449.43743896484375, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2006590366363525, |
|
"rewards/margins": 0.8521521687507629, |
|
"rewards/rejected": -2.0528111457824707, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 57.342935834506974, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": 0.12620897591114044, |
|
"logits/rejected": 0.16032932698726654, |
|
"logps/chosen": -405.9414367675781, |
|
"logps/rejected": -472.34100341796875, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1865851879119873, |
|
"rewards/margins": 0.7863299250602722, |
|
"rewards/rejected": -1.9729150533676147, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 48.99156051497743, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -0.0001941099762916565, |
|
"logits/rejected": 0.11766161769628525, |
|
"logps/chosen": -403.42352294921875, |
|
"logps/rejected": -414.8743591308594, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.31856369972229, |
|
"rewards/margins": 0.5916188955307007, |
|
"rewards/rejected": -1.9101825952529907, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 53.53404049780426, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": 0.16110765933990479, |
|
"logits/rejected": 0.1884194314479828, |
|
"logps/chosen": -425.4978942871094, |
|
"logps/rejected": -475.82598876953125, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2426551580429077, |
|
"rewards/margins": 0.8187869787216187, |
|
"rewards/rejected": -2.0614418983459473, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 44.92640678176699, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": 0.25712910294532776, |
|
"logits/rejected": 0.22067277133464813, |
|
"logps/chosen": -388.7443542480469, |
|
"logps/rejected": -499.996826171875, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.110144019126892, |
|
"rewards/margins": 0.9244272112846375, |
|
"rewards/rejected": -2.034571409225464, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 47.537953837190535, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": 0.32076993584632874, |
|
"logits/rejected": 0.26425430178642273, |
|
"logps/chosen": -439.27978515625, |
|
"logps/rejected": -618.1819458007812, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.488040804862976, |
|
"rewards/margins": 1.2890340089797974, |
|
"rewards/rejected": -2.7770748138427734, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 83.71701457860443, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": 0.1991526186466217, |
|
"logits/rejected": 0.15013298392295837, |
|
"logps/chosen": -431.1382751464844, |
|
"logps/rejected": -562.5550537109375, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4723626375198364, |
|
"rewards/margins": 0.9577463865280151, |
|
"rewards/rejected": -2.4301087856292725, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 51.96278616895983, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": 0.23320236802101135, |
|
"logits/rejected": 0.25320833921432495, |
|
"logps/chosen": -462.2037658691406, |
|
"logps/rejected": -513.6661376953125, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6028022766113281, |
|
"rewards/margins": 0.7869012355804443, |
|
"rewards/rejected": -2.3897035121917725, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 58.47105770666699, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": 0.11673329025506973, |
|
"logits/rejected": 0.11646854877471924, |
|
"logps/chosen": -450.5316467285156, |
|
"logps/rejected": -493.61456298828125, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2824386358261108, |
|
"rewards/margins": 0.7327200174331665, |
|
"rewards/rejected": -2.0151586532592773, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 50.18626495548194, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": 0.29557594656944275, |
|
"logits/rejected": 0.2590964436531067, |
|
"logps/chosen": -325.115966796875, |
|
"logps/rejected": -413.65069580078125, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1677870750427246, |
|
"rewards/margins": 0.7445520758628845, |
|
"rewards/rejected": -1.912339210510254, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 59.74535290520495, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": 0.2289455235004425, |
|
"logits/rejected": 0.2346872091293335, |
|
"logps/chosen": -335.0433349609375, |
|
"logps/rejected": -454.72283935546875, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1991482973098755, |
|
"rewards/margins": 0.9213361740112305, |
|
"rewards/rejected": -2.1204843521118164, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 47.67885660456801, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": 0.36972707509994507, |
|
"logits/rejected": 0.36796706914901733, |
|
"logps/chosen": -362.0436096191406, |
|
"logps/rejected": -504.9833984375, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2587521076202393, |
|
"rewards/margins": 1.354644536972046, |
|
"rewards/rejected": -2.613396406173706, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 57.447680326348596, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": 0.3414398729801178, |
|
"logits/rejected": 0.38713935017585754, |
|
"logps/chosen": -470.5823669433594, |
|
"logps/rejected": -540.7964477539062, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6748573780059814, |
|
"rewards/margins": 0.9955212473869324, |
|
"rewards/rejected": -2.6703786849975586, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 94.83896002985985, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": 0.3320922553539276, |
|
"logits/rejected": 0.27273011207580566, |
|
"logps/chosen": -354.8629455566406, |
|
"logps/rejected": -515.5682983398438, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4984259605407715, |
|
"rewards/margins": 1.194115161895752, |
|
"rewards/rejected": -2.6925411224365234, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 82.64775454398065, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": 0.5136270523071289, |
|
"logits/rejected": 0.541195273399353, |
|
"logps/chosen": -415.32470703125, |
|
"logps/rejected": -473.9029235839844, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5172972679138184, |
|
"rewards/margins": 0.6822672486305237, |
|
"rewards/rejected": -2.1995644569396973, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 47.797778973391935, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": 0.5480640530586243, |
|
"logits/rejected": 0.5535899996757507, |
|
"logps/chosen": -402.5614929199219, |
|
"logps/rejected": -467.32659912109375, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2578551769256592, |
|
"rewards/margins": 1.102461338043213, |
|
"rewards/rejected": -2.360316514968872, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 71.76813869344353, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": 0.29584652185440063, |
|
"logits/rejected": 0.2909153699874878, |
|
"logps/chosen": -420.04278564453125, |
|
"logps/rejected": -494.29034423828125, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3689825534820557, |
|
"rewards/margins": 1.0625414848327637, |
|
"rewards/rejected": -2.4315237998962402, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 57.518793713468824, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": 0.40785670280456543, |
|
"logits/rejected": 0.3927389681339264, |
|
"logps/chosen": -441.46820068359375, |
|
"logps/rejected": -522.16064453125, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.770795464515686, |
|
"rewards/margins": 1.0380085706710815, |
|
"rewards/rejected": -2.8088042736053467, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 70.92065974917496, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": 0.19428907334804535, |
|
"logits/rejected": 0.20077180862426758, |
|
"logps/chosen": -436.16400146484375, |
|
"logps/rejected": -509.80059814453125, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6207956075668335, |
|
"rewards/margins": 0.9756816625595093, |
|
"rewards/rejected": -2.5964772701263428, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 52.35712491429162, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": 0.3977021276950836, |
|
"logits/rejected": 0.30989381670951843, |
|
"logps/chosen": -390.9764404296875, |
|
"logps/rejected": -544.4352416992188, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2817388772964478, |
|
"rewards/margins": 1.2578352689743042, |
|
"rewards/rejected": -2.539574146270752, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 66.27745730268856, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": 0.3725019097328186, |
|
"logits/rejected": 0.34433817863464355, |
|
"logps/chosen": -377.6466979980469, |
|
"logps/rejected": -397.6703186035156, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3736088275909424, |
|
"rewards/margins": 0.3551202118396759, |
|
"rewards/rejected": -1.728729009628296, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 44.53686594068729, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": 0.39984074234962463, |
|
"logits/rejected": 0.3491300046443939, |
|
"logps/chosen": -373.43206787109375, |
|
"logps/rejected": -474.07635498046875, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0870692729949951, |
|
"rewards/margins": 0.8598827123641968, |
|
"rewards/rejected": -1.946952223777771, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 52.65271057491298, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": 0.4957643151283264, |
|
"logits/rejected": 0.5427692532539368, |
|
"logps/chosen": -445.40667724609375, |
|
"logps/rejected": -510.15911865234375, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3345266580581665, |
|
"rewards/margins": 1.0387364625930786, |
|
"rewards/rejected": -2.373263120651245, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 44.992102122521054, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": 0.33429089188575745, |
|
"logits/rejected": 0.3682582676410675, |
|
"logps/chosen": -450.81292724609375, |
|
"logps/rejected": -507.6797790527344, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4969028234481812, |
|
"rewards/margins": 0.793559193611145, |
|
"rewards/rejected": -2.2904622554779053, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 67.52687192461451, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": 0.4060050845146179, |
|
"logits/rejected": 0.37885960936546326, |
|
"logps/chosen": -383.94293212890625, |
|
"logps/rejected": -507.6661071777344, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.184340476989746, |
|
"rewards/margins": 1.3244092464447021, |
|
"rewards/rejected": -2.5087497234344482, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 51.16670610792298, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": 0.3683363199234009, |
|
"logits/rejected": 0.34374839067459106, |
|
"logps/chosen": -438.6788024902344, |
|
"logps/rejected": -515.6959838867188, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6028823852539062, |
|
"rewards/margins": 1.0263550281524658, |
|
"rewards/rejected": -2.629237174987793, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 61.81574184004172, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": 0.2071353942155838, |
|
"logits/rejected": 0.25202590227127075, |
|
"logps/chosen": -483.5445861816406, |
|
"logps/rejected": -521.457275390625, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.707818627357483, |
|
"rewards/margins": 0.9598783254623413, |
|
"rewards/rejected": -2.6676971912384033, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 65.97747644814727, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": 0.322220116853714, |
|
"logits/rejected": 0.38676518201828003, |
|
"logps/chosen": -426.3819885253906, |
|
"logps/rejected": -448.42108154296875, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.646468162536621, |
|
"rewards/margins": 0.7071776986122131, |
|
"rewards/rejected": -2.3536458015441895, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 65.57125314371665, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": 0.5356893539428711, |
|
"logits/rejected": 0.5479296445846558, |
|
"logps/chosen": -436.96063232421875, |
|
"logps/rejected": -473.86016845703125, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4618302583694458, |
|
"rewards/margins": 0.7506003379821777, |
|
"rewards/rejected": -2.212430477142334, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 60.14812204881907, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": 0.3621949851512909, |
|
"logits/rejected": 0.34622901678085327, |
|
"logps/chosen": -394.7340393066406, |
|
"logps/rejected": -511.791748046875, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9880274534225464, |
|
"rewards/margins": 1.0686829090118408, |
|
"rewards/rejected": -2.0567104816436768, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 47.39591731694274, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": 0.294962078332901, |
|
"logits/rejected": 0.27979040145874023, |
|
"logps/chosen": -406.9973449707031, |
|
"logps/rejected": -473.32708740234375, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0594736337661743, |
|
"rewards/margins": 0.9111591577529907, |
|
"rewards/rejected": -1.970632791519165, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 41.2209834851983, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": 0.3952016234397888, |
|
"logits/rejected": 0.34816282987594604, |
|
"logps/chosen": -445.1995544433594, |
|
"logps/rejected": -567.7208862304688, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4958828687667847, |
|
"rewards/margins": 1.3815982341766357, |
|
"rewards/rejected": -2.877480983734131, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 57.703162217123705, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": 0.23623302578926086, |
|
"logits/rejected": 0.25286155939102173, |
|
"logps/chosen": -407.5404357910156, |
|
"logps/rejected": -509.9039001464844, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5428358316421509, |
|
"rewards/margins": 1.1545684337615967, |
|
"rewards/rejected": -2.697404146194458, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 74.26702203654544, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": 0.27603861689567566, |
|
"logits/rejected": 0.2551714777946472, |
|
"logps/chosen": -497.12652587890625, |
|
"logps/rejected": -532.0398559570312, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.231362819671631, |
|
"rewards/margins": 0.5803264379501343, |
|
"rewards/rejected": -2.8116891384124756, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 51.86157508843387, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": 0.3319122791290283, |
|
"logits/rejected": 0.25652652978897095, |
|
"logps/chosen": -438.5191955566406, |
|
"logps/rejected": -514.2816772460938, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6855862140655518, |
|
"rewards/margins": 0.7145141363143921, |
|
"rewards/rejected": -2.4001002311706543, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 58.70463366289085, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": 0.3090011179447174, |
|
"logits/rejected": 0.24356667697429657, |
|
"logps/chosen": -366.631591796875, |
|
"logps/rejected": -503.88323974609375, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1779043674468994, |
|
"rewards/margins": 1.183097004890442, |
|
"rewards/rejected": -2.361001491546631, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 47.53804566518479, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": 0.20172250270843506, |
|
"logits/rejected": 0.22906656563282013, |
|
"logps/chosen": -428.5935974121094, |
|
"logps/rejected": -538.1102294921875, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3850289583206177, |
|
"rewards/margins": 1.2768621444702148, |
|
"rewards/rejected": -2.661891222000122, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 77.15585515791938, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": 0.26516109704971313, |
|
"logits/rejected": 0.21058981120586395, |
|
"logps/chosen": -455.91839599609375, |
|
"logps/rejected": -562.4434814453125, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5181043148040771, |
|
"rewards/margins": 1.2698519229888916, |
|
"rewards/rejected": -2.7879559993743896, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 65.47808323749652, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": 0.20047049224376678, |
|
"logits/rejected": 0.16870595514774323, |
|
"logps/chosen": -434.60626220703125, |
|
"logps/rejected": -548.126708984375, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2808418273925781, |
|
"rewards/margins": 1.3285714387893677, |
|
"rewards/rejected": -2.609412670135498, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 89.44547447863275, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": 0.26870986819267273, |
|
"logits/rejected": 0.15002629160881042, |
|
"logps/chosen": -419.7383728027344, |
|
"logps/rejected": -573.8462524414062, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6306660175323486, |
|
"rewards/margins": 1.3894847631454468, |
|
"rewards/rejected": -3.020150661468506, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 60.10882154238606, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": 0.1910504400730133, |
|
"logits/rejected": 0.1575092375278473, |
|
"logps/chosen": -445.48846435546875, |
|
"logps/rejected": -533.4600830078125, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8650315999984741, |
|
"rewards/margins": 0.9867684245109558, |
|
"rewards/rejected": -2.8518002033233643, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 74.71713120999203, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 0.22800974547863007, |
|
"logits/rejected": 0.12040402740240097, |
|
"logps/chosen": -455.5386657714844, |
|
"logps/rejected": -639.4290771484375, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0237579345703125, |
|
"rewards/margins": 1.4940173625946045, |
|
"rewards/rejected": -3.517775297164917, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 66.18615172930821, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": 0.18059277534484863, |
|
"logits/rejected": 0.12810157239437103, |
|
"logps/chosen": -404.3804626464844, |
|
"logps/rejected": -554.2039794921875, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8225759267807007, |
|
"rewards/margins": 1.03814697265625, |
|
"rewards/rejected": -2.8607230186462402, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 51.123178351808555, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": 0.32888683676719666, |
|
"logits/rejected": 0.214427188038826, |
|
"logps/chosen": -418.49969482421875, |
|
"logps/rejected": -606.8433837890625, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.695514440536499, |
|
"rewards/margins": 1.5961391925811768, |
|
"rewards/rejected": -3.291653871536255, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 52.06892246929386, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": 0.23663802444934845, |
|
"logits/rejected": 0.31276068091392517, |
|
"logps/chosen": -512.780029296875, |
|
"logps/rejected": -592.1165771484375, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.565666675567627, |
|
"rewards/margins": 1.2843080759048462, |
|
"rewards/rejected": -2.8499746322631836, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 77.01967173368558, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": 0.4146420359611511, |
|
"logits/rejected": 0.4239901006221771, |
|
"logps/chosen": -437.6181640625, |
|
"logps/rejected": -541.0182495117188, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.48988938331604, |
|
"rewards/margins": 0.9824197888374329, |
|
"rewards/rejected": -2.472309112548828, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 67.98986065857378, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": 0.3985747992992401, |
|
"logits/rejected": 0.3693119287490845, |
|
"logps/chosen": -403.2997741699219, |
|
"logps/rejected": -509.6571350097656, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.159311056137085, |
|
"rewards/margins": 1.0505834817886353, |
|
"rewards/rejected": -2.2098946571350098, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 61.05605298699306, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": 0.2373121976852417, |
|
"logits/rejected": 0.25002145767211914, |
|
"logps/chosen": -375.0496826171875, |
|
"logps/rejected": -443.29376220703125, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.434430480003357, |
|
"rewards/margins": 0.8452070355415344, |
|
"rewards/rejected": -2.279637336730957, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 49.776619421207165, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": 0.38883355259895325, |
|
"logits/rejected": 0.27539998292922974, |
|
"logps/chosen": -470.8067321777344, |
|
"logps/rejected": -535.9411010742188, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5812249183654785, |
|
"rewards/margins": 0.5776987075805664, |
|
"rewards/rejected": -2.158923625946045, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 63.21818186735171, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": 0.27616140246391296, |
|
"logits/rejected": 0.2776263654232025, |
|
"logps/chosen": -453.8504943847656, |
|
"logps/rejected": -543.6710205078125, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5051019191741943, |
|
"rewards/margins": 1.16765558719635, |
|
"rewards/rejected": -2.672757625579834, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 46.2566400088145, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": 0.2944423258304596, |
|
"logits/rejected": 0.2835266590118408, |
|
"logps/chosen": -417.1025390625, |
|
"logps/rejected": -504.23028564453125, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.323207139968872, |
|
"rewards/margins": 1.00657320022583, |
|
"rewards/rejected": -2.329780340194702, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 71.68643270839574, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": 0.4159972667694092, |
|
"logits/rejected": 0.449301540851593, |
|
"logps/chosen": -508.6168518066406, |
|
"logps/rejected": -591.8961791992188, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7826929092407227, |
|
"rewards/margins": 1.2551745176315308, |
|
"rewards/rejected": -3.037867307662964, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 85.59383176151694, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": 0.43766123056411743, |
|
"logits/rejected": 0.40199995040893555, |
|
"logps/chosen": -369.1076965332031, |
|
"logps/rejected": -485.8929748535156, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2781074047088623, |
|
"rewards/margins": 0.709191620349884, |
|
"rewards/rejected": -1.9872992038726807, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 54.51746169170813, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": 0.34857094287872314, |
|
"logits/rejected": 0.2321186363697052, |
|
"logps/chosen": -409.5308532714844, |
|
"logps/rejected": -532.0660400390625, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.615835189819336, |
|
"rewards/margins": 1.0217307806015015, |
|
"rewards/rejected": -2.637566089630127, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 54.30413769189543, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": 0.3620226979255676, |
|
"logits/rejected": 0.2604697644710541, |
|
"logps/chosen": -483.5345153808594, |
|
"logps/rejected": -649.7924194335938, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.860421895980835, |
|
"rewards/margins": 1.6947648525238037, |
|
"rewards/rejected": -3.5551865100860596, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 60.90116328784711, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": 0.20668205618858337, |
|
"logits/rejected": 0.22057127952575684, |
|
"logps/chosen": -427.727783203125, |
|
"logps/rejected": -503.6275329589844, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5892452001571655, |
|
"rewards/margins": 0.9098671674728394, |
|
"rewards/rejected": -2.499112367630005, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 90.10879186955651, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": 0.26061517000198364, |
|
"logits/rejected": 0.11571246385574341, |
|
"logps/chosen": -393.7334899902344, |
|
"logps/rejected": -607.5116577148438, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2890024185180664, |
|
"rewards/margins": 1.7361152172088623, |
|
"rewards/rejected": -3.0251173973083496, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 62.381058065720985, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": 0.38810300827026367, |
|
"logits/rejected": 0.34201544523239136, |
|
"logps/chosen": -407.7247009277344, |
|
"logps/rejected": -509.07342529296875, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5952517986297607, |
|
"rewards/margins": 0.8810189962387085, |
|
"rewards/rejected": -2.476270914077759, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 48.8749598177452, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": 0.3524308502674103, |
|
"logits/rejected": 0.3700847923755646, |
|
"logps/chosen": -459.9222717285156, |
|
"logps/rejected": -511.69573974609375, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5697377920150757, |
|
"rewards/margins": 0.9556680917739868, |
|
"rewards/rejected": -2.5254054069519043, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 59.09500967211413, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": 0.41831350326538086, |
|
"logits/rejected": 0.4462617337703705, |
|
"logps/chosen": -360.1072082519531, |
|
"logps/rejected": -383.2783203125, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0175033807754517, |
|
"rewards/margins": 0.5931234359741211, |
|
"rewards/rejected": -1.6106268167495728, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 50.53141384984837, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": 0.2824346423149109, |
|
"logits/rejected": 0.2604730427265167, |
|
"logps/chosen": -414.1935119628906, |
|
"logps/rejected": -472.2347106933594, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0589439868927002, |
|
"rewards/margins": 0.8909751772880554, |
|
"rewards/rejected": -1.9499191045761108, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 109.19108386409965, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": 0.45445793867111206, |
|
"logits/rejected": 0.48068398237228394, |
|
"logps/chosen": -431.97479248046875, |
|
"logps/rejected": -510.19305419921875, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7654443979263306, |
|
"rewards/margins": 0.9210473895072937, |
|
"rewards/rejected": -2.6864917278289795, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 51.50408126252172, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": 0.38441458344459534, |
|
"logits/rejected": 0.31780314445495605, |
|
"logps/chosen": -422.47723388671875, |
|
"logps/rejected": -546.2657470703125, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9557384252548218, |
|
"rewards/margins": 1.1367993354797363, |
|
"rewards/rejected": -3.0925374031066895, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 79.3332449273427, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": 0.5290141105651855, |
|
"logits/rejected": 0.52525794506073, |
|
"logps/chosen": -452.8272399902344, |
|
"logps/rejected": -564.7791748046875, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8193256855010986, |
|
"rewards/margins": 1.1284821033477783, |
|
"rewards/rejected": -2.947808027267456, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 60.45763828808063, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": 0.2585281729698181, |
|
"logits/rejected": 0.18836592137813568, |
|
"logps/chosen": -416.86773681640625, |
|
"logps/rejected": -573.0087280273438, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.55123770236969, |
|
"rewards/margins": 1.5055726766586304, |
|
"rewards/rejected": -3.0568103790283203, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 71.47172479396384, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": 0.31233787536621094, |
|
"logits/rejected": 0.2178495228290558, |
|
"logps/chosen": -385.94854736328125, |
|
"logps/rejected": -564.6609497070312, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.392052412033081, |
|
"rewards/margins": 1.5781797170639038, |
|
"rewards/rejected": -2.9702320098876953, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 68.3186341677682, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": 0.2971805930137634, |
|
"logits/rejected": 0.3825104534626007, |
|
"logps/chosen": -464.8531799316406, |
|
"logps/rejected": -538.8662109375, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5982029438018799, |
|
"rewards/margins": 1.087725043296814, |
|
"rewards/rejected": -2.6859278678894043, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 67.83128767182318, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": 0.334720641374588, |
|
"logits/rejected": 0.3014802932739258, |
|
"logps/chosen": -392.6343688964844, |
|
"logps/rejected": -533.2940673828125, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.396249532699585, |
|
"rewards/margins": 1.5643364191055298, |
|
"rewards/rejected": -2.960585832595825, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 51.076496075173246, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": 0.4284774661064148, |
|
"logits/rejected": 0.3270293176174164, |
|
"logps/chosen": -437.37481689453125, |
|
"logps/rejected": -574.0969848632812, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5580734014511108, |
|
"rewards/margins": 0.9295462369918823, |
|
"rewards/rejected": -2.487619638442993, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 64.38738329478983, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": 0.277169406414032, |
|
"logits/rejected": 0.3074883818626404, |
|
"logps/chosen": -455.8011779785156, |
|
"logps/rejected": -555.8069458007812, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6205753087997437, |
|
"rewards/margins": 0.76356440782547, |
|
"rewards/rejected": -2.3841395378112793, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 56.801307373384674, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": 0.4305633008480072, |
|
"logits/rejected": 0.3523896634578705, |
|
"logps/chosen": -412.545166015625, |
|
"logps/rejected": -555.5245971679688, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4458911418914795, |
|
"rewards/margins": 1.2502870559692383, |
|
"rewards/rejected": -2.6961779594421387, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 94.25599799986875, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": 0.5256787538528442, |
|
"logits/rejected": 0.4999951720237732, |
|
"logps/chosen": -431.0813903808594, |
|
"logps/rejected": -630.9769287109375, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9486348628997803, |
|
"rewards/margins": 1.8412199020385742, |
|
"rewards/rejected": -3.7898545265197754, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 57.442749877402164, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": 0.3364327549934387, |
|
"logits/rejected": 0.2900117337703705, |
|
"logps/chosen": -449.7611389160156, |
|
"logps/rejected": -620.5862426757812, |
|
"loss": 0.4521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7570139169692993, |
|
"rewards/margins": 1.577225923538208, |
|
"rewards/rejected": -3.334239959716797, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 124.40645841185747, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": 0.30810096859931946, |
|
"logits/rejected": 0.3993563652038574, |
|
"logps/chosen": -497.1150817871094, |
|
"logps/rejected": -563.6721801757812, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8938947916030884, |
|
"rewards/margins": 1.0604908466339111, |
|
"rewards/rejected": -2.95438551902771, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 63.3314681049724, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": 0.40472593903541565, |
|
"logits/rejected": 0.34432557225227356, |
|
"logps/chosen": -387.82659912109375, |
|
"logps/rejected": -560.4013671875, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2423477172851562, |
|
"rewards/margins": 1.4612280130386353, |
|
"rewards/rejected": -2.703575611114502, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 53.149590563645376, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": 0.44260650873184204, |
|
"logits/rejected": 0.3687344193458557, |
|
"logps/chosen": -449.75323486328125, |
|
"logps/rejected": -574.5797729492188, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3485386371612549, |
|
"rewards/margins": 1.6103626489639282, |
|
"rewards/rejected": -2.9589014053344727, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 53.773028540934796, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": 0.39142706990242004, |
|
"logits/rejected": 0.46402230858802795, |
|
"logps/chosen": -475.583740234375, |
|
"logps/rejected": -509.60650634765625, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8812768459320068, |
|
"rewards/margins": 0.7570799589157104, |
|
"rewards/rejected": -2.638356924057007, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 81.18780773764912, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": 0.229768306016922, |
|
"logits/rejected": 0.27546173334121704, |
|
"logps/chosen": -404.23333740234375, |
|
"logps/rejected": -500.17022705078125, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6131216287612915, |
|
"rewards/margins": 1.327030062675476, |
|
"rewards/rejected": -2.9401516914367676, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 59.98522942106916, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": 0.2163640260696411, |
|
"logits/rejected": 0.19707325100898743, |
|
"logps/chosen": -519.4266357421875, |
|
"logps/rejected": -638.4168701171875, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0921690464019775, |
|
"rewards/margins": 1.685040831565857, |
|
"rewards/rejected": -3.777210235595703, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 45.34708265791396, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": 0.29851406812667847, |
|
"logits/rejected": 0.2644515633583069, |
|
"logps/chosen": -400.7931213378906, |
|
"logps/rejected": -526.3135986328125, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5323349237442017, |
|
"rewards/margins": 1.1316112279891968, |
|
"rewards/rejected": -2.6639461517333984, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 53.54880439348179, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": 0.32904523611068726, |
|
"logits/rejected": 0.3071838617324829, |
|
"logps/chosen": -461.36944580078125, |
|
"logps/rejected": -689.0936279296875, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6705764532089233, |
|
"rewards/margins": 1.8669970035552979, |
|
"rewards/rejected": -3.537574052810669, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 60.602422168164615, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": 0.37119847536087036, |
|
"logits/rejected": 0.4386017322540283, |
|
"logps/chosen": -391.25189208984375, |
|
"logps/rejected": -542.0565795898438, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2834140062332153, |
|
"rewards/margins": 1.6211163997650146, |
|
"rewards/rejected": -2.9045305252075195, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 108.09779424898507, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": 0.3594167232513428, |
|
"logits/rejected": 0.31133952736854553, |
|
"logps/chosen": -452.1357421875, |
|
"logps/rejected": -523.7760620117188, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4595952033996582, |
|
"rewards/margins": 1.0168980360031128, |
|
"rewards/rejected": -2.4764935970306396, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 64.82605469755758, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": 0.33413031697273254, |
|
"logits/rejected": 0.3172528147697449, |
|
"logps/chosen": -453.7744140625, |
|
"logps/rejected": -542.4033203125, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6000181436538696, |
|
"rewards/margins": 1.1864218711853027, |
|
"rewards/rejected": -2.786440134048462, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 59.53605471920353, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": 0.3178195357322693, |
|
"logits/rejected": 0.19241534173488617, |
|
"logps/chosen": -471.8601989746094, |
|
"logps/rejected": -639.7420654296875, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6394401788711548, |
|
"rewards/margins": 1.4527866840362549, |
|
"rewards/rejected": -3.09222674369812, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 67.88498874409545, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": 0.26190441846847534, |
|
"logits/rejected": 0.21061572432518005, |
|
"logps/chosen": -437.1505432128906, |
|
"logps/rejected": -596.0320434570312, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8086521625518799, |
|
"rewards/margins": 1.4108121395111084, |
|
"rewards/rejected": -3.2194645404815674, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 67.74011501244706, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": 0.23020358383655548, |
|
"logits/rejected": 0.2323944866657257, |
|
"logps/chosen": -399.55999755859375, |
|
"logps/rejected": -545.030517578125, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.767966866493225, |
|
"rewards/margins": 1.3081542253494263, |
|
"rewards/rejected": -3.0761210918426514, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 68.54439234853297, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": 0.28997892141342163, |
|
"logits/rejected": 0.22331051528453827, |
|
"logps/chosen": -352.03021240234375, |
|
"logps/rejected": -523.7657470703125, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3509986400604248, |
|
"rewards/margins": 1.5307499170303345, |
|
"rewards/rejected": -2.8817484378814697, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 44.784810331400315, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": 0.46165722608566284, |
|
"logits/rejected": 0.4086335301399231, |
|
"logps/chosen": -441.5128479003906, |
|
"logps/rejected": -585.4395751953125, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6371694803237915, |
|
"rewards/margins": 1.2548922300338745, |
|
"rewards/rejected": -2.892062187194824, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 53.29430321195333, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": 0.29801997542381287, |
|
"logits/rejected": 0.32238397002220154, |
|
"logps/chosen": -451.3075256347656, |
|
"logps/rejected": -565.6251831054688, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6485878229141235, |
|
"rewards/margins": 1.187098741531372, |
|
"rewards/rejected": -2.835686445236206, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 89.87877992487157, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": 0.3851594924926758, |
|
"logits/rejected": 0.3086535334587097, |
|
"logps/chosen": -514.3781127929688, |
|
"logps/rejected": -665.1397705078125, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9155423641204834, |
|
"rewards/margins": 1.4537831544876099, |
|
"rewards/rejected": -3.3693249225616455, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 103.74174820519212, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": 0.42312413454055786, |
|
"logits/rejected": 0.41015100479125977, |
|
"logps/chosen": -451.295166015625, |
|
"logps/rejected": -596.2919311523438, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.744811773300171, |
|
"rewards/margins": 1.395531177520752, |
|
"rewards/rejected": -3.140343189239502, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 143.1488018368024, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": 0.21912448108196259, |
|
"logits/rejected": 0.23281314969062805, |
|
"logps/chosen": -479.14434814453125, |
|
"logps/rejected": -584.7716674804688, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8765337467193604, |
|
"rewards/margins": 1.0321691036224365, |
|
"rewards/rejected": -2.9087026119232178, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 48.268846399947925, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": 0.42225226759910583, |
|
"logits/rejected": 0.3880574405193329, |
|
"logps/chosen": -434.68487548828125, |
|
"logps/rejected": -623.84765625, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4603776931762695, |
|
"rewards/margins": 1.975516676902771, |
|
"rewards/rejected": -3.435894012451172, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 80.41996960251628, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": 0.4970664381980896, |
|
"logits/rejected": 0.4426855146884918, |
|
"logps/chosen": -543.0753173828125, |
|
"logps/rejected": -725.0374755859375, |
|
"loss": 0.4309, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0638086795806885, |
|
"rewards/margins": 1.44111967086792, |
|
"rewards/rejected": -3.5049285888671875, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 60.15037639054526, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": 0.4160444140434265, |
|
"logits/rejected": 0.41970038414001465, |
|
"logps/chosen": -418.5816955566406, |
|
"logps/rejected": -571.8365478515625, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7121940851211548, |
|
"rewards/margins": 1.3768190145492554, |
|
"rewards/rejected": -3.089012861251831, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 60.533720162859076, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 0.36265549063682556, |
|
"logits/rejected": 0.3030100166797638, |
|
"logps/chosen": -545.2647094726562, |
|
"logps/rejected": -659.9442138671875, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.23677659034729, |
|
"rewards/margins": 1.1260842084884644, |
|
"rewards/rejected": -3.362860918045044, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 56.011372557382224, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": 0.5000380277633667, |
|
"logits/rejected": 0.580724835395813, |
|
"logps/chosen": -429.5301208496094, |
|
"logps/rejected": -500.614013671875, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4057103395462036, |
|
"rewards/margins": 1.1594346761703491, |
|
"rewards/rejected": -2.5651450157165527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": 0.665359377861023, |
|
"eval_logits/rejected": 0.6184743642807007, |
|
"eval_logps/chosen": -447.8167724609375, |
|
"eval_logps/rejected": -608.9370727539062, |
|
"eval_loss": 0.4675012230873108, |
|
"eval_rewards/accuracies": 0.8107143044471741, |
|
"eval_rewards/chosen": -1.750851035118103, |
|
"eval_rewards/margins": 1.5577605962753296, |
|
"eval_rewards/rejected": -3.3086116313934326, |
|
"eval_runtime": 220.1548, |
|
"eval_samples_per_second": 20.263, |
|
"eval_steps_per_second": 0.318, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 55.47887519504596, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": 0.5363108515739441, |
|
"logits/rejected": 0.49973684549331665, |
|
"logps/chosen": -484.5751953125, |
|
"logps/rejected": -618.3671264648438, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9863439798355103, |
|
"rewards/margins": 1.5874888896942139, |
|
"rewards/rejected": -3.5738327503204346, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 61.042132194456485, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": 0.5197394490242004, |
|
"logits/rejected": 0.5401654243469238, |
|
"logps/chosen": -477.82470703125, |
|
"logps/rejected": -624.3440551757812, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7207244634628296, |
|
"rewards/margins": 1.8844687938690186, |
|
"rewards/rejected": -3.6051933765411377, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 60.90349183916883, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": 0.533577024936676, |
|
"logits/rejected": 0.4944360852241516, |
|
"logps/chosen": -405.94500732421875, |
|
"logps/rejected": -521.0950927734375, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5832645893096924, |
|
"rewards/margins": 1.0749413967132568, |
|
"rewards/rejected": -2.65820574760437, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 54.290635694074915, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": 0.6663334369659424, |
|
"logits/rejected": 0.5439913868904114, |
|
"logps/chosen": -389.1966857910156, |
|
"logps/rejected": -644.8016967773438, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3199899196624756, |
|
"rewards/margins": 2.6214773654937744, |
|
"rewards/rejected": -3.94146728515625, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 106.8177031286795, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": 0.31965428590774536, |
|
"logits/rejected": 0.2487453669309616, |
|
"logps/chosen": -508.271240234375, |
|
"logps/rejected": -639.26171875, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2463579177856445, |
|
"rewards/margins": 1.125352144241333, |
|
"rewards/rejected": -3.3717103004455566, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 56.07763345800946, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": 0.5042864084243774, |
|
"logits/rejected": 0.4615905284881592, |
|
"logps/chosen": -433.1471252441406, |
|
"logps/rejected": -642.4073486328125, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7094367742538452, |
|
"rewards/margins": 1.9255794286727905, |
|
"rewards/rejected": -3.635016679763794, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 77.68472784557788, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": 0.512526273727417, |
|
"logits/rejected": 0.46043792366981506, |
|
"logps/chosen": -451.4317321777344, |
|
"logps/rejected": -576.4466552734375, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.599999189376831, |
|
"rewards/margins": 1.2795385122299194, |
|
"rewards/rejected": -2.879538059234619, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 50.861840076587534, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": 0.42851418256759644, |
|
"logits/rejected": 0.38174164295196533, |
|
"logps/chosen": -501.5247497558594, |
|
"logps/rejected": -689.77783203125, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7646675109863281, |
|
"rewards/margins": 2.1865761280059814, |
|
"rewards/rejected": -3.9512436389923096, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 79.20219373170347, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": 0.3517116606235504, |
|
"logits/rejected": 0.32479557394981384, |
|
"logps/chosen": -451.6710510253906, |
|
"logps/rejected": -580.46923828125, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5235283374786377, |
|
"rewards/margins": 1.1337333917617798, |
|
"rewards/rejected": -2.657261610031128, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 68.21456448986119, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": 0.32835307717323303, |
|
"logits/rejected": 0.28805023431777954, |
|
"logps/chosen": -394.0077819824219, |
|
"logps/rejected": -523.2872924804688, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2557611465454102, |
|
"rewards/margins": 1.17495596408844, |
|
"rewards/rejected": -2.4307172298431396, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 66.4532400437519, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": 0.38638368248939514, |
|
"logits/rejected": 0.34177035093307495, |
|
"logps/chosen": -416.5057067871094, |
|
"logps/rejected": -548.0081787109375, |
|
"loss": 0.4494, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3065229654312134, |
|
"rewards/margins": 1.3287594318389893, |
|
"rewards/rejected": -2.635282278060913, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 99.72234181869767, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": 0.5496645569801331, |
|
"logits/rejected": 0.5133931040763855, |
|
"logps/chosen": -455.8819274902344, |
|
"logps/rejected": -555.8260498046875, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8602015972137451, |
|
"rewards/margins": 1.0084072351455688, |
|
"rewards/rejected": -2.8686089515686035, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 64.81093117332321, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": 0.4317900240421295, |
|
"logits/rejected": 0.3587071895599365, |
|
"logps/chosen": -445.65557861328125, |
|
"logps/rejected": -656.5147094726562, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8837440013885498, |
|
"rewards/margins": 1.883643388748169, |
|
"rewards/rejected": -3.767387866973877, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 56.023753996333426, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": 0.46527260541915894, |
|
"logits/rejected": 0.3798334300518036, |
|
"logps/chosen": -457.7913513183594, |
|
"logps/rejected": -615.3175048828125, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9232269525527954, |
|
"rewards/margins": 1.4897549152374268, |
|
"rewards/rejected": -3.4129815101623535, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 75.46239298086752, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": 0.39150822162628174, |
|
"logits/rejected": 0.42525988817214966, |
|
"logps/chosen": -469.97607421875, |
|
"logps/rejected": -557.24462890625, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9090726375579834, |
|
"rewards/margins": 1.1672375202178955, |
|
"rewards/rejected": -3.076310157775879, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 52.88465482979518, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": 0.529629111289978, |
|
"logits/rejected": 0.5416779518127441, |
|
"logps/chosen": -450.4493103027344, |
|
"logps/rejected": -655.2824096679688, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9700136184692383, |
|
"rewards/margins": 2.0623691082000732, |
|
"rewards/rejected": -4.032382488250732, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 59.76272055547423, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": 0.3737568259239197, |
|
"logits/rejected": 0.3171232044696808, |
|
"logps/chosen": -468.14959716796875, |
|
"logps/rejected": -590.6390991210938, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7388217449188232, |
|
"rewards/margins": 1.0507452487945557, |
|
"rewards/rejected": -2.7895667552948, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 51.77010886508484, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": 0.48223429918289185, |
|
"logits/rejected": 0.41144832968711853, |
|
"logps/chosen": -465.4364318847656, |
|
"logps/rejected": -683.9286499023438, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0610320568084717, |
|
"rewards/margins": 1.9983199834823608, |
|
"rewards/rejected": -4.059351921081543, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 84.85615169878321, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": 0.516851007938385, |
|
"logits/rejected": 0.4201034903526306, |
|
"logps/chosen": -429.69696044921875, |
|
"logps/rejected": -678.9994506835938, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.584281325340271, |
|
"rewards/margins": 2.506948471069336, |
|
"rewards/rejected": -4.0912299156188965, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 66.30525189839804, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": 0.5131409764289856, |
|
"logits/rejected": 0.45062461495399475, |
|
"logps/chosen": -455.73284912109375, |
|
"logps/rejected": -620.1326904296875, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9256746768951416, |
|
"rewards/margins": 1.8529987335205078, |
|
"rewards/rejected": -3.7786736488342285, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 75.42210636252842, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": 0.3453708291053772, |
|
"logits/rejected": 0.3795732259750366, |
|
"logps/chosen": -489.0921325683594, |
|
"logps/rejected": -522.8128662109375, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8708622455596924, |
|
"rewards/margins": 0.6701450347900391, |
|
"rewards/rejected": -2.5410072803497314, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 110.89398921309369, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": 0.7290188670158386, |
|
"logits/rejected": 0.7169550657272339, |
|
"logps/chosen": -423.59490966796875, |
|
"logps/rejected": -516.1988525390625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4784079790115356, |
|
"rewards/margins": 0.7747719883918762, |
|
"rewards/rejected": -2.2531797885894775, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 67.81108925992177, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": 0.7932499647140503, |
|
"logits/rejected": 0.749028205871582, |
|
"logps/chosen": -397.97808837890625, |
|
"logps/rejected": -597.330810546875, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.516108512878418, |
|
"rewards/margins": 1.8177108764648438, |
|
"rewards/rejected": -3.333819627761841, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 86.83596589278552, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": 0.6531923413276672, |
|
"logits/rejected": 0.660178005695343, |
|
"logps/chosen": -440.75640869140625, |
|
"logps/rejected": -620.4841918945312, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6386387348175049, |
|
"rewards/margins": 1.8078346252441406, |
|
"rewards/rejected": -3.4464733600616455, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 70.7879323784096, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": 0.4513081908226013, |
|
"logits/rejected": 0.45296710729599, |
|
"logps/chosen": -417.0743713378906, |
|
"logps/rejected": -515.775390625, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7094676494598389, |
|
"rewards/margins": 1.040052056312561, |
|
"rewards/rejected": -2.7495195865631104, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 64.42188723525166, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": 0.7294105291366577, |
|
"logits/rejected": 0.668805718421936, |
|
"logps/chosen": -425.08197021484375, |
|
"logps/rejected": -610.4728393554688, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6010410785675049, |
|
"rewards/margins": 1.835683822631836, |
|
"rewards/rejected": -3.436724901199341, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 72.58471133446139, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": 0.6620523929595947, |
|
"logits/rejected": 0.5573450326919556, |
|
"logps/chosen": -413.2271423339844, |
|
"logps/rejected": -729.1690673828125, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5765022039413452, |
|
"rewards/margins": 3.0226941108703613, |
|
"rewards/rejected": -4.599195957183838, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 60.85224884040184, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": 0.7272987365722656, |
|
"logits/rejected": 0.5997458100318909, |
|
"logps/chosen": -455.57611083984375, |
|
"logps/rejected": -617.6127319335938, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2517588138580322, |
|
"rewards/margins": 1.464371681213379, |
|
"rewards/rejected": -3.716130495071411, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 68.59652285945823, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": 0.8072897791862488, |
|
"logits/rejected": 0.8118192553520203, |
|
"logps/chosen": -432.44061279296875, |
|
"logps/rejected": -561.617431640625, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.858302354812622, |
|
"rewards/margins": 1.2698358297348022, |
|
"rewards/rejected": -3.1281380653381348, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 74.15278473295638, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": 0.8605824708938599, |
|
"logits/rejected": 0.7876245379447937, |
|
"logps/chosen": -466.9039611816406, |
|
"logps/rejected": -686.98486328125, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2978317737579346, |
|
"rewards/margins": 1.7995271682739258, |
|
"rewards/rejected": -4.097358703613281, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 74.96857776511332, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": 0.5963218212127686, |
|
"logits/rejected": 0.6567476391792297, |
|
"logps/chosen": -516.2249755859375, |
|
"logps/rejected": -657.1112670898438, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9260857105255127, |
|
"rewards/margins": 1.7939532995224, |
|
"rewards/rejected": -3.7200393676757812, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 89.28555181500718, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": 0.6519026756286621, |
|
"logits/rejected": 0.5650339126586914, |
|
"logps/chosen": -439.49560546875, |
|
"logps/rejected": -622.7562255859375, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8823158740997314, |
|
"rewards/margins": 1.2919491529464722, |
|
"rewards/rejected": -3.174265146255493, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 81.49489775108675, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": 0.774019181728363, |
|
"logits/rejected": 0.8780634999275208, |
|
"logps/chosen": -484.63702392578125, |
|
"logps/rejected": -557.9149780273438, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0355923175811768, |
|
"rewards/margins": 1.2235227823257446, |
|
"rewards/rejected": -3.259115219116211, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 81.4080605880572, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": 0.829224705696106, |
|
"logits/rejected": 0.873275637626648, |
|
"logps/chosen": -452.7657165527344, |
|
"logps/rejected": -531.43798828125, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9732475280761719, |
|
"rewards/margins": 0.9660792350769043, |
|
"rewards/rejected": -2.9393270015716553, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 80.48296878035309, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": 0.6965009570121765, |
|
"logits/rejected": 0.6310064196586609, |
|
"logps/chosen": -447.6282653808594, |
|
"logps/rejected": -665.9441528320312, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2503786087036133, |
|
"rewards/margins": 2.079531192779541, |
|
"rewards/rejected": -4.329909324645996, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 161.23383348494247, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": 0.6263203024864197, |
|
"logits/rejected": 0.5573066473007202, |
|
"logps/chosen": -523.3045043945312, |
|
"logps/rejected": -685.4592895507812, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0652005672454834, |
|
"rewards/margins": 1.9840151071548462, |
|
"rewards/rejected": -4.049216270446777, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 47.79710337777276, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": 0.6272764205932617, |
|
"logits/rejected": 0.5796754360198975, |
|
"logps/chosen": -512.209228515625, |
|
"logps/rejected": -641.87158203125, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.072915554046631, |
|
"rewards/margins": 1.2129814624786377, |
|
"rewards/rejected": -3.2858967781066895, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 61.84173283054112, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": 0.8959159851074219, |
|
"logits/rejected": 1.0488951206207275, |
|
"logps/chosen": -428.900390625, |
|
"logps/rejected": -450.09356689453125, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7133146524429321, |
|
"rewards/margins": 0.8512625694274902, |
|
"rewards/rejected": -2.564577341079712, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 55.820576437794486, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": 0.9364362955093384, |
|
"logits/rejected": 0.972970187664032, |
|
"logps/chosen": -382.2677917480469, |
|
"logps/rejected": -556.21240234375, |
|
"loss": 0.444, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4817780256271362, |
|
"rewards/margins": 1.8884556293487549, |
|
"rewards/rejected": -3.3702335357666016, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 45.45881446143656, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": 0.8092014193534851, |
|
"logits/rejected": 0.8170158267021179, |
|
"logps/chosen": -385.9479064941406, |
|
"logps/rejected": -530.9920654296875, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7555181980133057, |
|
"rewards/margins": 1.5362156629562378, |
|
"rewards/rejected": -3.291733503341675, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 66.11899005889326, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": 0.7756548523902893, |
|
"logits/rejected": 0.6337395906448364, |
|
"logps/chosen": -429.2940368652344, |
|
"logps/rejected": -577.1849365234375, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5882879495620728, |
|
"rewards/margins": 1.2956851720809937, |
|
"rewards/rejected": -2.8839731216430664, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 81.31268443644781, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": 0.8727922439575195, |
|
"logits/rejected": 0.794070839881897, |
|
"logps/chosen": -420.30413818359375, |
|
"logps/rejected": -778.9227294921875, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.078183650970459, |
|
"rewards/margins": 3.450129270553589, |
|
"rewards/rejected": -5.528312683105469, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 75.31101359568349, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": 0.865625262260437, |
|
"logits/rejected": 0.770646333694458, |
|
"logps/chosen": -430.75604248046875, |
|
"logps/rejected": -653.3302001953125, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0380074977874756, |
|
"rewards/margins": 1.9751768112182617, |
|
"rewards/rejected": -4.013184547424316, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 105.31971544417459, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": 0.7327530980110168, |
|
"logits/rejected": 0.7255542874336243, |
|
"logps/chosen": -463.42034912109375, |
|
"logps/rejected": -659.7835693359375, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3859894275665283, |
|
"rewards/margins": 1.8729331493377686, |
|
"rewards/rejected": -4.258922100067139, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 78.36856362858526, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": 0.8592809438705444, |
|
"logits/rejected": 0.9008442759513855, |
|
"logps/chosen": -531.7381591796875, |
|
"logps/rejected": -642.2724609375, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.252894401550293, |
|
"rewards/margins": 1.4084926843643188, |
|
"rewards/rejected": -3.6613869667053223, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 62.58305506876927, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": 0.5537108778953552, |
|
"logits/rejected": 0.4298032224178314, |
|
"logps/chosen": -441.60064697265625, |
|
"logps/rejected": -669.8635864257812, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.944635033607483, |
|
"rewards/margins": 1.8467686176300049, |
|
"rewards/rejected": -3.7914035320281982, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 73.50235555471477, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": 0.8199491500854492, |
|
"logits/rejected": 0.6450022459030151, |
|
"logps/chosen": -439.25982666015625, |
|
"logps/rejected": -657.178955078125, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.933325171470642, |
|
"rewards/margins": 1.7279558181762695, |
|
"rewards/rejected": -3.661280870437622, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 66.37939903381928, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": 0.7638829946517944, |
|
"logits/rejected": 0.5886873602867126, |
|
"logps/chosen": -397.14837646484375, |
|
"logps/rejected": -560.5330810546875, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4525659084320068, |
|
"rewards/margins": 1.4809906482696533, |
|
"rewards/rejected": -2.9335567951202393, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 118.8555498440257, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": 0.7545451521873474, |
|
"logits/rejected": 0.6964809894561768, |
|
"logps/chosen": -518.4376831054688, |
|
"logps/rejected": -660.7520751953125, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2820425033569336, |
|
"rewards/margins": 1.5144457817077637, |
|
"rewards/rejected": -3.7964882850646973, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 105.23886399309227, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": 0.9224992990493774, |
|
"logits/rejected": 1.0375258922576904, |
|
"logps/chosen": -487.0956115722656, |
|
"logps/rejected": -584.8184814453125, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.3171744346618652, |
|
"rewards/margins": 1.376301884651184, |
|
"rewards/rejected": -3.6934762001037598, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 134.72957888784933, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": 0.5583715438842773, |
|
"logits/rejected": 0.6421685814857483, |
|
"logps/chosen": -477.279052734375, |
|
"logps/rejected": -624.9611206054688, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0147852897644043, |
|
"rewards/margins": 1.9398906230926514, |
|
"rewards/rejected": -3.9546761512756348, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 70.7473732277954, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": 0.8045209646224976, |
|
"logits/rejected": 0.8030557632446289, |
|
"logps/chosen": -478.9481506347656, |
|
"logps/rejected": -648.4367065429688, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.477684497833252, |
|
"rewards/margins": 1.5615503787994385, |
|
"rewards/rejected": -4.039234638214111, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 78.84872275712426, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": 0.7728430032730103, |
|
"logits/rejected": 0.6502765417098999, |
|
"logps/chosen": -469.3561096191406, |
|
"logps/rejected": -708.9876708984375, |
|
"loss": 0.4057, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7923500537872314, |
|
"rewards/margins": 2.4673500061035156, |
|
"rewards/rejected": -4.259699821472168, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 79.82777536478129, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": 0.7944070100784302, |
|
"logits/rejected": 0.867503821849823, |
|
"logps/chosen": -431.4970703125, |
|
"logps/rejected": -461.4778747558594, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9094560146331787, |
|
"rewards/margins": 0.7783313989639282, |
|
"rewards/rejected": -2.6877872943878174, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 70.07061827528065, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": 0.8186457753181458, |
|
"logits/rejected": 0.8393930196762085, |
|
"logps/chosen": -471.9232482910156, |
|
"logps/rejected": -660.90087890625, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9725860357284546, |
|
"rewards/margins": 1.8898284435272217, |
|
"rewards/rejected": -3.862414598464966, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 62.934694167956906, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": 0.7928425669670105, |
|
"logits/rejected": 0.7183429002761841, |
|
"logps/chosen": -475.56689453125, |
|
"logps/rejected": -825.50537109375, |
|
"loss": 0.4448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9966167211532593, |
|
"rewards/margins": 3.472975254058838, |
|
"rewards/rejected": -5.469592094421387, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 84.23102644315742, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": 0.7230349779129028, |
|
"logits/rejected": 0.7304130792617798, |
|
"logps/chosen": -427.76641845703125, |
|
"logps/rejected": -645.4945678710938, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.894801378250122, |
|
"rewards/margins": 2.067033290863037, |
|
"rewards/rejected": -3.961834669113159, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 68.56738854248482, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": 0.6868112683296204, |
|
"logits/rejected": 0.7166644334793091, |
|
"logps/chosen": -430.49652099609375, |
|
"logps/rejected": -552.3370361328125, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.616554856300354, |
|
"rewards/margins": 1.3565380573272705, |
|
"rewards/rejected": -2.973092794418335, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 120.89142448374642, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": 0.8483439683914185, |
|
"logits/rejected": 0.7423468828201294, |
|
"logps/chosen": -437.2955627441406, |
|
"logps/rejected": -718.8609619140625, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.918787956237793, |
|
"rewards/margins": 2.17030668258667, |
|
"rewards/rejected": -4.089094638824463, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 49.311671556452566, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": 0.607376754283905, |
|
"logits/rejected": 0.5648632049560547, |
|
"logps/chosen": -473.47552490234375, |
|
"logps/rejected": -683.9610595703125, |
|
"loss": 0.4503, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.676776647567749, |
|
"rewards/margins": 2.1353073120117188, |
|
"rewards/rejected": -3.8120837211608887, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 75.59408868219587, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": 0.9677057266235352, |
|
"logits/rejected": 0.8434633016586304, |
|
"logps/chosen": -365.4425048828125, |
|
"logps/rejected": -690.6658935546875, |
|
"loss": 0.4309, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6164665222167969, |
|
"rewards/margins": 2.9124317169189453, |
|
"rewards/rejected": -4.528898239135742, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 115.88050705926936, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": 0.687825620174408, |
|
"logits/rejected": 0.6615604162216187, |
|
"logps/chosen": -461.5873107910156, |
|
"logps/rejected": -637.3787231445312, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.125917911529541, |
|
"rewards/margins": 1.9036601781845093, |
|
"rewards/rejected": -4.02957820892334, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 80.29417566534713, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": 0.906875729560852, |
|
"logits/rejected": 0.8116368055343628, |
|
"logps/chosen": -463.63848876953125, |
|
"logps/rejected": -738.002685546875, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3727240562438965, |
|
"rewards/margins": 2.5814578533172607, |
|
"rewards/rejected": -4.954182147979736, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 72.9727279905641, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": 0.704582929611206, |
|
"logits/rejected": 0.7599879503250122, |
|
"logps/chosen": -508.8060607910156, |
|
"logps/rejected": -597.4583740234375, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.142221212387085, |
|
"rewards/margins": 1.3050801753997803, |
|
"rewards/rejected": -3.4473013877868652, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 77.40867432941111, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": 0.8028141856193542, |
|
"logits/rejected": 0.9309502840042114, |
|
"logps/chosen": -522.3397827148438, |
|
"logps/rejected": -666.6038208007812, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.124357223510742, |
|
"rewards/margins": 1.7856041193008423, |
|
"rewards/rejected": -3.909961223602295, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 86.93862120475609, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": 0.5282206535339355, |
|
"logits/rejected": 0.45355233550071716, |
|
"logps/chosen": -479.072998046875, |
|
"logps/rejected": -800.297607421875, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.195789337158203, |
|
"rewards/margins": 2.915109395980835, |
|
"rewards/rejected": -5.110898494720459, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 53.39111099357644, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": 0.5834323167800903, |
|
"logits/rejected": 0.7496181726455688, |
|
"logps/chosen": -529.416015625, |
|
"logps/rejected": -672.9735107421875, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.5758395195007324, |
|
"rewards/margins": 2.0032310485839844, |
|
"rewards/rejected": -4.579070568084717, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 87.11146904026005, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": 0.6734325289726257, |
|
"logits/rejected": 0.6454343795776367, |
|
"logps/chosen": -424.33050537109375, |
|
"logps/rejected": -746.0318603515625, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8989346027374268, |
|
"rewards/margins": 2.7513890266418457, |
|
"rewards/rejected": -4.650323867797852, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 64.04687329478043, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": 0.6589122414588928, |
|
"logits/rejected": 0.6906758546829224, |
|
"logps/chosen": -446.0787658691406, |
|
"logps/rejected": -517.2769775390625, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.819963812828064, |
|
"rewards/margins": 0.9917726516723633, |
|
"rewards/rejected": -2.811736583709717, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 68.55077275701636, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": 0.7469342350959778, |
|
"logits/rejected": 0.6074378490447998, |
|
"logps/chosen": -486.0980529785156, |
|
"logps/rejected": -701.6823120117188, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9820486307144165, |
|
"rewards/margins": 1.8143211603164673, |
|
"rewards/rejected": -3.796370029449463, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 164.01818736003895, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": 1.0282725095748901, |
|
"logits/rejected": 0.9421369433403015, |
|
"logps/chosen": -484.1258850097656, |
|
"logps/rejected": -591.5389404296875, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.078385829925537, |
|
"rewards/margins": 1.168988823890686, |
|
"rewards/rejected": -3.2473747730255127, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 108.80166731257252, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": 0.9468935132026672, |
|
"logits/rejected": 0.8932015299797058, |
|
"logps/chosen": -482.67333984375, |
|
"logps/rejected": -685.7303466796875, |
|
"loss": 0.4015, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8609832525253296, |
|
"rewards/margins": 1.9226045608520508, |
|
"rewards/rejected": -3.78358793258667, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 65.35587867202435, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": 0.6565853357315063, |
|
"logits/rejected": 0.6313525438308716, |
|
"logps/chosen": -458.6571350097656, |
|
"logps/rejected": -657.23974609375, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8307663202285767, |
|
"rewards/margins": 2.0033957958221436, |
|
"rewards/rejected": -3.8341622352600098, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 69.4975198634658, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": 0.8870307803153992, |
|
"logits/rejected": 0.8806151151657104, |
|
"logps/chosen": -413.64324951171875, |
|
"logps/rejected": -596.6288452148438, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9652888774871826, |
|
"rewards/margins": 1.5917816162109375, |
|
"rewards/rejected": -3.5570709705352783, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 58.43177670940325, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": 0.8050106167793274, |
|
"logits/rejected": 0.7004156708717346, |
|
"logps/chosen": -415.05877685546875, |
|
"logps/rejected": -579.9642333984375, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6235889196395874, |
|
"rewards/margins": 1.465893268585205, |
|
"rewards/rejected": -3.089482069015503, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 120.32854357940431, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": 0.8283821940422058, |
|
"logits/rejected": 0.9186760187149048, |
|
"logps/chosen": -514.8087768554688, |
|
"logps/rejected": -676.7103881835938, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.914083480834961, |
|
"rewards/margins": 2.143592357635498, |
|
"rewards/rejected": -4.057675838470459, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 78.46670177760451, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": 0.8939564824104309, |
|
"logits/rejected": 0.9314833879470825, |
|
"logps/chosen": -498.5834045410156, |
|
"logps/rejected": -622.8574829101562, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.194986581802368, |
|
"rewards/margins": 1.3357025384902954, |
|
"rewards/rejected": -3.530688762664795, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 107.83148637987925, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": 0.7507692575454712, |
|
"logits/rejected": 0.8879140615463257, |
|
"logps/chosen": -509.0641174316406, |
|
"logps/rejected": -638.12060546875, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2347424030303955, |
|
"rewards/margins": 1.8413188457489014, |
|
"rewards/rejected": -4.076061248779297, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 83.40048340061365, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": 0.7239698171615601, |
|
"logits/rejected": 0.7137027978897095, |
|
"logps/chosen": -483.4082946777344, |
|
"logps/rejected": -631.5069580078125, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8569847345352173, |
|
"rewards/margins": 2.095280170440674, |
|
"rewards/rejected": -3.9522643089294434, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 111.8064969416606, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": 0.5845648646354675, |
|
"logits/rejected": 0.6570306420326233, |
|
"logps/chosen": -474.2705078125, |
|
"logps/rejected": -647.6243896484375, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0058846473693848, |
|
"rewards/margins": 1.990685224533081, |
|
"rewards/rejected": -3.996570110321045, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 74.2307028113121, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": 0.528331458568573, |
|
"logits/rejected": 0.5793333649635315, |
|
"logps/chosen": -475.62774658203125, |
|
"logps/rejected": -634.0245361328125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8012163639068604, |
|
"rewards/margins": 1.7039928436279297, |
|
"rewards/rejected": -3.505209445953369, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 61.271348177403006, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": 0.632502555847168, |
|
"logits/rejected": 0.6504623889923096, |
|
"logps/chosen": -524.9642333984375, |
|
"logps/rejected": -633.2562255859375, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0079779624938965, |
|
"rewards/margins": 1.271063208580017, |
|
"rewards/rejected": -3.2790417671203613, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 63.25326815957608, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": 0.7408447265625, |
|
"logits/rejected": 0.6214615702629089, |
|
"logps/chosen": -484.2117614746094, |
|
"logps/rejected": -666.2242431640625, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1996307373046875, |
|
"rewards/margins": 1.8269342184066772, |
|
"rewards/rejected": -4.026564598083496, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 53.68125150705029, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": 0.8675326108932495, |
|
"logits/rejected": 0.6697555780410767, |
|
"logps/chosen": -394.8028259277344, |
|
"logps/rejected": -586.7496337890625, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7711681127548218, |
|
"rewards/margins": 1.5016133785247803, |
|
"rewards/rejected": -3.2727808952331543, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 49.23879794442855, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": 0.7506469488143921, |
|
"logits/rejected": 0.7584705352783203, |
|
"logps/chosen": -497.8182678222656, |
|
"logps/rejected": -614.2308349609375, |
|
"loss": 0.4527, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9496490955352783, |
|
"rewards/margins": 1.4845595359802246, |
|
"rewards/rejected": -3.434208393096924, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 55.072603866085636, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": 0.7683936357498169, |
|
"logits/rejected": 0.6865507364273071, |
|
"logps/chosen": -510.0538635253906, |
|
"logps/rejected": -651.4440307617188, |
|
"loss": 0.4291, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.979453682899475, |
|
"rewards/margins": 1.3221153020858765, |
|
"rewards/rejected": -3.3015689849853516, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 54.75451532189644, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": 0.8782552480697632, |
|
"logits/rejected": 0.7610424160957336, |
|
"logps/chosen": -389.5190124511719, |
|
"logps/rejected": -591.7854614257812, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7472121715545654, |
|
"rewards/margins": 1.835559606552124, |
|
"rewards/rejected": -3.5827713012695312, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 82.79367033073343, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": 0.7418884634971619, |
|
"logits/rejected": 0.9005324244499207, |
|
"logps/chosen": -549.728271484375, |
|
"logps/rejected": -589.9193115234375, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.512777328491211, |
|
"rewards/margins": 0.7187246084213257, |
|
"rewards/rejected": -3.231501817703247, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 70.32047052520703, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": 0.569731593132019, |
|
"logits/rejected": 0.5882446765899658, |
|
"logps/chosen": -464.678466796875, |
|
"logps/rejected": -611.6061401367188, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.068535327911377, |
|
"rewards/margins": 1.3669475317001343, |
|
"rewards/rejected": -3.4354827404022217, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 70.24838014696037, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": 0.8074348568916321, |
|
"logits/rejected": 0.7572726011276245, |
|
"logps/chosen": -433.70916748046875, |
|
"logps/rejected": -633.556396484375, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0815036296844482, |
|
"rewards/margins": 1.9804821014404297, |
|
"rewards/rejected": -4.061985969543457, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 66.94215020114005, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": 1.0553810596466064, |
|
"logits/rejected": 1.0150127410888672, |
|
"logps/chosen": -411.4329528808594, |
|
"logps/rejected": -575.4439086914062, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9355758428573608, |
|
"rewards/margins": 1.6092584133148193, |
|
"rewards/rejected": -3.5448341369628906, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 57.678932026620025, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": 0.7202847003936768, |
|
"logits/rejected": 0.5807607769966125, |
|
"logps/chosen": -410.8163146972656, |
|
"logps/rejected": -619.6762084960938, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1611483097076416, |
|
"rewards/margins": 1.7657220363616943, |
|
"rewards/rejected": -3.926870346069336, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 99.71535765766733, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": 0.8449848294258118, |
|
"logits/rejected": 0.853103518486023, |
|
"logps/chosen": -453.7933044433594, |
|
"logps/rejected": -580.7551879882812, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.82171630859375, |
|
"rewards/margins": 1.3380426168441772, |
|
"rewards/rejected": -3.1597588062286377, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 86.21729316642917, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": 0.7170064449310303, |
|
"logits/rejected": 0.6676933765411377, |
|
"logps/chosen": -491.447021484375, |
|
"logps/rejected": -708.0086059570312, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0459351539611816, |
|
"rewards/margins": 2.2767374515533447, |
|
"rewards/rejected": -4.322672367095947, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 52.127578026788974, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": 0.6983531713485718, |
|
"logits/rejected": 0.6285141706466675, |
|
"logps/chosen": -547.9280395507812, |
|
"logps/rejected": -699.6654052734375, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.334404468536377, |
|
"rewards/margins": 1.5018515586853027, |
|
"rewards/rejected": -3.836256504058838, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 60.22843134197567, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": 0.8676375150680542, |
|
"logits/rejected": 0.7526202201843262, |
|
"logps/chosen": -486.1255798339844, |
|
"logps/rejected": -624.5291748046875, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9087669849395752, |
|
"rewards/margins": 1.2165353298187256, |
|
"rewards/rejected": -3.125302314758301, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 71.87031028247857, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": 0.7679969668388367, |
|
"logits/rejected": 0.8384410738945007, |
|
"logps/chosen": -526.3709716796875, |
|
"logps/rejected": -695.6194458007812, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.20868182182312, |
|
"rewards/margins": 1.9025743007659912, |
|
"rewards/rejected": -4.1112565994262695, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 81.31728475358824, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": 0.7713545560836792, |
|
"logits/rejected": 0.7580293416976929, |
|
"logps/chosen": -500.56634521484375, |
|
"logps/rejected": -699.48046875, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7967475652694702, |
|
"rewards/margins": 2.0774459838867188, |
|
"rewards/rejected": -3.8741936683654785, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 103.12746498881125, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": 0.9541581869125366, |
|
"logits/rejected": 0.9767134785652161, |
|
"logps/chosen": -465.8929138183594, |
|
"logps/rejected": -644.3170776367188, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0336551666259766, |
|
"rewards/margins": 2.015535831451416, |
|
"rewards/rejected": -4.049190998077393, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 55.131879680095366, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": 0.7562888860702515, |
|
"logits/rejected": 0.7069096565246582, |
|
"logps/chosen": -517.4041137695312, |
|
"logps/rejected": -714.3680419921875, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.484272003173828, |
|
"rewards/margins": 1.6936626434326172, |
|
"rewards/rejected": -4.177934646606445, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 133.6665015806597, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": 0.83746337890625, |
|
"logits/rejected": 0.9332591891288757, |
|
"logps/chosen": -512.98486328125, |
|
"logps/rejected": -671.4483642578125, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1171326637268066, |
|
"rewards/margins": 1.9028711318969727, |
|
"rewards/rejected": -4.020003318786621, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 84.68915066870318, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": 0.6289076805114746, |
|
"logits/rejected": 0.6697713136672974, |
|
"logps/chosen": -498.74114990234375, |
|
"logps/rejected": -624.8404541015625, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.210237979888916, |
|
"rewards/margins": 1.42970609664917, |
|
"rewards/rejected": -3.639944553375244, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 102.47483262799207, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": 0.9160445928573608, |
|
"logits/rejected": 0.9192814826965332, |
|
"logps/chosen": -506.0091247558594, |
|
"logps/rejected": -682.7260131835938, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.385509967803955, |
|
"rewards/margins": 1.963060736656189, |
|
"rewards/rejected": -4.348570823669434, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 87.68223538330484, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": 0.786394476890564, |
|
"logits/rejected": 0.8277347683906555, |
|
"logps/chosen": -442.1268005371094, |
|
"logps/rejected": -777.5369873046875, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1126275062561035, |
|
"rewards/margins": 3.4351935386657715, |
|
"rewards/rejected": -5.547821521759033, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 82.12826652686493, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": 0.9419485926628113, |
|
"logits/rejected": 0.7954811453819275, |
|
"logps/chosen": -516.1785278320312, |
|
"logps/rejected": -727.7694702148438, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3551204204559326, |
|
"rewards/margins": 2.0047378540039062, |
|
"rewards/rejected": -4.359858512878418, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 97.38576059604834, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": 1.0560508966445923, |
|
"logits/rejected": 1.0367125272750854, |
|
"logps/chosen": -506.546142578125, |
|
"logps/rejected": -642.7424926757812, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.351123809814453, |
|
"rewards/margins": 1.6191002130508423, |
|
"rewards/rejected": -3.9702236652374268, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 83.58804101429604, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": 0.9423390626907349, |
|
"logits/rejected": 0.9444313049316406, |
|
"logps/chosen": -493.010009765625, |
|
"logps/rejected": -604.2747802734375, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1212544441223145, |
|
"rewards/margins": 1.4309076070785522, |
|
"rewards/rejected": -3.5521621704101562, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 75.42257580756576, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": 0.9677990078926086, |
|
"logits/rejected": 0.952473521232605, |
|
"logps/chosen": -407.5378112792969, |
|
"logps/rejected": -601.81494140625, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8313926458358765, |
|
"rewards/margins": 1.5932445526123047, |
|
"rewards/rejected": -3.4246373176574707, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 91.46676153177002, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": 0.703011155128479, |
|
"logits/rejected": 0.6986725926399231, |
|
"logps/chosen": -502.3717346191406, |
|
"logps/rejected": -646.5982055664062, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9104440212249756, |
|
"rewards/margins": 1.7945911884307861, |
|
"rewards/rejected": -3.7050349712371826, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 76.80068261121944, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": 0.7269415259361267, |
|
"logits/rejected": 0.7850369215011597, |
|
"logps/chosen": -503.7071838378906, |
|
"logps/rejected": -581.5618896484375, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9429244995117188, |
|
"rewards/margins": 1.099717378616333, |
|
"rewards/rejected": -3.0426416397094727, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 96.72676533223496, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": 0.7977394461631775, |
|
"logits/rejected": 0.7315271496772766, |
|
"logps/chosen": -413.6046447753906, |
|
"logps/rejected": -628.2471313476562, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9980442523956299, |
|
"rewards/margins": 1.6271547079086304, |
|
"rewards/rejected": -3.62519907951355, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 110.81891067363568, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": 0.6197754144668579, |
|
"logits/rejected": 0.7629609704017639, |
|
"logps/chosen": -574.749755859375, |
|
"logps/rejected": -702.5732421875, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.510627269744873, |
|
"rewards/margins": 1.6529786586761475, |
|
"rewards/rejected": -4.163605690002441, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 123.49512245144888, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": 0.8252323269844055, |
|
"logits/rejected": 0.8725727200508118, |
|
"logps/chosen": -389.5809631347656, |
|
"logps/rejected": -616.921630859375, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6772960424423218, |
|
"rewards/margins": 2.261026620864868, |
|
"rewards/rejected": -3.9383227825164795, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 87.25886592352234, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": 0.6948505640029907, |
|
"logits/rejected": 0.7316187620162964, |
|
"logps/chosen": -435.98095703125, |
|
"logps/rejected": -661.4353637695312, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.04638409614563, |
|
"rewards/margins": 2.1867334842681885, |
|
"rewards/rejected": -4.23311710357666, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 50.25724996654546, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": 0.8304767608642578, |
|
"logits/rejected": 0.8217374682426453, |
|
"logps/chosen": -431.65057373046875, |
|
"logps/rejected": -658.638916015625, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8049224615097046, |
|
"rewards/margins": 2.259718418121338, |
|
"rewards/rejected": -4.064640998840332, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 91.7728219267119, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": 0.9847210049629211, |
|
"logits/rejected": 1.0127713680267334, |
|
"logps/chosen": -442.3282165527344, |
|
"logps/rejected": -618.39453125, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7866935729980469, |
|
"rewards/margins": 1.598508596420288, |
|
"rewards/rejected": -3.385201930999756, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 75.78201822702833, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": 0.8351515531539917, |
|
"logits/rejected": 1.008117437362671, |
|
"logps/chosen": -517.4036254882812, |
|
"logps/rejected": -710.7828979492188, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1466126441955566, |
|
"rewards/margins": 2.3097622394561768, |
|
"rewards/rejected": -4.4563751220703125, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 87.61061245095321, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": 0.889313817024231, |
|
"logits/rejected": 0.971932053565979, |
|
"logps/chosen": -498.4698791503906, |
|
"logps/rejected": -590.8843994140625, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.132840871810913, |
|
"rewards/margins": 1.4198811054229736, |
|
"rewards/rejected": -3.552722215652466, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 62.41342933725319, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": 0.8822885751724243, |
|
"logits/rejected": 0.8082054257392883, |
|
"logps/chosen": -450.9471130371094, |
|
"logps/rejected": -633.83935546875, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9862406253814697, |
|
"rewards/margins": 1.7078149318695068, |
|
"rewards/rejected": -3.6940555572509766, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 129.52846935302878, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 0.8456379175186157, |
|
"logits/rejected": 0.8468397259712219, |
|
"logps/chosen": -483.727783203125, |
|
"logps/rejected": -631.7237548828125, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2325234413146973, |
|
"rewards/margins": 1.2953944206237793, |
|
"rewards/rejected": -3.5279178619384766, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 72.5671230786154, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": 0.5209338665008545, |
|
"logits/rejected": 0.6625800728797913, |
|
"logps/chosen": -458.20306396484375, |
|
"logps/rejected": -573.9229736328125, |
|
"loss": 0.4546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0486111640930176, |
|
"rewards/margins": 1.2630393505096436, |
|
"rewards/rejected": -3.311650514602661, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 84.74435000434502, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": 0.7497361898422241, |
|
"logits/rejected": 0.6569031476974487, |
|
"logps/chosen": -420.9825134277344, |
|
"logps/rejected": -643.39306640625, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.023326873779297, |
|
"rewards/margins": 2.3721542358398438, |
|
"rewards/rejected": -4.395481109619141, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 73.22998152255393, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": 0.8783214688301086, |
|
"logits/rejected": 0.8447097539901733, |
|
"logps/chosen": -516.1060180664062, |
|
"logps/rejected": -667.566162109375, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1039013862609863, |
|
"rewards/margins": 1.7380021810531616, |
|
"rewards/rejected": -3.8419036865234375, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 81.39676344639581, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": 1.0345498323440552, |
|
"logits/rejected": 0.9467067718505859, |
|
"logps/chosen": -499.6732482910156, |
|
"logps/rejected": -672.6094970703125, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2702062129974365, |
|
"rewards/margins": 1.460707426071167, |
|
"rewards/rejected": -3.7309136390686035, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 64.76872591640944, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": 0.7510415315628052, |
|
"logits/rejected": 0.8044120669364929, |
|
"logps/chosen": -480.25933837890625, |
|
"logps/rejected": -667.4488525390625, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.932271957397461, |
|
"rewards/margins": 1.963918924331665, |
|
"rewards/rejected": -3.896191120147705, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 86.78885371479711, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": 0.9194823503494263, |
|
"logits/rejected": 0.9296694993972778, |
|
"logps/chosen": -502.721923828125, |
|
"logps/rejected": -827.4345703125, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2838196754455566, |
|
"rewards/margins": 3.3856589794158936, |
|
"rewards/rejected": -5.669478416442871, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 68.39172461254192, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": 0.7439590096473694, |
|
"logits/rejected": 0.8039398193359375, |
|
"logps/chosen": -509.512451171875, |
|
"logps/rejected": -654.8675537109375, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3204588890075684, |
|
"rewards/margins": 1.7843201160430908, |
|
"rewards/rejected": -4.10477876663208, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 72.83433305419376, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": 1.151757836341858, |
|
"logits/rejected": 1.2020816802978516, |
|
"logps/chosen": -508.3518981933594, |
|
"logps/rejected": -686.0626831054688, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4844202995300293, |
|
"rewards/margins": 1.7690684795379639, |
|
"rewards/rejected": -4.253489017486572, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 88.5623409851496, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": 0.8249871134757996, |
|
"logits/rejected": 0.8214422464370728, |
|
"logps/chosen": -514.89111328125, |
|
"logps/rejected": -685.9147338867188, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3745594024658203, |
|
"rewards/margins": 1.7026939392089844, |
|
"rewards/rejected": -4.0772528648376465, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 91.37784001131529, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": 0.9542096257209778, |
|
"logits/rejected": 0.9161281585693359, |
|
"logps/chosen": -516.6944580078125, |
|
"logps/rejected": -628.3036499023438, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.4940497875213623, |
|
"rewards/margins": 1.2038791179656982, |
|
"rewards/rejected": -3.6979286670684814, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 84.86391664853313, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": 0.799514889717102, |
|
"logits/rejected": 0.9613885879516602, |
|
"logps/chosen": -503.588134765625, |
|
"logps/rejected": -610.7938232421875, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9631938934326172, |
|
"rewards/margins": 1.7460286617279053, |
|
"rewards/rejected": -3.7092223167419434, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 95.25568475484987, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": 0.912161648273468, |
|
"logits/rejected": 0.9225689172744751, |
|
"logps/chosen": -563.4161987304688, |
|
"logps/rejected": -779.1158447265625, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5288901329040527, |
|
"rewards/margins": 2.3714680671691895, |
|
"rewards/rejected": -4.9003586769104, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 74.9725384221654, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": 0.64798903465271, |
|
"logits/rejected": 0.755608081817627, |
|
"logps/chosen": -567.8941040039062, |
|
"logps/rejected": -693.3001708984375, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2403149604797363, |
|
"rewards/margins": 1.9328399896621704, |
|
"rewards/rejected": -4.173154354095459, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 86.99406211803554, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": 0.9441853761672974, |
|
"logits/rejected": 1.0240905284881592, |
|
"logps/chosen": -439.358642578125, |
|
"logps/rejected": -567.1787109375, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.033222198486328, |
|
"rewards/margins": 1.4880897998809814, |
|
"rewards/rejected": -3.5213115215301514, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 56.5877804844532, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": 0.9739856719970703, |
|
"logits/rejected": 0.8587212562561035, |
|
"logps/chosen": -434.1171875, |
|
"logps/rejected": -685.1383666992188, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0161197185516357, |
|
"rewards/margins": 2.2106873989105225, |
|
"rewards/rejected": -4.226807117462158, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 73.76672845845005, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": 0.8544769287109375, |
|
"logits/rejected": 0.8131589889526367, |
|
"logps/chosen": -483.707763671875, |
|
"logps/rejected": -670.8358764648438, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1783053874969482, |
|
"rewards/margins": 1.594050407409668, |
|
"rewards/rejected": -3.7723560333251953, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 57.949038325445606, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": 0.8310180902481079, |
|
"logits/rejected": 0.8050309419631958, |
|
"logps/chosen": -550.3900756835938, |
|
"logps/rejected": -759.2706909179688, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4427483081817627, |
|
"rewards/margins": 1.8099620342254639, |
|
"rewards/rejected": -4.252710342407227, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 61.1368946654354, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": 0.9913840293884277, |
|
"logits/rejected": 0.9467577934265137, |
|
"logps/chosen": -457.57684326171875, |
|
"logps/rejected": -776.4711303710938, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.27138090133667, |
|
"rewards/margins": 3.33720064163208, |
|
"rewards/rejected": -5.608581066131592, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 89.27490710874471, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": 1.0495518445968628, |
|
"logits/rejected": 1.053961157798767, |
|
"logps/chosen": -454.3204040527344, |
|
"logps/rejected": -563.7561645507812, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2243590354919434, |
|
"rewards/margins": 1.115910530090332, |
|
"rewards/rejected": -3.3402695655822754, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 61.60918476637429, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": 0.6563401818275452, |
|
"logits/rejected": 0.8694489598274231, |
|
"logps/chosen": -631.4844970703125, |
|
"logps/rejected": -667.845458984375, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.545196533203125, |
|
"rewards/margins": 1.1111021041870117, |
|
"rewards/rejected": -3.656298875808716, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 62.30470166974104, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": 0.8611141443252563, |
|
"logits/rejected": 0.8700377345085144, |
|
"logps/chosen": -486.58642578125, |
|
"logps/rejected": -599.6117553710938, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.045078754425049, |
|
"rewards/margins": 1.389631986618042, |
|
"rewards/rejected": -3.43471097946167, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 54.037365548238334, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": 0.993360698223114, |
|
"logits/rejected": 0.9139231443405151, |
|
"logps/chosen": -514.1396484375, |
|
"logps/rejected": -675.6636962890625, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4059479236602783, |
|
"rewards/margins": 1.7387027740478516, |
|
"rewards/rejected": -4.144650459289551, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 109.65150784475642, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": 0.719974160194397, |
|
"logits/rejected": 0.7877434492111206, |
|
"logps/chosen": -547.4036865234375, |
|
"logps/rejected": -721.8352661132812, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3394248485565186, |
|
"rewards/margins": 2.0369014739990234, |
|
"rewards/rejected": -4.376326084136963, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 70.55311490465398, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": 0.9115017056465149, |
|
"logits/rejected": 0.9078693389892578, |
|
"logps/chosen": -534.7144165039062, |
|
"logps/rejected": -699.9698486328125, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.542038679122925, |
|
"rewards/margins": 1.6244325637817383, |
|
"rewards/rejected": -4.166471004486084, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 93.74369529385898, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": 0.6857677698135376, |
|
"logits/rejected": 0.8255289793014526, |
|
"logps/chosen": -522.6604614257812, |
|
"logps/rejected": -575.476806640625, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.268035411834717, |
|
"rewards/margins": 1.0178310871124268, |
|
"rewards/rejected": -3.2858662605285645, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 79.05926791540598, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": 0.9840590357780457, |
|
"logits/rejected": 0.9730936288833618, |
|
"logps/chosen": -505.021484375, |
|
"logps/rejected": -708.7816162109375, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.289645195007324, |
|
"rewards/margins": 1.9675376415252686, |
|
"rewards/rejected": -4.257182598114014, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 54.85721150289827, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": 0.9241796731948853, |
|
"logits/rejected": 0.8755942583084106, |
|
"logps/chosen": -441.4808044433594, |
|
"logps/rejected": -666.4075317382812, |
|
"loss": 0.4191, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8603441715240479, |
|
"rewards/margins": 2.213796615600586, |
|
"rewards/rejected": -4.074140548706055, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 69.53692701281979, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": 0.7930043935775757, |
|
"logits/rejected": 0.8601939082145691, |
|
"logps/chosen": -546.6875610351562, |
|
"logps/rejected": -728.0465087890625, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.904862642288208, |
|
"rewards/margins": 1.7051185369491577, |
|
"rewards/rejected": -4.609980583190918, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 57.15404263804987, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": 0.7899967432022095, |
|
"logits/rejected": 0.8825540542602539, |
|
"logps/chosen": -574.9866333007812, |
|
"logps/rejected": -653.0203857421875, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5510952472686768, |
|
"rewards/margins": 1.3856892585754395, |
|
"rewards/rejected": -3.9367847442626953, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 69.84012863730199, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": 0.8069669008255005, |
|
"logits/rejected": 0.7671279311180115, |
|
"logps/chosen": -506.8164978027344, |
|
"logps/rejected": -644.8781127929688, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.056473731994629, |
|
"rewards/margins": 1.4161741733551025, |
|
"rewards/rejected": -3.4726479053497314, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 52.15031521891097, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": 0.8552712202072144, |
|
"logits/rejected": 0.9585361480712891, |
|
"logps/chosen": -508.7132873535156, |
|
"logps/rejected": -594.5696411132812, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0185725688934326, |
|
"rewards/margins": 1.1297063827514648, |
|
"rewards/rejected": -3.1482787132263184, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 86.14563545532785, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": 0.8184653520584106, |
|
"logits/rejected": 0.8094233274459839, |
|
"logps/chosen": -435.2810974121094, |
|
"logps/rejected": -585.3737182617188, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8876644372940063, |
|
"rewards/margins": 1.402349829673767, |
|
"rewards/rejected": -3.2900142669677734, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 59.55267119271149, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": 0.9822513461112976, |
|
"logits/rejected": 0.9167024493217468, |
|
"logps/chosen": -434.79217529296875, |
|
"logps/rejected": -651.8714599609375, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1103196144104004, |
|
"rewards/margins": 1.7408027648925781, |
|
"rewards/rejected": -3.8511223793029785, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 77.60746615786343, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": 0.7015236616134644, |
|
"logits/rejected": 0.7136515378952026, |
|
"logps/chosen": -492.58758544921875, |
|
"logps/rejected": -697.36083984375, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3119843006134033, |
|
"rewards/margins": 2.0802037715911865, |
|
"rewards/rejected": -4.39218807220459, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 68.82304267196884, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": 0.7015440464019775, |
|
"logits/rejected": 0.6956168413162231, |
|
"logps/chosen": -554.7918701171875, |
|
"logps/rejected": -761.311279296875, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4717533588409424, |
|
"rewards/margins": 2.1406965255737305, |
|
"rewards/rejected": -4.612449645996094, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 70.53979731887281, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": 0.8410204648971558, |
|
"logits/rejected": 0.8310344815254211, |
|
"logps/chosen": -456.88238525390625, |
|
"logps/rejected": -562.0089111328125, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.152271032333374, |
|
"rewards/margins": 1.1227219104766846, |
|
"rewards/rejected": -3.274993419647217, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 78.81509934406596, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": 0.8206079602241516, |
|
"logits/rejected": 0.7918351888656616, |
|
"logps/chosen": -466.89031982421875, |
|
"logps/rejected": -705.0291137695312, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1885204315185547, |
|
"rewards/margins": 2.3920505046844482, |
|
"rewards/rejected": -4.580571174621582, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 61.28028591448242, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": 1.0063971281051636, |
|
"logits/rejected": 1.1010572910308838, |
|
"logps/chosen": -542.7324829101562, |
|
"logps/rejected": -619.283447265625, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2260279655456543, |
|
"rewards/margins": 1.2442249059677124, |
|
"rewards/rejected": -3.470252513885498, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 60.82971525790336, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": 0.8331437110900879, |
|
"logits/rejected": 0.7935817241668701, |
|
"logps/chosen": -512.60009765625, |
|
"logps/rejected": -673.0828247070312, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5210812091827393, |
|
"rewards/margins": 1.213062047958374, |
|
"rewards/rejected": -3.734143018722534, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 70.37974708800306, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": 0.9399808049201965, |
|
"logits/rejected": 0.883521556854248, |
|
"logps/chosen": -465.8692321777344, |
|
"logps/rejected": -599.1998291015625, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.310643434524536, |
|
"rewards/margins": 1.3211534023284912, |
|
"rewards/rejected": -3.6317965984344482, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 88.35135454271389, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": 0.9185535311698914, |
|
"logits/rejected": 0.8474872708320618, |
|
"logps/chosen": -479.0135803222656, |
|
"logps/rejected": -794.5410766601562, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1727652549743652, |
|
"rewards/margins": 3.3179447650909424, |
|
"rewards/rejected": -5.4907097816467285, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 100.9457534900751, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": 1.0351996421813965, |
|
"logits/rejected": 0.9647011756896973, |
|
"logps/chosen": -421.97430419921875, |
|
"logps/rejected": -609.3348388671875, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0034446716308594, |
|
"rewards/margins": 1.6635030508041382, |
|
"rewards/rejected": -3.666947841644287, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 94.52425828375493, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": 0.7409027218818665, |
|
"logits/rejected": 0.9445284605026245, |
|
"logps/chosen": -387.5218811035156, |
|
"logps/rejected": -567.7781982421875, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.958953857421875, |
|
"rewards/margins": 2.0247044563293457, |
|
"rewards/rejected": -3.9836583137512207, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 70.30312418031491, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": 0.7825919389724731, |
|
"logits/rejected": 0.8838742971420288, |
|
"logps/chosen": -482.88494873046875, |
|
"logps/rejected": -703.1980590820312, |
|
"loss": 0.4115, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.104428768157959, |
|
"rewards/margins": 2.635842800140381, |
|
"rewards/rejected": -4.74027156829834, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 76.85912872887178, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": 0.8507946729660034, |
|
"logits/rejected": 0.7550027966499329, |
|
"logps/chosen": -476.3455505371094, |
|
"logps/rejected": -667.7160034179688, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2749886512756348, |
|
"rewards/margins": 1.5755815505981445, |
|
"rewards/rejected": -3.8505702018737793, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 66.13251557960523, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": 0.7868701219558716, |
|
"logits/rejected": 0.6653637290000916, |
|
"logps/chosen": -425.3246154785156, |
|
"logps/rejected": -663.5589599609375, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.987339735031128, |
|
"rewards/margins": 2.195021152496338, |
|
"rewards/rejected": -4.182360649108887, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 83.96881421699949, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": 0.726348876953125, |
|
"logits/rejected": 0.7917483448982239, |
|
"logps/chosen": -507.626953125, |
|
"logps/rejected": -592.4185791015625, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.398878574371338, |
|
"rewards/margins": 1.2666693925857544, |
|
"rewards/rejected": -3.6655478477478027, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 83.60326297726142, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": 0.8373712301254272, |
|
"logits/rejected": 0.9131274223327637, |
|
"logps/chosen": -474.8595275878906, |
|
"logps/rejected": -605.430908203125, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.383213520050049, |
|
"rewards/margins": 1.1936473846435547, |
|
"rewards/rejected": -3.5768609046936035, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 77.40826507831135, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": 0.930553138256073, |
|
"logits/rejected": 0.8549301028251648, |
|
"logps/chosen": -526.2114868164062, |
|
"logps/rejected": -752.96826171875, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4706318378448486, |
|
"rewards/margins": 2.1137099266052246, |
|
"rewards/rejected": -4.584342002868652, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 96.96225759611778, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": 0.8870590925216675, |
|
"logits/rejected": 0.9308145642280579, |
|
"logps/chosen": -417.0521545410156, |
|
"logps/rejected": -612.4988403320312, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9357048273086548, |
|
"rewards/margins": 1.9571855068206787, |
|
"rewards/rejected": -3.892890214920044, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 71.30549608395641, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": 0.9065736532211304, |
|
"logits/rejected": 0.9175186157226562, |
|
"logps/chosen": -539.4212646484375, |
|
"logps/rejected": -658.9049682617188, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.5305135250091553, |
|
"rewards/margins": 1.4688078165054321, |
|
"rewards/rejected": -3.999321460723877, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 82.64198382992518, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": 0.70596843957901, |
|
"logits/rejected": 0.7268840074539185, |
|
"logps/chosen": -485.7884216308594, |
|
"logps/rejected": -671.3975830078125, |
|
"loss": 0.4019, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.097639322280884, |
|
"rewards/margins": 2.1588146686553955, |
|
"rewards/rejected": -4.256453990936279, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 98.1360011353097, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": 0.9807497262954712, |
|
"logits/rejected": 1.0856688022613525, |
|
"logps/chosen": -448.825439453125, |
|
"logps/rejected": -644.4601440429688, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1423897743225098, |
|
"rewards/margins": 1.8271543979644775, |
|
"rewards/rejected": -3.969543933868408, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 122.74086812234569, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": 0.6897640228271484, |
|
"logits/rejected": 0.744226336479187, |
|
"logps/chosen": -566.0299682617188, |
|
"logps/rejected": -666.23388671875, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.309209108352661, |
|
"rewards/margins": 1.3578829765319824, |
|
"rewards/rejected": -3.6670920848846436, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 58.86732920896171, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": 0.8211570978164673, |
|
"logits/rejected": 0.6882201433181763, |
|
"logps/chosen": -531.49462890625, |
|
"logps/rejected": -776.5237426757812, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.4697484970092773, |
|
"rewards/margins": 2.200464963912964, |
|
"rewards/rejected": -4.670213222503662, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 57.649912721089706, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": 0.8841966390609741, |
|
"logits/rejected": 0.805067241191864, |
|
"logps/chosen": -482.54351806640625, |
|
"logps/rejected": -695.4102783203125, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0235962867736816, |
|
"rewards/margins": 1.969292402267456, |
|
"rewards/rejected": -3.992888927459717, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 50.7982341760688, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": 0.8565505146980286, |
|
"logits/rejected": 0.836571991443634, |
|
"logps/chosen": -485.120849609375, |
|
"logps/rejected": -724.4110107421875, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.382401704788208, |
|
"rewards/margins": 2.487177848815918, |
|
"rewards/rejected": -4.869579792022705, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 91.2784276411979, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": 0.6925005912780762, |
|
"logits/rejected": 0.7664632201194763, |
|
"logps/chosen": -545.9867553710938, |
|
"logps/rejected": -661.2352294921875, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.394007682800293, |
|
"rewards/margins": 1.6589243412017822, |
|
"rewards/rejected": -4.052931785583496, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 79.35255117623291, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": 0.9943884015083313, |
|
"logits/rejected": 1.073082685470581, |
|
"logps/chosen": -547.3638916015625, |
|
"logps/rejected": -710.6573486328125, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.1406822204589844, |
|
"rewards/margins": 1.721480131149292, |
|
"rewards/rejected": -3.8621628284454346, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 71.40125610894862, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": 0.8574091196060181, |
|
"logits/rejected": 0.7331072688102722, |
|
"logps/chosen": -591.9991455078125, |
|
"logps/rejected": -859.9972534179688, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.830016613006592, |
|
"rewards/margins": 2.612532138824463, |
|
"rewards/rejected": -5.442547798156738, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 113.21916295933775, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": 0.9613982439041138, |
|
"logits/rejected": 1.0917298793792725, |
|
"logps/chosen": -502.1275329589844, |
|
"logps/rejected": -585.1144409179688, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1031970977783203, |
|
"rewards/margins": 1.2502291202545166, |
|
"rewards/rejected": -3.353426456451416, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 101.01935366187557, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": 0.8103985786437988, |
|
"logits/rejected": 0.7377224564552307, |
|
"logps/chosen": -482.72808837890625, |
|
"logps/rejected": -738.6390380859375, |
|
"loss": 0.4214, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.066443920135498, |
|
"rewards/margins": 2.5723373889923096, |
|
"rewards/rejected": -4.638781547546387, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 71.69214889770127, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": 0.7756108045578003, |
|
"logits/rejected": 0.6678739786148071, |
|
"logps/chosen": -484.711669921875, |
|
"logps/rejected": -620.3389892578125, |
|
"loss": 0.4047, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1276774406433105, |
|
"rewards/margins": 1.3302921056747437, |
|
"rewards/rejected": -3.457970142364502, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 115.71799337910478, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": 1.069846510887146, |
|
"logits/rejected": 0.9411687850952148, |
|
"logps/chosen": -542.6075439453125, |
|
"logps/rejected": -745.0030517578125, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.605783224105835, |
|
"rewards/margins": 1.680440902709961, |
|
"rewards/rejected": -4.286223411560059, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 72.76013421927613, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": 0.8603383302688599, |
|
"logits/rejected": 0.8015602231025696, |
|
"logps/chosen": -529.1666870117188, |
|
"logps/rejected": -688.7030639648438, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.521758794784546, |
|
"rewards/margins": 1.4825658798217773, |
|
"rewards/rejected": -4.004324913024902, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 71.95359387776338, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": 1.0213429927825928, |
|
"logits/rejected": 0.8971840739250183, |
|
"logps/chosen": -489.87799072265625, |
|
"logps/rejected": -752.3318481445312, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.571533679962158, |
|
"rewards/margins": 2.083768844604492, |
|
"rewards/rejected": -4.65530252456665, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 90.2302144007233, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": 1.0803401470184326, |
|
"logits/rejected": 0.920336127281189, |
|
"logps/chosen": -511.31396484375, |
|
"logps/rejected": -649.63330078125, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.257201910018921, |
|
"rewards/margins": 0.9670624732971191, |
|
"rewards/rejected": -3.224264144897461, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 70.88346903978004, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": 0.619393527507782, |
|
"logits/rejected": 0.715266227722168, |
|
"logps/chosen": -490.1979064941406, |
|
"logps/rejected": -669.96337890625, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2626171112060547, |
|
"rewards/margins": 2.1452434062957764, |
|
"rewards/rejected": -4.40786075592041, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 60.82584098483564, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": 0.7951589822769165, |
|
"logits/rejected": 0.6863880157470703, |
|
"logps/chosen": -527.05419921875, |
|
"logps/rejected": -772.709228515625, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2287442684173584, |
|
"rewards/margins": 2.489655017852783, |
|
"rewards/rejected": -4.718400001525879, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 80.48083510506083, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": 0.9316922426223755, |
|
"logits/rejected": 0.8792417645454407, |
|
"logps/chosen": -423.369873046875, |
|
"logps/rejected": -649.6119995117188, |
|
"loss": 0.4448, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.837327241897583, |
|
"rewards/margins": 2.0405640602111816, |
|
"rewards/rejected": -3.8778910636901855, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 72.24202288150467, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": 1.0281699895858765, |
|
"logits/rejected": 1.0224875211715698, |
|
"logps/chosen": -502.59619140625, |
|
"logps/rejected": -727.0276489257812, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.386428117752075, |
|
"rewards/margins": 2.5029191970825195, |
|
"rewards/rejected": -4.889347076416016, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 66.9309047603776, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": 0.9137152433395386, |
|
"logits/rejected": 0.9341109991073608, |
|
"logps/chosen": -529.5946044921875, |
|
"logps/rejected": -641.7630615234375, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3844833374023438, |
|
"rewards/margins": 1.2012643814086914, |
|
"rewards/rejected": -3.585747480392456, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 72.95597186585024, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": 0.8800632357597351, |
|
"logits/rejected": 0.9704621434211731, |
|
"logps/chosen": -570.0379638671875, |
|
"logps/rejected": -725.4605712890625, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.645059108734131, |
|
"rewards/margins": 1.603656530380249, |
|
"rewards/rejected": -4.248715400695801, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 78.91059649606636, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": 0.7107178568840027, |
|
"logits/rejected": 0.697405219078064, |
|
"logps/chosen": -526.5075073242188, |
|
"logps/rejected": -704.602294921875, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0540153980255127, |
|
"rewards/margins": 1.9263098239898682, |
|
"rewards/rejected": -3.980325222015381, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 69.56211331439835, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": 0.7250418066978455, |
|
"logits/rejected": 0.7543852925300598, |
|
"logps/chosen": -470.14764404296875, |
|
"logps/rejected": -671.8516235351562, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1768879890441895, |
|
"rewards/margins": 2.124656915664673, |
|
"rewards/rejected": -4.301545143127441, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 56.96632144130869, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": 0.7653132677078247, |
|
"logits/rejected": 0.5591055750846863, |
|
"logps/chosen": -467.858154296875, |
|
"logps/rejected": -697.0022583007812, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.238950729370117, |
|
"rewards/margins": 1.8226745128631592, |
|
"rewards/rejected": -4.0616254806518555, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 69.60701337402978, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": 0.9205236434936523, |
|
"logits/rejected": 0.9008172154426575, |
|
"logps/chosen": -452.0506896972656, |
|
"logps/rejected": -605.8800659179688, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3074121475219727, |
|
"rewards/margins": 1.5420969724655151, |
|
"rewards/rejected": -3.8495094776153564, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 93.23037895781522, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": 0.9459999799728394, |
|
"logits/rejected": 0.897383987903595, |
|
"logps/chosen": -495.180908203125, |
|
"logps/rejected": -789.0716552734375, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.483440399169922, |
|
"rewards/margins": 2.78719425201416, |
|
"rewards/rejected": -5.270634651184082, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 127.22498359251546, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": 0.911720871925354, |
|
"logits/rejected": 0.9415246844291687, |
|
"logps/chosen": -515.1292724609375, |
|
"logps/rejected": -705.1092529296875, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2659924030303955, |
|
"rewards/margins": 1.911820650100708, |
|
"rewards/rejected": -4.1778130531311035, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 77.7528389426739, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": 0.7923919558525085, |
|
"logits/rejected": 0.8714240789413452, |
|
"logps/chosen": -590.3426513671875, |
|
"logps/rejected": -742.2245483398438, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.454576015472412, |
|
"rewards/margins": 1.4876351356506348, |
|
"rewards/rejected": -3.9422104358673096, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": 1.1061499118804932, |
|
"eval_logits/rejected": 1.0543816089630127, |
|
"eval_logps/chosen": -505.8360900878906, |
|
"eval_logps/rejected": -707.1604614257812, |
|
"eval_loss": 0.433965802192688, |
|
"eval_rewards/accuracies": 0.8214285969734192, |
|
"eval_rewards/chosen": -2.3310441970825195, |
|
"eval_rewards/margins": 1.9598020315170288, |
|
"eval_rewards/rejected": -4.2908453941345215, |
|
"eval_runtime": 202.4459, |
|
"eval_samples_per_second": 22.036, |
|
"eval_steps_per_second": 0.346, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 87.60486138509854, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": 0.811089813709259, |
|
"logits/rejected": 0.7574669718742371, |
|
"logps/chosen": -457.95782470703125, |
|
"logps/rejected": -710.8450927734375, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.3024678230285645, |
|
"rewards/margins": 2.4505763053894043, |
|
"rewards/rejected": -4.753044128417969, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 83.75851805575147, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": 0.6803231239318848, |
|
"logits/rejected": 0.633036196231842, |
|
"logps/chosen": -612.798583984375, |
|
"logps/rejected": -848.0003662109375, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3807148933410645, |
|
"rewards/margins": 2.3853487968444824, |
|
"rewards/rejected": -4.766063213348389, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 87.7379361467868, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": 0.7605918049812317, |
|
"logits/rejected": 0.8963466882705688, |
|
"logps/chosen": -566.0819091796875, |
|
"logps/rejected": -676.0416870117188, |
|
"loss": 0.4145, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.5707292556762695, |
|
"rewards/margins": 1.1119014024734497, |
|
"rewards/rejected": -3.682631015777588, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 84.03419540949254, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": 0.8736980557441711, |
|
"logits/rejected": 0.8480004072189331, |
|
"logps/chosen": -551.294677734375, |
|
"logps/rejected": -828.1793212890625, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7000527381896973, |
|
"rewards/margins": 2.788301467895508, |
|
"rewards/rejected": -5.488353729248047, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 97.0344524282083, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": 0.9054147005081177, |
|
"logits/rejected": 0.7992134094238281, |
|
"logps/chosen": -495.37335205078125, |
|
"logps/rejected": -698.958740234375, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1146128177642822, |
|
"rewards/margins": 1.9125534296035767, |
|
"rewards/rejected": -4.02716588973999, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 65.78201833400202, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": 0.7898594737052917, |
|
"logits/rejected": 0.7758785486221313, |
|
"logps/chosen": -413.4170837402344, |
|
"logps/rejected": -673.2385864257812, |
|
"loss": 0.4201, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9046857357025146, |
|
"rewards/margins": 2.505577802658081, |
|
"rewards/rejected": -4.4102630615234375, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 71.73815251761103, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": 1.1263091564178467, |
|
"logits/rejected": 1.0472917556762695, |
|
"logps/chosen": -521.9844360351562, |
|
"logps/rejected": -680.7391967773438, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.6944947242736816, |
|
"rewards/margins": 1.6928943395614624, |
|
"rewards/rejected": -4.387389183044434, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 75.95604538471342, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": 1.0549625158309937, |
|
"logits/rejected": 0.9972447156906128, |
|
"logps/chosen": -524.8448486328125, |
|
"logps/rejected": -706.245361328125, |
|
"loss": 0.4008, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4551174640655518, |
|
"rewards/margins": 1.605100393295288, |
|
"rewards/rejected": -4.06021785736084, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 107.92031097958193, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": 0.799576997756958, |
|
"logits/rejected": 0.6931667327880859, |
|
"logps/chosen": -453.119140625, |
|
"logps/rejected": -698.4215698242188, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.308990955352783, |
|
"rewards/margins": 2.1881864070892334, |
|
"rewards/rejected": -4.497177600860596, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 101.19818601167513, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": 0.9475057721138, |
|
"logits/rejected": 0.832554817199707, |
|
"logps/chosen": -514.1024169921875, |
|
"logps/rejected": -710.1411743164062, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.581038236618042, |
|
"rewards/margins": 1.9726848602294922, |
|
"rewards/rejected": -4.553723335266113, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 55.49596876258818, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": 0.9003801345825195, |
|
"logits/rejected": 0.8903058767318726, |
|
"logps/chosen": -527.4493408203125, |
|
"logps/rejected": -702.6768188476562, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.275557041168213, |
|
"rewards/margins": 1.4511444568634033, |
|
"rewards/rejected": -3.7267022132873535, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 70.20445502352152, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": 0.8607804179191589, |
|
"logits/rejected": 0.9390938878059387, |
|
"logps/chosen": -549.3711547851562, |
|
"logps/rejected": -643.3042602539062, |
|
"loss": 0.4414, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1537482738494873, |
|
"rewards/margins": 1.3532254695892334, |
|
"rewards/rejected": -3.5069739818573, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 80.15871452300773, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": 0.8248364329338074, |
|
"logits/rejected": 0.776924729347229, |
|
"logps/chosen": -562.0177001953125, |
|
"logps/rejected": -780.6102294921875, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.316624879837036, |
|
"rewards/margins": 2.4005494117736816, |
|
"rewards/rejected": -4.7171735763549805, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 100.31797934426135, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": 0.7774088978767395, |
|
"logits/rejected": 0.8137041330337524, |
|
"logps/chosen": -511.67156982421875, |
|
"logps/rejected": -785.0963134765625, |
|
"loss": 0.4319, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.072662353515625, |
|
"rewards/margins": 3.2821147441864014, |
|
"rewards/rejected": -5.3547773361206055, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 87.24823051550543, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": 0.7539950013160706, |
|
"logits/rejected": 0.6739236116409302, |
|
"logps/chosen": -517.9874267578125, |
|
"logps/rejected": -718.8265380859375, |
|
"loss": 0.4166, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2389488220214844, |
|
"rewards/margins": 1.894044280052185, |
|
"rewards/rejected": -4.132993221282959, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 113.5161042209543, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": 1.0328501462936401, |
|
"logits/rejected": 0.9850383996963501, |
|
"logps/chosen": -451.6390075683594, |
|
"logps/rejected": -613.6373291015625, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1225802898406982, |
|
"rewards/margins": 1.4569990634918213, |
|
"rewards/rejected": -3.5795795917510986, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4974772959890384, |
|
"train_runtime": 14966.1301, |
|
"train_samples_per_second": 8.911, |
|
"train_steps_per_second": 0.278 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|