|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9979342973279136, |
|
"eval_steps": 400, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002132338242153662, |
|
"grad_norm": 4.209297607323075, |
|
"learning_rate": 1.0638297872340425e-08, |
|
"logits/chosen": -0.5133028626441956, |
|
"logits/rejected": -0.44742655754089355, |
|
"logps/chosen": -137.54428100585938, |
|
"logps/rejected": -153.15798950195312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010661691210768308, |
|
"grad_norm": 3.882884920695959, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -0.5493287444114685, |
|
"logits/rejected": -0.5353066325187683, |
|
"logps/chosen": -149.18487548828125, |
|
"logps/rejected": -152.49844360351562, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 5.994962702970952e-05, |
|
"rewards/margins": 0.00012374535435810685, |
|
"rewards/rejected": -6.379572732839733e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021323382421536616, |
|
"grad_norm": 4.187160307696768, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.5537915229797363, |
|
"logits/rejected": -0.49435940384864807, |
|
"logps/chosen": -148.3112030029297, |
|
"logps/rejected": -148.7774658203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.00019158302166033536, |
|
"rewards/margins": -4.862697096541524e-05, |
|
"rewards/rejected": 0.0002402100944891572, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031985073632304926, |
|
"grad_norm": 4.989515444992066, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -0.641636312007904, |
|
"logits/rejected": -0.5020254850387573, |
|
"logps/chosen": -145.27212524414062, |
|
"logps/rejected": -152.85948181152344, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.565824100514874e-05, |
|
"rewards/margins": 2.2089341655373573e-05, |
|
"rewards/rejected": -0.00010774763359222561, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04264676484307323, |
|
"grad_norm": 4.2620983128908385, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.5836353898048401, |
|
"logits/rejected": -0.513951301574707, |
|
"logps/chosen": -136.21817016601562, |
|
"logps/rejected": -140.47598266601562, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00023734460410196334, |
|
"rewards/margins": 0.0010727389017120004, |
|
"rewards/rejected": -0.0008353941957466304, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05330845605384154, |
|
"grad_norm": 4.037816980879404, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -0.5967869758605957, |
|
"logits/rejected": -0.552001953125, |
|
"logps/chosen": -164.9960174560547, |
|
"logps/rejected": -159.42225646972656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0004635151126421988, |
|
"rewards/margins": 0.00024214605218730867, |
|
"rewards/rejected": 0.0002213690459029749, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06397014726460985, |
|
"grad_norm": 3.926673567554115, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.6094973683357239, |
|
"logits/rejected": -0.5662705898284912, |
|
"logps/chosen": -152.1143798828125, |
|
"logps/rejected": -156.60104370117188, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.00032081748940981925, |
|
"rewards/margins": 0.001090517034754157, |
|
"rewards/rejected": -0.0014113344950601459, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07463183847537816, |
|
"grad_norm": 3.9457303398915133, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -0.6680720448493958, |
|
"logits/rejected": -0.5494471788406372, |
|
"logps/chosen": -142.37496948242188, |
|
"logps/rejected": -141.9342498779297, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 7.808269583620131e-05, |
|
"rewards/margins": 0.0020259125158190727, |
|
"rewards/rejected": -0.0019478298490867019, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08529352968614647, |
|
"grad_norm": 3.955885053818346, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.6476858854293823, |
|
"logits/rejected": -0.5677641034126282, |
|
"logps/chosen": -144.7392578125, |
|
"logps/rejected": -150.64984130859375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0007050691056065261, |
|
"rewards/margins": 0.0024024732410907745, |
|
"rewards/rejected": -0.003107542172074318, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09595522089691477, |
|
"grad_norm": 4.193445815112738, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -0.5770394206047058, |
|
"logits/rejected": -0.496532142162323, |
|
"logps/chosen": -150.3260498046875, |
|
"logps/rejected": -146.87594604492188, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0027928086929023266, |
|
"rewards/margins": 0.004344488959759474, |
|
"rewards/rejected": -0.0071372976526618, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10661691210768308, |
|
"grad_norm": 4.127676285889427, |
|
"learning_rate": 4.999373573764186e-07, |
|
"logits/chosen": -0.6215689182281494, |
|
"logits/rejected": -0.509456992149353, |
|
"logps/chosen": -156.27560424804688, |
|
"logps/rejected": -164.31430053710938, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.004007105715572834, |
|
"rewards/margins": 0.008976086974143982, |
|
"rewards/rejected": -0.012983192689716816, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11727860331845139, |
|
"grad_norm": 3.9160373040316245, |
|
"learning_rate": 4.995546550233241e-07, |
|
"logits/chosen": -0.6048154234886169, |
|
"logits/rejected": -0.5707536935806274, |
|
"logps/chosen": -150.3190460205078, |
|
"logps/rejected": -151.73095703125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.008075769990682602, |
|
"rewards/margins": 0.006209281273186207, |
|
"rewards/rejected": -0.014285050332546234, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1279402945292197, |
|
"grad_norm": 4.077147191981429, |
|
"learning_rate": 4.988245838331339e-07, |
|
"logits/chosen": -0.5681597590446472, |
|
"logits/rejected": -0.49796080589294434, |
|
"logps/chosen": -156.58375549316406, |
|
"logps/rejected": -162.067138671875, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.015376018360257149, |
|
"rewards/margins": 0.01199757494032383, |
|
"rewards/rejected": -0.02737359330058098, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.138601985739988, |
|
"grad_norm": 4.03433737830531, |
|
"learning_rate": 4.977481600320545e-07, |
|
"logits/chosen": -0.6257273554801941, |
|
"logits/rejected": -0.5969006419181824, |
|
"logps/chosen": -153.68531799316406, |
|
"logps/rejected": -153.58663940429688, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.021818330511450768, |
|
"rewards/margins": 0.014782066456973553, |
|
"rewards/rejected": -0.036600399762392044, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14926367695075632, |
|
"grad_norm": 4.603352859826981, |
|
"learning_rate": 4.963268819535228e-07, |
|
"logits/chosen": -0.6294292211532593, |
|
"logits/rejected": -0.4907303750514984, |
|
"logps/chosen": -154.8817901611328, |
|
"logps/rejected": -154.6444549560547, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03498289734125137, |
|
"rewards/margins": 0.0258068535476923, |
|
"rewards/rejected": -0.06078975275158882, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1599253681615246, |
|
"grad_norm": 4.546908129493846, |
|
"learning_rate": 4.945627279525942e-07, |
|
"logits/chosen": -0.5467456579208374, |
|
"logits/rejected": -0.4726549983024597, |
|
"logps/chosen": -142.3949432373047, |
|
"logps/rejected": -150.24319458007812, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04987934231758118, |
|
"rewards/margins": 0.025978704914450645, |
|
"rewards/rejected": -0.07585804164409637, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17058705937229293, |
|
"grad_norm": 4.609197926565239, |
|
"learning_rate": 4.924581536521611e-07, |
|
"logits/chosen": -0.6952506303787231, |
|
"logits/rejected": -0.5784528851509094, |
|
"logps/chosen": -157.6682891845703, |
|
"logps/rejected": -166.3611297607422, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07680492848157883, |
|
"rewards/margins": 0.0350065752863884, |
|
"rewards/rejected": -0.11181151866912842, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18124875058306122, |
|
"grad_norm": 5.2243481961036915, |
|
"learning_rate": 4.900160885248362e-07, |
|
"logits/chosen": -0.6026470065116882, |
|
"logits/rejected": -0.5226645469665527, |
|
"logps/chosen": -157.77618408203125, |
|
"logps/rejected": -168.16580200195312, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10381762683391571, |
|
"rewards/margins": 0.04510679468512535, |
|
"rewards/rejected": -0.14892444014549255, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19191044179382954, |
|
"grad_norm": 5.426637382074236, |
|
"learning_rate": 4.872399318152593e-07, |
|
"logits/chosen": -0.5520131587982178, |
|
"logits/rejected": -0.5407645106315613, |
|
"logps/chosen": -169.11874389648438, |
|
"logps/rejected": -171.12612915039062, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.15968191623687744, |
|
"rewards/margins": 0.05249009653925896, |
|
"rewards/rejected": -0.2121720016002655, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20257213300459787, |
|
"grad_norm": 5.259574320459566, |
|
"learning_rate": 4.841335478085015e-07, |
|
"logits/chosen": -0.602705180644989, |
|
"logits/rejected": -0.5497740507125854, |
|
"logps/chosen": -160.9965057373047, |
|
"logps/rejected": -174.87588500976562, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1887643039226532, |
|
"rewards/margins": 0.09653543680906296, |
|
"rewards/rejected": -0.28529977798461914, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21323382421536616, |
|
"grad_norm": 5.216332894677162, |
|
"learning_rate": 4.807012604511541e-07, |
|
"logits/chosen": -0.5497472882270813, |
|
"logits/rejected": -0.4645184576511383, |
|
"logps/chosen": -184.96392822265625, |
|
"logps/rejected": -191.80685424804688, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2781105637550354, |
|
"rewards/margins": 0.07820834219455719, |
|
"rewards/rejected": -0.3563188910484314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22389551542613448, |
|
"grad_norm": 5.528705114015297, |
|
"learning_rate": 4.769478473325907e-07, |
|
"logits/chosen": -0.5338613986968994, |
|
"logits/rejected": -0.51846843957901, |
|
"logps/chosen": -192.13998413085938, |
|
"logps/rejected": -201.9773406982422, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3397431969642639, |
|
"rewards/margins": 0.11168007552623749, |
|
"rewards/rejected": -0.4514232575893402, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23455720663690277, |
|
"grad_norm": 6.011226113877762, |
|
"learning_rate": 4.7287853303477696e-07, |
|
"logits/chosen": -0.5935484170913696, |
|
"logits/rejected": -0.5163929462432861, |
|
"logps/chosen": -191.00587463378906, |
|
"logps/rejected": -213.3096466064453, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.40704232454299927, |
|
"rewards/margins": 0.1552998423576355, |
|
"rewards/rejected": -0.5623422265052795, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2452188978476711, |
|
"grad_norm": 6.207154465186514, |
|
"learning_rate": 4.684989818598887e-07, |
|
"logits/chosen": -0.5952309370040894, |
|
"logits/rejected": -0.5275391340255737, |
|
"logps/chosen": -197.7395782470703, |
|
"logps/rejected": -218.7361602783203, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.46388697624206543, |
|
"rewards/margins": 0.22530913352966309, |
|
"rewards/rejected": -0.6891961097717285, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2558805890584394, |
|
"grad_norm": 6.116244737357764, |
|
"learning_rate": 4.638152899458579e-07, |
|
"logits/chosen": -0.5706356763839722, |
|
"logits/rejected": -0.5397927165031433, |
|
"logps/chosen": -216.757080078125, |
|
"logps/rejected": -236.5631866455078, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6336530447006226, |
|
"rewards/margins": 0.19573207199573517, |
|
"rewards/rejected": -0.8293851613998413, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2665422802692077, |
|
"grad_norm": 6.020581764532851, |
|
"learning_rate": 4.588339767808238e-07, |
|
"logits/chosen": -0.6469541788101196, |
|
"logits/rejected": -0.5639356970787048, |
|
"logps/chosen": -242.92431640625, |
|
"logps/rejected": -269.52703857421875, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7765325307846069, |
|
"rewards/margins": 0.2900935709476471, |
|
"rewards/rejected": -1.0666261911392212, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.277203971479976, |
|
"grad_norm": 6.365407871842816, |
|
"learning_rate": 4.535619761282988e-07, |
|
"logits/chosen": -0.5723987817764282, |
|
"logits/rejected": -0.4914250373840332, |
|
"logps/chosen": -244.1280517578125, |
|
"logps/rejected": -277.622802734375, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8900648355484009, |
|
"rewards/margins": 0.3473728597164154, |
|
"rewards/rejected": -1.2374377250671387, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2878656626907443, |
|
"grad_norm": 6.423134008068168, |
|
"learning_rate": 4.480066263756821e-07, |
|
"logits/chosen": -0.619472086429596, |
|
"logits/rejected": -0.5424299836158752, |
|
"logps/chosen": -261.1055908203125, |
|
"logps/rejected": -299.7402648925781, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.0646685361862183, |
|
"rewards/margins": 0.34845179319381714, |
|
"rewards/rejected": -1.4131202697753906, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29852735390151264, |
|
"grad_norm": 7.786460978742034, |
|
"learning_rate": 4.42175660319555e-07, |
|
"logits/chosen": -0.5871402621269226, |
|
"logits/rejected": -0.5535299181938171, |
|
"logps/chosen": -250.9578094482422, |
|
"logps/rejected": -283.0528869628906, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.072014570236206, |
|
"rewards/margins": 0.33194950222969055, |
|
"rewards/rejected": -1.4039641618728638, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30918904511228096, |
|
"grad_norm": 7.605992199638712, |
|
"learning_rate": 4.360771944019766e-07, |
|
"logits/chosen": -0.6613143682479858, |
|
"logits/rejected": -0.5987201929092407, |
|
"logps/chosen": -281.7815246582031, |
|
"logps/rejected": -325.2270812988281, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3026009798049927, |
|
"rewards/margins": 0.40984100103378296, |
|
"rewards/rejected": -1.7124418020248413, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3198507363230492, |
|
"grad_norm": 8.64455245115027, |
|
"learning_rate": 4.2971971741276185e-07, |
|
"logits/chosen": -0.5969311594963074, |
|
"logits/rejected": -0.547439694404602, |
|
"logps/chosen": -311.7250061035156, |
|
"logps/rejected": -360.2786560058594, |
|
"loss": 0.572, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.515192985534668, |
|
"rewards/margins": 0.5103713870048523, |
|
"rewards/rejected": -2.025564432144165, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33051242753381754, |
|
"grad_norm": 9.301843749121433, |
|
"learning_rate": 4.2311207867346886e-07, |
|
"logits/chosen": -0.5962031483650208, |
|
"logits/rejected": -0.5288577675819397, |
|
"logps/chosen": -320.295654296875, |
|
"logps/rejected": -374.32415771484375, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6180155277252197, |
|
"rewards/margins": 0.5525304079055786, |
|
"rewards/rejected": -2.170545816421509, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34117411874458586, |
|
"grad_norm": 10.172478276638168, |
|
"learning_rate": 4.162634757195417e-07, |
|
"logits/chosen": -0.6355341076850891, |
|
"logits/rejected": -0.5586315393447876, |
|
"logps/chosen": -334.57391357421875, |
|
"logps/rejected": -407.7463073730469, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7949445247650146, |
|
"rewards/margins": 0.6658836007118225, |
|
"rewards/rejected": -2.4608283042907715, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3518358099553542, |
|
"grad_norm": 9.778904083871394, |
|
"learning_rate": 4.0918344149775553e-07, |
|
"logits/chosen": -0.630448579788208, |
|
"logits/rejected": -0.5656327605247498, |
|
"logps/chosen": -332.3238220214844, |
|
"logps/rejected": -397.57818603515625, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.7869739532470703, |
|
"rewards/margins": 0.6199783086776733, |
|
"rewards/rejected": -2.406952381134033, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.36249750116612245, |
|
"grad_norm": 8.962534096060436, |
|
"learning_rate": 4.018818310967842e-07, |
|
"logits/chosen": -0.6361697912216187, |
|
"logits/rejected": -0.6125262379646301, |
|
"logps/chosen": -340.560791015625, |
|
"logps/rejected": -393.91943359375, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.8884109258651733, |
|
"rewards/margins": 0.5314295887947083, |
|
"rewards/rejected": -2.4198403358459473, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37315919237689077, |
|
"grad_norm": 9.199829374087676, |
|
"learning_rate": 3.9436880802936067e-07, |
|
"logits/chosen": -0.5958508253097534, |
|
"logits/rejected": -0.5747475624084473, |
|
"logps/chosen": -368.4230651855469, |
|
"logps/rejected": -433.1883850097656, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0544705390930176, |
|
"rewards/margins": 0.663877010345459, |
|
"rewards/rejected": -2.7183475494384766, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3838208835876591, |
|
"grad_norm": 10.70397755088912, |
|
"learning_rate": 3.8665483008512536e-07, |
|
"logits/chosen": -0.6373119950294495, |
|
"logits/rejected": -0.5987192392349243, |
|
"logps/chosen": -372.47662353515625, |
|
"logps/rejected": -454.82769775390625, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.168134927749634, |
|
"rewards/margins": 0.7901454567909241, |
|
"rewards/rejected": -2.958280324935913, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3944825747984274, |
|
"grad_norm": 10.679642796728752, |
|
"learning_rate": 3.787506347738538e-07, |
|
"logits/chosen": -0.6430412530899048, |
|
"logits/rejected": -0.6131948232650757, |
|
"logps/chosen": -371.15625, |
|
"logps/rejected": -464.52056884765625, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.154017925262451, |
|
"rewards/margins": 0.9343518018722534, |
|
"rewards/rejected": -3.088369607925415, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40514426600919573, |
|
"grad_norm": 10.016875889342252, |
|
"learning_rate": 3.706672243793271e-07, |
|
"logits/chosen": -0.7310691475868225, |
|
"logits/rejected": -0.6220592856407166, |
|
"logps/chosen": -401.1075439453125, |
|
"logps/rejected": -499.98480224609375, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.3723862171173096, |
|
"rewards/margins": 0.9661838412284851, |
|
"rewards/rejected": -3.3385701179504395, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.415805957219964, |
|
"grad_norm": 9.705697392058743, |
|
"learning_rate": 3.624158506446484e-07, |
|
"logits/chosen": -0.6986671686172485, |
|
"logits/rejected": -0.6336062550544739, |
|
"logps/chosen": -401.28143310546875, |
|
"logps/rejected": -496.3516540527344, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4336769580841064, |
|
"rewards/margins": 0.9512998461723328, |
|
"rewards/rejected": -3.384976863861084, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4264676484307323, |
|
"grad_norm": 11.1924041807584, |
|
"learning_rate": 3.540079991103235e-07, |
|
"logits/chosen": -0.6155360341072083, |
|
"logits/rejected": -0.5795532464981079, |
|
"logps/chosen": -420.99505615234375, |
|
"logps/rejected": -510.29949951171875, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6494202613830566, |
|
"rewards/margins": 0.9207509756088257, |
|
"rewards/rejected": -3.570171356201172, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43712933964150064, |
|
"grad_norm": 11.402392729818219, |
|
"learning_rate": 3.4545537312690557e-07, |
|
"logits/chosen": -0.6502883434295654, |
|
"logits/rejected": -0.5817223787307739, |
|
"logps/chosen": -393.88580322265625, |
|
"logps/rejected": -484.9453125, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4964230060577393, |
|
"rewards/margins": 0.8120796084403992, |
|
"rewards/rejected": -3.308502197265625, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44779103085226896, |
|
"grad_norm": 13.457423856796606, |
|
"learning_rate": 3.367698775644589e-07, |
|
"logits/chosen": -0.6476996541023254, |
|
"logits/rejected": -0.5849811434745789, |
|
"logps/chosen": -425.48779296875, |
|
"logps/rejected": -523.0567626953125, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.741436004638672, |
|
"rewards/margins": 0.932793915271759, |
|
"rewards/rejected": -3.674229860305786, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4584527220630373, |
|
"grad_norm": 11.205830566067114, |
|
"learning_rate": 3.279636022415158e-07, |
|
"logits/chosen": -0.6362490653991699, |
|
"logits/rejected": -0.5585761070251465, |
|
"logps/chosen": -406.4499816894531, |
|
"logps/rejected": -513.5891723632812, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5447230339050293, |
|
"rewards/margins": 1.0839515924453735, |
|
"rewards/rejected": -3.628674268722534, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46911441327380554, |
|
"grad_norm": 11.16271599609029, |
|
"learning_rate": 3.1904880509659394e-07, |
|
"logits/chosen": -0.7270904183387756, |
|
"logits/rejected": -0.6296547651290894, |
|
"logps/chosen": -425.26409912109375, |
|
"logps/rejected": -543.89013671875, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7184805870056152, |
|
"rewards/margins": 1.107802152633667, |
|
"rewards/rejected": -3.8262829780578613, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47977610448457386, |
|
"grad_norm": 12.78514973294055, |
|
"learning_rate": 3.100378951256981e-07, |
|
"logits/chosen": -0.6514928340911865, |
|
"logits/rejected": -0.617154061794281, |
|
"logps/chosen": -455.8995056152344, |
|
"logps/rejected": -549.4182739257812, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.9396634101867676, |
|
"rewards/margins": 0.8971932530403137, |
|
"rewards/rejected": -3.8368568420410156, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4904377956953422, |
|
"grad_norm": 12.228823390494735, |
|
"learning_rate": 3.0094341510955693e-07, |
|
"logits/chosen": -0.6390506029129028, |
|
"logits/rejected": -0.5878512263298035, |
|
"logps/chosen": -386.823486328125, |
|
"logps/rejected": -473.30084228515625, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.460756778717041, |
|
"rewards/margins": 0.869968593120575, |
|
"rewards/rejected": -3.3307254314422607, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5010994869061105, |
|
"grad_norm": 14.395964703992592, |
|
"learning_rate": 2.917780241546371e-07, |
|
"logits/chosen": -0.724800705909729, |
|
"logits/rejected": -0.6312491297721863, |
|
"logps/chosen": -440.82135009765625, |
|
"logps/rejected": -563.3785400390625, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.806361675262451, |
|
"rewards/margins": 1.23042893409729, |
|
"rewards/rejected": -4.036790370941162, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5117611781168788, |
|
"grad_norm": 11.701162608624527, |
|
"learning_rate": 2.825544800722376e-07, |
|
"logits/chosen": -0.678560197353363, |
|
"logits/rejected": -0.5992690324783325, |
|
"logps/chosen": -410.61309814453125, |
|
"logps/rejected": -517.0965576171875, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.588188886642456, |
|
"rewards/margins": 1.034708857536316, |
|
"rewards/rejected": -3.6228981018066406, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5224228693276471, |
|
"grad_norm": 10.666345842168104, |
|
"learning_rate": 2.7328562162019057e-07, |
|
"logits/chosen": -0.6738190650939941, |
|
"logits/rejected": -0.6309827566146851, |
|
"logps/chosen": -389.16461181640625, |
|
"logps/rejected": -480.9994201660156, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.460685968399048, |
|
"rewards/margins": 0.908993124961853, |
|
"rewards/rejected": -3.369678497314453, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5330845605384154, |
|
"grad_norm": 12.712988877236741, |
|
"learning_rate": 2.639843506318899e-07, |
|
"logits/chosen": -0.6564850807189941, |
|
"logits/rejected": -0.6236242055892944, |
|
"logps/chosen": -404.89874267578125, |
|
"logps/rejected": -493.0071716308594, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.589948892593384, |
|
"rewards/margins": 0.8368066549301147, |
|
"rewards/rejected": -3.426755428314209, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5437462517491837, |
|
"grad_norm": 10.503825338255721, |
|
"learning_rate": 2.546636140575191e-07, |
|
"logits/chosen": -0.7264063358306885, |
|
"logits/rejected": -0.6483162641525269, |
|
"logps/chosen": -374.58258056640625, |
|
"logps/rejected": -508.2911071777344, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.2691266536712646, |
|
"rewards/margins": 1.2855161428451538, |
|
"rewards/rejected": -3.554642915725708, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.554407942959952, |
|
"grad_norm": 11.560210733838508, |
|
"learning_rate": 2.453363859424809e-07, |
|
"logits/chosen": -0.6495022773742676, |
|
"logits/rejected": -0.6398875713348389, |
|
"logps/chosen": -407.90216064453125, |
|
"logps/rejected": -494.3495178222656, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5238044261932373, |
|
"rewards/margins": 0.9107331037521362, |
|
"rewards/rejected": -3.434537410736084, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5650696341707203, |
|
"grad_norm": 11.927908855427777, |
|
"learning_rate": 2.3601564936811018e-07, |
|
"logits/chosen": -0.7030381560325623, |
|
"logits/rejected": -0.6639617085456848, |
|
"logps/chosen": -383.1998291015625, |
|
"logps/rejected": -495.85015869140625, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.35768985748291, |
|
"rewards/margins": 1.0254899263381958, |
|
"rewards/rejected": -3.3831799030303955, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5757313253814886, |
|
"grad_norm": 11.968314972913152, |
|
"learning_rate": 2.267143783798094e-07, |
|
"logits/chosen": -0.6847670674324036, |
|
"logits/rejected": -0.6712856292724609, |
|
"logps/chosen": -397.50421142578125, |
|
"logps/rejected": -478.1363220214844, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4988386631011963, |
|
"rewards/margins": 0.8168338537216187, |
|
"rewards/rejected": -3.3156723976135254, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.586393016592257, |
|
"grad_norm": 14.417251821790718, |
|
"learning_rate": 2.1744551992776244e-07, |
|
"logits/chosen": -0.6841756701469421, |
|
"logits/rejected": -0.6490763425827026, |
|
"logps/chosen": -423.22052001953125, |
|
"logps/rejected": -527.6184692382812, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6250014305114746, |
|
"rewards/margins": 0.9655100107192993, |
|
"rewards/rejected": -3.5905113220214844, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5970547078030253, |
|
"grad_norm": 11.936781827926813, |
|
"learning_rate": 2.0822197584536287e-07, |
|
"logits/chosen": -0.6951079368591309, |
|
"logits/rejected": -0.6226946711540222, |
|
"logps/chosen": -384.56121826171875, |
|
"logps/rejected": -499.79095458984375, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4033970832824707, |
|
"rewards/margins": 1.1390013694763184, |
|
"rewards/rejected": -3.542397975921631, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6077163990137936, |
|
"grad_norm": 16.620259019998883, |
|
"learning_rate": 1.9905658489044307e-07, |
|
"logits/chosen": -0.6553946137428284, |
|
"logits/rejected": -0.6309363842010498, |
|
"logps/chosen": -409.9910583496094, |
|
"logps/rejected": -513.8627319335938, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5966861248016357, |
|
"rewards/margins": 0.9828068614006042, |
|
"rewards/rejected": -3.5794930458068848, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6183780902245619, |
|
"grad_norm": 12.0304164250374, |
|
"learning_rate": 1.899621048743019e-07, |
|
"logits/chosen": -0.6362113952636719, |
|
"logits/rejected": -0.6540666222572327, |
|
"logps/chosen": -382.2437438964844, |
|
"logps/rejected": -475.27130126953125, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3609213829040527, |
|
"rewards/margins": 0.9484884142875671, |
|
"rewards/rejected": -3.3094096183776855, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6290397814353301, |
|
"grad_norm": 11.846669016651198, |
|
"learning_rate": 1.8095119490340615e-07, |
|
"logits/chosen": -0.6740937232971191, |
|
"logits/rejected": -0.647221565246582, |
|
"logps/chosen": -396.89208984375, |
|
"logps/rejected": -510.3208923339844, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4332923889160156, |
|
"rewards/margins": 1.1633473634719849, |
|
"rewards/rejected": -3.596639633178711, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6397014726460984, |
|
"grad_norm": 10.787540635532423, |
|
"learning_rate": 1.7203639775848423e-07, |
|
"logits/chosen": -0.6580207347869873, |
|
"logits/rejected": -0.6252058744430542, |
|
"logps/chosen": -402.5466003417969, |
|
"logps/rejected": -513.3468017578125, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.515665292739868, |
|
"rewards/margins": 1.1106935739517212, |
|
"rewards/rejected": -3.6263587474823, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6503631638568668, |
|
"grad_norm": 13.5895433661477, |
|
"learning_rate": 1.6323012243554106e-07, |
|
"logits/chosen": -0.7108097672462463, |
|
"logits/rejected": -0.6324597597122192, |
|
"logps/chosen": -427.470458984375, |
|
"logps/rejected": -526.4566650390625, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6723906993865967, |
|
"rewards/margins": 0.9718725085258484, |
|
"rewards/rejected": -3.6442630290985107, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6610248550676351, |
|
"grad_norm": 10.69149258619884, |
|
"learning_rate": 1.5454462687309444e-07, |
|
"logits/chosen": -0.7408244013786316, |
|
"logits/rejected": -0.6759353876113892, |
|
"logps/chosen": -410.1529235839844, |
|
"logps/rejected": -576.83984375, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.559839963912964, |
|
"rewards/margins": 1.6155951023101807, |
|
"rewards/rejected": -4.1754350662231445, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6716865462784034, |
|
"grad_norm": 13.414294575112912, |
|
"learning_rate": 1.459920008896765e-07, |
|
"logits/chosen": -0.713616669178009, |
|
"logits/rejected": -0.7023594379425049, |
|
"logps/chosen": -430.0381774902344, |
|
"logps/rejected": -536.2015380859375, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.690309762954712, |
|
"rewards/margins": 1.0717895030975342, |
|
"rewards/rejected": -3.762099027633667, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6823482374891717, |
|
"grad_norm": 13.49512905939788, |
|
"learning_rate": 1.3758414935535145e-07, |
|
"logits/chosen": -0.6761552691459656, |
|
"logits/rejected": -0.6508086919784546, |
|
"logps/chosen": -397.4078674316406, |
|
"logps/rejected": -503.7195739746094, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4770684242248535, |
|
"rewards/margins": 1.089545488357544, |
|
"rewards/rejected": -3.5666146278381348, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69300992869994, |
|
"grad_norm": 11.06599328024594, |
|
"learning_rate": 1.2933277562067288e-07, |
|
"logits/chosen": -0.7036711573600769, |
|
"logits/rejected": -0.6898130178451538, |
|
"logps/chosen": -428.58563232421875, |
|
"logps/rejected": -540.5661010742188, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6999242305755615, |
|
"rewards/margins": 1.1091340780258179, |
|
"rewards/rejected": -3.8090579509735107, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7036716199107084, |
|
"grad_norm": 11.287294541718003, |
|
"learning_rate": 1.212493652261462e-07, |
|
"logits/chosen": -0.6557536721229553, |
|
"logits/rejected": -0.598514974117279, |
|
"logps/chosen": -411.374755859375, |
|
"logps/rejected": -522.1329956054688, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.67822003364563, |
|
"rewards/margins": 1.0374858379364014, |
|
"rewards/rejected": -3.715705394744873, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7143333111214767, |
|
"grad_norm": 14.174123718877084, |
|
"learning_rate": 1.1334516991487472e-07, |
|
"logits/chosen": -0.6946598887443542, |
|
"logits/rejected": -0.6765455603599548, |
|
"logps/chosen": -420.7255859375, |
|
"logps/rejected": -525.3436889648438, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.735704183578491, |
|
"rewards/margins": 1.0136221647262573, |
|
"rewards/rejected": -3.749326229095459, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7249950023322449, |
|
"grad_norm": 11.875650925559528, |
|
"learning_rate": 1.0563119197063933e-07, |
|
"logits/chosen": -0.6363321542739868, |
|
"logits/rejected": -0.610489010810852, |
|
"logps/chosen": -430.07135009765625, |
|
"logps/rejected": -546.0662231445312, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.6708502769470215, |
|
"rewards/margins": 1.1354032754898071, |
|
"rewards/rejected": -3.806253433227539, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7356566935430132, |
|
"grad_norm": 12.158844690788914, |
|
"learning_rate": 9.811816890321578e-08, |
|
"logits/chosen": -0.6831678152084351, |
|
"logits/rejected": -0.6622704863548279, |
|
"logps/chosen": -421.2864685058594, |
|
"logps/rejected": -587.9827880859375, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.720494031906128, |
|
"rewards/margins": 1.634281873703003, |
|
"rewards/rejected": -4.354775905609131, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7463183847537815, |
|
"grad_norm": 20.30568413490793, |
|
"learning_rate": 9.081655850224449e-08, |
|
"logits/chosen": -0.669623851776123, |
|
"logits/rejected": -0.5724581480026245, |
|
"logps/chosen": -424.1244201660156, |
|
"logps/rejected": -568.143798828125, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7207565307617188, |
|
"rewards/margins": 1.3297325372695923, |
|
"rewards/rejected": -4.0504889488220215, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7569800759645499, |
|
"grad_norm": 17.640008025150962, |
|
"learning_rate": 8.37365242804583e-08, |
|
"logits/chosen": -0.6978561878204346, |
|
"logits/rejected": -0.6499579548835754, |
|
"logps/chosen": -466.77337646484375, |
|
"logps/rejected": -663.1705932617188, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.1434326171875, |
|
"rewards/margins": 1.8815462589263916, |
|
"rewards/rejected": -5.024979114532471, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7676417671753182, |
|
"grad_norm": 51.21482859691763, |
|
"learning_rate": 7.68879213265311e-08, |
|
"logits/chosen": -0.6487331986427307, |
|
"logits/rejected": -0.5610198378562927, |
|
"logps/chosen": -426.02532958984375, |
|
"logps/rejected": -567.581787109375, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7650089263916016, |
|
"rewards/margins": 1.389169454574585, |
|
"rewards/rejected": -4.154178619384766, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7783034583860865, |
|
"grad_norm": 12.312317231610301, |
|
"learning_rate": 7.028028258723817e-08, |
|
"logits/chosen": -0.6627609729766846, |
|
"logits/rejected": -0.6275384426116943, |
|
"logps/chosen": -420.10638427734375, |
|
"logps/rejected": -541.6509399414062, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.699690103530884, |
|
"rewards/margins": 1.2001326084136963, |
|
"rewards/rejected": -3.89982271194458, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7889651495968548, |
|
"grad_norm": 14.824950870173511, |
|
"learning_rate": 6.392280559802341e-08, |
|
"logits/chosen": -0.6717527508735657, |
|
"logits/rejected": -0.6606348752975464, |
|
"logps/chosen": -445.54412841796875, |
|
"logps/rejected": -588.8523559570312, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.92059588432312, |
|
"rewards/margins": 1.460580825805664, |
|
"rewards/rejected": -4.381176948547363, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7996268408076231, |
|
"grad_norm": 13.6644249310921, |
|
"learning_rate": 5.782433968044495e-08, |
|
"logits/chosen": -0.6819238662719727, |
|
"logits/rejected": -0.638149082660675, |
|
"logps/chosen": -421.51031494140625, |
|
"logps/rejected": -669.2894287109375, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.7319085597991943, |
|
"rewards/margins": 2.4759204387664795, |
|
"rewards/rejected": -5.207829475402832, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8102885320183915, |
|
"grad_norm": 13.811424824515617, |
|
"learning_rate": 5.199337362431791e-08, |
|
"logits/chosen": -0.6884719133377075, |
|
"logits/rejected": -0.6233955025672913, |
|
"logps/chosen": -438.57293701171875, |
|
"logps/rejected": -597.1315307617188, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.8267288208007812, |
|
"rewards/margins": 1.5069990158081055, |
|
"rewards/rejected": -4.333728313446045, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8209502232291597, |
|
"grad_norm": 12.122140633447449, |
|
"learning_rate": 4.643802387170117e-08, |
|
"logits/chosen": -0.6704726815223694, |
|
"logits/rejected": -0.6620519161224365, |
|
"logps/chosen": -462.6454162597656, |
|
"logps/rejected": -569.0838623046875, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.02994966506958, |
|
"rewards/margins": 1.0704432725906372, |
|
"rewards/rejected": -4.100392818450928, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.831611914439928, |
|
"grad_norm": 13.510721862336407, |
|
"learning_rate": 4.116602321917617e-08, |
|
"logits/chosen": -0.6129786968231201, |
|
"logits/rejected": -0.5863287448883057, |
|
"logps/chosen": -459.5635681152344, |
|
"logps/rejected": -604.2559814453125, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.085102081298828, |
|
"rewards/margins": 1.438297986984253, |
|
"rewards/rejected": -4.52340030670166, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8422736056506963, |
|
"grad_norm": 17.09993809361333, |
|
"learning_rate": 3.6184710054142144e-08, |
|
"logits/chosen": -0.69035804271698, |
|
"logits/rejected": -0.6435971260070801, |
|
"logps/chosen": -479.14935302734375, |
|
"logps/rejected": -643.2648315429688, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.1109519004821777, |
|
"rewards/margins": 1.6468206644058228, |
|
"rewards/rejected": -4.757771968841553, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8529352968614646, |
|
"grad_norm": 14.20201406251709, |
|
"learning_rate": 3.150101814011136e-08, |
|
"logits/chosen": -0.6618126630783081, |
|
"logits/rejected": -0.6529041528701782, |
|
"logps/chosen": -463.82415771484375, |
|
"logps/rejected": -575.7443237304688, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.062734603881836, |
|
"rewards/margins": 1.059270977973938, |
|
"rewards/rejected": -4.122005462646484, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8529352968614646, |
|
"eval_logits/chosen": -0.6989333629608154, |
|
"eval_logits/rejected": -0.6484398245811462, |
|
"eval_logps/chosen": -448.32574462890625, |
|
"eval_logps/rejected": -597.7947998046875, |
|
"eval_loss": 0.4833716154098511, |
|
"eval_rewards/accuracies": 0.7759674191474915, |
|
"eval_rewards/chosen": -2.957282066345215, |
|
"eval_rewards/margins": 1.4963340759277344, |
|
"eval_rewards/rejected": -4.453616142272949, |
|
"eval_runtime": 356.4535, |
|
"eval_samples_per_second": 5.504, |
|
"eval_steps_per_second": 1.377, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.863596988072233, |
|
"grad_norm": 15.05986989475561, |
|
"learning_rate": 2.712146696522305e-08, |
|
"logits/chosen": -0.6149640083312988, |
|
"logits/rejected": -0.5986669659614563, |
|
"logps/chosen": -456.62615966796875, |
|
"logps/rejected": -587.21630859375, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0605359077453613, |
|
"rewards/margins": 1.240896463394165, |
|
"rewards/rejected": -4.3014326095581055, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8742586792830013, |
|
"grad_norm": 17.350625249654133, |
|
"learning_rate": 2.3052152667409287e-08, |
|
"logits/chosen": -0.6955739259719849, |
|
"logits/rejected": -0.6243568658828735, |
|
"logps/chosen": -449.71319580078125, |
|
"logps/rejected": -593.1027221679688, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.006499767303467, |
|
"rewards/margins": 1.3734157085418701, |
|
"rewards/rejected": -4.379915714263916, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8849203704937696, |
|
"grad_norm": 14.462761406291644, |
|
"learning_rate": 1.929873954884581e-08, |
|
"logits/chosen": -0.6829768419265747, |
|
"logits/rejected": -0.6062845587730408, |
|
"logps/chosen": -418.482177734375, |
|
"logps/rejected": -541.1483154296875, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7707464694976807, |
|
"rewards/margins": 1.1584304571151733, |
|
"rewards/rejected": -3.9291768074035645, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8955820617045379, |
|
"grad_norm": 14.240832321861784, |
|
"learning_rate": 1.5866452191498486e-08, |
|
"logits/chosen": -0.6378864049911499, |
|
"logits/rejected": -0.5631311535835266, |
|
"logps/chosen": -445.82489013671875, |
|
"logps/rejected": -588.2786865234375, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9222068786621094, |
|
"rewards/margins": 1.373407006263733, |
|
"rewards/rejected": -4.295614242553711, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9062437529153062, |
|
"grad_norm": 14.300875070246876, |
|
"learning_rate": 1.2760068184740597e-08, |
|
"logits/chosen": -0.6629018783569336, |
|
"logits/rejected": -0.6413969993591309, |
|
"logps/chosen": -447.8999938964844, |
|
"logps/rejected": -604.1832275390625, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.8662009239196777, |
|
"rewards/margins": 1.5696779489517212, |
|
"rewards/rejected": -4.435878753662109, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9169054441260746, |
|
"grad_norm": 15.167998348057905, |
|
"learning_rate": 9.983911475163725e-09, |
|
"logits/chosen": -0.6861739754676819, |
|
"logits/rejected": -0.6672254800796509, |
|
"logps/chosen": -467.89996337890625, |
|
"logps/rejected": -618.0469360351562, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1228601932525635, |
|
"rewards/margins": 1.485724687576294, |
|
"rewards/rejected": -4.608585357666016, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9275671353368428, |
|
"grad_norm": 14.071697017283158, |
|
"learning_rate": 7.541846347838915e-09, |
|
"logits/chosen": -0.6308220624923706, |
|
"logits/rejected": -0.6001772880554199, |
|
"logps/chosen": -477.44329833984375, |
|
"logps/rejected": -596.0589599609375, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.173348903656006, |
|
"rewards/margins": 1.121080756187439, |
|
"rewards/rejected": -4.294429779052734, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9382288265476111, |
|
"grad_norm": 16.08993041357463, |
|
"learning_rate": 5.437272047405711e-09, |
|
"logits/chosen": -0.6392040252685547, |
|
"logits/rejected": -0.6253064870834351, |
|
"logps/chosen": -415.3251953125, |
|
"logps/rejected": -527.3311767578125, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.714038848876953, |
|
"rewards/margins": 1.125810146331787, |
|
"rewards/rejected": -3.8398489952087402, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9488905177583794, |
|
"grad_norm": 13.509397089938123, |
|
"learning_rate": 3.673118046477158e-09, |
|
"logits/chosen": -0.6543330550193787, |
|
"logits/rejected": -0.6509405374526978, |
|
"logps/chosen": -456.007080078125, |
|
"logps/rejected": -619.2344360351562, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.979337453842163, |
|
"rewards/margins": 1.5961627960205078, |
|
"rewards/rejected": -4.575499534606934, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9595522089691477, |
|
"grad_norm": 14.016464196588691, |
|
"learning_rate": 2.251839967945535e-09, |
|
"logits/chosen": -0.6727645397186279, |
|
"logits/rejected": -0.5936774611473083, |
|
"logps/chosen": -428.5733337402344, |
|
"logps/rejected": -629.4482421875, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7794129848480225, |
|
"rewards/margins": 2.001149892807007, |
|
"rewards/rejected": -4.7805633544921875, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.970213900179916, |
|
"grad_norm": 14.602385442499152, |
|
"learning_rate": 1.1754161668660612e-09, |
|
"logits/chosen": -0.5893079042434692, |
|
"logits/rejected": -0.5978569984436035, |
|
"logps/chosen": -428.67218017578125, |
|
"logps/rejected": -585.7625732421875, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.863682985305786, |
|
"rewards/margins": 1.5374996662139893, |
|
"rewards/rejected": -4.401182174682617, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9808755913906844, |
|
"grad_norm": 12.799904592616386, |
|
"learning_rate": 4.453449766758932e-10, |
|
"logits/chosen": -0.7284419536590576, |
|
"logits/rejected": -0.6639989614486694, |
|
"logps/chosen": -453.0536193847656, |
|
"logps/rejected": -603.8924560546875, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9768033027648926, |
|
"rewards/margins": 1.4411671161651611, |
|
"rewards/rejected": -4.417970657348633, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9915372826014527, |
|
"grad_norm": 13.505994490232299, |
|
"learning_rate": 6.264262358129935e-11, |
|
"logits/chosen": -0.7227040529251099, |
|
"logits/rejected": -0.6694614887237549, |
|
"logps/chosen": -442.71307373046875, |
|
"logps/rejected": -571.0550537109375, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.9378533363342285, |
|
"rewards/margins": 1.2195371389389038, |
|
"rewards/rejected": -4.15739107131958, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9979342973279136, |
|
"step": 468, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5391119274064007, |
|
"train_runtime": 25974.5898, |
|
"train_samples_per_second": 2.311, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|