|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010465724751439038, |
|
"grad_norm": 7.312430627947873, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.6544837951660156, |
|
"logits/rejected": -2.5759358406066895, |
|
"logps/chosen": -101.20021057128906, |
|
"logps/rejected": -85.73662567138672, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 6.798309843145578, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.5073227882385254, |
|
"logits/rejected": -2.498267650604248, |
|
"logps/chosen": -76.74188995361328, |
|
"logps/rejected": -68.77124786376953, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.1597222238779068, |
|
"rewards/chosen": -0.00010638780076988041, |
|
"rewards/margins": -0.00026712569524534047, |
|
"rewards/rejected": 0.00016073790902737528, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 6.328409074575995, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.5329933166503906, |
|
"logits/rejected": -2.5121402740478516, |
|
"logps/chosen": -91.55634307861328, |
|
"logps/rejected": -97.98811340332031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -8.86881971382536e-05, |
|
"rewards/margins": 0.0001090427249437198, |
|
"rewards/rejected": -0.00019773092935793102, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 5.987591394147146, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.6352438926696777, |
|
"logits/rejected": -2.624114513397217, |
|
"logps/chosen": -74.1455078125, |
|
"logps/rejected": -74.52520751953125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": -0.00024412055790890008, |
|
"rewards/margins": 0.00047922172234393656, |
|
"rewards/rejected": -0.0007233422948047519, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 6.813717975556312, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.5477070808410645, |
|
"logits/rejected": -2.495793104171753, |
|
"logps/chosen": -90.96524810791016, |
|
"logps/rejected": -89.30764770507812, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -3.622327858465724e-05, |
|
"rewards/margins": 0.0016909090336412191, |
|
"rewards/rejected": -0.0017271323595196009, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 6.382358089839322, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.5686728954315186, |
|
"logits/rejected": -2.5408482551574707, |
|
"logps/chosen": -76.85763549804688, |
|
"logps/rejected": -78.096923828125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.002494217362254858, |
|
"rewards/margins": 0.002928710076957941, |
|
"rewards/rejected": -0.00043449303484521806, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 6.142397539571157, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.520242214202881, |
|
"logits/rejected": -2.5160062313079834, |
|
"logps/chosen": -71.42273712158203, |
|
"logps/rejected": -71.63546752929688, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.011287200264632702, |
|
"rewards/margins": 0.006170675158500671, |
|
"rewards/rejected": 0.0051165251061320305, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 6.834963526035445, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.462669849395752, |
|
"logits/rejected": -2.448659658432007, |
|
"logps/chosen": -72.4560775756836, |
|
"logps/rejected": -89.7001953125, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.019068170338869095, |
|
"rewards/margins": 0.014615567401051521, |
|
"rewards/rejected": 0.0044526029378175735, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 7.385617041745493, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.4439797401428223, |
|
"logits/rejected": -2.4138834476470947, |
|
"logps/chosen": -81.91383361816406, |
|
"logps/rejected": -80.78437042236328, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.00990500207990408, |
|
"rewards/margins": 0.032499730587005615, |
|
"rewards/rejected": -0.02259472757577896, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 8.102390740927866, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.3960914611816406, |
|
"logits/rejected": -2.3794617652893066, |
|
"logps/chosen": -73.29556274414062, |
|
"logps/rejected": -76.41487121582031, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.040218498557806015, |
|
"rewards/margins": 0.016615843400359154, |
|
"rewards/rejected": -0.056834347546100616, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 6.755359569975091, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -2.3556602001190186, |
|
"logits/rejected": -2.3346455097198486, |
|
"logps/chosen": -78.03315734863281, |
|
"logps/rejected": -95.56941223144531, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.005892142653465271, |
|
"rewards/margins": 0.07231049239635468, |
|
"rewards/rejected": -0.07820263504981995, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"eval_logits/chosen": -2.3486523628234863, |
|
"eval_logits/rejected": -2.3309624195098877, |
|
"eval_logps/chosen": -73.28648376464844, |
|
"eval_logps/rejected": -85.4478530883789, |
|
"eval_loss": 0.6686670184135437, |
|
"eval_rewards/accuracies": 0.3273809552192688, |
|
"eval_rewards/chosen": 0.012047496624290943, |
|
"eval_rewards/margins": 0.05524253472685814, |
|
"eval_rewards/rejected": -0.04319504275918007, |
|
"eval_runtime": 113.7223, |
|
"eval_samples_per_second": 17.587, |
|
"eval_steps_per_second": 0.554, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 13.391566933736375, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -2.406934976577759, |
|
"logits/rejected": -2.4089908599853516, |
|
"logps/chosen": -80.33895111083984, |
|
"logps/rejected": -93.52415466308594, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.034039516001939774, |
|
"rewards/margins": 0.08607066422700882, |
|
"rewards/rejected": -0.1201101765036583, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 9.646857443256218, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -2.2991995811462402, |
|
"logits/rejected": -2.265392303466797, |
|
"logps/chosen": -76.64207458496094, |
|
"logps/rejected": -94.79474639892578, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.01742006093263626, |
|
"rewards/margins": 0.11232365667819977, |
|
"rewards/rejected": -0.12974372506141663, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 16.35113879818621, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": -2.244642496109009, |
|
"logits/rejected": -2.2259411811828613, |
|
"logps/chosen": -65.43486022949219, |
|
"logps/rejected": -84.06461334228516, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.04491991177201271, |
|
"rewards/margins": 0.0634341612458229, |
|
"rewards/rejected": -0.10835406929254532, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 17.442951288216843, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -2.218097686767578, |
|
"logits/rejected": -2.2064428329467773, |
|
"logps/chosen": -109.58231353759766, |
|
"logps/rejected": -120.42280578613281, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.18489012122154236, |
|
"rewards/margins": 0.14332745969295502, |
|
"rewards/rejected": -0.3282175660133362, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 21.84882656667421, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": -2.3305749893188477, |
|
"logits/rejected": -2.3431344032287598, |
|
"logps/chosen": -69.88795471191406, |
|
"logps/rejected": -94.39766693115234, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.14505597949028015, |
|
"rewards/margins": 0.1382911652326584, |
|
"rewards/rejected": -0.28334707021713257, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 13.04813612493708, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": -2.0861306190490723, |
|
"logits/rejected": -2.0900378227233887, |
|
"logps/chosen": -87.21963500976562, |
|
"logps/rejected": -114.88661193847656, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.13326936960220337, |
|
"rewards/margins": 0.1332504153251648, |
|
"rewards/rejected": -0.26651981472969055, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 14.778101043613702, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": -1.9173187017440796, |
|
"logits/rejected": -1.8886661529541016, |
|
"logps/chosen": -81.811767578125, |
|
"logps/rejected": -101.0234375, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.16403724253177643, |
|
"rewards/margins": 0.19587047398090363, |
|
"rewards/rejected": -0.35990768671035767, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 26.424629291070275, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": -1.3328689336776733, |
|
"logits/rejected": -1.3415606021881104, |
|
"logps/chosen": -121.7206039428711, |
|
"logps/rejected": -163.23861694335938, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.4643549919128418, |
|
"rewards/margins": 0.2982317805290222, |
|
"rewards/rejected": -0.7625867128372192, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 18.286232555939137, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": -1.299851655960083, |
|
"logits/rejected": -1.2263530492782593, |
|
"logps/chosen": -130.7896270751953, |
|
"logps/rejected": -153.83375549316406, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.5220120549201965, |
|
"rewards/margins": 0.28233885765075684, |
|
"rewards/rejected": -0.8043509721755981, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 22.532611448739736, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": -1.2297742366790771, |
|
"logits/rejected": -1.1969741582870483, |
|
"logps/chosen": -117.6633071899414, |
|
"logps/rejected": -130.67193603515625, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.3429523706436157, |
|
"rewards/margins": 0.20222480595111847, |
|
"rewards/rejected": -0.545177161693573, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"eval_logits/chosen": -1.2583706378936768, |
|
"eval_logits/rejected": -1.23958420753479, |
|
"eval_logps/chosen": -102.57432556152344, |
|
"eval_logps/rejected": -130.3724822998047, |
|
"eval_loss": 0.634810745716095, |
|
"eval_rewards/accuracies": 0.3373015820980072, |
|
"eval_rewards/chosen": -0.2808309495449066, |
|
"eval_rewards/margins": 0.21161039173603058, |
|
"eval_rewards/rejected": -0.492441326379776, |
|
"eval_runtime": 113.6607, |
|
"eval_samples_per_second": 17.596, |
|
"eval_steps_per_second": 0.554, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 43.96668983794973, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": -1.166576623916626, |
|
"logits/rejected": -1.0878881216049194, |
|
"logps/chosen": -102.22654724121094, |
|
"logps/rejected": -119.87858581542969, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.30706560611724854, |
|
"rewards/margins": 0.2078002393245697, |
|
"rewards/rejected": -0.5148658752441406, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 21.60611363424848, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": -1.4838167428970337, |
|
"logits/rejected": -1.293268084526062, |
|
"logps/chosen": -140.9451446533203, |
|
"logps/rejected": -159.35256958007812, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.40442484617233276, |
|
"rewards/margins": 0.2767654359340668, |
|
"rewards/rejected": -0.6811902523040771, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 23.269156895827596, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": -1.360938310623169, |
|
"logits/rejected": -1.2307772636413574, |
|
"logps/chosen": -92.72604370117188, |
|
"logps/rejected": -110.38468933105469, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.3325788080692291, |
|
"rewards/margins": 0.19615355134010315, |
|
"rewards/rejected": -0.5287323594093323, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 32.70987860826756, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": -1.1462054252624512, |
|
"logits/rejected": -0.9243119359016418, |
|
"logps/chosen": -129.86032104492188, |
|
"logps/rejected": -173.3503875732422, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.4205262064933777, |
|
"rewards/margins": 0.42327141761779785, |
|
"rewards/rejected": -0.8437975645065308, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 18.56295517177359, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": -0.9204837083816528, |
|
"logits/rejected": -0.8685296177864075, |
|
"logps/chosen": -83.56452178955078, |
|
"logps/rejected": -112.06929779052734, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": -0.30188173055648804, |
|
"rewards/margins": 0.20583298802375793, |
|
"rewards/rejected": -0.5077147483825684, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 24.278208900563758, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": -0.5377733111381531, |
|
"logits/rejected": -0.28726479411125183, |
|
"logps/chosen": -126.16239166259766, |
|
"logps/rejected": -144.44386291503906, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.43910473585128784, |
|
"rewards/margins": 0.27166199684143066, |
|
"rewards/rejected": -0.7107667922973633, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 33.830045643602666, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": -0.43365517258644104, |
|
"logits/rejected": -0.23527593910694122, |
|
"logps/chosen": -127.06233215332031, |
|
"logps/rejected": -150.31356811523438, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.4495798647403717, |
|
"rewards/margins": 0.3023374378681183, |
|
"rewards/rejected": -0.7519172430038452, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 24.405765366439173, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": -0.9159714579582214, |
|
"logits/rejected": -0.717955470085144, |
|
"logps/chosen": -109.26994323730469, |
|
"logps/rejected": -139.60409545898438, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3430403769016266, |
|
"rewards/margins": 0.24182644486427307, |
|
"rewards/rejected": -0.5848668217658997, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 18.88355766247808, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": -1.3694268465042114, |
|
"logits/rejected": -1.1670420169830322, |
|
"logps/chosen": -97.29615783691406, |
|
"logps/rejected": -123.79087829589844, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.3448092043399811, |
|
"rewards/margins": 0.27321183681488037, |
|
"rewards/rejected": -0.6180210709571838, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 24.161262620892497, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": -1.5254216194152832, |
|
"logits/rejected": -1.4045015573501587, |
|
"logps/chosen": -147.19302368164062, |
|
"logps/rejected": -174.0820770263672, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5355431437492371, |
|
"rewards/margins": 0.2548714876174927, |
|
"rewards/rejected": -0.7904146313667297, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"eval_logits/chosen": -1.1872740983963013, |
|
"eval_logits/rejected": -1.0319762229919434, |
|
"eval_logps/chosen": -120.13069915771484, |
|
"eval_logps/rejected": -157.09765625, |
|
"eval_loss": 0.6194990277290344, |
|
"eval_rewards/accuracies": 0.3452380895614624, |
|
"eval_rewards/chosen": -0.45639467239379883, |
|
"eval_rewards/margins": 0.3032984435558319, |
|
"eval_rewards/rejected": -0.7596930265426636, |
|
"eval_runtime": 113.7203, |
|
"eval_samples_per_second": 17.587, |
|
"eval_steps_per_second": 0.554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 27.882854283662915, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": -0.8721631765365601, |
|
"logits/rejected": -0.47974568605422974, |
|
"logps/chosen": -142.952392578125, |
|
"logps/rejected": -164.13180541992188, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.44492608308792114, |
|
"rewards/margins": 0.36958834528923035, |
|
"rewards/rejected": -0.8145144581794739, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 40.79438243043005, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": 0.030854111537337303, |
|
"logits/rejected": 0.30916082859039307, |
|
"logps/chosen": -143.35768127441406, |
|
"logps/rejected": -215.4879608154297, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.6657307147979736, |
|
"rewards/margins": 0.5068569779396057, |
|
"rewards/rejected": -1.1725876331329346, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 28.74932802994849, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": 0.10449258983135223, |
|
"logits/rejected": 0.19263358414173126, |
|
"logps/chosen": -125.71956634521484, |
|
"logps/rejected": -157.4947509765625, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.5608196258544922, |
|
"rewards/margins": 0.2601728141307831, |
|
"rewards/rejected": -0.8209924697875977, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 20.26835641704999, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -0.6976083517074585, |
|
"logits/rejected": -0.4943923354148865, |
|
"logps/chosen": -127.49371337890625, |
|
"logps/rejected": -158.2890625, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5544435977935791, |
|
"rewards/margins": 0.3053310215473175, |
|
"rewards/rejected": -0.8597745895385742, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 27.63474707817879, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": -0.6875920295715332, |
|
"logits/rejected": -0.5427245497703552, |
|
"logps/chosen": -149.16458129882812, |
|
"logps/rejected": -175.6387481689453, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.6470784544944763, |
|
"rewards/margins": 0.2719033360481262, |
|
"rewards/rejected": -0.9189817309379578, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 30.4860863672758, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": 0.019889334216713905, |
|
"logits/rejected": 0.449519544839859, |
|
"logps/chosen": -158.36898803710938, |
|
"logps/rejected": -190.15274047851562, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.6107655763626099, |
|
"rewards/margins": 0.40659332275390625, |
|
"rewards/rejected": -1.0173588991165161, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 28.105949004143817, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": 0.23578917980194092, |
|
"logits/rejected": 0.578147292137146, |
|
"logps/chosen": -144.15545654296875, |
|
"logps/rejected": -183.06863403320312, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.577487587928772, |
|
"rewards/margins": 0.37577182054519653, |
|
"rewards/rejected": -0.9532594680786133, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 19.345534639045226, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -0.3674705922603607, |
|
"logits/rejected": 0.07584401965141296, |
|
"logps/chosen": -130.8544464111328, |
|
"logps/rejected": -168.41697692871094, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.49021464586257935, |
|
"rewards/margins": 0.39871546626091003, |
|
"rewards/rejected": -0.8889301419258118, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 29.65852362420373, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -0.23846562206745148, |
|
"logits/rejected": -0.008897816762328148, |
|
"logps/chosen": -138.2523956298828, |
|
"logps/rejected": -194.8299102783203, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.5844453573226929, |
|
"rewards/margins": 0.4089936316013336, |
|
"rewards/rejected": -0.9934390187263489, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 42.485008339554724, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": 0.0847388356924057, |
|
"logits/rejected": 0.3739756643772125, |
|
"logps/chosen": -133.99862670898438, |
|
"logps/rejected": -157.2317352294922, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.6070858836174011, |
|
"rewards/margins": 0.2605039179325104, |
|
"rewards/rejected": -0.8675897717475891, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"eval_logits/chosen": 0.03354182466864586, |
|
"eval_logits/rejected": 0.37280067801475525, |
|
"eval_logps/chosen": -146.9637451171875, |
|
"eval_logps/rejected": -190.27566528320312, |
|
"eval_loss": 0.6099374294281006, |
|
"eval_rewards/accuracies": 0.363095223903656, |
|
"eval_rewards/chosen": -0.7247251272201538, |
|
"eval_rewards/margins": 0.36674803495407104, |
|
"eval_rewards/rejected": -1.0914732217788696, |
|
"eval_runtime": 113.6653, |
|
"eval_samples_per_second": 17.596, |
|
"eval_steps_per_second": 0.554, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 17.771390083818016, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -0.3344365656375885, |
|
"logits/rejected": -0.13601410388946533, |
|
"logps/chosen": -168.97885131835938, |
|
"logps/rejected": -211.15029907226562, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.8895459175109863, |
|
"rewards/margins": 0.2965359091758728, |
|
"rewards/rejected": -1.186081886291504, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 29.30170184710936, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -0.6253395080566406, |
|
"logits/rejected": -0.4562221169471741, |
|
"logps/chosen": -117.9905014038086, |
|
"logps/rejected": -139.88853454589844, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.49884548783302307, |
|
"rewards/margins": 0.21792730689048767, |
|
"rewards/rejected": -0.7167727947235107, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 19.52934749077977, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": 0.060841239988803864, |
|
"logits/rejected": 0.28291866183280945, |
|
"logps/chosen": -119.32076263427734, |
|
"logps/rejected": -161.02288818359375, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.5378307104110718, |
|
"rewards/margins": 0.3328271806240082, |
|
"rewards/rejected": -0.8706579208374023, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 28.977327104522658, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": 0.562627911567688, |
|
"logits/rejected": 0.706725001335144, |
|
"logps/chosen": -145.38662719726562, |
|
"logps/rejected": -177.89791870117188, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.7169743776321411, |
|
"rewards/margins": 0.19164128601551056, |
|
"rewards/rejected": -0.9086155891418457, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 21.22082345177454, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": 0.6601327657699585, |
|
"logits/rejected": 0.9368169903755188, |
|
"logps/chosen": -159.88311767578125, |
|
"logps/rejected": -201.655517578125, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.6956228017807007, |
|
"rewards/margins": 0.4413565993309021, |
|
"rewards/rejected": -1.1369794607162476, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 19.35642436173428, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": 0.47756925225257874, |
|
"logits/rejected": 1.0075833797454834, |
|
"logps/chosen": -136.54437255859375, |
|
"logps/rejected": -176.9443359375, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.6124375462532043, |
|
"rewards/margins": 0.4467083811759949, |
|
"rewards/rejected": -1.0591459274291992, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 35.6743715566195, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": 0.9841750264167786, |
|
"logits/rejected": 1.1566669940948486, |
|
"logps/chosen": -134.76565551757812, |
|
"logps/rejected": -183.22018432617188, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.6426252722740173, |
|
"rewards/margins": 0.34931105375289917, |
|
"rewards/rejected": -0.9919363856315613, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 31.355461154744457, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": 1.8672128915786743, |
|
"logits/rejected": 2.3499321937561035, |
|
"logps/chosen": -142.2679443359375, |
|
"logps/rejected": -197.79867553710938, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.6666483879089355, |
|
"rewards/margins": 0.5482696890830994, |
|
"rewards/rejected": -1.2149180173873901, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 39.792642390254535, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": 2.002504825592041, |
|
"logits/rejected": 2.7407174110412598, |
|
"logps/chosen": -147.71644592285156, |
|
"logps/rejected": -184.10256958007812, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.5607318878173828, |
|
"rewards/margins": 0.49417656660079956, |
|
"rewards/rejected": -1.0549085140228271, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 19.501719693409513, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": 2.3184399604797363, |
|
"logits/rejected": 2.6984035968780518, |
|
"logps/chosen": -179.01693725585938, |
|
"logps/rejected": -221.22781372070312, |
|
"loss": 0.6318, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.8433685302734375, |
|
"rewards/margins": 0.404899924993515, |
|
"rewards/rejected": -1.248268485069275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"eval_logits/chosen": 2.6547250747680664, |
|
"eval_logits/rejected": 2.9545063972473145, |
|
"eval_logps/chosen": -155.49295043945312, |
|
"eval_logps/rejected": -204.6371307373047, |
|
"eval_loss": 0.6104578375816345, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": -0.8100170493125916, |
|
"eval_rewards/margins": 0.4250708818435669, |
|
"eval_rewards/rejected": -1.2350879907608032, |
|
"eval_runtime": 113.5938, |
|
"eval_samples_per_second": 17.607, |
|
"eval_steps_per_second": 0.555, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 23.676894445603228, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": 2.713271379470825, |
|
"logits/rejected": 2.843205213546753, |
|
"logps/chosen": -130.48731994628906, |
|
"logps/rejected": -174.28225708007812, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.6893380284309387, |
|
"rewards/margins": 0.37240949273109436, |
|
"rewards/rejected": -1.061747431755066, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 35.84824398777263, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": 2.0955018997192383, |
|
"logits/rejected": 2.464780330657959, |
|
"logps/chosen": -167.99371337890625, |
|
"logps/rejected": -198.072021484375, |
|
"loss": 0.6346, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.9579731822013855, |
|
"rewards/margins": 0.2499997913837433, |
|
"rewards/rejected": -1.2079728841781616, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 19.326207651996775, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": 1.578254222869873, |
|
"logits/rejected": 1.7954685688018799, |
|
"logps/chosen": -124.0101547241211, |
|
"logps/rejected": -157.9552764892578, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.6386234760284424, |
|
"rewards/margins": 0.3164999485015869, |
|
"rewards/rejected": -0.9551234245300293, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 25.47159513000541, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": 1.5957086086273193, |
|
"logits/rejected": 1.9952272176742554, |
|
"logps/chosen": -136.2624053955078, |
|
"logps/rejected": -160.97589111328125, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.6916071772575378, |
|
"rewards/margins": 0.2573884129524231, |
|
"rewards/rejected": -0.9489954710006714, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 27.189003326527832, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": 0.5969494581222534, |
|
"logits/rejected": 1.1224400997161865, |
|
"logps/chosen": -165.4878387451172, |
|
"logps/rejected": -201.8157958984375, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.7528584599494934, |
|
"rewards/margins": 0.4292448163032532, |
|
"rewards/rejected": -1.1821032762527466, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 19.01493291730421, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": 0.5469252467155457, |
|
"logits/rejected": 0.9992968440055847, |
|
"logps/chosen": -109.37480163574219, |
|
"logps/rejected": -158.11180114746094, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.5237552523612976, |
|
"rewards/margins": 0.4429488778114319, |
|
"rewards/rejected": -0.9667040705680847, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 34.53884646430518, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": 0.47024235129356384, |
|
"logits/rejected": 1.3419710397720337, |
|
"logps/chosen": -152.4022674560547, |
|
"logps/rejected": -181.06829833984375, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.6253000497817993, |
|
"rewards/margins": 0.45260563492774963, |
|
"rewards/rejected": -1.0779056549072266, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 23.195476547368756, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": 0.7025114297866821, |
|
"logits/rejected": 1.4946035146713257, |
|
"logps/chosen": -155.9736328125, |
|
"logps/rejected": -197.6754150390625, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.7866873741149902, |
|
"rewards/margins": 0.5116966366767883, |
|
"rewards/rejected": -1.2983839511871338, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 28.28948495055075, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": 1.2547266483306885, |
|
"logits/rejected": 1.6798299551010132, |
|
"logps/chosen": -143.81948852539062, |
|
"logps/rejected": -180.07110595703125, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.7290927171707153, |
|
"rewards/margins": 0.344825804233551, |
|
"rewards/rejected": -1.0739185810089111, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 26.475084176869974, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": 1.0473191738128662, |
|
"logits/rejected": 1.4664316177368164, |
|
"logps/chosen": -151.5343017578125, |
|
"logps/rejected": -203.01600646972656, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.8304673433303833, |
|
"rewards/margins": 0.4478435516357422, |
|
"rewards/rejected": -1.278310775756836, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"eval_logits/chosen": 0.9605558514595032, |
|
"eval_logits/rejected": 1.442029356956482, |
|
"eval_logps/chosen": -147.85601806640625, |
|
"eval_logps/rejected": -199.51206970214844, |
|
"eval_loss": 0.6014743447303772, |
|
"eval_rewards/accuracies": 0.3591269850730896, |
|
"eval_rewards/chosen": -0.733647882938385, |
|
"eval_rewards/margins": 0.4501895010471344, |
|
"eval_rewards/rejected": -1.1838374137878418, |
|
"eval_runtime": 113.6375, |
|
"eval_samples_per_second": 17.6, |
|
"eval_steps_per_second": 0.554, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 24.64809094353209, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": 0.9734107255935669, |
|
"logits/rejected": 1.4407756328582764, |
|
"logps/chosen": -136.4254150390625, |
|
"logps/rejected": -157.22183227539062, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.6510840654373169, |
|
"rewards/margins": 0.3478087782859802, |
|
"rewards/rejected": -0.9988927841186523, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 24.74151564246123, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": 0.4937843680381775, |
|
"logits/rejected": 0.9153006672859192, |
|
"logps/chosen": -137.00144958496094, |
|
"logps/rejected": -177.6410675048828, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.725879967212677, |
|
"rewards/margins": 0.3728798031806946, |
|
"rewards/rejected": -1.0987598896026611, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 22.938989151746902, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": 0.19430339336395264, |
|
"logits/rejected": 0.6654868721961975, |
|
"logps/chosen": -176.47686767578125, |
|
"logps/rejected": -213.91622924804688, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.7090758085250854, |
|
"rewards/margins": 0.45301565527915955, |
|
"rewards/rejected": -1.1620914936065674, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 43.50900642344418, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": 0.7957710027694702, |
|
"logits/rejected": 1.4320136308670044, |
|
"logps/chosen": -182.98187255859375, |
|
"logps/rejected": -233.4198455810547, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.872964084148407, |
|
"rewards/margins": 0.5010480284690857, |
|
"rewards/rejected": -1.3740123510360718, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 24.400296552508813, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": 1.008284330368042, |
|
"logits/rejected": 1.555418848991394, |
|
"logps/chosen": -128.50022888183594, |
|
"logps/rejected": -170.05349731445312, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.5598582029342651, |
|
"rewards/margins": 0.4066081643104553, |
|
"rewards/rejected": -0.9664663076400757, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 31.773454484895552, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": 0.5741680860519409, |
|
"logits/rejected": 1.095399022102356, |
|
"logps/chosen": -144.4388427734375, |
|
"logps/rejected": -190.87571716308594, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.5422704815864563, |
|
"rewards/margins": 0.48283880949020386, |
|
"rewards/rejected": -1.0251094102859497, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 37.13339246311252, |
|
"learning_rate": 1.239457282149695e-07, |
|
"logits/chosen": 0.8501984477043152, |
|
"logits/rejected": 1.152748942375183, |
|
"logps/chosen": -102.63143157958984, |
|
"logps/rejected": -154.28704833984375, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.4462759494781494, |
|
"rewards/margins": 0.41997307538986206, |
|
"rewards/rejected": -0.8662489652633667, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 32.56523005952964, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": 1.335402250289917, |
|
"logits/rejected": 1.811517357826233, |
|
"logps/chosen": -135.314453125, |
|
"logps/rejected": -198.50515747070312, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.587442934513092, |
|
"rewards/margins": 0.5670086145401001, |
|
"rewards/rejected": -1.154451608657837, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 23.014936124623496, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": 0.7806999683380127, |
|
"logits/rejected": 1.3070814609527588, |
|
"logps/chosen": -168.81216430664062, |
|
"logps/rejected": -202.5950469970703, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.7739877700805664, |
|
"rewards/margins": 0.40968823432922363, |
|
"rewards/rejected": -1.18367600440979, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 26.764323919821496, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": 0.9588180780410767, |
|
"logits/rejected": 1.4978833198547363, |
|
"logps/chosen": -165.7656707763672, |
|
"logps/rejected": -204.122802734375, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.9200434684753418, |
|
"rewards/margins": 0.37314558029174805, |
|
"rewards/rejected": -1.2931890487670898, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"eval_logits/chosen": 1.1833491325378418, |
|
"eval_logits/rejected": 1.7187780141830444, |
|
"eval_logps/chosen": -150.68544006347656, |
|
"eval_logps/rejected": -204.91946411132812, |
|
"eval_loss": 0.5986347794532776, |
|
"eval_rewards/accuracies": 0.3650793731212616, |
|
"eval_rewards/chosen": -0.7619420289993286, |
|
"eval_rewards/margins": 0.4759688675403595, |
|
"eval_rewards/rejected": -1.2379108667373657, |
|
"eval_runtime": 113.6638, |
|
"eval_samples_per_second": 17.596, |
|
"eval_steps_per_second": 0.554, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 27.404707816356066, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": 0.7452703714370728, |
|
"logits/rejected": 1.4386818408966064, |
|
"logps/chosen": -175.61923217773438, |
|
"logps/rejected": -214.57119750976562, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.852981686592102, |
|
"rewards/margins": 0.44363918900489807, |
|
"rewards/rejected": -1.2966209650039673, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 27.03589930657382, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": 1.3341294527053833, |
|
"logits/rejected": 1.880934476852417, |
|
"logps/chosen": -154.9960174560547, |
|
"logps/rejected": -193.92404174804688, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.8138143420219421, |
|
"rewards/margins": 0.4843239188194275, |
|
"rewards/rejected": -1.2981382608413696, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 19.503526282992237, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": 1.2488057613372803, |
|
"logits/rejected": 1.543897271156311, |
|
"logps/chosen": -179.2456817626953, |
|
"logps/rejected": -218.6804656982422, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.9957185983657837, |
|
"rewards/margins": 0.3561645448207855, |
|
"rewards/rejected": -1.3518832921981812, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 24.561492093850955, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": 0.7433587908744812, |
|
"logits/rejected": 1.235414981842041, |
|
"logps/chosen": -166.8399200439453, |
|
"logps/rejected": -215.26876831054688, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.9067522883415222, |
|
"rewards/margins": 0.41579413414001465, |
|
"rewards/rejected": -1.322546362876892, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 28.115986705653192, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": 0.6296231150627136, |
|
"logits/rejected": 1.1273549795150757, |
|
"logps/chosen": -182.21527099609375, |
|
"logps/rejected": -222.38735961914062, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.9979060292243958, |
|
"rewards/margins": 0.37623411417007446, |
|
"rewards/rejected": -1.3741401433944702, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 25.524078897067774, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": 0.6932498216629028, |
|
"logits/rejected": 1.2560994625091553, |
|
"logps/chosen": -163.45713806152344, |
|
"logps/rejected": -210.75875854492188, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.8338336944580078, |
|
"rewards/margins": 0.41811808943748474, |
|
"rewards/rejected": -1.2519516944885254, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 19.037858132036448, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": 0.7545400261878967, |
|
"logits/rejected": 1.186693787574768, |
|
"logps/chosen": -139.1665802001953, |
|
"logps/rejected": -173.541748046875, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.6477771401405334, |
|
"rewards/margins": 0.3577363193035126, |
|
"rewards/rejected": -1.0055135488510132, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 25.539683354526822, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": 0.8985282778739929, |
|
"logits/rejected": 1.1507294178009033, |
|
"logps/chosen": -127.5772933959961, |
|
"logps/rejected": -150.71971130371094, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": -0.5861515402793884, |
|
"rewards/margins": 0.2556864023208618, |
|
"rewards/rejected": -0.8418378829956055, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 29.296410897786252, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": 0.5066564083099365, |
|
"logits/rejected": 1.2835947275161743, |
|
"logps/chosen": -169.21194458007812, |
|
"logps/rejected": -226.9739990234375, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7719463109970093, |
|
"rewards/margins": 0.5745865702629089, |
|
"rewards/rejected": -1.346532940864563, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 23.071826857167007, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": 0.2603650689125061, |
|
"logits/rejected": 0.9912735819816589, |
|
"logps/chosen": -189.1995086669922, |
|
"logps/rejected": -238.74069213867188, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.8696195483207703, |
|
"rewards/margins": 0.551138162612915, |
|
"rewards/rejected": -1.4207580089569092, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"eval_logits/chosen": 0.5612532496452332, |
|
"eval_logits/rejected": 1.0127543210983276, |
|
"eval_logps/chosen": -141.69253540039062, |
|
"eval_logps/rejected": -192.48452758789062, |
|
"eval_loss": 0.5973930954933167, |
|
"eval_rewards/accuracies": 0.369047611951828, |
|
"eval_rewards/chosen": -0.672012984752655, |
|
"eval_rewards/margins": 0.4415486454963684, |
|
"eval_rewards/rejected": -1.113561749458313, |
|
"eval_runtime": 113.6177, |
|
"eval_samples_per_second": 17.603, |
|
"eval_steps_per_second": 0.554, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 17.313265289048484, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": 0.3017066419124603, |
|
"logits/rejected": 0.7334527373313904, |
|
"logps/chosen": -151.9001007080078, |
|
"logps/rejected": -182.63177490234375, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6817248463630676, |
|
"rewards/margins": 0.36351272463798523, |
|
"rewards/rejected": -1.0452375411987305, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 32.324000689082936, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": 0.6850260496139526, |
|
"logits/rejected": 1.2093479633331299, |
|
"logps/chosen": -144.95767211914062, |
|
"logps/rejected": -198.352783203125, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.6805930733680725, |
|
"rewards/margins": 0.44533902406692505, |
|
"rewards/rejected": -1.1259320974349976, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 27.72643919363414, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": -0.07449465245008469, |
|
"logits/rejected": 0.24005027115345, |
|
"logps/chosen": -154.6387176513672, |
|
"logps/rejected": -227.32882690429688, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.7048233151435852, |
|
"rewards/margins": 0.4948086142539978, |
|
"rewards/rejected": -1.199631929397583, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 23.57031908849105, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": 0.2434501200914383, |
|
"logits/rejected": 0.7666997313499451, |
|
"logps/chosen": -151.32212829589844, |
|
"logps/rejected": -209.18057250976562, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.6667734980583191, |
|
"rewards/margins": 0.47117409110069275, |
|
"rewards/rejected": -1.1379475593566895, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 27.822093386580878, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": 0.09173062443733215, |
|
"logits/rejected": 0.7935197949409485, |
|
"logps/chosen": -160.8597869873047, |
|
"logps/rejected": -215.7743377685547, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.6746786236763, |
|
"rewards/margins": 0.5673048496246338, |
|
"rewards/rejected": -1.2419836521148682, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 24.993719056225927, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": -0.022973239421844482, |
|
"logits/rejected": 0.728354811668396, |
|
"logps/chosen": -177.0865936279297, |
|
"logps/rejected": -222.30313110351562, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.7254469990730286, |
|
"rewards/margins": 0.5123113989830017, |
|
"rewards/rejected": -1.2377583980560303, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 15.152551046729396, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": 0.3221861720085144, |
|
"logits/rejected": 0.7626439332962036, |
|
"logps/chosen": -138.0549774169922, |
|
"logps/rejected": -176.89402770996094, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.6149898767471313, |
|
"rewards/margins": 0.41914796829223633, |
|
"rewards/rejected": -1.0341379642486572, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 34.8714524353856, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": 0.6613011360168457, |
|
"logits/rejected": 0.8276697397232056, |
|
"logps/chosen": -124.2752914428711, |
|
"logps/rejected": -161.05874633789062, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.602181077003479, |
|
"rewards/margins": 0.27649611234664917, |
|
"rewards/rejected": -0.8786771893501282, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 17.42009363611703, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": 0.14433155953884125, |
|
"logits/rejected": 0.636074423789978, |
|
"logps/chosen": -144.4778289794922, |
|
"logps/rejected": -189.1996612548828, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.6060083508491516, |
|
"rewards/margins": 0.42767366766929626, |
|
"rewards/rejected": -1.033682107925415, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 25.137415279564916, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": 0.5784530639648438, |
|
"logits/rejected": 1.0418832302093506, |
|
"logps/chosen": -118.29461669921875, |
|
"logps/rejected": -145.42251586914062, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.4733172357082367, |
|
"rewards/margins": 0.31973880529403687, |
|
"rewards/rejected": -0.793056070804596, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"eval_logits/chosen": 0.4325558543205261, |
|
"eval_logits/rejected": 0.9106192588806152, |
|
"eval_logps/chosen": -136.28819274902344, |
|
"eval_logps/rejected": -189.55056762695312, |
|
"eval_loss": 0.5958317518234253, |
|
"eval_rewards/accuracies": 0.3710317313671112, |
|
"eval_rewards/chosen": -0.6179695725440979, |
|
"eval_rewards/margins": 0.4662524461746216, |
|
"eval_rewards/rejected": -1.0842220783233643, |
|
"eval_runtime": 113.6384, |
|
"eval_samples_per_second": 17.6, |
|
"eval_steps_per_second": 0.554, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 48.333955356090875, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": -0.08735128492116928, |
|
"logits/rejected": 0.43545690178871155, |
|
"logps/chosen": -137.79881286621094, |
|
"logps/rejected": -196.9468231201172, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.7070980072021484, |
|
"rewards/margins": 0.4561308026313782, |
|
"rewards/rejected": -1.1632287502288818, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 28.43671889890296, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": -0.048371605575084686, |
|
"logits/rejected": 0.41192588210105896, |
|
"logps/chosen": -149.82281494140625, |
|
"logps/rejected": -189.8140411376953, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.6467713713645935, |
|
"rewards/margins": 0.3985586166381836, |
|
"rewards/rejected": -1.0453299283981323, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 37.55350929589785, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": -0.05256899446249008, |
|
"logits/rejected": 0.6733183860778809, |
|
"logps/chosen": -138.57061767578125, |
|
"logps/rejected": -181.3246307373047, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.5108522772789001, |
|
"rewards/margins": 0.48942360281944275, |
|
"rewards/rejected": -1.0002758502960205, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 23.109646236441566, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": 0.17422077059745789, |
|
"logits/rejected": 0.798999547958374, |
|
"logps/chosen": -132.8301239013672, |
|
"logps/rejected": -187.94265747070312, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.599638819694519, |
|
"rewards/margins": 0.4609258770942688, |
|
"rewards/rejected": -1.060564637184143, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 18.08118855716985, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": 0.5573434829711914, |
|
"logits/rejected": 0.9020156860351562, |
|
"logps/chosen": -134.12124633789062, |
|
"logps/rejected": -163.89334106445312, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.6402407288551331, |
|
"rewards/margins": 0.31644412875175476, |
|
"rewards/rejected": -0.9566848874092102, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0165, |
|
"train_samples_per_second": 369998.418, |
|
"train_steps_per_second": 5810.543 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|