|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9974025974025974, |
|
"eval_steps": 36, |
|
"global_step": 360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027705627705627706, |
|
"grad_norm": 14.627778578717658, |
|
"kl": 0.0, |
|
"learning_rate": 1.25e-06, |
|
"logits/chosen": -36907408.0, |
|
"logits/rejected": -36792634.666666664, |
|
"logps/chosen": -312.351806640625, |
|
"logps/rejected": -283.6378580729167, |
|
"loss": 0.5, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005541125541125541, |
|
"grad_norm": 13.780174124436396, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -34840466.666666664, |
|
"logits/rejected": -43695128.0, |
|
"logps/chosen": -182.85721842447916, |
|
"logps/rejected": -222.22654724121094, |
|
"loss": 0.5, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008311688311688312, |
|
"grad_norm": 12.294259063686624, |
|
"kl": 4.698612689971924, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -19804460.8, |
|
"logits/rejected": -16712610.909090908, |
|
"logps/chosen": -222.3800048828125, |
|
"logps/rejected": -122.40013538707386, |
|
"loss": 0.4986, |
|
"rewards/chosen": 0.04441284537315369, |
|
"rewards/margins": 0.043410241061990915, |
|
"rewards/rejected": 0.0010026043111627753, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011082251082251082, |
|
"grad_norm": 17.203944077808572, |
|
"kl": 8.57018756866455, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -42728661.333333336, |
|
"logits/rejected": -37368886.85714286, |
|
"logps/chosen": -206.19590928819446, |
|
"logps/rejected": -164.41081891741072, |
|
"loss": 0.4789, |
|
"rewards/chosen": -0.018145985073513456, |
|
"rewards/margins": 0.18317703027573842, |
|
"rewards/rejected": -0.20132301534925187, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013852813852813853, |
|
"grad_norm": 10.50938907109465, |
|
"kl": 34.45105743408203, |
|
"learning_rate": 4.999902656502973e-06, |
|
"logits/chosen": -18186068.0, |
|
"logits/rejected": -24469592.0, |
|
"logps/chosen": -188.94903564453125, |
|
"logps/rejected": -102.71192932128906, |
|
"loss": 0.4732, |
|
"rewards/chosen": 0.12755107879638672, |
|
"rewards/margins": 0.456174373626709, |
|
"rewards/rejected": -0.32862329483032227, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016623376623376623, |
|
"grad_norm": 9.308233094927798, |
|
"kl": 64.32754516601562, |
|
"learning_rate": 4.9996106335924965e-06, |
|
"logits/chosen": -58021513.14285714, |
|
"logits/rejected": -46324174.222222224, |
|
"logps/chosen": -431.99476841517856, |
|
"logps/rejected": -169.90737575954861, |
|
"loss": 0.3975, |
|
"rewards/chosen": 0.04927550894873483, |
|
"rewards/margins": 1.1982065883893815, |
|
"rewards/rejected": -1.1489310794406467, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019393939393939394, |
|
"grad_norm": 8.26019431779866, |
|
"kl": 64.02778625488281, |
|
"learning_rate": 4.999123954009797e-06, |
|
"logits/chosen": -39251945.14285714, |
|
"logits/rejected": -45070531.55555555, |
|
"logps/chosen": -216.63478306361608, |
|
"logps/rejected": -215.42609320746527, |
|
"loss": 0.352, |
|
"rewards/chosen": -1.9811251504080636, |
|
"rewards/margins": 2.4016610705663286, |
|
"rewards/rejected": -4.382786220974392, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022164502164502164, |
|
"grad_norm": 8.217719696009635, |
|
"kl": 48.159217834472656, |
|
"learning_rate": 4.998442655654946e-06, |
|
"logits/chosen": -34504220.8, |
|
"logits/rejected": -36456395.63636363, |
|
"logps/chosen": -324.70927734375, |
|
"logps/rejected": -180.1541082208807, |
|
"loss": 0.3637, |
|
"rewards/chosen": -1.7615163803100586, |
|
"rewards/margins": 3.1409979906949133, |
|
"rewards/rejected": -4.902514371004972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024935064935064935, |
|
"grad_norm": 10.654775495500138, |
|
"kl": 23.938976287841797, |
|
"learning_rate": 4.997566791583916e-06, |
|
"logits/chosen": -67123216.0, |
|
"logits/rejected": -84117072.0, |
|
"logps/chosen": -435.34124755859375, |
|
"logps/rejected": -230.71749877929688, |
|
"loss": 0.4793, |
|
"rewards/chosen": -2.759248733520508, |
|
"rewards/margins": 2.205596923828125, |
|
"rewards/rejected": -4.964845657348633, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027705627705627706, |
|
"grad_norm": 8.39236794673388, |
|
"kl": 63.119232177734375, |
|
"learning_rate": 4.996496430004446e-06, |
|
"logits/chosen": -67407008.0, |
|
"logits/rejected": -40225140.0, |
|
"logps/chosen": -231.02957153320312, |
|
"logps/rejected": -157.6824951171875, |
|
"loss": 0.3755, |
|
"rewards/chosen": -1.482393503189087, |
|
"rewards/margins": 2.959088087081909, |
|
"rewards/rejected": -4.441481590270996, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030476190476190476, |
|
"grad_norm": 9.000961005160892, |
|
"kl": 80.52793884277344, |
|
"learning_rate": 4.995231654270726e-06, |
|
"logits/chosen": -35507206.4, |
|
"logits/rejected": -27073269.333333332, |
|
"logps/chosen": -129.73853759765626, |
|
"logps/rejected": -198.3338419596354, |
|
"loss": 0.4853, |
|
"rewards/chosen": -3.5513961791992186, |
|
"rewards/margins": -2.7626913865407308, |
|
"rewards/rejected": -0.7887047926584879, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03324675324675325, |
|
"grad_norm": 6.379191198402483, |
|
"kl": 106.77307891845703, |
|
"learning_rate": 4.993772562876909e-06, |
|
"logits/chosen": -41411072.0, |
|
"logits/rejected": -30578227.2, |
|
"logps/chosen": -189.07457386363637, |
|
"logps/rejected": -137.128857421875, |
|
"loss": 0.3923, |
|
"rewards/chosen": 1.0351500077681108, |
|
"rewards/margins": 1.3740568681196734, |
|
"rewards/rejected": -0.3389068603515625, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03601731601731602, |
|
"grad_norm": 6.452742017638695, |
|
"kl": 122.21878814697266, |
|
"learning_rate": 4.992119269449445e-06, |
|
"logits/chosen": -43168708.571428575, |
|
"logits/rejected": -131686030.22222222, |
|
"logps/chosen": -225.06903948102678, |
|
"logps/rejected": -224.36984592013889, |
|
"loss": 0.3514, |
|
"rewards/chosen": -0.09181928634643555, |
|
"rewards/margins": 4.457288159264459, |
|
"rewards/rejected": -4.549107445610894, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03878787878787879, |
|
"grad_norm": 5.69756506879914, |
|
"kl": 121.53662872314453, |
|
"learning_rate": 4.990271902738223e-06, |
|
"logits/chosen": -59036272.0, |
|
"logits/rejected": -60129404.0, |
|
"logps/chosen": -261.13958740234375, |
|
"logps/rejected": -139.1809539794922, |
|
"loss": 0.4055, |
|
"rewards/chosen": -0.7429616451263428, |
|
"rewards/margins": 2.4130918979644775, |
|
"rewards/rejected": -3.1560535430908203, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04155844155844156, |
|
"grad_norm": 6.128286045148822, |
|
"kl": 135.92550659179688, |
|
"learning_rate": 4.988230606606552e-06, |
|
"logits/chosen": -68020558.22222222, |
|
"logits/rejected": -159836653.7142857, |
|
"logps/chosen": -279.03700086805554, |
|
"logps/rejected": -178.57425362723214, |
|
"loss": 0.3175, |
|
"rewards/chosen": 1.3599602381388347, |
|
"rewards/margins": 3.7635091599963957, |
|
"rewards/rejected": -2.403548921857561, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04432900432900433, |
|
"grad_norm": 6.677440912213255, |
|
"kl": 200.06797790527344, |
|
"learning_rate": 4.985995540019956e-06, |
|
"logits/chosen": -64806084.0, |
|
"logits/rejected": -43605824.0, |
|
"logps/chosen": -188.4673309326172, |
|
"logps/rejected": -151.33450317382812, |
|
"loss": 0.3861, |
|
"rewards/chosen": -2.2935891151428223, |
|
"rewards/margins": 1.6028807163238525, |
|
"rewards/rejected": -3.896469831466675, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0470995670995671, |
|
"grad_norm": 5.53499338288777, |
|
"kl": 132.95904541015625, |
|
"learning_rate": 4.983566877033791e-06, |
|
"logits/chosen": -58334624.0, |
|
"logits/rejected": -47487511.27272727, |
|
"logps/chosen": -269.74716796875, |
|
"logps/rejected": -178.86970658735797, |
|
"loss": 0.3605, |
|
"rewards/chosen": -0.3949946403503418, |
|
"rewards/margins": 4.419949887015602, |
|
"rewards/rejected": -4.814944527365944, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04987012987012987, |
|
"grad_norm": 5.996796684805764, |
|
"kl": 208.39804077148438, |
|
"learning_rate": 4.980944806779698e-06, |
|
"logits/chosen": -80701216.0, |
|
"logits/rejected": -29817596.8, |
|
"logps/chosen": -366.2938639322917, |
|
"logps/rejected": -123.7053955078125, |
|
"loss": 0.388, |
|
"rewards/chosen": 0.7954994837443033, |
|
"rewards/margins": 2.894377581278483, |
|
"rewards/rejected": -2.09887809753418, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05264069264069264, |
|
"grad_norm": 5.298093207942763, |
|
"kl": 171.8631591796875, |
|
"learning_rate": 4.9781295334508664e-06, |
|
"logits/chosen": -27342618.666666668, |
|
"logits/rejected": -175808640.0, |
|
"logps/chosen": -178.55368381076389, |
|
"logps/rejected": -251.65311104910714, |
|
"loss": 0.3068, |
|
"rewards/chosen": -1.4788221783108182, |
|
"rewards/margins": 5.431802734496101, |
|
"rewards/rejected": -6.910624912806919, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05541125541125541, |
|
"grad_norm": 5.5366402223371605, |
|
"kl": 168.5543212890625, |
|
"learning_rate": 4.975121276286136e-06, |
|
"logits/chosen": -39478311.11111111, |
|
"logits/rejected": -44926532.571428575, |
|
"logps/chosen": -229.44447157118054, |
|
"logps/rejected": -176.92333984375, |
|
"loss": 0.3188, |
|
"rewards/chosen": 0.9360362158881294, |
|
"rewards/margins": 3.546793498690166, |
|
"rewards/rejected": -2.610757282802037, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05818181818181818, |
|
"grad_norm": 4.114450168170683, |
|
"kl": 213.9116973876953, |
|
"learning_rate": 4.9719202695529265e-06, |
|
"logits/chosen": -30691328.0, |
|
"logits/rejected": -33394152.727272727, |
|
"logps/chosen": -149.3848876953125, |
|
"logps/rejected": -156.1878329190341, |
|
"loss": 0.3428, |
|
"rewards/chosen": -1.1895624160766602, |
|
"rewards/margins": 1.3192689548839223, |
|
"rewards/rejected": -2.5088313709605825, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06095238095238095, |
|
"grad_norm": 4.009166305344757, |
|
"kl": 183.32760620117188, |
|
"learning_rate": 4.968526762528988e-06, |
|
"logits/chosen": -35098371.2, |
|
"logits/rejected": 25460706.90909091, |
|
"logps/chosen": -145.5945556640625, |
|
"logps/rejected": -120.78593306107955, |
|
"loss": 0.3241, |
|
"rewards/chosen": -3.308795166015625, |
|
"rewards/margins": 0.9302642822265623, |
|
"rewards/rejected": -4.2390594482421875, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06372294372294372, |
|
"grad_norm": 4.640987152057261, |
|
"kl": 153.11923217773438, |
|
"learning_rate": 4.964941019482995e-06, |
|
"logits/chosen": -33867536.0, |
|
"logits/rejected": -55536612.0, |
|
"logps/chosen": -305.594970703125, |
|
"logps/rejected": -207.89471435546875, |
|
"loss": 0.3545, |
|
"rewards/chosen": 0.747844934463501, |
|
"rewards/margins": 4.4949328899383545, |
|
"rewards/rejected": -3.7470879554748535, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0664935064935065, |
|
"grad_norm": 4.069238916717589, |
|
"kl": 125.38595581054688, |
|
"learning_rate": 4.961163319653959e-06, |
|
"logits/chosen": -47397428.0, |
|
"logits/rejected": -82234298.66666667, |
|
"logps/chosen": -178.1082763671875, |
|
"logps/rejected": -169.91890462239584, |
|
"loss": 0.3224, |
|
"rewards/chosen": 0.6711676716804504, |
|
"rewards/margins": 4.262869497140249, |
|
"rewards/rejected": -3.5917018254597983, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06926406926406926, |
|
"grad_norm": 4.859678347936459, |
|
"kl": 130.6267852783203, |
|
"learning_rate": 4.9571939572294914e-06, |
|
"logits/chosen": -38077904.0, |
|
"logits/rejected": -46673220.0, |
|
"logps/chosen": -246.6427001953125, |
|
"logps/rejected": -112.71556091308594, |
|
"loss": 0.318, |
|
"rewards/chosen": 0.7895539999008179, |
|
"rewards/margins": 3.2466784715652466, |
|
"rewards/rejected": -2.4571244716644287, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07203463203463203, |
|
"grad_norm": 4.215905495865878, |
|
"kl": 85.88780212402344, |
|
"learning_rate": 4.953033241322887e-06, |
|
"logits/chosen": -36872256.0, |
|
"logits/rejected": -27889625.6, |
|
"logps/chosen": -206.98484108664772, |
|
"logps/rejected": -222.779833984375, |
|
"loss": 0.3116, |
|
"rewards/chosen": -0.3384280638261275, |
|
"rewards/margins": 2.963332037492232, |
|
"rewards/rejected": -3.3017601013183593, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0748051948051948, |
|
"grad_norm": 5.17124531247104, |
|
"kl": 132.2495880126953, |
|
"learning_rate": 4.948681495949055e-06, |
|
"logits/chosen": -50783381.333333336, |
|
"logits/rejected": -15797090.0, |
|
"logps/chosen": -282.1715087890625, |
|
"logps/rejected": -155.00286865234375, |
|
"loss": 0.3684, |
|
"rewards/chosen": -0.16415607929229736, |
|
"rewards/margins": 3.6816858053207397, |
|
"rewards/rejected": -3.845841884613037, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07757575757575758, |
|
"grad_norm": 5.45010267483556, |
|
"kl": 99.80706787109375, |
|
"learning_rate": 4.944139059999286e-06, |
|
"logits/chosen": -48624998.4, |
|
"logits/rejected": -14226288.0, |
|
"logps/chosen": -418.8494140625, |
|
"logps/rejected": -180.1219278971354, |
|
"loss": 0.2844, |
|
"rewards/chosen": 1.4669153213500976, |
|
"rewards/margins": 8.812172126770019, |
|
"rewards/rejected": -7.345256805419922, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08034632034632035, |
|
"grad_norm": 4.572726104274604, |
|
"kl": 121.2557601928711, |
|
"learning_rate": 4.939406287214861e-06, |
|
"logits/chosen": -75202816.0, |
|
"logits/rejected": -52673325.71428572, |
|
"logps/chosen": -256.0649685329861, |
|
"logps/rejected": -171.17110770089286, |
|
"loss": 0.3235, |
|
"rewards/chosen": -0.08450836605495876, |
|
"rewards/margins": 2.685517977154444, |
|
"rewards/rejected": -2.7700263432094028, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08311688311688312, |
|
"grad_norm": 4.6083614006549585, |
|
"kl": 86.99708557128906, |
|
"learning_rate": 4.9344835461595016e-06, |
|
"logits/chosen": -36531722.666666664, |
|
"logits/rejected": -29011876.57142857, |
|
"logps/chosen": -172.84847005208334, |
|
"logps/rejected": -144.96559361049108, |
|
"loss": 0.3997, |
|
"rewards/chosen": -0.7234950595431857, |
|
"rewards/margins": 2.354658823164683, |
|
"rewards/rejected": -3.0781538827078685, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08588744588744589, |
|
"grad_norm": 4.872957501355809, |
|
"kl": 108.07892608642578, |
|
"learning_rate": 4.929371220190671e-06, |
|
"logits/chosen": -23903181.714285713, |
|
"logits/rejected": -30589084.444444444, |
|
"logps/chosen": -177.80123465401786, |
|
"logps/rejected": -171.34022352430554, |
|
"loss": 0.3246, |
|
"rewards/chosen": 0.4418588365827288, |
|
"rewards/margins": 2.642532257806687, |
|
"rewards/rejected": -2.2006734212239585, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08865800865800866, |
|
"grad_norm": 4.353723125264067, |
|
"kl": 189.1405029296875, |
|
"learning_rate": 4.9240697074297205e-06, |
|
"logits/chosen": -61281422.222222224, |
|
"logits/rejected": -26948859.42857143, |
|
"logps/chosen": -315.59071180555554, |
|
"logps/rejected": -145.61966378348214, |
|
"loss": 0.2904, |
|
"rewards/chosen": 1.7190555996365018, |
|
"rewards/margins": 6.484896402510385, |
|
"rewards/rejected": -4.765840802873884, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 3.9621961428795482, |
|
"kl": 129.8131103515625, |
|
"learning_rate": 4.918579420730884e-06, |
|
"logits/chosen": -59562949.333333336, |
|
"logits/rejected": -38947785.6, |
|
"logps/chosen": -232.64375813802084, |
|
"logps/rejected": -109.01717529296874, |
|
"loss": 0.3206, |
|
"rewards/chosen": 0.07605441411336263, |
|
"rewards/margins": 2.483719857533773, |
|
"rewards/rejected": -2.4076654434204103, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0941991341991342, |
|
"grad_norm": 4.640088655848224, |
|
"kl": 153.42471313476562, |
|
"learning_rate": 4.912900787649124e-06, |
|
"logits/chosen": -79684328.72727273, |
|
"logits/rejected": -61416396.8, |
|
"logps/chosen": -557.2211026278409, |
|
"logps/rejected": -227.8122314453125, |
|
"loss": 0.3035, |
|
"rewards/chosen": 1.5112367109818892, |
|
"rewards/margins": 5.1786668257279835, |
|
"rewards/rejected": -3.667430114746094, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09696969696969697, |
|
"grad_norm": 3.400632617608017, |
|
"kl": 223.9918212890625, |
|
"learning_rate": 4.907034250406846e-06, |
|
"logits/chosen": -63817045.333333336, |
|
"logits/rejected": -36983830.85714286, |
|
"logps/chosen": -226.66205512152777, |
|
"logps/rejected": -213.019775390625, |
|
"loss": 0.3779, |
|
"rewards/chosen": -0.03687206904093424, |
|
"rewards/margins": 5.821811380840483, |
|
"rewards/rejected": -5.8586834498814175, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09974025974025974, |
|
"grad_norm": 4.880517140185325, |
|
"kl": 106.79385375976562, |
|
"learning_rate": 4.900980265859449e-06, |
|
"logits/chosen": -36042773.333333336, |
|
"logits/rejected": -33042774.85714286, |
|
"logps/chosen": -207.92580837673611, |
|
"logps/rejected": -164.6212158203125, |
|
"loss": 0.2623, |
|
"rewards/chosen": -0.6909873220655653, |
|
"rewards/margins": 1.9688087115212094, |
|
"rewards/rejected": -2.6597960335867747, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09974025974025974, |
|
"eval_logits/chosen": -55713169.06666667, |
|
"eval_logits/rejected": -40055004.68965517, |
|
"eval_logps/chosen": -345.47955729166665, |
|
"eval_logps/rejected": -197.1070177801724, |
|
"eval_loss": 0.33400189876556396, |
|
"eval_rewards/chosen": 1.3846696217854817, |
|
"eval_rewards/margins": 5.02308683285768, |
|
"eval_rewards/rejected": -3.6384172110721984, |
|
"eval_runtime": 376.2441, |
|
"eval_samples_per_second": 1.241, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 890.2158813476562, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10251082251082251, |
|
"grad_norm": 4.888307645224207, |
|
"kl": 218.9296112060547, |
|
"learning_rate": 4.894739305459754e-06, |
|
"logits/chosen": -43587472.0, |
|
"logits/rejected": -41952672.0, |
|
"logps/chosen": -277.0217692057292, |
|
"logps/rejected": -136.7241455078125, |
|
"loss": 0.2681, |
|
"rewards/chosen": -0.6008732318878174, |
|
"rewards/margins": 2.065483808517456, |
|
"rewards/rejected": -2.6663570404052734, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10528138528138528, |
|
"grad_norm": 3.9930692633221185, |
|
"kl": 173.95948791503906, |
|
"learning_rate": 4.88831185522129e-06, |
|
"logits/chosen": -42094129.777777776, |
|
"logits/rejected": -102693074.28571428, |
|
"logps/chosen": -383.2141384548611, |
|
"logps/rejected": -281.09629603794644, |
|
"loss": 0.3007, |
|
"rewards/chosen": 1.5146233240763347, |
|
"rewards/margins": 7.224462191263835, |
|
"rewards/rejected": -5.7098388671875, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10805194805194805, |
|
"grad_norm": 4.7795619675028815, |
|
"kl": 210.2698211669922, |
|
"learning_rate": 4.881698415680442e-06, |
|
"logits/chosen": -55168146.28571428, |
|
"logits/rejected": -61552739.55555555, |
|
"logps/chosen": -279.0960693359375, |
|
"logps/rejected": -206.74319118923611, |
|
"loss": 0.3303, |
|
"rewards/chosen": 0.6114097322736468, |
|
"rewards/margins": 5.236137193346781, |
|
"rewards/rejected": -4.6247274610731335, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11082251082251082, |
|
"grad_norm": 4.316697115631673, |
|
"kl": 160.36947631835938, |
|
"learning_rate": 4.874899501857477e-06, |
|
"logits/chosen": -48044475.428571425, |
|
"logits/rejected": -110801429.33333333, |
|
"logps/chosen": -183.30032784598214, |
|
"logps/rejected": -277.29033745659723, |
|
"loss": 0.2789, |
|
"rewards/chosen": -1.4944199153355189, |
|
"rewards/margins": 6.188857638646685, |
|
"rewards/rejected": -7.6832775539822045, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11359307359307359, |
|
"grad_norm": 3.752176207163753, |
|
"kl": 255.1221466064453, |
|
"learning_rate": 4.867915643216434e-06, |
|
"logits/chosen": -47792854.85714286, |
|
"logits/rejected": -61886236.44444445, |
|
"logps/chosen": -237.83729771205358, |
|
"logps/rejected": -266.69927300347223, |
|
"loss": 0.3505, |
|
"rewards/chosen": 0.7381483486720494, |
|
"rewards/margins": 6.307764030638195, |
|
"rewards/rejected": -5.5696156819661455, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11636363636363636, |
|
"grad_norm": 4.00527705976467, |
|
"kl": 229.1636199951172, |
|
"learning_rate": 4.860747383623889e-06, |
|
"logits/chosen": -67951502.22222222, |
|
"logits/rejected": -32232422.85714286, |
|
"logps/chosen": -215.768310546875, |
|
"logps/rejected": -87.32527378627232, |
|
"loss": 0.3736, |
|
"rewards/chosen": 1.4013728035820856, |
|
"rewards/margins": 1.9508999377962144, |
|
"rewards/rejected": -0.5495271342141288, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11913419913419913, |
|
"grad_norm": 4.135955907049656, |
|
"kl": 246.27914428710938, |
|
"learning_rate": 4.85339528130661e-06, |
|
"logits/chosen": -68992011.63636364, |
|
"logits/rejected": -29306707.2, |
|
"logps/chosen": -250.64783824573863, |
|
"logps/rejected": -138.613916015625, |
|
"loss": 0.3449, |
|
"rewards/chosen": 0.9984440370039507, |
|
"rewards/margins": 6.130092672868209, |
|
"rewards/rejected": -5.1316486358642575, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1219047619047619, |
|
"grad_norm": 4.923417830289614, |
|
"kl": 211.35293579101562, |
|
"learning_rate": 4.845859908808074e-06, |
|
"logits/chosen": -46779109.333333336, |
|
"logits/rejected": -48145318.4, |
|
"logps/chosen": -305.1178792317708, |
|
"logps/rejected": -204.313037109375, |
|
"loss": 0.3315, |
|
"rewards/chosen": 1.7318267822265625, |
|
"rewards/margins": 3.607950782775879, |
|
"rewards/rejected": -1.8761240005493165, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12467532467532468, |
|
"grad_norm": 4.2715927235424385, |
|
"kl": 186.33251953125, |
|
"learning_rate": 4.838141852943891e-06, |
|
"logits/chosen": -38434194.28571428, |
|
"logits/rejected": -33670951.11111111, |
|
"logps/chosen": -271.14303152901783, |
|
"logps/rejected": -149.29356553819446, |
|
"loss": 0.2698, |
|
"rewards/chosen": 1.1793903623308455, |
|
"rewards/margins": 2.703519382174053, |
|
"rewards/rejected": -1.5241290198432074, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12744588744588745, |
|
"grad_norm": 4.6712246149702334, |
|
"kl": 379.38775634765625, |
|
"learning_rate": 4.830241714756099e-06, |
|
"logits/chosen": -47082810.666666664, |
|
"logits/rejected": -53669280.0, |
|
"logps/chosen": -209.59720865885416, |
|
"logps/rejected": -228.5864715576172, |
|
"loss": 0.3081, |
|
"rewards/chosen": -1.286256472269694, |
|
"rewards/margins": 4.078855832417806, |
|
"rewards/rejected": -5.3651123046875, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13021645021645023, |
|
"grad_norm": 4.157335918962299, |
|
"kl": 252.03436279296875, |
|
"learning_rate": 4.822160109466361e-06, |
|
"logits/chosen": -71515571.2, |
|
"logits/rejected": -22340152.0, |
|
"logps/chosen": -444.233447265625, |
|
"logps/rejected": -128.06829833984375, |
|
"loss": 0.2922, |
|
"rewards/chosen": 3.7814491271972654, |
|
"rewards/margins": 7.945715713500976, |
|
"rewards/rejected": -4.164266586303711, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.132987012987013, |
|
"grad_norm": 4.614266751571361, |
|
"kl": 209.5349578857422, |
|
"learning_rate": 4.813897666428054e-06, |
|
"logits/chosen": -44444301.71428572, |
|
"logits/rejected": -104307093.33333333, |
|
"logps/chosen": -344.1742466517857, |
|
"logps/rejected": -157.80377875434027, |
|
"loss": 0.3107, |
|
"rewards/chosen": 4.24214472089495, |
|
"rewards/margins": 10.676348398602197, |
|
"rewards/rejected": -6.434203677707249, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13575757575757577, |
|
"grad_norm": 3.963234963343962, |
|
"kl": 283.381103515625, |
|
"learning_rate": 4.805455029077255e-06, |
|
"logits/chosen": -34125922.28571428, |
|
"logits/rejected": -58880739.55555555, |
|
"logps/chosen": -242.03897530691964, |
|
"logps/rejected": -168.01375325520834, |
|
"loss": 0.3119, |
|
"rewards/chosen": 0.16667870112827846, |
|
"rewards/margins": 5.258827799842471, |
|
"rewards/rejected": -5.092149098714192, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13852813852813853, |
|
"grad_norm": 4.655086695934908, |
|
"kl": 234.0029754638672, |
|
"learning_rate": 4.79683285488264e-06, |
|
"logits/chosen": -60300384.0, |
|
"logits/rejected": -33782124.8, |
|
"logps/chosen": -330.35732014973956, |
|
"logps/rejected": -118.8407958984375, |
|
"loss": 0.3508, |
|
"rewards/chosen": -4.03786055246989, |
|
"rewards/margins": -0.5354275385538738, |
|
"rewards/rejected": -3.502433013916016, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1412987012987013, |
|
"grad_norm": 4.404029490858146, |
|
"kl": 258.7068176269531, |
|
"learning_rate": 4.788031815294282e-06, |
|
"logits/chosen": -41171053.333333336, |
|
"logits/rejected": -47690988.8, |
|
"logps/chosen": -225.68290201822916, |
|
"logps/rejected": -165.079248046875, |
|
"loss": 0.3853, |
|
"rewards/chosen": -3.4279282887776694, |
|
"rewards/margins": -0.5250184377034506, |
|
"rewards/rejected": -2.902909851074219, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14406926406926407, |
|
"grad_norm": 4.502899514521724, |
|
"kl": 234.19357299804688, |
|
"learning_rate": 4.779052595691355e-06, |
|
"logits/chosen": -48258922.666666664, |
|
"logits/rejected": -41363040.0, |
|
"logps/chosen": -226.1843465169271, |
|
"logps/rejected": -213.36155700683594, |
|
"loss": 0.3074, |
|
"rewards/chosen": -0.007684846719106038, |
|
"rewards/margins": 4.0709970990816755, |
|
"rewards/rejected": -4.078681945800781, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14683982683982685, |
|
"grad_norm": 4.564834903412173, |
|
"kl": 273.16302490234375, |
|
"learning_rate": 4.76989589532877e-06, |
|
"logits/chosen": -58062768.0, |
|
"logits/rejected": -28522993.230769232, |
|
"logps/chosen": -288.6212158203125, |
|
"logps/rejected": -137.31136380709134, |
|
"loss": 0.3462, |
|
"rewards/chosen": 2.7320359547932944, |
|
"rewards/margins": 6.071770252325596, |
|
"rewards/rejected": -3.339734297532302, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1496103896103896, |
|
"grad_norm": 4.295499026061421, |
|
"kl": 138.43191528320312, |
|
"learning_rate": 4.7605624272827125e-06, |
|
"logits/chosen": -77513848.0, |
|
"logits/rejected": -133321173.33333333, |
|
"logps/chosen": -210.3496856689453, |
|
"logps/rejected": -126.23902384440105, |
|
"loss": 0.3016, |
|
"rewards/chosen": -3.524599075317383, |
|
"rewards/margins": 2.062013626098633, |
|
"rewards/rejected": -5.586612701416016, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1523809523809524, |
|
"grad_norm": 3.7911522230413235, |
|
"kl": 138.6194305419922, |
|
"learning_rate": 4.75105291839512e-06, |
|
"logits/chosen": -52138740.0, |
|
"logits/rejected": -23599914.0, |
|
"logps/chosen": -258.3819274902344, |
|
"logps/rejected": -172.046630859375, |
|
"loss": 0.335, |
|
"rewards/chosen": 0.0711725652217865, |
|
"rewards/margins": 5.18670067191124, |
|
"rewards/rejected": -5.115528106689453, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15515151515151515, |
|
"grad_norm": 3.473713440188282, |
|
"kl": 212.0938720703125, |
|
"learning_rate": 4.741368109217072e-06, |
|
"logits/chosen": -55373659.428571425, |
|
"logits/rejected": -26278688.0, |
|
"logps/chosen": -308.01792689732144, |
|
"logps/rejected": -207.99937608506946, |
|
"loss": 0.2906, |
|
"rewards/chosen": 4.009753363473075, |
|
"rewards/margins": 8.175141682700506, |
|
"rewards/rejected": -4.16538831922743, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15792207792207794, |
|
"grad_norm": 4.254963226590674, |
|
"kl": 171.899658203125, |
|
"learning_rate": 4.7315087539511225e-06, |
|
"logits/chosen": -47232265.14285714, |
|
"logits/rejected": -35438577.777777776, |
|
"logps/chosen": -169.33925083705358, |
|
"logps/rejected": -180.327880859375, |
|
"loss": 0.3628, |
|
"rewards/chosen": -2.5366314479282925, |
|
"rewards/margins": 1.3937638903421066, |
|
"rewards/rejected": -3.930395338270399, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1606926406926407, |
|
"grad_norm": 3.9659807050855505, |
|
"kl": 254.7626953125, |
|
"learning_rate": 4.721475620392567e-06, |
|
"logits/chosen": -50043021.71428572, |
|
"logits/rejected": -62206862.222222224, |
|
"logps/chosen": -242.54567173549108, |
|
"logps/rejected": -245.458984375, |
|
"loss": 0.2694, |
|
"rewards/chosen": 1.714186668395996, |
|
"rewards/margins": 8.648983319600422, |
|
"rewards/rejected": -6.934796651204427, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16346320346320348, |
|
"grad_norm": 4.532754524040317, |
|
"kl": 254.67286682128906, |
|
"learning_rate": 4.711269489869654e-06, |
|
"logits/chosen": -23121174.0, |
|
"logits/rejected": -25245008.0, |
|
"logps/chosen": -231.70849609375, |
|
"logps/rejected": -182.4578857421875, |
|
"loss": 0.3414, |
|
"rewards/chosen": -0.8669279217720032, |
|
"rewards/margins": 3.1747695803642273, |
|
"rewards/rejected": -4.0416975021362305, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16623376623376623, |
|
"grad_norm": 4.231371855524758, |
|
"kl": 281.6844177246094, |
|
"learning_rate": 4.700891157182729e-06, |
|
"logits/chosen": -42404366.222222224, |
|
"logits/rejected": -37054004.571428575, |
|
"logps/chosen": -141.22482638888889, |
|
"logps/rejected": -169.881591796875, |
|
"loss": 0.4238, |
|
"rewards/chosen": -2.689548068576389, |
|
"rewards/margins": 2.9127321394663013, |
|
"rewards/rejected": -5.60228020804269, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16900432900432902, |
|
"grad_norm": 3.979048810041181, |
|
"kl": 392.581298828125, |
|
"learning_rate": 4.690341430542351e-06, |
|
"logits/chosen": -82912292.57142857, |
|
"logits/rejected": -52836330.666666664, |
|
"logps/chosen": -509.1022251674107, |
|
"logps/rejected": -204.47900390625, |
|
"loss": 0.3324, |
|
"rewards/chosen": 2.8874827793666293, |
|
"rewards/margins": 9.94957036820669, |
|
"rewards/rejected": -7.062087588840061, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17177489177489177, |
|
"grad_norm": 4.714544408668155, |
|
"kl": 215.25192260742188, |
|
"learning_rate": 4.679621131506347e-06, |
|
"logits/chosen": -38413403.428571425, |
|
"logits/rejected": -51266595.55555555, |
|
"logps/chosen": -234.61213030133928, |
|
"logps/rejected": -182.70484754774304, |
|
"loss": 0.3589, |
|
"rewards/chosen": 0.2332275935581752, |
|
"rewards/margins": 3.614927685449994, |
|
"rewards/rejected": -3.3817000918918185, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17454545454545456, |
|
"grad_norm": 4.623389807959627, |
|
"kl": 335.26934814453125, |
|
"learning_rate": 4.668731094915835e-06, |
|
"logits/chosen": -35904576.0, |
|
"logits/rejected": -65528533.333333336, |
|
"logps/chosen": -289.337890625, |
|
"logps/rejected": -275.28566487630206, |
|
"loss": 0.3341, |
|
"rewards/chosen": 1.6814540863037108, |
|
"rewards/margins": 10.296795272827149, |
|
"rewards/rejected": -8.615341186523438, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17731601731601732, |
|
"grad_norm": 4.704432412085312, |
|
"kl": 219.504150390625, |
|
"learning_rate": 4.657672168830211e-06, |
|
"logits/chosen": -73965184.0, |
|
"logits/rejected": -135261056.0, |
|
"logps/chosen": -360.76513671875, |
|
"logps/rejected": -249.28709411621094, |
|
"loss": 0.3758, |
|
"rewards/chosen": 1.1860873699188232, |
|
"rewards/margins": 7.718278646469116, |
|
"rewards/rejected": -6.532191276550293, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1800865800865801, |
|
"grad_norm": 3.4016250966473707, |
|
"kl": 293.42034912109375, |
|
"learning_rate": 4.646445214461105e-06, |
|
"logits/chosen": -68569877.33333333, |
|
"logits/rejected": -70185024.0, |
|
"logps/chosen": -208.67692057291666, |
|
"logps/rejected": -220.83232421875, |
|
"loss": 0.2864, |
|
"rewards/chosen": 1.106031099955241, |
|
"rewards/margins": 8.20595251719157, |
|
"rewards/rejected": -7.099921417236328, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 4.424106473489455, |
|
"kl": 359.9471435546875, |
|
"learning_rate": 4.635051106105316e-06, |
|
"logits/chosen": -53769763.55555555, |
|
"logits/rejected": -45391094.85714286, |
|
"logps/chosen": -188.59453667534723, |
|
"logps/rejected": -173.94886997767858, |
|
"loss": 0.3533, |
|
"rewards/chosen": -2.5977306365966797, |
|
"rewards/margins": 1.459943226405552, |
|
"rewards/rejected": -4.057673863002232, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18562770562770564, |
|
"grad_norm": 3.7471465185818924, |
|
"kl": 283.2303466796875, |
|
"learning_rate": 4.623490731076728e-06, |
|
"logits/chosen": -44121778.28571428, |
|
"logits/rejected": -35914748.44444445, |
|
"logps/chosen": -223.49496023995536, |
|
"logps/rejected": -194.11257595486111, |
|
"loss": 0.2895, |
|
"rewards/chosen": -1.5352796827043806, |
|
"rewards/margins": 1.1657637641543435, |
|
"rewards/rejected": -2.701043446858724, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1883982683982684, |
|
"grad_norm": 4.286110449007166, |
|
"kl": 249.0130615234375, |
|
"learning_rate": 4.6117649896372055e-06, |
|
"logits/chosen": -66104793.6, |
|
"logits/rejected": -78641966.54545455, |
|
"logps/chosen": -486.58681640625, |
|
"logps/rejected": -216.8487215909091, |
|
"loss": 0.3098, |
|
"rewards/chosen": 2.7544036865234376, |
|
"rewards/margins": 8.66450722434304, |
|
"rewards/rejected": -5.9101035378196025, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19116883116883118, |
|
"grad_norm": 4.19634882053574, |
|
"kl": 295.2521667480469, |
|
"learning_rate": 4.59987479492649e-06, |
|
"logits/chosen": -49621673.6, |
|
"logits/rejected": -103387168.0, |
|
"logps/chosen": -292.345458984375, |
|
"logps/rejected": -152.9824015299479, |
|
"loss": 0.3605, |
|
"rewards/chosen": 1.9187255859375, |
|
"rewards/margins": 6.33223876953125, |
|
"rewards/rejected": -4.41351318359375, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19393939393939394, |
|
"grad_norm": 3.4689063655732606, |
|
"kl": 295.6634216308594, |
|
"learning_rate": 4.587821072891089e-06, |
|
"logits/chosen": -85551232.0, |
|
"logits/rejected": -44202093.333333336, |
|
"logps/chosen": -415.968359375, |
|
"logps/rejected": -180.08984375, |
|
"loss": 0.2793, |
|
"rewards/chosen": 3.5712188720703124, |
|
"rewards/margins": 6.219875717163086, |
|
"rewards/rejected": -2.6486568450927734, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19670995670995672, |
|
"grad_norm": 4.285024920845038, |
|
"kl": 258.05487060546875, |
|
"learning_rate": 4.5756047622121665e-06, |
|
"logits/chosen": -50422928.0, |
|
"logits/rejected": -55744752.0, |
|
"logps/chosen": -167.7302703857422, |
|
"logps/rejected": -208.2799072265625, |
|
"loss": 0.3164, |
|
"rewards/chosen": -2.9284355640411377, |
|
"rewards/margins": -0.29325294494628906, |
|
"rewards/rejected": -2.6351826190948486, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19948051948051948, |
|
"grad_norm": 3.6178248205419554, |
|
"kl": 321.04461669921875, |
|
"learning_rate": 4.563226814232444e-06, |
|
"logits/chosen": -57370528.0, |
|
"logits/rejected": -49400672.0, |
|
"logps/chosen": -243.88953993055554, |
|
"logps/rejected": -175.27256556919642, |
|
"loss": 0.3222, |
|
"rewards/chosen": 1.1517394383748372, |
|
"rewards/margins": 4.414692787897019, |
|
"rewards/rejected": -3.262953349522182, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19948051948051948, |
|
"eval_logits/chosen": -61469499.733333334, |
|
"eval_logits/rejected": -32503238.620689657, |
|
"eval_logps/chosen": -344.1068359375, |
|
"eval_logps/rejected": -209.9999158135776, |
|
"eval_loss": 0.32733702659606934, |
|
"eval_rewards/chosen": 1.5219429016113282, |
|
"eval_rewards/margins": 6.449649389858903, |
|
"eval_rewards/rejected": -4.927706488247575, |
|
"eval_runtime": 376.0789, |
|
"eval_samples_per_second": 1.242, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 1189.544677734375, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20225108225108226, |
|
"grad_norm": 4.29914119533165, |
|
"kl": 282.59783935546875, |
|
"learning_rate": 4.550688192882115e-06, |
|
"logits/chosen": -45450176.0, |
|
"logits/rejected": -62472713.14285714, |
|
"logps/chosen": -270.63197157118054, |
|
"logps/rejected": -223.25875418526786, |
|
"loss": 0.308, |
|
"rewards/chosen": 0.047760725021362305, |
|
"rewards/margins": 5.480312517711094, |
|
"rewards/rejected": -5.432551792689732, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20502164502164502, |
|
"grad_norm": 4.0469021106657355, |
|
"kl": 301.9462890625, |
|
"learning_rate": 4.53798987460378e-06, |
|
"logits/chosen": -61264512.0, |
|
"logits/rejected": -70442304.0, |
|
"logps/chosen": -208.86031087239584, |
|
"logps/rejected": -239.4268798828125, |
|
"loss": 0.3314, |
|
"rewards/chosen": -0.09858560562133789, |
|
"rewards/margins": 4.180919170379639, |
|
"rewards/rejected": -4.279504776000977, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"grad_norm": 3.9451262684847106, |
|
"kl": 291.61004638671875, |
|
"learning_rate": 4.525132848276405e-06, |
|
"logits/chosen": -51769896.0, |
|
"logits/rejected": -33958032.0, |
|
"logps/chosen": -296.2464599609375, |
|
"logps/rejected": -154.05738830566406, |
|
"loss": 0.2617, |
|
"rewards/chosen": 0.972441554069519, |
|
"rewards/margins": 5.835387110710144, |
|
"rewards/rejected": -4.862945556640625, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21056277056277056, |
|
"grad_norm": 3.6314514523940984, |
|
"kl": 232.65203857421875, |
|
"learning_rate": 4.512118115138315e-06, |
|
"logits/chosen": -67941207.27272727, |
|
"logits/rejected": -48613404.8, |
|
"logps/chosen": -314.12986061789775, |
|
"logps/rejected": -230.21904296875, |
|
"loss": 0.3111, |
|
"rewards/chosen": 3.0080684315074575, |
|
"rewards/margins": 6.707305110584606, |
|
"rewards/rejected": -3.6992366790771483, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 4.06139882760236, |
|
"kl": 281.50775146484375, |
|
"learning_rate": 4.498946688709216e-06, |
|
"logits/chosen": -53087360.0, |
|
"logits/rejected": -53375168.0, |
|
"logps/chosen": -192.54742431640625, |
|
"logps/rejected": -188.837060546875, |
|
"loss": 0.312, |
|
"rewards/chosen": -2.0402072270711265, |
|
"rewards/margins": 1.6422310511271156, |
|
"rewards/rejected": -3.682438278198242, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2161038961038961, |
|
"grad_norm": 3.8100857886583226, |
|
"kl": 203.6710662841797, |
|
"learning_rate": 4.485619594711278e-06, |
|
"logits/chosen": -98739016.0, |
|
"logits/rejected": -57291765.333333336, |
|
"logps/chosen": -229.67535400390625, |
|
"logps/rejected": -275.8813069661458, |
|
"loss": 0.3199, |
|
"rewards/chosen": 2.281548261642456, |
|
"rewards/margins": 7.678904930750529, |
|
"rewards/rejected": -5.397356669108073, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2188744588744589, |
|
"grad_norm": 4.0430201951719384, |
|
"kl": 237.18043518066406, |
|
"learning_rate": 4.4721378709892475e-06, |
|
"logits/chosen": -67218921.14285715, |
|
"logits/rejected": -48233592.88888889, |
|
"logps/chosen": -234.91176060267858, |
|
"logps/rejected": -176.94509548611111, |
|
"loss": 0.3595, |
|
"rewards/chosen": 0.625354358128139, |
|
"rewards/margins": 5.350802118816073, |
|
"rewards/rejected": -4.725447760687934, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22164502164502164, |
|
"grad_norm": 3.6673216672550777, |
|
"kl": 213.51605224609375, |
|
"learning_rate": 4.4585025674296315e-06, |
|
"logits/chosen": -84983160.0, |
|
"logits/rejected": -81099253.33333333, |
|
"logps/chosen": -495.0953063964844, |
|
"logps/rejected": -206.34517415364584, |
|
"loss": 0.2647, |
|
"rewards/chosen": 2.066316604614258, |
|
"rewards/margins": 6.826260566711426, |
|
"rewards/rejected": -4.759943962097168, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22441558441558443, |
|
"grad_norm": 3.6668548319794882, |
|
"kl": 149.62611389160156, |
|
"learning_rate": 4.444714745878936e-06, |
|
"logits/chosen": -53186656.0, |
|
"logits/rejected": -110574705.77777778, |
|
"logps/chosen": -203.04007393973214, |
|
"logps/rejected": -216.20652940538196, |
|
"loss": 0.2927, |
|
"rewards/chosen": -0.9502920423235212, |
|
"rewards/margins": 4.565778853401306, |
|
"rewards/rejected": -5.516070895724827, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22718614718614719, |
|
"grad_norm": 4.419648414760186, |
|
"kl": 177.2197265625, |
|
"learning_rate": 4.430775480060973e-06, |
|
"logits/chosen": -67670118.4, |
|
"logits/rejected": -32177362.666666668, |
|
"logps/chosen": -336.2021728515625, |
|
"logps/rejected": -181.74137369791666, |
|
"loss": 0.3077, |
|
"rewards/chosen": 2.1282627105712892, |
|
"rewards/margins": 5.788774998982747, |
|
"rewards/rejected": -3.6605122884114585, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.22995670995670997, |
|
"grad_norm": 3.649359422872617, |
|
"kl": 165.82017517089844, |
|
"learning_rate": 4.416685855493246e-06, |
|
"logits/chosen": -34292243.2, |
|
"logits/rejected": -70526666.66666667, |
|
"logps/chosen": -187.4371826171875, |
|
"logps/rejected": -280.88523356119794, |
|
"loss": 0.3408, |
|
"rewards/chosen": 0.32179970741271974, |
|
"rewards/margins": 7.029301436742147, |
|
"rewards/rejected": -6.707501729329427, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23272727272727273, |
|
"grad_norm": 4.6924596982127635, |
|
"kl": 287.5317687988281, |
|
"learning_rate": 4.4024469694024194e-06, |
|
"logits/chosen": -44100153.6, |
|
"logits/rejected": -76765579.63636364, |
|
"logps/chosen": -231.4888427734375, |
|
"logps/rejected": -254.74940074573863, |
|
"loss": 0.2934, |
|
"rewards/chosen": -4.008915710449219, |
|
"rewards/margins": 1.9597850452769885, |
|
"rewards/rejected": -5.968700755726207, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2354978354978355, |
|
"grad_norm": 3.103062765938464, |
|
"kl": 158.9697723388672, |
|
"learning_rate": 4.388059930638865e-06, |
|
"logits/chosen": -41523650.666666664, |
|
"logits/rejected": -91830340.92307693, |
|
"logps/chosen": -135.86697387695312, |
|
"logps/rejected": -196.91357421875, |
|
"loss": 0.3093, |
|
"rewards/chosen": -4.111440022786458, |
|
"rewards/margins": -0.3413952558468547, |
|
"rewards/rejected": -3.7700447669396033, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.23826839826839827, |
|
"grad_norm": 3.792643877482403, |
|
"kl": 121.17691802978516, |
|
"learning_rate": 4.373525859590313e-06, |
|
"logits/chosen": -62676300.8, |
|
"logits/rejected": -68556032.0, |
|
"logps/chosen": -301.734521484375, |
|
"logps/rejected": -153.32867431640625, |
|
"loss": 0.2263, |
|
"rewards/chosen": 1.7410552978515625, |
|
"rewards/margins": 6.963522084554036, |
|
"rewards/rejected": -5.222466786702474, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24103896103896105, |
|
"grad_norm": 4.249459927433365, |
|
"kl": 259.1443176269531, |
|
"learning_rate": 4.358845888094607e-06, |
|
"logits/chosen": -41253852.8, |
|
"logits/rejected": -31193773.333333332, |
|
"logps/chosen": -269.636181640625, |
|
"logps/rejected": -107.13633219401042, |
|
"loss": 0.2666, |
|
"rewards/chosen": 0.29608802795410155, |
|
"rewards/margins": 3.969274012247721, |
|
"rewards/rejected": -3.6731859842936196, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2438095238095238, |
|
"grad_norm": 4.075168086387706, |
|
"kl": 334.02734375, |
|
"learning_rate": 4.3440211593515556e-06, |
|
"logits/chosen": -50688288.0, |
|
"logits/rejected": -126751398.4, |
|
"logps/chosen": -228.28106689453125, |
|
"logps/rejected": -291.38564453125, |
|
"loss": 0.3756, |
|
"rewards/chosen": -0.46695244312286377, |
|
"rewards/margins": 7.10467689037323, |
|
"rewards/rejected": -7.571629333496094, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2465800865800866, |
|
"grad_norm": 4.06016767816469, |
|
"kl": 237.378662109375, |
|
"learning_rate": 4.32905282783391e-06, |
|
"logits/chosen": -64327445.333333336, |
|
"logits/rejected": 108107328.0, |
|
"logps/chosen": -239.98824055989584, |
|
"logps/rejected": -162.29722900390624, |
|
"loss": 0.3375, |
|
"rewards/chosen": 0.16190481185913086, |
|
"rewards/margins": 6.292160320281982, |
|
"rewards/rejected": -6.130255508422851, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.24935064935064935, |
|
"grad_norm": 5.14470621992649, |
|
"kl": 298.228515625, |
|
"learning_rate": 4.313942059197457e-06, |
|
"logits/chosen": -20689902.666666668, |
|
"logits/rejected": -37410044.8, |
|
"logps/chosen": -165.82305908203125, |
|
"logps/rejected": -172.006689453125, |
|
"loss": 0.3189, |
|
"rewards/chosen": -1.7497021357218425, |
|
"rewards/margins": 1.8894381841023764, |
|
"rewards/rejected": -3.639140319824219, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25212121212121213, |
|
"grad_norm": 4.584573892248193, |
|
"kl": 270.75927734375, |
|
"learning_rate": 4.298690030190247e-06, |
|
"logits/chosen": -59297584.0, |
|
"logits/rejected": -47739494.4, |
|
"logps/chosen": -452.8969319661458, |
|
"logps/rejected": -150.6827392578125, |
|
"loss": 0.2776, |
|
"rewards/chosen": 2.8319133122762046, |
|
"rewards/margins": 7.6222434361775715, |
|
"rewards/rejected": -4.790330123901367, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2548917748917749, |
|
"grad_norm": 4.287301568077891, |
|
"kl": 227.37271118164062, |
|
"learning_rate": 4.283297928560951e-06, |
|
"logits/chosen": -53382579.2, |
|
"logits/rejected": -54202240.0, |
|
"logps/chosen": -175.3722412109375, |
|
"logps/rejected": -285.7312825520833, |
|
"loss": 0.3621, |
|
"rewards/chosen": -0.47195091247558596, |
|
"rewards/margins": 4.055190658569336, |
|
"rewards/rejected": -4.527141571044922, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25766233766233765, |
|
"grad_norm": 4.220829228867934, |
|
"kl": 398.3648681640625, |
|
"learning_rate": 4.267766952966369e-06, |
|
"logits/chosen": -63718946.90909091, |
|
"logits/rejected": -42242217.6, |
|
"logps/chosen": -335.96311257102275, |
|
"logps/rejected": -149.02587890625, |
|
"loss": 0.3372, |
|
"rewards/chosen": 0.7265178507024591, |
|
"rewards/margins": 3.0149881189519707, |
|
"rewards/rejected": -2.2884702682495117, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.26043290043290046, |
|
"grad_norm": 4.358702143381746, |
|
"kl": 264.9898681640625, |
|
"learning_rate": 4.252098312878083e-06, |
|
"logits/chosen": -79301941.33333333, |
|
"logits/rejected": -43912708.0, |
|
"logps/chosen": -302.7877604166667, |
|
"logps/rejected": -173.28231811523438, |
|
"loss": 0.3621, |
|
"rewards/chosen": 0.2944912115732829, |
|
"rewards/margins": 3.817267100016276, |
|
"rewards/rejected": -3.522775888442993, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2632034632034632, |
|
"grad_norm": 4.254924656611504, |
|
"kl": 330.5307922363281, |
|
"learning_rate": 4.236293228488267e-06, |
|
"logits/chosen": -67833432.0, |
|
"logits/rejected": -78841848.0, |
|
"logps/chosen": -237.7548065185547, |
|
"logps/rejected": -250.9872589111328, |
|
"loss": 0.3272, |
|
"rewards/chosen": 0.7279956340789795, |
|
"rewards/margins": 5.013963460922241, |
|
"rewards/rejected": -4.285967826843262, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.265974025974026, |
|
"grad_norm": 4.712066217533106, |
|
"kl": 237.3966522216797, |
|
"learning_rate": 4.220352930614672e-06, |
|
"logits/chosen": -83831668.36363636, |
|
"logits/rejected": -39652540.8, |
|
"logps/chosen": -279.5663396661932, |
|
"logps/rejected": -141.55482177734376, |
|
"loss": 0.337, |
|
"rewards/chosen": 1.9667802290482954, |
|
"rewards/margins": 5.22833303971724, |
|
"rewards/rejected": -3.2615528106689453, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26874458874458873, |
|
"grad_norm": 3.756148033952096, |
|
"kl": 363.8216247558594, |
|
"learning_rate": 4.204278660604767e-06, |
|
"logits/chosen": -42908544.0, |
|
"logits/rejected": -70756288.0, |
|
"logps/chosen": -437.5492350260417, |
|
"logps/rejected": -170.53684645432693, |
|
"loss": 0.3529, |
|
"rewards/chosen": 3.5399929682413735, |
|
"rewards/margins": 9.0660737600082, |
|
"rewards/rejected": -5.526080791766827, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.27151515151515154, |
|
"grad_norm": 4.179831214257625, |
|
"kl": 269.4178466796875, |
|
"learning_rate": 4.1880716702390764e-06, |
|
"logits/chosen": -70139897.6, |
|
"logits/rejected": -164937994.66666666, |
|
"logps/chosen": -314.740185546875, |
|
"logps/rejected": -187.75248209635416, |
|
"loss": 0.2662, |
|
"rewards/chosen": 1.2780502319335938, |
|
"rewards/margins": 2.9210824648539226, |
|
"rewards/rejected": -1.6430322329203289, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 4.691897896464608, |
|
"kl": 446.2198486328125, |
|
"learning_rate": 4.171733221633695e-06, |
|
"logits/chosen": -42673925.333333336, |
|
"logits/rejected": -22087344.0, |
|
"logps/chosen": -203.5780029296875, |
|
"logps/rejected": -194.28848266601562, |
|
"loss": 0.3054, |
|
"rewards/chosen": -0.28914421796798706, |
|
"rewards/margins": 3.8305423855781555, |
|
"rewards/rejected": -4.119686603546143, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.27705627705627706, |
|
"grad_norm": 4.964944191060874, |
|
"kl": 455.47906494140625, |
|
"learning_rate": 4.155264587142002e-06, |
|
"logits/chosen": -65384675.55555555, |
|
"logits/rejected": -41647456.0, |
|
"logps/chosen": -266.32972547743054, |
|
"logps/rejected": -211.07502092633928, |
|
"loss": 0.3833, |
|
"rewards/chosen": 0.7655517260233561, |
|
"rewards/margins": 3.0700247855413525, |
|
"rewards/rejected": -2.3044730595179965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2798268398268398, |
|
"grad_norm": 3.8792329085023534, |
|
"kl": 308.05084228515625, |
|
"learning_rate": 4.138667049255574e-06, |
|
"logits/chosen": -21252804.8, |
|
"logits/rejected": -42198394.18181818, |
|
"logps/chosen": -239.7299072265625, |
|
"logps/rejected": -214.3200017755682, |
|
"loss": 0.3334, |
|
"rewards/chosen": 2.0222194671630858, |
|
"rewards/margins": 7.540434993397105, |
|
"rewards/rejected": -5.51821552623402, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2825974025974026, |
|
"grad_norm": 3.9057066201741293, |
|
"kl": 234.1949462890625, |
|
"learning_rate": 4.121941900504316e-06, |
|
"logits/chosen": -54799564.0, |
|
"logits/rejected": -59120932.0, |
|
"logps/chosen": -195.52490234375, |
|
"logps/rejected": -255.46969604492188, |
|
"loss": 0.323, |
|
"rewards/chosen": -2.307999610900879, |
|
"rewards/margins": 5.062156677246094, |
|
"rewards/rejected": -7.370156288146973, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2853679653679654, |
|
"grad_norm": 3.4089811236807392, |
|
"kl": 329.79876708984375, |
|
"learning_rate": 4.105090443355801e-06, |
|
"logits/chosen": -71939761.77777778, |
|
"logits/rejected": -86891346.28571428, |
|
"logps/chosen": -248.69297960069446, |
|
"logps/rejected": -295.9298618861607, |
|
"loss": 0.3506, |
|
"rewards/chosen": -2.3155670166015625, |
|
"rewards/margins": 4.571990966796875, |
|
"rewards/rejected": -6.8875579833984375, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.28813852813852814, |
|
"grad_norm": 3.896313964551072, |
|
"kl": 173.1775665283203, |
|
"learning_rate": 4.088113990113846e-06, |
|
"logits/chosen": -39651421.333333336, |
|
"logits/rejected": -83678412.8, |
|
"logps/chosen": -153.03709920247397, |
|
"logps/rejected": -227.3532470703125, |
|
"loss": 0.3179, |
|
"rewards/chosen": 0.045263449350992836, |
|
"rewards/margins": 5.96420914332072, |
|
"rewards/rejected": -5.918945693969727, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 4.60023712492313, |
|
"kl": 248.22030639648438, |
|
"learning_rate": 4.071013862816311e-06, |
|
"logits/chosen": -33611790.222222224, |
|
"logits/rejected": -75680219.42857143, |
|
"logps/chosen": -215.91140407986111, |
|
"logps/rejected": -202.12782505580358, |
|
"loss": 0.2801, |
|
"rewards/chosen": -3.120152791341146, |
|
"rewards/margins": 4.2319252377464665, |
|
"rewards/rejected": -7.352078029087612, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2936796536796537, |
|
"grad_norm": 3.278250351800608, |
|
"kl": 256.5851745605469, |
|
"learning_rate": 4.0537913931321495e-06, |
|
"logits/chosen": -61538093.71428572, |
|
"logits/rejected": -32800387.555555556, |
|
"logps/chosen": -244.89592633928572, |
|
"logps/rejected": -191.21028645833334, |
|
"loss": 0.3477, |
|
"rewards/chosen": 0.4793204239436558, |
|
"rewards/margins": 2.215007361911592, |
|
"rewards/rejected": -1.7356869379679363, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29645021645021646, |
|
"grad_norm": 3.346665787200658, |
|
"kl": 281.5144348144531, |
|
"learning_rate": 4.036447922257699e-06, |
|
"logits/chosen": -63766696.0, |
|
"logits/rejected": -166489008.0, |
|
"logps/chosen": -335.6734924316406, |
|
"logps/rejected": -220.5155792236328, |
|
"loss": 0.291, |
|
"rewards/chosen": 3.1485161781311035, |
|
"rewards/margins": 8.604130268096924, |
|
"rewards/rejected": -5.45561408996582, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2992207792207792, |
|
"grad_norm": 3.9845639945750104, |
|
"kl": 216.4145050048828, |
|
"learning_rate": 4.018984800812248e-06, |
|
"logits/chosen": -61535150.54545455, |
|
"logits/rejected": -26694153.6, |
|
"logps/chosen": -263.3902476917614, |
|
"logps/rejected": -140.8936767578125, |
|
"loss": 0.3798, |
|
"rewards/chosen": -2.7628853537819604, |
|
"rewards/margins": -0.11502139351584706, |
|
"rewards/rejected": -2.6478639602661134, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2992207792207792, |
|
"eval_logits/chosen": -63003302.4, |
|
"eval_logits/rejected": -31597484.137931034, |
|
"eval_logps/chosen": -343.75305989583336, |
|
"eval_logps/rejected": -217.80376144935346, |
|
"eval_loss": 0.3185268044471741, |
|
"eval_rewards/chosen": 1.5573221842447917, |
|
"eval_rewards/margins": 7.265413130836926, |
|
"eval_rewards/rejected": -5.708090946592134, |
|
"eval_runtime": 375.7756, |
|
"eval_samples_per_second": 1.243, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 955.4994506835938, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.301991341991342, |
|
"grad_norm": 2.6213775523227443, |
|
"kl": 135.88255310058594, |
|
"learning_rate": 4.001403388732842e-06, |
|
"logits/chosen": -44565760.0, |
|
"logits/rejected": -44044765.09090909, |
|
"logps/chosen": -208.237841796875, |
|
"logps/rejected": -185.643310546875, |
|
"loss": 0.2488, |
|
"rewards/chosen": -2.5330881118774413, |
|
"rewards/margins": 3.4011062795465645, |
|
"rewards/rejected": -5.934194391424006, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3047619047619048, |
|
"grad_norm": 3.884074940842948, |
|
"kl": 172.53640747070312, |
|
"learning_rate": 3.983705055168391e-06, |
|
"logits/chosen": -60829255.11111111, |
|
"logits/rejected": -39536137.14285714, |
|
"logps/chosen": -338.5453287760417, |
|
"logps/rejected": -126.38860212053571, |
|
"loss": 0.329, |
|
"rewards/chosen": 3.6447732713487415, |
|
"rewards/margins": 5.23764221251957, |
|
"rewards/rejected": -1.5928689411708288, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.30753246753246755, |
|
"grad_norm": 3.1610714574942866, |
|
"kl": 194.14366149902344, |
|
"learning_rate": 3.965891178373038e-06, |
|
"logits/chosen": -60445340.44444445, |
|
"logits/rejected": -173474523.42857143, |
|
"logps/chosen": -347.3582356770833, |
|
"logps/rejected": -189.652587890625, |
|
"loss": 0.3257, |
|
"rewards/chosen": 2.0447090996636286, |
|
"rewards/margins": 3.7555268454173256, |
|
"rewards/rejected": -1.710817745753697, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3103030303030303, |
|
"grad_norm": 4.309649858922247, |
|
"kl": 251.73069763183594, |
|
"learning_rate": 3.947963145598833e-06, |
|
"logits/chosen": -51157600.0, |
|
"logits/rejected": -30458390.85714286, |
|
"logps/chosen": -367.60199652777777, |
|
"logps/rejected": -172.21843610491072, |
|
"loss": 0.3487, |
|
"rewards/chosen": -1.202609380086263, |
|
"rewards/margins": 2.800561723254976, |
|
"rewards/rejected": -4.003171103341239, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.31307359307359306, |
|
"grad_norm": 3.682730545477106, |
|
"kl": 298.24200439453125, |
|
"learning_rate": 3.929922352987702e-06, |
|
"logits/chosen": -75188368.0, |
|
"logits/rejected": -38451584.0, |
|
"logps/chosen": -226.4161580403646, |
|
"logps/rejected": -158.42171630859374, |
|
"loss": 0.3574, |
|
"rewards/chosen": 3.0447769165039062, |
|
"rewards/margins": 5.327561569213867, |
|
"rewards/rejected": -2.282784652709961, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31584415584415587, |
|
"grad_norm": 3.8491174271088955, |
|
"kl": 233.29315185546875, |
|
"learning_rate": 3.911770205462717e-06, |
|
"logits/chosen": -36310968.88888889, |
|
"logits/rejected": -142471716.57142857, |
|
"logps/chosen": -199.12166341145834, |
|
"logps/rejected": -214.38889857700892, |
|
"loss": 0.3359, |
|
"rewards/chosen": -4.300041198730469, |
|
"rewards/margins": 0.7065179007393976, |
|
"rewards/rejected": -5.006559099469866, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31861471861471863, |
|
"grad_norm": 3.301544661387389, |
|
"kl": 165.22003173828125, |
|
"learning_rate": 3.8935081166186935e-06, |
|
"logits/chosen": -64191436.8, |
|
"logits/rejected": -44381066.666666664, |
|
"logps/chosen": -166.80452880859374, |
|
"logps/rejected": -209.62703450520834, |
|
"loss": 0.3044, |
|
"rewards/chosen": -3.046977233886719, |
|
"rewards/margins": 1.0728163401285808, |
|
"rewards/rejected": -4.1197935740153, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3213852813852814, |
|
"grad_norm": 3.8281271473973297, |
|
"kl": 377.83355712890625, |
|
"learning_rate": 3.875137508612104e-06, |
|
"logits/chosen": -35862681.6, |
|
"logits/rejected": -123204629.33333333, |
|
"logps/chosen": -174.58330078125, |
|
"logps/rejected": -278.10194905598956, |
|
"loss": 0.3275, |
|
"rewards/chosen": 0.8950153350830078, |
|
"rewards/margins": 5.497071202596028, |
|
"rewards/rejected": -4.6020558675130205, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.32415584415584414, |
|
"grad_norm": 4.141911435738275, |
|
"kl": 301.08831787109375, |
|
"learning_rate": 3.856659812050328e-06, |
|
"logits/chosen": -68912918.85714285, |
|
"logits/rejected": -45419395.55555555, |
|
"logps/chosen": -278.12374441964283, |
|
"logps/rejected": -215.42805989583334, |
|
"loss": 0.3665, |
|
"rewards/chosen": -2.4060380118233815, |
|
"rewards/margins": 1.0650618416922435, |
|
"rewards/rejected": -3.471099853515625, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.32692640692640695, |
|
"grad_norm": 3.9655628188506165, |
|
"kl": 279.1497802734375, |
|
"learning_rate": 3.838076465880248e-06, |
|
"logits/chosen": -91121265.77777778, |
|
"logits/rejected": -51099254.85714286, |
|
"logps/chosen": -402.9201388888889, |
|
"logps/rejected": -193.41775948660714, |
|
"loss": 0.3269, |
|
"rewards/chosen": 3.7855517069498696, |
|
"rewards/margins": 7.851486296880813, |
|
"rewards/rejected": -4.065934589930943, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3296969696969697, |
|
"grad_norm": 5.104200711024875, |
|
"kl": 317.8056640625, |
|
"learning_rate": 3.819388917276186e-06, |
|
"logits/chosen": -57592173.71428572, |
|
"logits/rejected": -96817816.0, |
|
"logps/chosen": -305.020751953125, |
|
"logps/rejected": -300.74017333984375, |
|
"loss": 0.3944, |
|
"rewards/chosen": 0.34333719526018414, |
|
"rewards/margins": 7.173882620675223, |
|
"rewards/rejected": -6.830545425415039, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.33246753246753247, |
|
"grad_norm": 4.431842355084121, |
|
"kl": 320.4718017578125, |
|
"learning_rate": 3.8005986215272056e-06, |
|
"logits/chosen": -69471952.0, |
|
"logits/rejected": -88022208.0, |
|
"logps/chosen": -294.1892395019531, |
|
"logps/rejected": -163.56695556640625, |
|
"loss": 0.3068, |
|
"rewards/chosen": -1.5611099004745483, |
|
"rewards/margins": 1.6267324686050415, |
|
"rewards/rejected": -3.18784236907959, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3352380952380952, |
|
"grad_norm": 3.938866395899551, |
|
"kl": 348.1075439453125, |
|
"learning_rate": 3.7817070419237866e-06, |
|
"logits/chosen": -45101536.0, |
|
"logits/rejected": -64612682.666666664, |
|
"logps/chosen": -181.74100341796876, |
|
"logps/rejected": -277.8824055989583, |
|
"loss": 0.3559, |
|
"rewards/chosen": -3.265227508544922, |
|
"rewards/margins": 2.3906384785970047, |
|
"rewards/rejected": -5.655865987141927, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.33800865800865804, |
|
"grad_norm": 4.385482507170866, |
|
"kl": 215.3175506591797, |
|
"learning_rate": 3.7627156496438686e-06, |
|
"logits/chosen": -59783303.11111111, |
|
"logits/rejected": -68396731.42857143, |
|
"logps/chosen": -289.76850043402777, |
|
"logps/rejected": -293.2662876674107, |
|
"loss": 0.2996, |
|
"rewards/chosen": 0.33743249045477974, |
|
"rewards/margins": 7.059532884567503, |
|
"rewards/rejected": -6.722100394112723, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3407792207792208, |
|
"grad_norm": 3.0031329772352704, |
|
"kl": 279.9674987792969, |
|
"learning_rate": 3.7436259236382797e-06, |
|
"logits/chosen": -52341881.6, |
|
"logits/rejected": -52982293.333333336, |
|
"logps/chosen": -345.33056640625, |
|
"logps/rejected": -214.87158203125, |
|
"loss": 0.3117, |
|
"rewards/chosen": 2.501374435424805, |
|
"rewards/margins": 6.737306785583496, |
|
"rewards/rejected": -4.235932350158691, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.34354978354978355, |
|
"grad_norm": 3.828989254174337, |
|
"kl": 322.56781005859375, |
|
"learning_rate": 3.7244393505155713e-06, |
|
"logits/chosen": -55003168.0, |
|
"logits/rejected": -38429700.571428575, |
|
"logps/chosen": -319.0847439236111, |
|
"logps/rejected": -220.10914829799108, |
|
"loss": 0.3261, |
|
"rewards/chosen": -4.397332509358724, |
|
"rewards/margins": 3.908439454578219, |
|
"rewards/rejected": -8.305771963936943, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3463203463203463, |
|
"grad_norm": 3.3716001758196614, |
|
"kl": 175.434326171875, |
|
"learning_rate": 3.7051574244262412e-06, |
|
"logits/chosen": -63195132.0, |
|
"logits/rejected": -43530908.0, |
|
"logps/chosen": -322.2129821777344, |
|
"logps/rejected": -238.111572265625, |
|
"loss": 0.2878, |
|
"rewards/chosen": 3.3598997592926025, |
|
"rewards/margins": 11.616735696792603, |
|
"rewards/rejected": -8.2568359375, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3490909090909091, |
|
"grad_norm": 3.520834555125108, |
|
"kl": 336.7948303222656, |
|
"learning_rate": 3.6857816469463806e-06, |
|
"logits/chosen": -45862705.777777776, |
|
"logits/rejected": -61132077.71428572, |
|
"logps/chosen": -182.07706705729166, |
|
"logps/rejected": -235.33091517857142, |
|
"loss": 0.3024, |
|
"rewards/chosen": -0.025241321987575956, |
|
"rewards/margins": 6.8694152680654375, |
|
"rewards/rejected": -6.894656590053013, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3518614718614719, |
|
"grad_norm": 3.6860559156292867, |
|
"kl": 245.18157958984375, |
|
"learning_rate": 3.6663135269607413e-06, |
|
"logits/chosen": -54326741.333333336, |
|
"logits/rejected": -32279739.42857143, |
|
"logps/chosen": -279.1733669704861, |
|
"logps/rejected": -167.6710205078125, |
|
"loss": 0.2751, |
|
"rewards/chosen": -0.2571737501356337, |
|
"rewards/margins": 3.168460906498016, |
|
"rewards/rejected": -3.4256346566336497, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.35463203463203463, |
|
"grad_norm": 3.557862256362831, |
|
"kl": 276.58953857421875, |
|
"learning_rate": 3.6467545805452266e-06, |
|
"logits/chosen": -58534702.54545455, |
|
"logits/rejected": -44616534.4, |
|
"logps/chosen": -288.982666015625, |
|
"logps/rejected": -202.448095703125, |
|
"loss": 0.3473, |
|
"rewards/chosen": 1.2414755387739702, |
|
"rewards/margins": 3.335013250871138, |
|
"rewards/rejected": -2.093537712097168, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3574025974025974, |
|
"grad_norm": 3.2589691814781454, |
|
"kl": 309.430419921875, |
|
"learning_rate": 3.6271063308488298e-06, |
|
"logits/chosen": -50912728.0, |
|
"logits/rejected": -39484352.0, |
|
"logps/chosen": -172.81301879882812, |
|
"logps/rejected": -150.67156982421875, |
|
"loss": 0.2654, |
|
"rewards/chosen": -0.07838886976242065, |
|
"rewards/margins": 5.625042259693146, |
|
"rewards/rejected": -5.703431129455566, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3601731601731602, |
|
"grad_norm": 4.37989836230213, |
|
"kl": 288.1458740234375, |
|
"learning_rate": 3.6073703079750204e-06, |
|
"logits/chosen": -86152466.28571428, |
|
"logits/rejected": -94912405.33333333, |
|
"logps/chosen": -428.32386997767856, |
|
"logps/rejected": -225.74245876736111, |
|
"loss": 0.2825, |
|
"rewards/chosen": 6.9850311279296875, |
|
"rewards/margins": 13.19218275282118, |
|
"rewards/rejected": -6.207151624891493, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36294372294372296, |
|
"grad_norm": 2.943643589164811, |
|
"kl": 219.82888793945312, |
|
"learning_rate": 3.5875480488625847e-06, |
|
"logits/chosen": -61464201.14285714, |
|
"logits/rejected": -75437624.8888889, |
|
"logps/chosen": -230.37840053013392, |
|
"logps/rejected": -146.04918077256946, |
|
"loss": 0.3034, |
|
"rewards/chosen": -2.431492396763393, |
|
"rewards/margins": 2.6535274566165987, |
|
"rewards/rejected": -5.0850198533799915, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 2.5894854276111055, |
|
"kl": 137.6978759765625, |
|
"learning_rate": 3.5676410971659404e-06, |
|
"logits/chosen": -48515029.333333336, |
|
"logits/rejected": -53117555.2, |
|
"logps/chosen": -259.1162923177083, |
|
"logps/rejected": -156.87340087890624, |
|
"loss": 0.3241, |
|
"rewards/chosen": -2.674201329549154, |
|
"rewards/margins": 3.1144594828287757, |
|
"rewards/rejected": -5.7886608123779295, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.36848484848484847, |
|
"grad_norm": 3.4611805226691934, |
|
"kl": 284.0462646484375, |
|
"learning_rate": 3.547651003134921e-06, |
|
"logits/chosen": -50902294.4, |
|
"logits/rejected": -29886968.0, |
|
"logps/chosen": -316.385986328125, |
|
"logps/rejected": -143.97941080729166, |
|
"loss": 0.2921, |
|
"rewards/chosen": 1.024208927154541, |
|
"rewards/margins": 4.237102095286051, |
|
"rewards/rejected": -3.2128931681315103, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3712554112554113, |
|
"grad_norm": 3.4173731166240695, |
|
"kl": 223.8057098388672, |
|
"learning_rate": 3.527579323494055e-06, |
|
"logits/chosen": -55733544.0, |
|
"logits/rejected": -58133440.0, |
|
"logps/chosen": -282.2908935546875, |
|
"logps/rejected": -204.7510223388672, |
|
"loss": 0.2934, |
|
"rewards/chosen": 2.8473093509674072, |
|
"rewards/margins": 9.089063882827759, |
|
"rewards/rejected": -6.241754531860352, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.37402597402597404, |
|
"grad_norm": 4.5075933419621395, |
|
"kl": 226.37472534179688, |
|
"learning_rate": 3.507427621321331e-06, |
|
"logits/chosen": -44690326.85714286, |
|
"logits/rejected": -40422179.55555555, |
|
"logps/chosen": -197.55255998883928, |
|
"logps/rejected": -173.03732638888889, |
|
"loss": 0.2774, |
|
"rewards/chosen": 0.318291323525565, |
|
"rewards/margins": 3.7029942179483077, |
|
"rewards/rejected": -3.384702894422743, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3767965367965368, |
|
"grad_norm": 2.9825634868618396, |
|
"kl": 225.9692840576172, |
|
"learning_rate": 3.4871974659264786e-06, |
|
"logits/chosen": -103750326.85714285, |
|
"logits/rejected": -36922702.222222224, |
|
"logps/chosen": -330.4085170200893, |
|
"logps/rejected": -195.47115071614584, |
|
"loss": 0.3326, |
|
"rewards/chosen": 0.9742650985717773, |
|
"rewards/margins": 7.700783517625597, |
|
"rewards/rejected": -6.72651841905382, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.37956709956709955, |
|
"grad_norm": 3.4440138918363106, |
|
"kl": 208.99081420898438, |
|
"learning_rate": 3.466890432728754e-06, |
|
"logits/chosen": -44583936.0, |
|
"logits/rejected": -61040928.0, |
|
"logps/chosen": -263.41937255859375, |
|
"logps/rejected": -275.160888671875, |
|
"loss": 0.3081, |
|
"rewards/chosen": 0.680980920791626, |
|
"rewards/margins": 7.2455079555511475, |
|
"rewards/rejected": -6.5645270347595215, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.38233766233766237, |
|
"grad_norm": 4.447615914301488, |
|
"kl": 231.37353515625, |
|
"learning_rate": 3.446508103134259e-06, |
|
"logits/chosen": -56082509.71428572, |
|
"logits/rejected": -36391459.55555555, |
|
"logps/chosen": -301.94363839285717, |
|
"logps/rejected": -130.56571451822916, |
|
"loss": 0.3302, |
|
"rewards/chosen": 0.23018741607666016, |
|
"rewards/margins": 5.488740815056695, |
|
"rewards/rejected": -5.258553398980035, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3851082251082251, |
|
"grad_norm": 3.227144245083403, |
|
"kl": 110.66608428955078, |
|
"learning_rate": 3.426052064412785e-06, |
|
"logits/chosen": -61858517.333333336, |
|
"logits/rejected": -84151936.0, |
|
"logps/chosen": -326.2563883463542, |
|
"logps/rejected": -106.5555419921875, |
|
"loss": 0.349, |
|
"rewards/chosen": -1.6193631490071614, |
|
"rewards/margins": 1.0518550078074138, |
|
"rewards/rejected": -2.671218156814575, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3878787878787879, |
|
"grad_norm": 2.962046268391638, |
|
"kl": 226.14247131347656, |
|
"learning_rate": 3.4055239095742067e-06, |
|
"logits/chosen": -31455502.222222224, |
|
"logits/rejected": -69333769.14285715, |
|
"logps/chosen": -165.690185546875, |
|
"logps/rejected": -210.72841099330358, |
|
"loss": 0.2861, |
|
"rewards/chosen": -4.098465389675564, |
|
"rewards/margins": 1.3641238742404518, |
|
"rewards/rejected": -5.462589263916016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39064935064935064, |
|
"grad_norm": 3.437549771241675, |
|
"kl": 123.592529296875, |
|
"learning_rate": 3.3849252372444295e-06, |
|
"logits/chosen": -76042393.6, |
|
"logits/rejected": -40568626.666666664, |
|
"logps/chosen": -320.3619140625, |
|
"logps/rejected": -211.34452311197916, |
|
"loss": 0.3302, |
|
"rewards/chosen": 0.19550623893737792, |
|
"rewards/margins": 6.736018323898316, |
|
"rewards/rejected": -6.5405120849609375, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.39341991341991345, |
|
"grad_norm": 3.8510252801665685, |
|
"kl": 131.01315307617188, |
|
"learning_rate": 3.364257651540891e-06, |
|
"logits/chosen": -62763580.0, |
|
"logits/rejected": -93990400.0, |
|
"logps/chosen": -323.82330322265625, |
|
"logps/rejected": -222.23504638671875, |
|
"loss": 0.3731, |
|
"rewards/chosen": -1.7098395824432373, |
|
"rewards/margins": 6.021216630935669, |
|
"rewards/rejected": -7.731056213378906, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3961904761904762, |
|
"grad_norm": 3.127952716350436, |
|
"kl": 188.2672119140625, |
|
"learning_rate": 3.343522761947646e-06, |
|
"logits/chosen": -48637932.8, |
|
"logits/rejected": -127404981.33333333, |
|
"logps/chosen": -202.50628662109375, |
|
"logps/rejected": -183.98612467447916, |
|
"loss": 0.2592, |
|
"rewards/chosen": -1.46456880569458, |
|
"rewards/margins": 6.765842723846435, |
|
"rewards/rejected": -8.230411529541016, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.39896103896103896, |
|
"grad_norm": 3.686618621599935, |
|
"kl": 201.80601501464844, |
|
"learning_rate": 3.322722183190025e-06, |
|
"logits/chosen": -69373568.0, |
|
"logits/rejected": -74651648.0, |
|
"logps/chosen": -295.803564453125, |
|
"logps/rejected": -173.57267252604166, |
|
"loss": 0.3755, |
|
"rewards/chosen": -1.9531005859375, |
|
"rewards/margins": 4.711961364746093, |
|
"rewards/rejected": -6.665061950683594, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.39896103896103896, |
|
"eval_logits/chosen": -63924428.8, |
|
"eval_logits/rejected": -27711788.137931034, |
|
"eval_logps/chosen": -350.41809895833336, |
|
"eval_logps/rejected": -229.70920325969828, |
|
"eval_loss": 0.30155444145202637, |
|
"eval_rewards/chosen": 0.8908199310302735, |
|
"eval_rewards/margins": 7.7894547429578065, |
|
"eval_rewards/rejected": -6.898634811927533, |
|
"eval_runtime": 375.8146, |
|
"eval_samples_per_second": 1.243, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 705.8950805664062, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4017316017316017, |
|
"grad_norm": 3.4461177641035965, |
|
"kl": 185.58932495117188, |
|
"learning_rate": 3.3018575351088894e-06, |
|
"logits/chosen": -48496684.8, |
|
"logits/rejected": -53352704.0, |
|
"logps/chosen": -207.49033203125, |
|
"logps/rejected": -178.48878062855113, |
|
"loss": 0.3813, |
|
"rewards/chosen": -6.067684555053711, |
|
"rewards/margins": -0.9541293404319076, |
|
"rewards/rejected": -5.113555214621804, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.40450216450216453, |
|
"grad_norm": 2.9521823101965525, |
|
"kl": 143.91859436035156, |
|
"learning_rate": 3.280930442534486e-06, |
|
"logits/chosen": -52825682.28571428, |
|
"logits/rejected": -66124664.88888889, |
|
"logps/chosen": -161.79621233258928, |
|
"logps/rejected": -206.44997829861111, |
|
"loss": 0.3756, |
|
"rewards/chosen": -3.0689896174839566, |
|
"rewards/margins": -1.2850553421747117, |
|
"rewards/rejected": -1.7839342753092449, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4072727272727273, |
|
"grad_norm": 4.456496036133048, |
|
"kl": 124.10264587402344, |
|
"learning_rate": 3.2599425351599136e-06, |
|
"logits/chosen": -57379889.777777776, |
|
"logits/rejected": -68506422.85714285, |
|
"logps/chosen": -192.18386501736111, |
|
"logps/rejected": -231.92295619419642, |
|
"loss": 0.3121, |
|
"rewards/chosen": -1.923858854505751, |
|
"rewards/margins": 4.669862807743133, |
|
"rewards/rejected": -6.593721662248884, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.41004329004329004, |
|
"grad_norm": 3.6744852920431432, |
|
"kl": 282.42156982421875, |
|
"learning_rate": 3.238895447414211e-06, |
|
"logits/chosen": -50177578.666666664, |
|
"logits/rejected": -54207213.71428572, |
|
"logps/chosen": -243.22791883680554, |
|
"logps/rejected": -223.31515066964286, |
|
"loss": 0.3778, |
|
"rewards/chosen": 3.0299839443630643, |
|
"rewards/margins": 5.608355809771826, |
|
"rewards/rejected": -2.5783718654087613, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4128138528138528, |
|
"grad_norm": 3.6093960850994122, |
|
"kl": 193.4736328125, |
|
"learning_rate": 3.217790818335077e-06, |
|
"logits/chosen": -41057161.6, |
|
"logits/rejected": -60332170.666666664, |
|
"logps/chosen": -195.6895263671875, |
|
"logps/rejected": -261.66713460286456, |
|
"loss": 0.3093, |
|
"rewards/chosen": -3.0542125701904297, |
|
"rewards/margins": 4.613312403361003, |
|
"rewards/rejected": -7.667524973551433, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4155844155844156, |
|
"grad_norm": 2.9227143339919324, |
|
"kl": 328.6368103027344, |
|
"learning_rate": 3.196630291441231e-06, |
|
"logits/chosen": -46167304.0, |
|
"logits/rejected": -39402808.0, |
|
"logps/chosen": -200.86553955078125, |
|
"logps/rejected": -197.71485900878906, |
|
"loss": 0.3118, |
|
"rewards/chosen": 0.24297916889190674, |
|
"rewards/margins": 4.419718384742737, |
|
"rewards/rejected": -4.17673921585083, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41835497835497837, |
|
"grad_norm": 3.5809278836703617, |
|
"kl": 287.126708984375, |
|
"learning_rate": 3.175415514604422e-06, |
|
"logits/chosen": -82066352.0, |
|
"logits/rejected": -65108904.0, |
|
"logps/chosen": -405.883056640625, |
|
"logps/rejected": -179.24244689941406, |
|
"loss": 0.3376, |
|
"rewards/chosen": 3.3306524753570557, |
|
"rewards/margins": 8.936949968338013, |
|
"rewards/rejected": -5.606297492980957, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4211255411255411, |
|
"grad_norm": 4.070395809564026, |
|
"kl": 196.88804626464844, |
|
"learning_rate": 3.154148139921102e-06, |
|
"logits/chosen": -44551419.428571425, |
|
"logits/rejected": -40765962.666666664, |
|
"logps/chosen": -169.73423549107142, |
|
"logps/rejected": -266.4632161458333, |
|
"loss": 0.259, |
|
"rewards/chosen": -1.6945086887904577, |
|
"rewards/margins": 6.6088563828241265, |
|
"rewards/rejected": -8.303365071614584, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4238961038961039, |
|
"grad_norm": 4.302061820298429, |
|
"kl": 134.61264038085938, |
|
"learning_rate": 3.132829823583771e-06, |
|
"logits/chosen": -67628433.45454545, |
|
"logits/rejected": -65108883.2, |
|
"logps/chosen": -268.54106001420456, |
|
"logps/rejected": -281.208203125, |
|
"loss": 0.3274, |
|
"rewards/chosen": -1.7147686698219993, |
|
"rewards/margins": 4.980678107521751, |
|
"rewards/rejected": -6.69544677734375, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 3.639837464165828, |
|
"kl": 190.67416381835938, |
|
"learning_rate": 3.1114622257520004e-06, |
|
"logits/chosen": -50159235.2, |
|
"logits/rejected": -213066624.0, |
|
"logps/chosen": -301.3509521484375, |
|
"logps/rejected": -239.82194010416666, |
|
"loss": 0.3034, |
|
"rewards/chosen": 0.7694726943969726, |
|
"rewards/margins": 7.645719464619954, |
|
"rewards/rejected": -6.8762467702229815, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.42943722943722945, |
|
"grad_norm": 3.7172883552273768, |
|
"kl": 285.25189208984375, |
|
"learning_rate": 3.0900470104231456e-06, |
|
"logits/chosen": -46441233.777777776, |
|
"logits/rejected": -26665755.42857143, |
|
"logps/chosen": -325.4695638020833, |
|
"logps/rejected": -101.84296526227679, |
|
"loss": 0.3404, |
|
"rewards/chosen": 0.1546611785888672, |
|
"rewards/margins": 4.439751216343471, |
|
"rewards/rejected": -4.285090037754604, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4322077922077922, |
|
"grad_norm": 3.312339719329392, |
|
"kl": 285.44769287109375, |
|
"learning_rate": 3.0685858453027668e-06, |
|
"logits/chosen": -49904411.428571425, |
|
"logits/rejected": -44058030.222222224, |
|
"logps/chosen": -140.95223563058036, |
|
"logps/rejected": -162.76173231336804, |
|
"loss": 0.2813, |
|
"rewards/chosen": -1.3947293417794364, |
|
"rewards/margins": 4.468493416195824, |
|
"rewards/rejected": -5.863222757975261, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.43497835497835496, |
|
"grad_norm": 3.479887405954686, |
|
"kl": 270.1009521484375, |
|
"learning_rate": 3.047080401674754e-06, |
|
"logits/chosen": -51652922.18181818, |
|
"logits/rejected": -61749913.6, |
|
"logps/chosen": -276.02401455965907, |
|
"logps/rejected": -134.616015625, |
|
"loss": 0.3498, |
|
"rewards/chosen": 1.614875620061701, |
|
"rewards/margins": 8.703042429143732, |
|
"rewards/rejected": -7.088166809082031, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4377489177489178, |
|
"grad_norm": 4.077398185150515, |
|
"kl": 330.6680908203125, |
|
"learning_rate": 3.0255323542711784e-06, |
|
"logits/chosen": -60507984.0, |
|
"logits/rejected": -11630900.0, |
|
"logps/chosen": -268.957275390625, |
|
"logps/rejected": -127.5375747680664, |
|
"loss": 0.2849, |
|
"rewards/chosen": 2.3366661071777344, |
|
"rewards/margins": 7.807009220123291, |
|
"rewards/rejected": -5.470343112945557, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.44051948051948053, |
|
"grad_norm": 3.2322970154513215, |
|
"kl": 261.8209228515625, |
|
"learning_rate": 3.00394338114187e-06, |
|
"logits/chosen": -40481952.0, |
|
"logits/rejected": -142613808.0, |
|
"logps/chosen": -157.77676391601562, |
|
"logps/rejected": -166.2552032470703, |
|
"loss": 0.333, |
|
"rewards/chosen": -0.08739203214645386, |
|
"rewards/margins": 6.05664998292923, |
|
"rewards/rejected": -6.144042015075684, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4432900432900433, |
|
"grad_norm": 3.1517174776004455, |
|
"kl": 177.34640502929688, |
|
"learning_rate": 2.9823151635237424e-06, |
|
"logits/chosen": -57420648.0, |
|
"logits/rejected": -60590308.0, |
|
"logps/chosen": -281.8450622558594, |
|
"logps/rejected": -194.87185668945312, |
|
"loss": 0.3901, |
|
"rewards/chosen": 2.5421125888824463, |
|
"rewards/margins": 5.507090330123901, |
|
"rewards/rejected": -2.964977741241455, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44606060606060605, |
|
"grad_norm": 3.7184656464192902, |
|
"kl": 154.72317504882812, |
|
"learning_rate": 2.9606493857098657e-06, |
|
"logits/chosen": -40019485.333333336, |
|
"logits/rejected": 303537024.0, |
|
"logps/chosen": -218.24466959635416, |
|
"logps/rejected": -148.39111328125, |
|
"loss": 0.3214, |
|
"rewards/chosen": -1.7011640866597493, |
|
"rewards/margins": 5.837723890940349, |
|
"rewards/rejected": -7.538887977600098, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.44883116883116886, |
|
"grad_norm": 3.857199942049942, |
|
"kl": 182.81161499023438, |
|
"learning_rate": 2.938947734918302e-06, |
|
"logits/chosen": -47456576.0, |
|
"logits/rejected": -56964508.44444445, |
|
"logps/chosen": -213.33220563616072, |
|
"logps/rejected": -128.02083333333334, |
|
"loss": 0.2709, |
|
"rewards/chosen": -3.7205652509416853, |
|
"rewards/margins": -0.0823261321537081, |
|
"rewards/rejected": -3.6382391187879772, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4516017316017316, |
|
"grad_norm": 3.6554439186941736, |
|
"kl": 197.32955932617188, |
|
"learning_rate": 2.9172119011607153e-06, |
|
"logits/chosen": -62697898.666666664, |
|
"logits/rejected": -30507272.0, |
|
"logps/chosen": -226.22090657552084, |
|
"logps/rejected": -94.68756866455078, |
|
"loss": 0.2987, |
|
"rewards/chosen": -2.0916875203450522, |
|
"rewards/margins": 2.4424230257670083, |
|
"rewards/rejected": -4.5341105461120605, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.45437229437229437, |
|
"grad_norm": 4.608790075602045, |
|
"kl": 213.34255981445312, |
|
"learning_rate": 2.8954435771107604e-06, |
|
"logits/chosen": -79102016.0, |
|
"logits/rejected": -62801610.666666664, |
|
"logps/chosen": -278.2896423339844, |
|
"logps/rejected": -202.40938313802084, |
|
"loss": 0.2914, |
|
"rewards/chosen": -6.47105598449707, |
|
"rewards/margins": -0.2049547831217451, |
|
"rewards/rejected": -6.266101201375325, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 3.3882159493387616, |
|
"kl": 198.41207885742188, |
|
"learning_rate": 2.8736444579722665e-06, |
|
"logits/chosen": -78504037.33333333, |
|
"logits/rejected": -73070310.4, |
|
"logps/chosen": -327.8160400390625, |
|
"logps/rejected": -222.9837890625, |
|
"loss": 0.2741, |
|
"rewards/chosen": 1.1677392323811848, |
|
"rewards/margins": 8.064625676472982, |
|
"rewards/rejected": -6.8968864440917965, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45991341991341994, |
|
"grad_norm": 3.279972807646011, |
|
"kl": 142.17318725585938, |
|
"learning_rate": 2.8518162413472266e-06, |
|
"logits/chosen": -85365942.85714285, |
|
"logits/rejected": -70893845.33333333, |
|
"logps/chosen": -279.30538504464283, |
|
"logps/rejected": -182.84242078993054, |
|
"loss": 0.3081, |
|
"rewards/chosen": -2.3745449611118863, |
|
"rewards/margins": 1.4273942614358566, |
|
"rewards/rejected": -3.801939222547743, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4626839826839827, |
|
"grad_norm": 3.6661575094256427, |
|
"kl": 89.53360748291016, |
|
"learning_rate": 2.8299606271035913e-06, |
|
"logits/chosen": -52625970.28571428, |
|
"logits/rejected": -72505472.0, |
|
"logps/chosen": -301.35030691964283, |
|
"logps/rejected": -205.73879665798611, |
|
"loss": 0.3063, |
|
"rewards/chosen": 1.8995137895856584, |
|
"rewards/margins": 8.419966500902932, |
|
"rewards/rejected": -6.520452711317274, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.46545454545454545, |
|
"grad_norm": 4.152823525128342, |
|
"kl": 183.32223510742188, |
|
"learning_rate": 2.8080793172428965e-06, |
|
"logits/chosen": -73402512.0, |
|
"logits/rejected": -142298960.0, |
|
"logps/chosen": -396.2037353515625, |
|
"logps/rejected": -394.7413330078125, |
|
"loss": 0.3263, |
|
"rewards/chosen": 1.2748676141103108, |
|
"rewards/margins": 13.17820700009664, |
|
"rewards/rejected": -11.903339385986328, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4682251082251082, |
|
"grad_norm": 4.915619443451149, |
|
"kl": 157.55738830566406, |
|
"learning_rate": 2.786174015767721e-06, |
|
"logits/chosen": -56088037.333333336, |
|
"logits/rejected": -43772393.6, |
|
"logps/chosen": -226.74296061197916, |
|
"logps/rejected": -226.644873046875, |
|
"loss": 0.255, |
|
"rewards/chosen": -4.559232076009114, |
|
"rewards/margins": 4.601092020670573, |
|
"rewards/rejected": -9.160324096679688, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.470995670995671, |
|
"grad_norm": 5.3345479859735345, |
|
"kl": 184.7256317138672, |
|
"learning_rate": 2.764246428548983e-06, |
|
"logits/chosen": -51022968.0, |
|
"logits/rejected": -58412576.0, |
|
"logps/chosen": -207.92623901367188, |
|
"logps/rejected": -239.64891052246094, |
|
"loss": 0.2828, |
|
"rewards/chosen": -1.1218031644821167, |
|
"rewards/margins": 8.50666630268097, |
|
"rewards/rejected": -9.628469467163086, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4737662337662338, |
|
"grad_norm": 4.13624571497418, |
|
"kl": 177.11712646484375, |
|
"learning_rate": 2.742298263193099e-06, |
|
"logits/chosen": -99049797.81818181, |
|
"logits/rejected": -67656134.4, |
|
"logps/chosen": -280.09694602272725, |
|
"logps/rejected": -271.40283203125, |
|
"loss": 0.3186, |
|
"rewards/chosen": -1.507522236217152, |
|
"rewards/margins": 6.865571941028941, |
|
"rewards/rejected": -8.373094177246093, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.47653679653679654, |
|
"grad_norm": 3.0904137276117103, |
|
"kl": 200.4022216796875, |
|
"learning_rate": 2.720331228909005e-06, |
|
"logits/chosen": -42735584.0, |
|
"logits/rejected": -45149984.0, |
|
"logps/chosen": -211.05224609375, |
|
"logps/rejected": -235.17949567522322, |
|
"loss": 0.3305, |
|
"rewards/chosen": -1.8013004726833768, |
|
"rewards/margins": 1.051658206515842, |
|
"rewards/rejected": -2.8529586791992188, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4793073593073593, |
|
"grad_norm": 4.247838462733702, |
|
"kl": 208.93966674804688, |
|
"learning_rate": 2.6983470363750497e-06, |
|
"logits/chosen": -52870764.8, |
|
"logits/rejected": -93502741.33333333, |
|
"logps/chosen": -247.8509521484375, |
|
"logps/rejected": -176.8176472981771, |
|
"loss": 0.2907, |
|
"rewards/chosen": 0.4748857498168945, |
|
"rewards/margins": 6.685143597920735, |
|
"rewards/rejected": -6.210257848103841, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4820779220779221, |
|
"grad_norm": 3.6922139188578385, |
|
"kl": 199.78564453125, |
|
"learning_rate": 2.6763473976057776e-06, |
|
"logits/chosen": -78229719.27272727, |
|
"logits/rejected": -64247590.4, |
|
"logps/chosen": -343.76633522727275, |
|
"logps/rejected": -289.5193359375, |
|
"loss": 0.3302, |
|
"rewards/chosen": 0.27789141915061255, |
|
"rewards/margins": 6.072880051352761, |
|
"rewards/rejected": -5.794988632202148, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.48484848484848486, |
|
"grad_norm": 2.1830098135367253, |
|
"kl": 291.7579345703125, |
|
"learning_rate": 2.6543340258186063e-06, |
|
"logits/chosen": -61894516.0, |
|
"logits/rejected": -88448416.0, |
|
"logps/chosen": -175.04672241210938, |
|
"logps/rejected": -153.3810577392578, |
|
"loss": 0.2448, |
|
"rewards/chosen": -1.9807024002075195, |
|
"rewards/margins": 2.309774398803711, |
|
"rewards/rejected": -4.2904767990112305, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4876190476190476, |
|
"grad_norm": 4.218222845890689, |
|
"kl": 119.53733825683594, |
|
"learning_rate": 2.6323086353004077e-06, |
|
"logits/chosen": -85618194.28571428, |
|
"logits/rejected": -93672120.8888889, |
|
"logps/chosen": -396.98486328125, |
|
"logps/rejected": -197.43092176649304, |
|
"loss": 0.2549, |
|
"rewards/chosen": 0.570185661315918, |
|
"rewards/margins": 5.8234213723076715, |
|
"rewards/rejected": -5.253235710991754, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4903896103896104, |
|
"grad_norm": 2.596784708198279, |
|
"kl": 273.5486145019531, |
|
"learning_rate": 2.610272941274012e-06, |
|
"logits/chosen": -33087848.0, |
|
"logits/rejected": -25035680.0, |
|
"logps/chosen": -114.39351654052734, |
|
"logps/rejected": -187.162353515625, |
|
"loss": 0.2989, |
|
"rewards/chosen": -1.1123628616333008, |
|
"rewards/margins": 6.887551307678223, |
|
"rewards/rejected": -7.999914169311523, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4931601731601732, |
|
"grad_norm": 3.339051362841475, |
|
"kl": 120.95160675048828, |
|
"learning_rate": 2.588228659764632e-06, |
|
"logits/chosen": -50194481.777777776, |
|
"logits/rejected": -46198921.14285714, |
|
"logps/chosen": -192.59000651041666, |
|
"logps/rejected": -185.52493722098214, |
|
"loss": 0.3189, |
|
"rewards/chosen": -6.144053565131293, |
|
"rewards/margins": -1.0134704832046753, |
|
"rewards/rejected": -5.130583081926618, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.49593073593073594, |
|
"grad_norm": 3.4153821985604975, |
|
"kl": 170.84121704101562, |
|
"learning_rate": 2.5661775074662276e-06, |
|
"logits/chosen": -130978457.6, |
|
"logits/rejected": -44669677.333333336, |
|
"logps/chosen": -237.725830078125, |
|
"logps/rejected": -108.64566040039062, |
|
"loss": 0.3417, |
|
"rewards/chosen": -0.7145191192626953, |
|
"rewards/margins": 3.991784159342448, |
|
"rewards/rejected": -4.7063032786051435, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4987012987012987, |
|
"grad_norm": 3.013896524570175, |
|
"kl": 204.37509155273438, |
|
"learning_rate": 2.544121201607822e-06, |
|
"logits/chosen": -84088128.0, |
|
"logits/rejected": -29717827.2, |
|
"logps/chosen": -383.296875, |
|
"logps/rejected": -157.595068359375, |
|
"loss": 0.3454, |
|
"rewards/chosen": 1.9199379285176594, |
|
"rewards/margins": 7.402603562672932, |
|
"rewards/rejected": -5.482665634155273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4987012987012987, |
|
"eval_logits/chosen": -67193476.26666667, |
|
"eval_logits/rejected": -37107747.31034483, |
|
"eval_logps/chosen": -344.8448893229167, |
|
"eval_logps/rejected": -226.03357354525863, |
|
"eval_loss": 0.30528655648231506, |
|
"eval_rewards/chosen": 1.4481372833251953, |
|
"eval_rewards/margins": 7.979209965672986, |
|
"eval_rewards/rejected": -6.531072682347791, |
|
"eval_runtime": 375.7871, |
|
"eval_samples_per_second": 1.243, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 836.632568359375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5014718614718615, |
|
"grad_norm": 4.356060168329763, |
|
"kl": 191.42889404296875, |
|
"learning_rate": 2.5220614598197708e-06, |
|
"logits/chosen": -107889254.4, |
|
"logits/rejected": -52258368.0, |
|
"logps/chosen": -304.1372314453125, |
|
"logps/rejected": -282.9250081380208, |
|
"loss": 0.3333, |
|
"rewards/chosen": -2.5827245712280273, |
|
"rewards/margins": 6.893130302429199, |
|
"rewards/rejected": -9.475854873657227, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5042424242424243, |
|
"grad_norm": 4.2430092265377715, |
|
"kl": 188.14871215820312, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -124263899.42857143, |
|
"logits/rejected": -47703192.88888889, |
|
"logps/chosen": -204.89937918526786, |
|
"logps/rejected": -259.95594618055554, |
|
"loss": 0.3226, |
|
"rewards/chosen": -2.21267454964774, |
|
"rewards/margins": 1.8116386353023466, |
|
"rewards/rejected": -4.024313184950087, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.507012987012987, |
|
"grad_norm": 2.722752429679691, |
|
"kl": 169.01434326171875, |
|
"learning_rate": 2.477938540180231e-06, |
|
"logits/chosen": -49152826.666666664, |
|
"logits/rejected": -71773224.0, |
|
"logps/chosen": -209.00362141927084, |
|
"logps/rejected": -120.16709899902344, |
|
"loss": 0.3903, |
|
"rewards/chosen": -1.652681032816569, |
|
"rewards/margins": 2.2883280118306475, |
|
"rewards/rejected": -3.941009044647217, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5097835497835498, |
|
"grad_norm": 3.4971096121656378, |
|
"kl": 147.58242797851562, |
|
"learning_rate": 2.455878798392179e-06, |
|
"logits/chosen": -64680269.71428572, |
|
"logits/rejected": -103894449.77777778, |
|
"logps/chosen": -346.93739536830356, |
|
"logps/rejected": -235.97618272569446, |
|
"loss": 0.3303, |
|
"rewards/chosen": 2.4380479540143694, |
|
"rewards/margins": 12.841405565776522, |
|
"rewards/rejected": -10.403357611762154, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5125541125541125, |
|
"grad_norm": 3.538113732240373, |
|
"kl": 341.8497314453125, |
|
"learning_rate": 2.433822492533774e-06, |
|
"logits/chosen": -39145449.14285714, |
|
"logits/rejected": -84920298.66666667, |
|
"logps/chosen": -229.3765869140625, |
|
"logps/rejected": -230.70206705729166, |
|
"loss": 0.3537, |
|
"rewards/chosen": -5.895386832101004, |
|
"rewards/margins": 1.4380838303338912, |
|
"rewards/rejected": -7.3334706624348955, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5153246753246753, |
|
"grad_norm": 2.9086188103636714, |
|
"kl": 114.81712341308594, |
|
"learning_rate": 2.411771340235369e-06, |
|
"logits/chosen": -49792800.0, |
|
"logits/rejected": -105182803.2, |
|
"logps/chosen": -250.2506306966146, |
|
"logps/rejected": -235.0851806640625, |
|
"loss": 0.3312, |
|
"rewards/chosen": -4.144415219624837, |
|
"rewards/margins": 1.8368858973185223, |
|
"rewards/rejected": -5.981301116943359, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.518095238095238, |
|
"grad_norm": 6.561660944265687, |
|
"kl": 104.41949462890625, |
|
"learning_rate": 2.389727058725989e-06, |
|
"logits/chosen": -158027867.42857143, |
|
"logits/rejected": -63834716.44444445, |
|
"logps/chosen": -347.48318917410717, |
|
"logps/rejected": -279.6531032986111, |
|
"loss": 0.342, |
|
"rewards/chosen": 2.9799742017473494, |
|
"rewards/margins": 11.362875711350215, |
|
"rewards/rejected": -8.382901509602865, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5208658008658009, |
|
"grad_norm": 3.079713812899905, |
|
"kl": 112.68702697753906, |
|
"learning_rate": 2.3676913646995923e-06, |
|
"logits/chosen": -87418666.66666667, |
|
"logits/rejected": -104699712.0, |
|
"logps/chosen": -482.70909288194446, |
|
"logps/rejected": -167.60402134486608, |
|
"loss": 0.2445, |
|
"rewards/chosen": 4.543322245279948, |
|
"rewards/margins": 11.19434828985305, |
|
"rewards/rejected": -6.651026044573102, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5236363636363637, |
|
"grad_norm": 3.384610966553019, |
|
"kl": 159.68157958984375, |
|
"learning_rate": 2.3456659741813945e-06, |
|
"logits/chosen": -69022734.22222222, |
|
"logits/rejected": -40000859.428571425, |
|
"logps/chosen": -371.64873589409723, |
|
"logps/rejected": -211.55555943080358, |
|
"loss": 0.3456, |
|
"rewards/chosen": 0.04310830434163412, |
|
"rewards/margins": 7.334953353518531, |
|
"rewards/rejected": -7.291845049176898, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5264069264069264, |
|
"grad_norm": 3.8221129014555633, |
|
"kl": 226.33648681640625, |
|
"learning_rate": 2.3236526023942224e-06, |
|
"logits/chosen": -64279296.0, |
|
"logits/rejected": -94565772.8, |
|
"logps/chosen": -300.8207341974432, |
|
"logps/rejected": -186.9443603515625, |
|
"loss": 0.3614, |
|
"rewards/chosen": -3.407633001154119, |
|
"rewards/margins": 2.492927759343928, |
|
"rewards/rejected": -5.900560760498047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5291774891774892, |
|
"grad_norm": 3.7771126966424564, |
|
"kl": 137.0946044921875, |
|
"learning_rate": 2.301652963624951e-06, |
|
"logits/chosen": -76113541.81818181, |
|
"logits/rejected": -51425171.2, |
|
"logps/chosen": -346.21928267045456, |
|
"logps/rejected": -229.1968994140625, |
|
"loss": 0.3241, |
|
"rewards/chosen": -2.7908023487437856, |
|
"rewards/margins": 1.7894470561634406, |
|
"rewards/rejected": -4.580249404907226, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.531948051948052, |
|
"grad_norm": 1.8598401806559461, |
|
"kl": 197.385986328125, |
|
"learning_rate": 2.2796687710909966e-06, |
|
"logits/chosen": -55358822.4, |
|
"logits/rejected": -102614474.66666667, |
|
"logps/chosen": -203.16923828125, |
|
"logps/rejected": -193.5582275390625, |
|
"loss": 0.3448, |
|
"rewards/chosen": -4.700377655029297, |
|
"rewards/margins": 0.5808376948038738, |
|
"rewards/rejected": -5.281215349833171, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5347186147186147, |
|
"grad_norm": 3.834495630642973, |
|
"kl": 187.49928283691406, |
|
"learning_rate": 2.2577017368069017e-06, |
|
"logits/chosen": -108176749.71428572, |
|
"logits/rejected": -47750257.777777776, |
|
"logps/chosen": -469.99124581473217, |
|
"logps/rejected": -177.22574869791666, |
|
"loss": 0.2616, |
|
"rewards/chosen": 1.035844189780099, |
|
"rewards/margins": 8.450432270292252, |
|
"rewards/rejected": -7.414588080512153, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5374891774891775, |
|
"grad_norm": 3.186977461146233, |
|
"kl": 90.20316314697266, |
|
"learning_rate": 2.235753571451018e-06, |
|
"logits/chosen": -72693686.85714285, |
|
"logits/rejected": -53111786.666666664, |
|
"logps/chosen": -186.34223284040178, |
|
"logps/rejected": -159.53831651475696, |
|
"loss": 0.3198, |
|
"rewards/chosen": -0.7966529982430595, |
|
"rewards/margins": 2.663518807244679, |
|
"rewards/rejected": -3.4601718054877386, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5402597402597402, |
|
"grad_norm": 4.255977101246861, |
|
"kl": 178.103759765625, |
|
"learning_rate": 2.2138259842322794e-06, |
|
"logits/chosen": -94066453.33333333, |
|
"logits/rejected": -80498636.8, |
|
"logps/chosen": -396.0868733723958, |
|
"logps/rejected": -196.08753662109376, |
|
"loss": 0.2509, |
|
"rewards/chosen": -0.41888968149820965, |
|
"rewards/margins": 3.4590808232625325, |
|
"rewards/rejected": -3.877970504760742, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5430303030303031, |
|
"grad_norm": 3.6907369184366896, |
|
"kl": 121.10801696777344, |
|
"learning_rate": 2.191920682757104e-06, |
|
"logits/chosen": -55915576.0, |
|
"logits/rejected": -137895184.0, |
|
"logps/chosen": -360.009765625, |
|
"logps/rejected": -233.00111389160156, |
|
"loss": 0.3048, |
|
"rewards/chosen": -0.7659169435501099, |
|
"rewards/margins": 6.7380160093307495, |
|
"rewards/rejected": -7.503932952880859, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5458008658008658, |
|
"grad_norm": 2.2704628703365444, |
|
"kl": 160.8948974609375, |
|
"learning_rate": 2.170039372896409e-06, |
|
"logits/chosen": -96806220.8, |
|
"logits/rejected": -139429973.33333334, |
|
"logps/chosen": -165.101806640625, |
|
"logps/rejected": -218.95597330729166, |
|
"loss": 0.2765, |
|
"rewards/chosen": -0.4397267818450928, |
|
"rewards/margins": 5.83861387570699, |
|
"rewards/rejected": -6.278340657552083, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 3.3593101598446586, |
|
"kl": 155.88333129882812, |
|
"learning_rate": 2.148183758652774e-06, |
|
"logits/chosen": -59763976.0, |
|
"logits/rejected": -74410616.0, |
|
"logps/chosen": -267.308837890625, |
|
"logps/rejected": -214.98797607421875, |
|
"loss": 0.2769, |
|
"rewards/chosen": -1.0068299770355225, |
|
"rewards/margins": 4.386720895767212, |
|
"rewards/rejected": -5.393550872802734, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5513419913419914, |
|
"grad_norm": 3.348634978846618, |
|
"kl": 211.95455932617188, |
|
"learning_rate": 2.126355542027734e-06, |
|
"logits/chosen": -93054104.0, |
|
"logits/rejected": -36135216.0, |
|
"logps/chosen": -209.05706787109375, |
|
"logps/rejected": -142.19302368164062, |
|
"loss": 0.3478, |
|
"rewards/chosen": -2.386659622192383, |
|
"rewards/margins": 0.8848772048950195, |
|
"rewards/rejected": -3.2715368270874023, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5541125541125541, |
|
"grad_norm": 2.6185958603566855, |
|
"kl": 133.95506286621094, |
|
"learning_rate": 2.1045564228892404e-06, |
|
"logits/chosen": -59569035.63636363, |
|
"logits/rejected": -37804576.0, |
|
"logps/chosen": -369.35862038352275, |
|
"logps/rejected": -124.1267578125, |
|
"loss": 0.2268, |
|
"rewards/chosen": 1.7300832921808416, |
|
"rewards/margins": 7.441509073430842, |
|
"rewards/rejected": -5.71142578125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5568831168831169, |
|
"grad_norm": 3.1853022448974375, |
|
"kl": 239.44927978515625, |
|
"learning_rate": 2.0827880988392856e-06, |
|
"logits/chosen": -84826208.0, |
|
"logits/rejected": -94426064.0, |
|
"logps/chosen": -268.79168701171875, |
|
"logps/rejected": -142.93902587890625, |
|
"loss": 0.274, |
|
"rewards/chosen": 0.07495087385177612, |
|
"rewards/margins": 3.469887435436249, |
|
"rewards/rejected": -3.3949365615844727, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5596536796536796, |
|
"grad_norm": 2.8423454716873917, |
|
"kl": 168.17323303222656, |
|
"learning_rate": 2.0610522650816985e-06, |
|
"logits/chosen": -68017907.2, |
|
"logits/rejected": -222357141.33333334, |
|
"logps/chosen": -231.947509765625, |
|
"logps/rejected": -189.9059855143229, |
|
"loss": 0.3558, |
|
"rewards/chosen": -0.16442975997924805, |
|
"rewards/margins": 6.447909895579021, |
|
"rewards/rejected": -6.6123396555582685, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5624242424242424, |
|
"grad_norm": 3.7346854730125254, |
|
"kl": 104.60269927978516, |
|
"learning_rate": 2.0393506142901347e-06, |
|
"logits/chosen": -60994228.36363637, |
|
"logits/rejected": -107862118.4, |
|
"logps/chosen": -260.94948508522725, |
|
"logps/rejected": -245.2341552734375, |
|
"loss": 0.274, |
|
"rewards/chosen": 1.0195409601384944, |
|
"rewards/margins": 9.791877538507634, |
|
"rewards/rejected": -8.77233657836914, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5651948051948052, |
|
"grad_norm": 2.8605120324430215, |
|
"kl": 205.68209838867188, |
|
"learning_rate": 2.017684836476258e-06, |
|
"logits/chosen": -49999822.222222224, |
|
"logits/rejected": -84995885.71428572, |
|
"logps/chosen": -266.4399685329861, |
|
"logps/rejected": -143.72217668805803, |
|
"loss": 0.3008, |
|
"rewards/chosen": 1.8411036597357855, |
|
"rewards/margins": 8.940965894668821, |
|
"rewards/rejected": -7.099862234933036, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.567965367965368, |
|
"grad_norm": 3.280948123262779, |
|
"kl": 216.97950744628906, |
|
"learning_rate": 1.9960566188581306e-06, |
|
"logits/chosen": -46048905.14285714, |
|
"logits/rejected": -57397656.88888889, |
|
"logps/chosen": -184.09369768415178, |
|
"logps/rejected": -143.51478407118054, |
|
"loss": 0.343, |
|
"rewards/chosen": -7.50201416015625, |
|
"rewards/margins": -3.1174642774793835, |
|
"rewards/rejected": -4.3845498826768665, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5707359307359308, |
|
"grad_norm": 2.4558271657409336, |
|
"kl": 286.4020080566406, |
|
"learning_rate": 1.9744676457288225e-06, |
|
"logits/chosen": -77790198.85714285, |
|
"logits/rejected": -47510997.333333336, |
|
"logps/chosen": -345.818603515625, |
|
"logps/rejected": -197.7877197265625, |
|
"loss": 0.3601, |
|
"rewards/chosen": -1.7951624734061105, |
|
"rewards/margins": 2.253208311777266, |
|
"rewards/rejected": -4.048370785183376, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5735064935064935, |
|
"grad_norm": 3.942002526387752, |
|
"kl": 206.5150146484375, |
|
"learning_rate": 1.952919598325247e-06, |
|
"logits/chosen": -55335221.333333336, |
|
"logits/rejected": -34377880.0, |
|
"logps/chosen": -207.23506673177084, |
|
"logps/rejected": -113.67945861816406, |
|
"loss": 0.3738, |
|
"rewards/chosen": -2.086402416229248, |
|
"rewards/margins": 5.298608779907227, |
|
"rewards/rejected": -7.385011196136475, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5762770562770563, |
|
"grad_norm": 2.5572946277340742, |
|
"kl": 279.0215148925781, |
|
"learning_rate": 1.9314141546972345e-06, |
|
"logits/chosen": -59309312.0, |
|
"logits/rejected": -43267481.6, |
|
"logps/chosen": -198.94559733072916, |
|
"logps/rejected": -191.52454833984376, |
|
"loss": 0.2513, |
|
"rewards/chosen": -4.163699150085449, |
|
"rewards/margins": 1.1932497024536133, |
|
"rewards/rejected": -5.3569488525390625, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.579047619047619, |
|
"grad_norm": 3.792975813303454, |
|
"kl": 169.12713623046875, |
|
"learning_rate": 1.9099529895768552e-06, |
|
"logits/chosen": -41098888.0, |
|
"logits/rejected": -99659952.0, |
|
"logps/chosen": -248.0914306640625, |
|
"logps/rejected": -194.57171630859375, |
|
"loss": 0.2727, |
|
"rewards/chosen": 4.070443630218506, |
|
"rewards/margins": 10.523594379425049, |
|
"rewards/rejected": -6.453150749206543, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 3.1456231702627186, |
|
"kl": 189.19842529296875, |
|
"learning_rate": 1.8885377742480005e-06, |
|
"logits/chosen": -86137941.33333333, |
|
"logits/rejected": -69529678.76923077, |
|
"logps/chosen": -357.3465169270833, |
|
"logps/rejected": -243.63138521634616, |
|
"loss": 0.3118, |
|
"rewards/chosen": 3.267169952392578, |
|
"rewards/margins": 9.234573657696064, |
|
"rewards/rejected": -5.967403705303486, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5845887445887445, |
|
"grad_norm": 3.255048348416602, |
|
"kl": 218.10623168945312, |
|
"learning_rate": 1.8671701764162287e-06, |
|
"logits/chosen": -59820352.0, |
|
"logits/rejected": -43202088.0, |
|
"logps/chosen": -235.07044982910156, |
|
"logps/rejected": -230.73135375976562, |
|
"loss": 0.2568, |
|
"rewards/chosen": -0.05381596088409424, |
|
"rewards/margins": 9.376083254814148, |
|
"rewards/rejected": -9.429899215698242, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5873593073593074, |
|
"grad_norm": 2.54679273461964, |
|
"kl": 152.00927734375, |
|
"learning_rate": 1.8458518600788988e-06, |
|
"logits/chosen": -77701637.81818181, |
|
"logits/rejected": -50945308.8, |
|
"logps/chosen": -347.40553977272725, |
|
"logps/rejected": -185.97353515625, |
|
"loss": 0.277, |
|
"rewards/chosen": 4.390115217729048, |
|
"rewards/margins": 9.58318353132768, |
|
"rewards/rejected": -5.193068313598633, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5901298701298702, |
|
"grad_norm": 2.766794898118744, |
|
"kl": 188.09686279296875, |
|
"learning_rate": 1.8245844853955786e-06, |
|
"logits/chosen": -61706139.428571425, |
|
"logits/rejected": -98877696.0, |
|
"logps/chosen": -208.24166434151786, |
|
"logps/rejected": -238.23269314236111, |
|
"loss": 0.2229, |
|
"rewards/chosen": -0.4899448667253767, |
|
"rewards/margins": 5.7206187929425925, |
|
"rewards/rejected": -6.210563659667969, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5929004329004329, |
|
"grad_norm": 3.077063238549823, |
|
"kl": 192.8994140625, |
|
"learning_rate": 1.8033697085587698e-06, |
|
"logits/chosen": -44352933.333333336, |
|
"logits/rejected": -61902777.6, |
|
"logps/chosen": -228.66778564453125, |
|
"logps/rejected": -204.53642578125, |
|
"loss": 0.3359, |
|
"rewards/chosen": -4.820536295572917, |
|
"rewards/margins": 1.5907872517903643, |
|
"rewards/rejected": -6.411323547363281, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5956709956709957, |
|
"grad_norm": 3.355279316331082, |
|
"kl": 348.7542724609375, |
|
"learning_rate": 1.782209181664924e-06, |
|
"logits/chosen": -79986400.0, |
|
"logits/rejected": -69224832.0, |
|
"logps/chosen": -353.9718017578125, |
|
"logps/rejected": -196.55494689941406, |
|
"loss": 0.2641, |
|
"rewards/chosen": 0.0889444351196289, |
|
"rewards/margins": 6.460943698883057, |
|
"rewards/rejected": -6.371999263763428, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5984415584415584, |
|
"grad_norm": 3.652340653227592, |
|
"kl": 177.54664611816406, |
|
"learning_rate": 1.7611045525857902e-06, |
|
"logits/chosen": -77715251.2, |
|
"logits/rejected": -51552048.0, |
|
"logps/chosen": -340.14111328125, |
|
"logps/rejected": -178.1352335611979, |
|
"loss": 0.2633, |
|
"rewards/chosen": 0.6218101501464843, |
|
"rewards/margins": 7.701845041910808, |
|
"rewards/rejected": -7.080034891764323, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5984415584415584, |
|
"eval_logits/chosen": -68801646.93333334, |
|
"eval_logits/rejected": -37986458.48275862, |
|
"eval_logps/chosen": -343.46266276041666, |
|
"eval_logps/rejected": -225.37659954202587, |
|
"eval_loss": 0.30848661065101624, |
|
"eval_rewards/chosen": 1.586359405517578, |
|
"eval_rewards/margins": 8.051734582309065, |
|
"eval_rewards/rejected": -6.465375176791487, |
|
"eval_runtime": 375.7911, |
|
"eval_samples_per_second": 1.243, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 974.3778076171875, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6012121212121212, |
|
"grad_norm": 2.6940300170171376, |
|
"kl": 296.03472900390625, |
|
"learning_rate": 1.740057464840088e-06, |
|
"logits/chosen": -72538086.4, |
|
"logits/rejected": -77256587.63636364, |
|
"logps/chosen": -338.1424560546875, |
|
"logps/rejected": -178.35451438210228, |
|
"loss": 0.2693, |
|
"rewards/chosen": 4.033015441894531, |
|
"rewards/margins": 9.458120796897195, |
|
"rewards/rejected": -5.425105355002663, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.603982683982684, |
|
"grad_norm": 5.880026752195334, |
|
"kl": 285.2386169433594, |
|
"learning_rate": 1.7190695574655147e-06, |
|
"logits/chosen": -67694520.8888889, |
|
"logits/rejected": -37378576.0, |
|
"logps/chosen": -280.1799045138889, |
|
"logps/rejected": -164.02565220424108, |
|
"loss": 0.3916, |
|
"rewards/chosen": -1.6728751924302843, |
|
"rewards/margins": 3.780970240396166, |
|
"rewards/rejected": -5.4538454328264505, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6067532467532467, |
|
"grad_norm": 2.610552070109955, |
|
"kl": 240.72793579101562, |
|
"learning_rate": 1.6981424648911112e-06, |
|
"logits/chosen": -41839498.666666664, |
|
"logits/rejected": -50530960.0, |
|
"logps/chosen": -212.16925048828125, |
|
"logps/rejected": -200.35166015625, |
|
"loss": 0.308, |
|
"rewards/chosen": -0.23567096392313638, |
|
"rewards/margins": 5.863442881902059, |
|
"rewards/rejected": -6.099113845825196, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6095238095238096, |
|
"grad_norm": 3.401876003409717, |
|
"kl": 323.20635986328125, |
|
"learning_rate": 1.677277816809975e-06, |
|
"logits/chosen": -67701248.0, |
|
"logits/rejected": -70669077.33333333, |
|
"logps/chosen": -345.4431396484375, |
|
"logps/rejected": -145.46737670898438, |
|
"loss": 0.336, |
|
"rewards/chosen": -4.474223327636719, |
|
"rewards/margins": -2.6156206448872883, |
|
"rewards/rejected": -1.8586026827494304, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6122943722943723, |
|
"grad_norm": 3.915703434239461, |
|
"kl": 216.21176147460938, |
|
"learning_rate": 1.6564772380523546e-06, |
|
"logits/chosen": -58600466.28571428, |
|
"logits/rejected": -74421880.8888889, |
|
"logps/chosen": -241.07268415178572, |
|
"logps/rejected": -211.32619900173611, |
|
"loss": 0.368, |
|
"rewards/chosen": 1.955857821873256, |
|
"rewards/margins": 9.50594394926041, |
|
"rewards/rejected": -7.550086127387153, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6150649350649351, |
|
"grad_norm": 3.6577888277996102, |
|
"kl": 207.39048767089844, |
|
"learning_rate": 1.635742348459109e-06, |
|
"logits/chosen": -60414848.0, |
|
"logits/rejected": -129543833.6, |
|
"logps/chosen": -279.7829996744792, |
|
"logps/rejected": -161.88243408203124, |
|
"loss": 0.2651, |
|
"rewards/chosen": 1.968168576558431, |
|
"rewards/margins": 7.690291913350423, |
|
"rewards/rejected": -5.722123336791992, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6178354978354978, |
|
"grad_norm": 4.7884656461897, |
|
"kl": 286.3252868652344, |
|
"learning_rate": 1.6150747627555713e-06, |
|
"logits/chosen": -74578944.0, |
|
"logits/rejected": -49674577.777777776, |
|
"logps/chosen": -284.42117745535717, |
|
"logps/rejected": -222.95830620659723, |
|
"loss": 0.3203, |
|
"rewards/chosen": -3.2675579616001675, |
|
"rewards/margins": 4.710056547134641, |
|
"rewards/rejected": -7.977614508734809, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6206060606060606, |
|
"grad_norm": 3.378925338056134, |
|
"kl": 275.15087890625, |
|
"learning_rate": 1.5944760904257944e-06, |
|
"logits/chosen": -46858032.0, |
|
"logits/rejected": -68605488.0, |
|
"logps/chosen": -253.7530314127604, |
|
"logps/rejected": -271.87481689453125, |
|
"loss": 0.3245, |
|
"rewards/chosen": -1.6568760871887207, |
|
"rewards/margins": 7.310287952423096, |
|
"rewards/rejected": -8.967164039611816, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6233766233766234, |
|
"grad_norm": 5.2707958999962266, |
|
"kl": 302.9314880371094, |
|
"learning_rate": 1.5739479355872162e-06, |
|
"logits/chosen": -47392557.71428572, |
|
"logits/rejected": -34227840.0, |
|
"logps/chosen": -175.62360491071428, |
|
"logps/rejected": -144.90973578559027, |
|
"loss": 0.3358, |
|
"rewards/chosen": -2.6706837245396207, |
|
"rewards/margins": 0.10068981231205054, |
|
"rewards/rejected": -2.771373536851671, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6261471861471861, |
|
"grad_norm": 3.9348746172309466, |
|
"kl": 300.81634521484375, |
|
"learning_rate": 1.5534918968657423e-06, |
|
"logits/chosen": -88354311.1111111, |
|
"logits/rejected": -88495917.71428572, |
|
"logps/chosen": -483.0252278645833, |
|
"logps/rejected": -221.13459123883928, |
|
"loss": 0.2938, |
|
"rewards/chosen": 1.3839975992838542, |
|
"rewards/margins": 7.2059002830868675, |
|
"rewards/rejected": -5.821902683803013, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6289177489177489, |
|
"grad_norm": 3.5239597024615374, |
|
"kl": 304.20819091796875, |
|
"learning_rate": 1.5331095672712463e-06, |
|
"logits/chosen": -59224921.6, |
|
"logits/rejected": -51953280.0, |
|
"logps/chosen": -307.050732421875, |
|
"logps/rejected": -200.07796223958334, |
|
"loss": 0.4037, |
|
"rewards/chosen": -1.031832218170166, |
|
"rewards/margins": 5.969323635101318, |
|
"rewards/rejected": -7.001155853271484, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6316883116883117, |
|
"grad_norm": 3.3758964781856307, |
|
"kl": 177.49435424804688, |
|
"learning_rate": 1.5128025340735223e-06, |
|
"logits/chosen": -72111429.81818181, |
|
"logits/rejected": -45238502.4, |
|
"logps/chosen": -310.7448064630682, |
|
"logps/rejected": -189.208349609375, |
|
"loss": 0.3153, |
|
"rewards/chosen": -3.234787334095348, |
|
"rewards/margins": 1.2791733134876595, |
|
"rewards/rejected": -4.513960647583008, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6344588744588745, |
|
"grad_norm": 3.4930191805210673, |
|
"kl": 288.73455810546875, |
|
"learning_rate": 1.4925723786786691e-06, |
|
"logits/chosen": -57290352.0, |
|
"logits/rejected": -64807464.0, |
|
"logps/chosen": -269.4695739746094, |
|
"logps/rejected": -178.97718811035156, |
|
"loss": 0.3023, |
|
"rewards/chosen": -3.9210712909698486, |
|
"rewards/margins": 1.226576566696167, |
|
"rewards/rejected": -5.147647857666016, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6372294372294373, |
|
"grad_norm": 3.1177672935847545, |
|
"kl": 135.36849975585938, |
|
"learning_rate": 1.4724206765059456e-06, |
|
"logits/chosen": -54134994.28571428, |
|
"logits/rejected": -111949852.44444445, |
|
"logps/chosen": -204.20844377790178, |
|
"logps/rejected": -233.01820203993054, |
|
"loss": 0.2649, |
|
"rewards/chosen": 0.520991427557809, |
|
"rewards/margins": 6.7558784447019065, |
|
"rewards/rejected": -6.234887017144097, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.7643709324145864, |
|
"kl": 178.657958984375, |
|
"learning_rate": 1.4523489968650795e-06, |
|
"logits/chosen": -64459858.28571428, |
|
"logits/rejected": -85256739.55555555, |
|
"logps/chosen": -291.76614815848217, |
|
"logps/rejected": -213.23220486111111, |
|
"loss": 0.3014, |
|
"rewards/chosen": 2.469794682094029, |
|
"rewards/margins": 8.441367558070592, |
|
"rewards/rejected": -5.9715728759765625, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6427705627705628, |
|
"grad_norm": 3.712587969836102, |
|
"kl": 143.9523162841797, |
|
"learning_rate": 1.4323589028340598e-06, |
|
"logits/chosen": -79929376.0, |
|
"logits/rejected": -155408866.46153846, |
|
"logps/chosen": -377.5170084635417, |
|
"logps/rejected": -220.03947566105768, |
|
"loss": 0.2185, |
|
"rewards/chosen": -0.14797210693359375, |
|
"rewards/margins": 6.270457341120793, |
|
"rewards/rejected": -6.418429448054387, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6455411255411255, |
|
"grad_norm": 2.6928811967907924, |
|
"kl": 237.62255859375, |
|
"learning_rate": 1.4124519511374158e-06, |
|
"logits/chosen": -68120402.28571428, |
|
"logits/rejected": -84057173.33333333, |
|
"logps/chosen": -176.9976806640625, |
|
"logps/rejected": -196.41248914930554, |
|
"loss": 0.3564, |
|
"rewards/chosen": 2.186638968331473e-05, |
|
"rewards/margins": 8.546026267702619, |
|
"rewards/rejected": -8.546004401312935, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6483116883116883, |
|
"grad_norm": 3.3451720915869685, |
|
"kl": 248.16729736328125, |
|
"learning_rate": 1.3926296920249796e-06, |
|
"logits/chosen": -69849329.77777778, |
|
"logits/rejected": -48679707.428571425, |
|
"logps/chosen": -240.88053385416666, |
|
"logps/rejected": -153.94259207589286, |
|
"loss": 0.2441, |
|
"rewards/chosen": 2.785261789957682, |
|
"rewards/margins": 9.85936500912621, |
|
"rewards/rejected": -7.074103219168527, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.651082251082251, |
|
"grad_norm": 4.331228652019405, |
|
"kl": 217.10455322265625, |
|
"learning_rate": 1.3728936691511704e-06, |
|
"logits/chosen": -53082216.72727273, |
|
"logits/rejected": -73758624.0, |
|
"logps/chosen": -257.35977450284093, |
|
"logps/rejected": -336.2916015625, |
|
"loss": 0.343, |
|
"rewards/chosen": 0.690484263680198, |
|
"rewards/margins": 6.507778193733909, |
|
"rewards/rejected": -5.817293930053711, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6538528138528139, |
|
"grad_norm": 3.7377097810384403, |
|
"kl": 176.5330810546875, |
|
"learning_rate": 1.3532454194547734e-06, |
|
"logits/chosen": -114124352.0, |
|
"logits/rejected": -52099942.4, |
|
"logps/chosen": -380.898193359375, |
|
"logps/rejected": -236.4220703125, |
|
"loss": 0.2775, |
|
"rewards/chosen": -0.5208327770233154, |
|
"rewards/margins": 5.431049394607544, |
|
"rewards/rejected": -5.951882171630859, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6566233766233767, |
|
"grad_norm": 4.191306253129776, |
|
"kl": 201.5948486328125, |
|
"learning_rate": 1.3336864730392587e-06, |
|
"logits/chosen": -98785644.8, |
|
"logits/rejected": -63730141.09090909, |
|
"logps/chosen": -268.1023193359375, |
|
"logps/rejected": -166.88303444602272, |
|
"loss": 0.3229, |
|
"rewards/chosen": -0.36386423110961913, |
|
"rewards/margins": 4.326531470905651, |
|
"rewards/rejected": -4.69039570201527, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6593939393939394, |
|
"grad_norm": 4.9884577158326415, |
|
"kl": 275.1553955078125, |
|
"learning_rate": 1.314218353053619e-06, |
|
"logits/chosen": -50905499.428571425, |
|
"logits/rejected": -108700174.22222222, |
|
"logps/chosen": -168.27641950334822, |
|
"logps/rejected": -248.65044487847223, |
|
"loss": 0.374, |
|
"rewards/chosen": -5.029791695731027, |
|
"rewards/margins": -0.4220421806214345, |
|
"rewards/rejected": -4.607749515109592, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6621645021645022, |
|
"grad_norm": 2.908368881048658, |
|
"kl": 235.3330078125, |
|
"learning_rate": 1.2948425755737592e-06, |
|
"logits/chosen": -48739173.333333336, |
|
"logits/rejected": -37064664.0, |
|
"logps/chosen": -217.1978759765625, |
|
"logps/rejected": -134.1108856201172, |
|
"loss": 0.3171, |
|
"rewards/chosen": -0.37347551186879474, |
|
"rewards/margins": 6.792223970095317, |
|
"rewards/rejected": -7.165699481964111, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6649350649350649, |
|
"grad_norm": 4.311065760657517, |
|
"kl": 178.09713745117188, |
|
"learning_rate": 1.2755606494844294e-06, |
|
"logits/chosen": -59626680.0, |
|
"logits/rejected": -64623596.0, |
|
"logps/chosen": -316.20013427734375, |
|
"logps/rejected": -231.9442901611328, |
|
"loss": 0.2457, |
|
"rewards/chosen": 1.917290449142456, |
|
"rewards/margins": 9.681453466415405, |
|
"rewards/rejected": -7.764163017272949, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6677056277056277, |
|
"grad_norm": 3.0064005045422832, |
|
"kl": 202.15634155273438, |
|
"learning_rate": 1.2563740763617198e-06, |
|
"logits/chosen": -62175616.0, |
|
"logits/rejected": -42690928.0, |
|
"logps/chosen": -308.9885919744318, |
|
"logps/rejected": -176.0036865234375, |
|
"loss": 0.2843, |
|
"rewards/chosen": 1.3145668723366477, |
|
"rewards/margins": 2.3579565741799096, |
|
"rewards/rejected": -1.0433897018432616, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6704761904761904, |
|
"grad_norm": 2.9232235110862885, |
|
"kl": 170.14356994628906, |
|
"learning_rate": 1.2372843503561318e-06, |
|
"logits/chosen": -41083064.88888889, |
|
"logits/rejected": -52004361.14285714, |
|
"logps/chosen": -193.79656304253473, |
|
"logps/rejected": -240.13106863839286, |
|
"loss": 0.3154, |
|
"rewards/chosen": 0.9401733610365126, |
|
"rewards/margins": 7.140057503230988, |
|
"rewards/rejected": -6.199884142194476, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6732467532467532, |
|
"grad_norm": 3.2345169148154636, |
|
"kl": 306.8934020996094, |
|
"learning_rate": 1.218292958076213e-06, |
|
"logits/chosen": -43112040.0, |
|
"logits/rejected": -49439756.0, |
|
"logps/chosen": -206.4788055419922, |
|
"logps/rejected": -203.757080078125, |
|
"loss": 0.342, |
|
"rewards/chosen": 0.21420925855636597, |
|
"rewards/margins": 6.295262038707733, |
|
"rewards/rejected": -6.081052780151367, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6760173160173161, |
|
"grad_norm": 3.5936976457100824, |
|
"kl": 290.11920166015625, |
|
"learning_rate": 1.1994013784727948e-06, |
|
"logits/chosen": -61215940.0, |
|
"logits/rejected": -35973080.0, |
|
"logps/chosen": -332.1205139160156, |
|
"logps/rejected": -155.35565185546875, |
|
"loss": 0.352, |
|
"rewards/chosen": 1.5890569686889648, |
|
"rewards/margins": 5.8782877922058105, |
|
"rewards/rejected": -4.289230823516846, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6787878787878788, |
|
"grad_norm": 3.2106021999017913, |
|
"kl": 253.48971557617188, |
|
"learning_rate": 1.180611082723814e-06, |
|
"logits/chosen": -71740049.45454545, |
|
"logits/rejected": -95363673.6, |
|
"logps/chosen": -374.18319424715907, |
|
"logps/rejected": -203.96192626953126, |
|
"loss": 0.3024, |
|
"rewards/chosen": 0.7615150104869496, |
|
"rewards/margins": 9.2805946003307, |
|
"rewards/rejected": -8.51907958984375, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6815584415584416, |
|
"grad_norm": 3.7801991502937464, |
|
"kl": 188.4849853515625, |
|
"learning_rate": 1.161923534119752e-06, |
|
"logits/chosen": -50279622.4, |
|
"logits/rejected": -42633162.666666664, |
|
"logps/chosen": -254.7062744140625, |
|
"logps/rejected": -164.59708658854166, |
|
"loss": 0.3034, |
|
"rewards/chosen": -1.0920855522155761, |
|
"rewards/margins": 4.658198006947835, |
|
"rewards/rejected": -5.750283559163411, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6843290043290043, |
|
"grad_norm": 2.700192871685373, |
|
"kl": 223.2760772705078, |
|
"learning_rate": 1.1433401879496723e-06, |
|
"logits/chosen": -72866929.77777778, |
|
"logits/rejected": -46679538.28571428, |
|
"logps/chosen": -280.45448133680554, |
|
"logps/rejected": -190.09620884486608, |
|
"loss": 0.2797, |
|
"rewards/chosen": -1.5863361358642578, |
|
"rewards/margins": 4.47297477722168, |
|
"rewards/rejected": -6.0593109130859375, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6870995670995671, |
|
"grad_norm": 3.7313916035101675, |
|
"kl": 229.77716064453125, |
|
"learning_rate": 1.1248624913878966e-06, |
|
"logits/chosen": -90124640.0, |
|
"logits/rejected": -49573136.0, |
|
"logps/chosen": -392.1523132324219, |
|
"logps/rejected": -185.72872924804688, |
|
"loss": 0.3145, |
|
"rewards/chosen": 2.3596444129943848, |
|
"rewards/margins": 9.85338306427002, |
|
"rewards/rejected": -7.493738651275635, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6898701298701299, |
|
"grad_norm": 2.8892760595582017, |
|
"kl": 330.8048095703125, |
|
"learning_rate": 1.1064918833813073e-06, |
|
"logits/chosen": -51090944.0, |
|
"logits/rejected": -71529491.2, |
|
"logps/chosen": -175.30206298828125, |
|
"logps/rejected": -274.307275390625, |
|
"loss": 0.3581, |
|
"rewards/chosen": 0.6159710884094238, |
|
"rewards/margins": 6.964090061187744, |
|
"rewards/rejected": -6.3481189727783205, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6926406926406926, |
|
"grad_norm": 3.5154507753168063, |
|
"kl": 262.57794189453125, |
|
"learning_rate": 1.088229794537283e-06, |
|
"logits/chosen": -64860172.8, |
|
"logits/rejected": -149981525.33333334, |
|
"logps/chosen": -288.94384765625, |
|
"logps/rejected": -265.95355224609375, |
|
"loss": 0.316, |
|
"rewards/chosen": 1.1107938766479493, |
|
"rewards/margins": 10.878010876973471, |
|
"rewards/rejected": -9.767217000325521, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6954112554112554, |
|
"grad_norm": 2.9905382664133535, |
|
"kl": 159.176513671875, |
|
"learning_rate": 1.0700776470122981e-06, |
|
"logits/chosen": -45575300.0, |
|
"logits/rejected": -83707800.0, |
|
"logps/chosen": -157.0391845703125, |
|
"logps/rejected": -256.8617858886719, |
|
"loss": 0.2896, |
|
"rewards/chosen": -0.197882741689682, |
|
"rewards/margins": 8.787455469369888, |
|
"rewards/rejected": -8.98533821105957, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6981818181818182, |
|
"grad_norm": 3.2115303535516118, |
|
"kl": 212.965576171875, |
|
"learning_rate": 1.0520368544011661e-06, |
|
"logits/chosen": -56122581.333333336, |
|
"logits/rejected": -132167224.0, |
|
"logps/chosen": -262.070068359375, |
|
"logps/rejected": -305.2275390625, |
|
"loss": 0.2519, |
|
"rewards/chosen": -0.16033987204233804, |
|
"rewards/margins": 9.907416780789694, |
|
"rewards/rejected": -10.067756652832031, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6981818181818182, |
|
"eval_logits/chosen": -69407142.4, |
|
"eval_logits/rejected": -34758311.72413793, |
|
"eval_logps/chosen": -343.69078776041664, |
|
"eval_logps/rejected": -225.02624932650863, |
|
"eval_loss": 0.3109191656112671, |
|
"eval_rewards/chosen": 1.5635496775309246, |
|
"eval_rewards/margins": 7.993889622304632, |
|
"eval_rewards/rejected": -6.430339944773707, |
|
"eval_runtime": 375.3266, |
|
"eval_samples_per_second": 1.244, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 1106.7635498046875, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.700952380952381, |
|
"grad_norm": 3.408372034521783, |
|
"kl": 155.97579956054688, |
|
"learning_rate": 1.0341088216269625e-06, |
|
"logits/chosen": -65252832.0, |
|
"logits/rejected": -51106684.0, |
|
"logps/chosen": -388.51385498046875, |
|
"logps/rejected": -146.90829467773438, |
|
"loss": 0.2696, |
|
"rewards/chosen": 0.8967366218566895, |
|
"rewards/margins": 6.216433048248291, |
|
"rewards/rejected": -5.319696426391602, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7037229437229438, |
|
"grad_norm": 4.383580312032361, |
|
"kl": 341.5067443847656, |
|
"learning_rate": 1.0162949448316089e-06, |
|
"logits/chosen": -86413928.72727273, |
|
"logits/rejected": -114341286.4, |
|
"logps/chosen": -367.32177734375, |
|
"logps/rejected": -190.59051513671875, |
|
"loss": 0.3508, |
|
"rewards/chosen": 1.0880462473089045, |
|
"rewards/margins": 3.699661619013006, |
|
"rewards/rejected": -2.6116153717041017, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7064935064935065, |
|
"grad_norm": 3.055607123710733, |
|
"kl": 195.12664794921875, |
|
"learning_rate": 9.98596611267158e-07, |
|
"logits/chosen": -76725646.22222222, |
|
"logits/rejected": -64971469.71428572, |
|
"logps/chosen": -337.21875, |
|
"logps/rejected": -205.77462332589286, |
|
"loss": 0.3384, |
|
"rewards/chosen": -1.6779973771837022, |
|
"rewards/margins": 5.715687010023329, |
|
"rewards/rejected": -7.393684387207031, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7092640692640693, |
|
"grad_norm": 2.3759956264666604, |
|
"kl": 273.7037353515625, |
|
"learning_rate": 9.81015199187753e-07, |
|
"logits/chosen": -72682016.0, |
|
"logits/rejected": -81808928.0, |
|
"logps/chosen": -385.7765197753906, |
|
"logps/rejected": -200.73654174804688, |
|
"loss": 0.2374, |
|
"rewards/chosen": 1.9999887943267822, |
|
"rewards/margins": 8.917597532272339, |
|
"rewards/rejected": -6.917608737945557, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.712034632034632, |
|
"grad_norm": 3.8057096837262425, |
|
"kl": 188.0250244140625, |
|
"learning_rate": 9.63552077742301e-07, |
|
"logits/chosen": -67763000.0, |
|
"logits/rejected": -90358536.0, |
|
"logps/chosen": -238.7766876220703, |
|
"logps/rejected": -257.0978698730469, |
|
"loss": 0.3161, |
|
"rewards/chosen": 1.9526047706604004, |
|
"rewards/margins": 8.665544509887695, |
|
"rewards/rejected": -6.712939739227295, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7148051948051948, |
|
"grad_norm": 2.5951420032808077, |
|
"kl": 182.75180053710938, |
|
"learning_rate": 9.462086068678519e-07, |
|
"logits/chosen": -82555717.33333333, |
|
"logits/rejected": -83283424.0, |
|
"logps/chosen": -274.6826171875, |
|
"logps/rejected": -231.703662109375, |
|
"loss": 0.2752, |
|
"rewards/chosen": -4.324587504069011, |
|
"rewards/margins": 2.9592534383138016, |
|
"rewards/rejected": -7.283840942382812, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7175757575757575, |
|
"grad_norm": 3.592536819677904, |
|
"kl": 372.02880859375, |
|
"learning_rate": 9.289861371836886e-07, |
|
"logits/chosen": -33494551.272727273, |
|
"logits/rejected": -43864403.2, |
|
"logps/chosen": -210.74216530539772, |
|
"logps/rejected": -146.0051513671875, |
|
"loss": 0.3954, |
|
"rewards/chosen": -2.606386184692383, |
|
"rewards/margins": -0.9926284790039062, |
|
"rewards/rejected": -1.6137577056884767, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7203463203463204, |
|
"grad_norm": 3.611795674877251, |
|
"kl": 144.7232208251953, |
|
"learning_rate": 9.118860098861538e-07, |
|
"logits/chosen": -27257533.714285713, |
|
"logits/rejected": -92377756.44444445, |
|
"logps/chosen": -106.76185825892857, |
|
"logps/rejected": -172.93641493055554, |
|
"loss": 0.2925, |
|
"rewards/chosen": -4.36328969682966, |
|
"rewards/margins": 1.0060645512172153, |
|
"rewards/rejected": -5.369354248046875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7231168831168832, |
|
"grad_norm": 3.426912310848221, |
|
"kl": 230.80148315429688, |
|
"learning_rate": 8.949095566441985e-07, |
|
"logits/chosen": -70854326.85714285, |
|
"logits/rejected": -106736128.0, |
|
"logps/chosen": -355.3104771205357, |
|
"logps/rejected": -143.09120008680554, |
|
"loss": 0.2628, |
|
"rewards/chosen": 4.951349530901227, |
|
"rewards/margins": 11.514222523522754, |
|
"rewards/rejected": -6.562872992621528, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7258874458874459, |
|
"grad_norm": 3.9222498849988217, |
|
"kl": 275.5905456542969, |
|
"learning_rate": 8.78058099495685e-07, |
|
"logits/chosen": -63815808.0, |
|
"logits/rejected": -43907032.0, |
|
"logps/chosen": -221.944921875, |
|
"logps/rejected": -152.33535766601562, |
|
"loss": 0.3599, |
|
"rewards/chosen": 1.3001568794250489, |
|
"rewards/margins": 6.684520689646403, |
|
"rewards/rejected": -5.3843638102213545, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7286580086580087, |
|
"grad_norm": 4.081637856436443, |
|
"kl": 205.53375244140625, |
|
"learning_rate": 8.613329507444274e-07, |
|
"logits/chosen": -70823014.4, |
|
"logits/rejected": 51152917.333333336, |
|
"logps/chosen": -416.84267578125, |
|
"logps/rejected": -218.20318603515625, |
|
"loss": 0.3067, |
|
"rewards/chosen": 2.8227375030517576, |
|
"rewards/margins": 11.075025685628255, |
|
"rewards/rejected": -8.252288182576498, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 3.0956087519734283, |
|
"kl": 347.00433349609375, |
|
"learning_rate": 8.44735412857999e-07, |
|
"logits/chosen": -48914204.44444445, |
|
"logits/rejected": -54545723.428571425, |
|
"logps/chosen": -236.80449761284723, |
|
"logps/rejected": -212.97638811383928, |
|
"loss": 0.3157, |
|
"rewards/chosen": -2.7551773918999567, |
|
"rewards/margins": 2.7477452111622647, |
|
"rewards/rejected": -5.502922603062221, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7341991341991342, |
|
"grad_norm": 3.6798026423098853, |
|
"kl": 378.4184265136719, |
|
"learning_rate": 8.282667783663056e-07, |
|
"logits/chosen": -47141145.6, |
|
"logits/rejected": -69143936.0, |
|
"logps/chosen": -230.9576171875, |
|
"logps/rejected": -178.14546342329547, |
|
"loss": 0.2581, |
|
"rewards/chosen": 1.3340899467468261, |
|
"rewards/margins": 5.900271476398815, |
|
"rewards/rejected": -4.566181529651988, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7369696969696969, |
|
"grad_norm": 2.818485572402302, |
|
"kl": 211.65887451171875, |
|
"learning_rate": 8.119283297609238e-07, |
|
"logits/chosen": -63136617.14285714, |
|
"logits/rejected": -55634499.55555555, |
|
"logps/chosen": -260.20821707589283, |
|
"logps/rejected": -183.45157877604166, |
|
"loss": 0.2609, |
|
"rewards/chosen": 2.2578698566981723, |
|
"rewards/margins": 5.820503794957721, |
|
"rewards/rejected": -3.5626339382595487, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7397402597402597, |
|
"grad_norm": 3.1897322815284683, |
|
"kl": 142.84185791015625, |
|
"learning_rate": 7.957213393952335e-07, |
|
"logits/chosen": -67597770.66666667, |
|
"logits/rejected": -44832944.0, |
|
"logps/chosen": -222.02327473958334, |
|
"logps/rejected": -255.47900390625, |
|
"loss": 0.2854, |
|
"rewards/chosen": -4.233989079793294, |
|
"rewards/margins": 4.390859285990397, |
|
"rewards/rejected": -8.624848365783691, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7425108225108226, |
|
"grad_norm": 3.3891834518638553, |
|
"kl": 134.27809143066406, |
|
"learning_rate": 7.796470693853281e-07, |
|
"logits/chosen": -43992632.0, |
|
"logits/rejected": -94291280.0, |
|
"logps/chosen": -248.00100708007812, |
|
"logps/rejected": -233.19149780273438, |
|
"loss": 0.2848, |
|
"rewards/chosen": 0.43076658248901367, |
|
"rewards/margins": 6.797356128692627, |
|
"rewards/rejected": -6.366589546203613, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7452813852813853, |
|
"grad_norm": 3.3252557869988606, |
|
"kl": 180.33831787109375, |
|
"learning_rate": 7.637067715117327e-07, |
|
"logits/chosen": -62336172.8, |
|
"logits/rejected": -46789130.666666664, |
|
"logps/chosen": -399.1670654296875, |
|
"logps/rejected": -212.2073771158854, |
|
"loss": 0.291, |
|
"rewards/chosen": 1.414572048187256, |
|
"rewards/margins": 7.036088593800862, |
|
"rewards/rejected": -5.6215165456136065, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7480519480519481, |
|
"grad_norm": 3.566170694431363, |
|
"kl": 199.22201538085938, |
|
"learning_rate": 7.479016871219174e-07, |
|
"logits/chosen": -71036677.33333333, |
|
"logits/rejected": -59511052.8, |
|
"logps/chosen": -341.9608154296875, |
|
"logps/rejected": -148.16932373046876, |
|
"loss": 0.3713, |
|
"rewards/chosen": -0.5459572474161783, |
|
"rewards/margins": 2.417703183492025, |
|
"rewards/rejected": -2.9636604309082033, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7508225108225108, |
|
"grad_norm": 2.802819654299222, |
|
"kl": 356.5174560546875, |
|
"learning_rate": 7.322330470336314e-07, |
|
"logits/chosen": -53457590.85714286, |
|
"logits/rejected": -110052842.66666667, |
|
"logps/chosen": -213.4609375, |
|
"logps/rejected": -178.4493408203125, |
|
"loss": 0.3042, |
|
"rewards/chosen": 0.768862111227853, |
|
"rewards/margins": 6.479752139439658, |
|
"rewards/rejected": -5.710890028211805, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7535930735930736, |
|
"grad_norm": 3.27403131274412, |
|
"kl": 300.19671630859375, |
|
"learning_rate": 7.167020714390502e-07, |
|
"logits/chosen": -79985589.33333333, |
|
"logits/rejected": -98586624.0, |
|
"logps/chosen": -271.8570963541667, |
|
"logps/rejected": -254.91591796875, |
|
"loss": 0.2824, |
|
"rewards/chosen": -0.687197208404541, |
|
"rewards/margins": 5.884105587005616, |
|
"rewards/rejected": -6.571302795410157, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7563636363636363, |
|
"grad_norm": 4.796024627038505, |
|
"kl": 172.873291015625, |
|
"learning_rate": 7.013099698097539e-07, |
|
"logits/chosen": -58375436.8, |
|
"logits/rejected": -113968372.36363636, |
|
"logps/chosen": -200.2075927734375, |
|
"logps/rejected": -224.1153231534091, |
|
"loss": 0.2744, |
|
"rewards/chosen": 2.289317321777344, |
|
"rewards/margins": 9.3487773548473, |
|
"rewards/rejected": -7.059460033069957, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7591341991341991, |
|
"grad_norm": 1.898707621563178, |
|
"kl": 275.5303955078125, |
|
"learning_rate": 6.860579408025436e-07, |
|
"logits/chosen": -66903751.11111111, |
|
"logits/rejected": -52988854.85714286, |
|
"logps/chosen": -210.03946940104166, |
|
"logps/rejected": -199.36795479910714, |
|
"loss": 0.351, |
|
"rewards/chosen": -4.433419969346788, |
|
"rewards/margins": 2.752294752332899, |
|
"rewards/rejected": -7.1857147216796875, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 3.166222661314354, |
|
"kl": 249.98538208007812, |
|
"learning_rate": 6.709471721660904e-07, |
|
"logits/chosen": -46733385.14285714, |
|
"logits/rejected": -126729671.1111111, |
|
"logps/chosen": -265.08956473214283, |
|
"logps/rejected": -174.86515299479166, |
|
"loss": 0.3114, |
|
"rewards/chosen": -0.1910043443952288, |
|
"rewards/margins": 4.7248930249895364, |
|
"rewards/rejected": -4.915897369384766, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7646753246753247, |
|
"grad_norm": 2.690930213427526, |
|
"kl": 212.82666015625, |
|
"learning_rate": 6.559788406484446e-07, |
|
"logits/chosen": -48945336.0, |
|
"logits/rejected": -61969256.0, |
|
"logps/chosen": -328.7871398925781, |
|
"logps/rejected": -133.97158813476562, |
|
"loss": 0.2748, |
|
"rewards/chosen": 3.2727508544921875, |
|
"rewards/margins": 6.7245166301727295, |
|
"rewards/rejected": -3.451765775680542, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7674458874458875, |
|
"grad_norm": 2.7007228681678948, |
|
"kl": 120.11971282958984, |
|
"learning_rate": 6.41154111905393e-07, |
|
"logits/chosen": -91537863.1111111, |
|
"logits/rejected": -50415232.0, |
|
"logps/chosen": -287.26361762152777, |
|
"logps/rejected": -191.4361572265625, |
|
"loss": 0.2997, |
|
"rewards/chosen": 1.7716151343451605, |
|
"rewards/margins": 9.348053493197002, |
|
"rewards/rejected": -7.576438358851841, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7702164502164502, |
|
"grad_norm": 2.072018372743311, |
|
"kl": 345.71630859375, |
|
"learning_rate": 6.264741404096875e-07, |
|
"logits/chosen": -50276706.90909091, |
|
"logits/rejected": -44993907.2, |
|
"logps/chosen": -155.4117986505682, |
|
"logps/rejected": -171.1499755859375, |
|
"loss": 0.3447, |
|
"rewards/chosen": -1.067693363536488, |
|
"rewards/margins": 4.660452998768199, |
|
"rewards/rejected": -5.728146362304687, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.772987012987013, |
|
"grad_norm": 2.9339434752000773, |
|
"kl": 322.894287109375, |
|
"learning_rate": 6.119400693611358e-07, |
|
"logits/chosen": -70990665.14285715, |
|
"logits/rejected": -43696771.55555555, |
|
"logps/chosen": -285.5392368861607, |
|
"logps/rejected": -176.08287217881946, |
|
"loss": 0.2985, |
|
"rewards/chosen": 2.104515620640346, |
|
"rewards/margins": 7.515042320130362, |
|
"rewards/rejected": -5.410526699490017, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7757575757575758, |
|
"grad_norm": 3.562536584571455, |
|
"kl": 177.15957641601562, |
|
"learning_rate": 5.975530305975808e-07, |
|
"logits/chosen": -103062884.57142857, |
|
"logits/rejected": -50913994.666666664, |
|
"logps/chosen": -339.17027064732144, |
|
"logps/rejected": -184.08607313368054, |
|
"loss": 0.282, |
|
"rewards/chosen": 3.5254838126046315, |
|
"rewards/margins": 7.8241237458728605, |
|
"rewards/rejected": -4.2986399332682295, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7785281385281385, |
|
"grad_norm": 4.0306130164614, |
|
"kl": 167.3966064453125, |
|
"learning_rate": 5.833141445067541e-07, |
|
"logits/chosen": -100358954.66666667, |
|
"logits/rejected": -57243862.85714286, |
|
"logps/chosen": -231.44365776909723, |
|
"logps/rejected": -176.75859723772322, |
|
"loss": 0.2869, |
|
"rewards/chosen": -1.6125593185424805, |
|
"rewards/margins": 2.7696215765816827, |
|
"rewards/rejected": -4.382180895124163, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7812987012987013, |
|
"grad_norm": 3.44873803212302, |
|
"kl": 348.943359375, |
|
"learning_rate": 5.692245199390281e-07, |
|
"logits/chosen": -68807224.0, |
|
"logits/rejected": -69575264.0, |
|
"logps/chosen": -225.96981811523438, |
|
"logps/rejected": -246.62388610839844, |
|
"loss": 0.3249, |
|
"rewards/chosen": -0.17798352241516113, |
|
"rewards/margins": 5.94242262840271, |
|
"rewards/rejected": -6.120406150817871, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.784069264069264, |
|
"grad_norm": 2.894002118538045, |
|
"kl": 241.6986083984375, |
|
"learning_rate": 5.552852541210651e-07, |
|
"logits/chosen": -47480483.55555555, |
|
"logits/rejected": -41506720.0, |
|
"logps/chosen": -136.6748046875, |
|
"logps/rejected": -164.92328752790178, |
|
"loss": 0.3215, |
|
"rewards/chosen": -4.108927408854167, |
|
"rewards/margins": 2.43545168922061, |
|
"rewards/rejected": -6.544379098074777, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7868398268398269, |
|
"grad_norm": 3.2824108049566894, |
|
"kl": 170.58547973632812, |
|
"learning_rate": 5.414974325703687e-07, |
|
"logits/chosen": -58694203.428571425, |
|
"logits/rejected": -85210695.1111111, |
|
"logps/chosen": -287.21871512276783, |
|
"logps/rejected": -216.19121636284723, |
|
"loss": 0.2668, |
|
"rewards/chosen": -1.4370292936052596, |
|
"rewards/margins": 3.6460005593678306, |
|
"rewards/rejected": -5.08302985297309, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7896103896103897, |
|
"grad_norm": 3.738104349195781, |
|
"kl": 140.03277587890625, |
|
"learning_rate": 5.278621290107533e-07, |
|
"logits/chosen": -48089952.0, |
|
"logits/rejected": -68618922.66666667, |
|
"logps/chosen": -210.25188337053572, |
|
"logps/rejected": -225.17115614149304, |
|
"loss": 0.3009, |
|
"rewards/chosen": -2.186547006879534, |
|
"rewards/margins": -1.190037031022329, |
|
"rewards/rejected": -0.9965099758572049, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7923809523809524, |
|
"grad_norm": 3.4587778119224875, |
|
"kl": 338.2921142578125, |
|
"learning_rate": 5.143804052887228e-07, |
|
"logits/chosen": -87703588.57142857, |
|
"logits/rejected": -32304408.888888888, |
|
"logps/chosen": -481.19339425223217, |
|
"logps/rejected": -205.55504014756946, |
|
"loss": 0.3118, |
|
"rewards/chosen": 2.911403111049107, |
|
"rewards/margins": 8.781512245299325, |
|
"rewards/rejected": -5.870109134250217, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7951515151515152, |
|
"grad_norm": 4.206081190690971, |
|
"kl": 144.6542510986328, |
|
"learning_rate": 5.010533112907845e-07, |
|
"logits/chosen": -35128680.0, |
|
"logits/rejected": -64883232.0, |
|
"logps/chosen": -138.93106079101562, |
|
"logps/rejected": -158.321044921875, |
|
"loss": 0.2868, |
|
"rewards/chosen": 2.1601052284240723, |
|
"rewards/margins": 8.834944248199463, |
|
"rewards/rejected": -6.674839019775391, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7979220779220779, |
|
"grad_norm": 3.386798752207688, |
|
"kl": 239.18814086914062, |
|
"learning_rate": 4.878818848616861e-07, |
|
"logits/chosen": -80401216.0, |
|
"logits/rejected": -90698474.66666667, |
|
"logps/chosen": -238.15419224330358, |
|
"logps/rejected": -262.73231336805554, |
|
"loss": 0.2959, |
|
"rewards/chosen": 1.4890550885881697, |
|
"rewards/margins": 10.170775398375497, |
|
"rewards/rejected": -8.681720309787327, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7979220779220779, |
|
"eval_logits/chosen": -69444923.73333333, |
|
"eval_logits/rejected": -36029797.51724138, |
|
"eval_logps/chosen": -342.69557291666666, |
|
"eval_logps/rejected": -230.78365773168105, |
|
"eval_loss": 0.3033340275287628, |
|
"eval_rewards/chosen": 1.6630688985188802, |
|
"eval_rewards/margins": 8.669149262877717, |
|
"eval_rewards/rejected": -7.006080364358836, |
|
"eval_runtime": 375.9399, |
|
"eval_samples_per_second": 1.242, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 1082.5067138671875, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8006926406926407, |
|
"grad_norm": 2.808240147886354, |
|
"kl": 238.10203552246094, |
|
"learning_rate": 4.748671517235948e-07, |
|
"logits/chosen": -62291260.0, |
|
"logits/rejected": -120199832.0, |
|
"logps/chosen": -252.74928283691406, |
|
"logps/rejected": -304.03900146484375, |
|
"loss": 0.3472, |
|
"rewards/chosen": -0.03838551044464111, |
|
"rewards/margins": 8.531028628349304, |
|
"rewards/rejected": -8.569414138793945, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8034632034632034, |
|
"grad_norm": 4.382126448345103, |
|
"kl": 272.16949462890625, |
|
"learning_rate": 4.620101253962206e-07, |
|
"logits/chosen": -41600490.666666664, |
|
"logits/rejected": -59269593.6, |
|
"logps/chosen": -191.1187744140625, |
|
"logps/rejected": -142.35963134765626, |
|
"loss": 0.2236, |
|
"rewards/chosen": 0.45951588948567706, |
|
"rewards/margins": 5.3680478413899735, |
|
"rewards/rejected": -4.908531951904297, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8062337662337662, |
|
"grad_norm": 3.351554288936717, |
|
"kl": 270.4270935058594, |
|
"learning_rate": 4.4931180711788537e-07, |
|
"logits/chosen": -163745781.33333334, |
|
"logits/rejected": -138789612.30769232, |
|
"logps/chosen": -429.6186930338542, |
|
"logps/rejected": -226.00240384615384, |
|
"loss": 0.2312, |
|
"rewards/chosen": 6.541826883951823, |
|
"rewards/margins": 12.985393426357172, |
|
"rewards/rejected": -6.443566542405349, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8090043290043291, |
|
"grad_norm": 2.404015170456249, |
|
"kl": 400.7691650390625, |
|
"learning_rate": 4.3677318576755693e-07, |
|
"logits/chosen": -60730896.0, |
|
"logits/rejected": -195664464.0, |
|
"logps/chosen": -279.2489929199219, |
|
"logps/rejected": -246.13470458984375, |
|
"loss": 0.3219, |
|
"rewards/chosen": -3.9280588626861572, |
|
"rewards/margins": 2.784353017807007, |
|
"rewards/rejected": -6.712411880493164, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8117748917748918, |
|
"grad_norm": 3.105770373370238, |
|
"kl": 166.30032348632812, |
|
"learning_rate": 4.243952377878338e-07, |
|
"logits/chosen": -77011364.57142857, |
|
"logits/rejected": -49117194.666666664, |
|
"logps/chosen": -285.13291713169644, |
|
"logps/rejected": -136.7578125, |
|
"loss": 0.2972, |
|
"rewards/chosen": -6.261838095528739, |
|
"rewards/margins": 0.7438669961596291, |
|
"rewards/rejected": -7.005705091688368, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8145454545454546, |
|
"grad_norm": 4.000426081788337, |
|
"kl": 267.60205078125, |
|
"learning_rate": 4.1217892710891134e-07, |
|
"logits/chosen": -113192118.85714285, |
|
"logits/rejected": -43238922.666666664, |
|
"logps/chosen": -452.5059291294643, |
|
"logps/rejected": -208.38062879774304, |
|
"loss": 0.2971, |
|
"rewards/chosen": 0.20489086423601424, |
|
"rewards/margins": 7.392902033669608, |
|
"rewards/rejected": -7.188011169433594, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8173160173160173, |
|
"grad_norm": 3.123408229018022, |
|
"kl": 218.33502197265625, |
|
"learning_rate": 4.001252050735102e-07, |
|
"logits/chosen": -38402567.11111111, |
|
"logits/rejected": -56519460.571428575, |
|
"logps/chosen": -252.67822265625, |
|
"logps/rejected": -268.72366768973217, |
|
"loss": 0.3443, |
|
"rewards/chosen": 0.5289336310492622, |
|
"rewards/margins": 9.533072032625714, |
|
"rewards/rejected": -9.004138401576451, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8200865800865801, |
|
"grad_norm": 5.000927062881679, |
|
"kl": 338.04443359375, |
|
"learning_rate": 3.882350103627952e-07, |
|
"logits/chosen": -58505769.14285714, |
|
"logits/rejected": -57647619.55555555, |
|
"logps/chosen": -247.9539794921875, |
|
"logps/rejected": -206.06060112847223, |
|
"loss": 0.3395, |
|
"rewards/chosen": 3.2841903141566684, |
|
"rewards/margins": 11.585580220298162, |
|
"rewards/rejected": -8.301389906141493, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 3.0362677210091022, |
|
"kl": 173.23562622070312, |
|
"learning_rate": 3.7650926892327297e-07, |
|
"logits/chosen": -44870958.222222224, |
|
"logits/rejected": -66984045.71428572, |
|
"logps/chosen": -256.7326388888889, |
|
"logps/rejected": -109.4141845703125, |
|
"loss": 0.2963, |
|
"rewards/chosen": -2.3694470723470054, |
|
"rewards/margins": 3.4614811851864764, |
|
"rewards/rejected": -5.830928257533482, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8256277056277056, |
|
"grad_norm": 2.855189599099019, |
|
"kl": 210.4012451171875, |
|
"learning_rate": 3.649488938946844e-07, |
|
"logits/chosen": -45783491.55555555, |
|
"logits/rejected": -51585654.85714286, |
|
"logps/chosen": -244.33824327256946, |
|
"logps/rejected": -176.662841796875, |
|
"loss": 0.2647, |
|
"rewards/chosen": -2.904595692952474, |
|
"rewards/margins": 3.5319571722121466, |
|
"rewards/rejected": -6.436552865164621, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8283982683982684, |
|
"grad_norm": 3.805562216682305, |
|
"kl": 276.049072265625, |
|
"learning_rate": 3.5355478553889626e-07, |
|
"logits/chosen": -79455768.0, |
|
"logits/rejected": 15600413.333333334, |
|
"logps/chosen": -300.433349609375, |
|
"logps/rejected": -213.483154296875, |
|
"loss": 0.2934, |
|
"rewards/chosen": 2.3578178882598877, |
|
"rewards/margins": 7.926704963048299, |
|
"rewards/rejected": -5.568887074788411, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8311688311688312, |
|
"grad_norm": 2.8140310846836147, |
|
"kl": 196.21664428710938, |
|
"learning_rate": 3.4232783116978976e-07, |
|
"logits/chosen": -72521104.0, |
|
"logits/rejected": -60625048.0, |
|
"logps/chosen": -243.73028564453125, |
|
"logps/rejected": -262.26806640625, |
|
"loss": 0.3485, |
|
"rewards/chosen": -1.804718017578125, |
|
"rewards/margins": 1.0916314125061035, |
|
"rewards/rejected": -2.8963494300842285, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.833939393939394, |
|
"grad_norm": 3.9832279655691645, |
|
"kl": 228.42916870117188, |
|
"learning_rate": 3.312689050841658e-07, |
|
"logits/chosen": -93826016.0, |
|
"logits/rejected": -53009270.4, |
|
"logps/chosen": -299.4957682291667, |
|
"logps/rejected": -197.59713134765624, |
|
"loss": 0.2369, |
|
"rewards/chosen": -2.22921085357666, |
|
"rewards/margins": 4.334420585632325, |
|
"rewards/rejected": -6.563631439208985, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8367099567099567, |
|
"grad_norm": 2.8538026273017025, |
|
"kl": 175.7204132080078, |
|
"learning_rate": 3.203788684936535e-07, |
|
"logits/chosen": -73006393.6, |
|
"logits/rejected": -61207109.81818182, |
|
"logps/chosen": -242.75419921875, |
|
"logps/rejected": -142.1380948153409, |
|
"loss": 0.3186, |
|
"rewards/chosen": 0.9609004974365234, |
|
"rewards/margins": 4.359611615267667, |
|
"rewards/rejected": -3.3987111178311435, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8394805194805195, |
|
"grad_norm": 3.8683368268702805, |
|
"kl": 301.593505859375, |
|
"learning_rate": 3.096585694576498e-07, |
|
"logits/chosen": -70421624.8888889, |
|
"logits/rejected": -174282185.14285713, |
|
"logps/chosen": -437.4618869357639, |
|
"logps/rejected": -260.1992710658482, |
|
"loss": 0.3247, |
|
"rewards/chosen": 1.732922871907552, |
|
"rewards/margins": 9.504617600213914, |
|
"rewards/rejected": -7.771694728306362, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8422510822510823, |
|
"grad_norm": 3.9680420344462726, |
|
"kl": 251.5784912109375, |
|
"learning_rate": 2.9910884281727225e-07, |
|
"logits/chosen": -71067492.57142857, |
|
"logits/rejected": -66364231.11111111, |
|
"logps/chosen": -239.69499860491072, |
|
"logps/rejected": -192.93386501736111, |
|
"loss": 0.3226, |
|
"rewards/chosen": 0.19936534336635045, |
|
"rewards/margins": 6.042430393279545, |
|
"rewards/rejected": -5.843065049913195, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.845021645021645, |
|
"grad_norm": 3.7746476943141687, |
|
"kl": 325.1376953125, |
|
"learning_rate": 2.8873051013034695e-07, |
|
"logits/chosen": -42786944.0, |
|
"logits/rejected": -62841930.666666664, |
|
"logps/chosen": -141.4681884765625, |
|
"logps/rejected": -199.77783203125, |
|
"loss": 0.2805, |
|
"rewards/chosen": -4.344178771972656, |
|
"rewards/margins": 2.449849192301433, |
|
"rewards/rejected": -6.794027964274089, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8477922077922078, |
|
"grad_norm": 4.307089034749869, |
|
"kl": 240.35658264160156, |
|
"learning_rate": 2.785243796074333e-07, |
|
"logits/chosen": -57018339.55555555, |
|
"logits/rejected": -130973366.85714285, |
|
"logps/chosen": -227.49403211805554, |
|
"logps/rejected": -192.2506103515625, |
|
"loss": 0.2939, |
|
"rewards/chosen": -1.010747167799208, |
|
"rewards/margins": 2.9281449242243687, |
|
"rewards/rejected": -3.938892092023577, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8505627705627705, |
|
"grad_norm": 2.1582717524376656, |
|
"kl": 225.83633422851562, |
|
"learning_rate": 2.6849124604887836e-07, |
|
"logits/chosen": -45088565.333333336, |
|
"logits/rejected": -65247955.2, |
|
"logps/chosen": -155.26885986328125, |
|
"logps/rejected": -231.8005859375, |
|
"loss": 0.2921, |
|
"rewards/chosen": -2.116344610850016, |
|
"rewards/margins": 5.6510290463765465, |
|
"rewards/rejected": -7.767373657226562, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 2.996078110648648, |
|
"kl": 217.82655334472656, |
|
"learning_rate": 2.5863189078292913e-07, |
|
"logits/chosen": -63356553.84615385, |
|
"logits/rejected": -39525973.333333336, |
|
"logps/chosen": -406.2390324519231, |
|
"logps/rejected": -163.10069783528647, |
|
"loss": 0.2882, |
|
"rewards/chosen": 1.0046427800105169, |
|
"rewards/margins": 3.491529758159931, |
|
"rewards/rejected": -2.486886978149414, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8561038961038961, |
|
"grad_norm": 2.9594486176139214, |
|
"kl": 283.44134521484375, |
|
"learning_rate": 2.489470816048806e-07, |
|
"logits/chosen": -43881161.14285714, |
|
"logits/rejected": -29412579.555555556, |
|
"logps/chosen": -207.05925641741072, |
|
"logps/rejected": -212.45526801215277, |
|
"loss": 0.2987, |
|
"rewards/chosen": -1.6313300813947404, |
|
"rewards/margins": 5.493112125093974, |
|
"rewards/rejected": -7.124442206488715, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8588744588744589, |
|
"grad_norm": 3.3978016460694382, |
|
"kl": 230.8690185546875, |
|
"learning_rate": 2.3943757271728816e-07, |
|
"logits/chosen": -66721024.0, |
|
"logits/rejected": -92142896.0, |
|
"logps/chosen": -311.16510009765625, |
|
"logps/rejected": -228.64767456054688, |
|
"loss": 0.3377, |
|
"rewards/chosen": -0.45139995217323303, |
|
"rewards/margins": 6.1784490048885345, |
|
"rewards/rejected": -6.629848957061768, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8616450216450217, |
|
"grad_norm": 2.8586263345070497, |
|
"kl": 321.99847412109375, |
|
"learning_rate": 2.30104104671231e-07, |
|
"logits/chosen": -90843675.42857143, |
|
"logits/rejected": -73359914.66666667, |
|
"logps/chosen": -273.88846261160717, |
|
"logps/rejected": -211.38840060763889, |
|
"loss": 0.3031, |
|
"rewards/chosen": 1.2099121638706751, |
|
"rewards/margins": 7.697730987791031, |
|
"rewards/rejected": -6.487818823920356, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8644155844155844, |
|
"grad_norm": 3.629578412957439, |
|
"kl": 352.22943115234375, |
|
"learning_rate": 2.2094740430864569e-07, |
|
"logits/chosen": -93567232.0, |
|
"logits/rejected": -73416960.0, |
|
"logps/chosen": -474.8711344401042, |
|
"logps/rejected": -223.6056396484375, |
|
"loss": 0.3594, |
|
"rewards/chosen": 0.6456926663716634, |
|
"rewards/margins": 5.024157174428304, |
|
"rewards/rejected": -4.3784645080566404, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8671861471861472, |
|
"grad_norm": 3.2593722533835514, |
|
"kl": 101.73670959472656, |
|
"learning_rate": 2.119681847057184e-07, |
|
"logits/chosen": -49168621.71428572, |
|
"logits/rejected": -64435868.44444445, |
|
"logps/chosen": -223.25570242745536, |
|
"logps/rejected": -224.44247775607639, |
|
"loss": 0.2976, |
|
"rewards/chosen": -1.141160488128662, |
|
"rewards/margins": 3.5539769596523705, |
|
"rewards/rejected": -4.695137447781033, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8699567099567099, |
|
"grad_norm": 3.0561471018551187, |
|
"kl": 238.22964477539062, |
|
"learning_rate": 2.0316714511736002e-07, |
|
"logits/chosen": -73805504.0, |
|
"logits/rejected": -120474040.8888889, |
|
"logps/chosen": -281.7974330357143, |
|
"logps/rejected": -220.72686089409723, |
|
"loss": 0.2737, |
|
"rewards/chosen": 4.077202388218471, |
|
"rewards/margins": 5.297303744724819, |
|
"rewards/rejected": -1.2201013565063477, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 4.05194504150993, |
|
"kl": 356.24725341796875, |
|
"learning_rate": 1.9454497092274565e-07, |
|
"logits/chosen": -104081649.77777778, |
|
"logits/rejected": -34870395.428571425, |
|
"logps/chosen": -365.76576063368054, |
|
"logps/rejected": -160.77207728794642, |
|
"loss": 0.3133, |
|
"rewards/chosen": 3.3444756401909723, |
|
"rewards/margins": 9.537121121845548, |
|
"rewards/rejected": -6.1926454816545755, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8754978354978356, |
|
"grad_norm": 3.0299855668646036, |
|
"kl": 274.89227294921875, |
|
"learning_rate": 1.861023335719475e-07, |
|
"logits/chosen": -54783534.54545455, |
|
"logits/rejected": -23337324.8, |
|
"logps/chosen": -257.8592640269886, |
|
"logps/rejected": -76.99609375, |
|
"loss": 0.3657, |
|
"rewards/chosen": -1.5488752885298296, |
|
"rewards/margins": 3.5257962660356, |
|
"rewards/rejected": -5.0746715545654295, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8782683982683983, |
|
"grad_norm": 3.4960756957856054, |
|
"kl": 216.21316528320312, |
|
"learning_rate": 1.7783989053363926e-07, |
|
"logits/chosen": -102276416.0, |
|
"logits/rejected": -56183384.0, |
|
"logps/chosen": -436.3009033203125, |
|
"logps/rejected": -253.29261779785156, |
|
"loss": 0.256, |
|
"rewards/chosen": -2.8814847469329834, |
|
"rewards/margins": 3.035329580307007, |
|
"rewards/rejected": -5.91681432723999, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8810389610389611, |
|
"grad_norm": 3.255643273544522, |
|
"kl": 170.55194091796875, |
|
"learning_rate": 1.6975828524390116e-07, |
|
"logits/chosen": -25597888.0, |
|
"logits/rejected": -74746484.36363636, |
|
"logps/chosen": -149.9252685546875, |
|
"logps/rejected": -170.6590909090909, |
|
"loss": 0.3, |
|
"rewards/chosen": -3.4752635955810547, |
|
"rewards/margins": 3.1817431016401807, |
|
"rewards/rejected": -6.657006697221235, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8838095238095238, |
|
"grad_norm": 3.211590528118705, |
|
"kl": 225.81451416015625, |
|
"learning_rate": 1.6185814705610926e-07, |
|
"logits/chosen": -80230812.44444445, |
|
"logits/rejected": -95820672.0, |
|
"logps/chosen": -365.86924913194446, |
|
"logps/rejected": -267.72134835379467, |
|
"loss": 0.3225, |
|
"rewards/chosen": -0.6288706461588541, |
|
"rewards/margins": 9.722852071126303, |
|
"rewards/rejected": -10.351722717285156, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8865800865800866, |
|
"grad_norm": 4.968087897180119, |
|
"kl": 318.9226379394531, |
|
"learning_rate": 1.5414009119192635e-07, |
|
"logits/chosen": -59935155.2, |
|
"logits/rejected": -99951413.33333333, |
|
"logps/chosen": -189.9381591796875, |
|
"logps/rejected": -181.38252766927084, |
|
"loss": 0.2767, |
|
"rewards/chosen": -3.4159313201904298, |
|
"rewards/margins": 4.187530899047852, |
|
"rewards/rejected": -7.603462219238281, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8893506493506493, |
|
"grad_norm": 2.710501799989454, |
|
"kl": 182.2108154296875, |
|
"learning_rate": 1.4660471869339056e-07, |
|
"logits/chosen": -60232084.0, |
|
"logits/rejected": -43643592.0, |
|
"logps/chosen": -188.66204833984375, |
|
"logps/rejected": -244.72280883789062, |
|
"loss": 0.3119, |
|
"rewards/chosen": -0.325054407119751, |
|
"rewards/margins": 7.316741704940796, |
|
"rewards/rejected": -7.641796112060547, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8921212121212121, |
|
"grad_norm": 3.1125121761971637, |
|
"kl": 231.7778778076172, |
|
"learning_rate": 1.392526163761107e-07, |
|
"logits/chosen": -58159717.333333336, |
|
"logits/rejected": -44477772.8, |
|
"logps/chosen": -325.116943359375, |
|
"logps/rejected": -250.502734375, |
|
"loss": 0.3591, |
|
"rewards/chosen": -0.008535941441853842, |
|
"rewards/margins": 5.7585439840952555, |
|
"rewards/rejected": -5.767079925537109, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8948917748917748, |
|
"grad_norm": 2.9951995159051656, |
|
"kl": 321.8599853515625, |
|
"learning_rate": 1.3208435678356612e-07, |
|
"logits/chosen": -84027865.6, |
|
"logits/rejected": -111909888.0, |
|
"logps/chosen": -349.3338134765625, |
|
"logps/rejected": -172.89811197916666, |
|
"loss": 0.3352, |
|
"rewards/chosen": -2.2276878356933594, |
|
"rewards/margins": 3.500701268513997, |
|
"rewards/rejected": -5.7283891042073565, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8976623376623377, |
|
"grad_norm": 3.773179708058833, |
|
"kl": 320.73931884765625, |
|
"learning_rate": 1.2510049814252302e-07, |
|
"logits/chosen": -52532563.2, |
|
"logits/rejected": -46823627.63636363, |
|
"logps/chosen": -191.76851806640624, |
|
"logps/rejected": -189.61625532670453, |
|
"loss": 0.2921, |
|
"rewards/chosen": 0.10258646011352539, |
|
"rewards/margins": 5.660441546006636, |
|
"rewards/rejected": -5.55785508589311, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8976623376623377, |
|
"eval_logits/chosen": -69711099.73333333, |
|
"eval_logits/rejected": -35742644.96551724, |
|
"eval_logps/chosen": -345.00416666666666, |
|
"eval_logps/rejected": -236.56349339978448, |
|
"eval_loss": 0.30222654342651367, |
|
"eval_rewards/chosen": 1.4322120666503906, |
|
"eval_rewards/margins": 9.016275892586544, |
|
"eval_rewards/rejected": -7.584063825936153, |
|
"eval_runtime": 376.6798, |
|
"eval_samples_per_second": 1.24, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 1047.622314453125, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9004329004329005, |
|
"grad_norm": 3.225048381245606, |
|
"kl": 162.58395385742188, |
|
"learning_rate": 1.1830158431955841e-07, |
|
"logits/chosen": -70002758.4, |
|
"logits/rejected": -68099543.27272727, |
|
"logps/chosen": -316.6424560546875, |
|
"logps/rejected": -205.49928977272728, |
|
"loss": 0.2455, |
|
"rewards/chosen": 1.4882720947265624, |
|
"rewards/margins": 9.835954700816762, |
|
"rewards/rejected": -8.3476826060902, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9032034632034632, |
|
"grad_norm": 7.020489831387403, |
|
"kl": 154.04054260253906, |
|
"learning_rate": 1.1168814477871132e-07, |
|
"logits/chosen": -27315149.333333332, |
|
"logits/rejected": -40221536.0, |
|
"logps/chosen": -163.5007527669271, |
|
"logps/rejected": -205.08662109375, |
|
"loss": 0.3071, |
|
"rewards/chosen": 1.7751677831013997, |
|
"rewards/margins": 6.471413358052572, |
|
"rewards/rejected": -4.696245574951172, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.905974025974026, |
|
"grad_norm": 3.9910790804312772, |
|
"kl": 343.1492004394531, |
|
"learning_rate": 1.0526069454024651e-07, |
|
"logits/chosen": -69089873.45454545, |
|
"logits/rejected": -25067128.0, |
|
"logps/chosen": -184.99495072798297, |
|
"logps/rejected": -145.78553466796876, |
|
"loss": 0.4173, |
|
"rewards/chosen": -0.5141198418357156, |
|
"rewards/margins": 4.104613373496315, |
|
"rewards/rejected": -4.618733215332031, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9087445887445887, |
|
"grad_norm": 2.3379723695453842, |
|
"kl": 200.3038787841797, |
|
"learning_rate": 9.901973414055188e-08, |
|
"logits/chosen": -43525616.0, |
|
"logits/rejected": -88932544.0, |
|
"logps/chosen": -189.66627502441406, |
|
"logps/rejected": -226.9715118408203, |
|
"loss": 0.2767, |
|
"rewards/chosen": -0.33219343423843384, |
|
"rewards/margins": 5.645094335079193, |
|
"rewards/rejected": -5.977287769317627, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9115151515151515, |
|
"grad_norm": 3.368306425906985, |
|
"kl": 226.6565704345703, |
|
"learning_rate": 9.296574959315464e-08, |
|
"logits/chosen": -55839961.6, |
|
"logits/rejected": -1148263.2727272727, |
|
"logps/chosen": -202.15638427734376, |
|
"logps/rejected": -215.63878284801137, |
|
"loss": 0.2375, |
|
"rewards/chosen": 2.4019466400146485, |
|
"rewards/margins": 9.938241750543767, |
|
"rewards/rejected": -7.536295110529119, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 3.5086087623333375, |
|
"kl": 212.34596252441406, |
|
"learning_rate": 8.709921235087598e-08, |
|
"logits/chosen": -115221869.71428572, |
|
"logits/rejected": -43847939.55555555, |
|
"logps/chosen": -409.99365234375, |
|
"logps/rejected": -195.90090603298611, |
|
"loss": 0.2772, |
|
"rewards/chosen": 1.8871876852852958, |
|
"rewards/margins": 8.114954781910729, |
|
"rewards/rejected": -6.227767096625434, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.917056277056277, |
|
"grad_norm": 3.1912878671336515, |
|
"kl": 276.59588623046875, |
|
"learning_rate": 8.142057926911722e-08, |
|
"logits/chosen": -54401212.44444445, |
|
"logits/rejected": -86659062.85714285, |
|
"logps/chosen": -228.07329644097223, |
|
"logps/rejected": -178.87850516183036, |
|
"loss": 0.247, |
|
"rewards/chosen": -1.1525320476955838, |
|
"rewards/margins": 4.903115666101849, |
|
"rewards/rejected": -6.055647713797433, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9198268398268399, |
|
"grad_norm": 3.4518885090842457, |
|
"kl": 179.40066528320312, |
|
"learning_rate": 7.593029257027956e-08, |
|
"logits/chosen": -81397880.0, |
|
"logits/rejected": -65295944.0, |
|
"logps/chosen": -387.6104736328125, |
|
"logps/rejected": -264.7943420410156, |
|
"loss": 0.2784, |
|
"rewards/chosen": 0.17824554443359375, |
|
"rewards/margins": 7.819864273071289, |
|
"rewards/rejected": -7.641618728637695, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9225974025974026, |
|
"grad_norm": 3.240667722331457, |
|
"kl": 273.7913818359375, |
|
"learning_rate": 7.062877980932914e-08, |
|
"logits/chosen": -69872288.0, |
|
"logits/rejected": -134056736.0, |
|
"logps/chosen": -258.5987548828125, |
|
"logps/rejected": -230.22084045410156, |
|
"loss": 0.2929, |
|
"rewards/chosen": 0.8421229124069214, |
|
"rewards/margins": 10.753296732902527, |
|
"rewards/rejected": -9.911173820495605, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9253679653679654, |
|
"grad_norm": 2.999389757174662, |
|
"kl": 398.09234619140625, |
|
"learning_rate": 6.551645384049898e-08, |
|
"logits/chosen": -50461835.63636363, |
|
"logits/rejected": -68490873.6, |
|
"logps/chosen": -184.9519708806818, |
|
"logps/rejected": -173.1832763671875, |
|
"loss": 0.3399, |
|
"rewards/chosen": 0.13307727466930042, |
|
"rewards/margins": 7.3943916667591445, |
|
"rewards/rejected": -7.261314392089844, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9281385281385282, |
|
"grad_norm": 4.041013131462937, |
|
"kl": 291.8245849609375, |
|
"learning_rate": 6.059371278513942e-08, |
|
"logits/chosen": -80652544.0, |
|
"logits/rejected": -60337960.0, |
|
"logps/chosen": -281.4577941894531, |
|
"logps/rejected": -140.62286376953125, |
|
"loss": 0.2597, |
|
"rewards/chosen": 2.074185848236084, |
|
"rewards/margins": 5.002105712890625, |
|
"rewards/rejected": -2.927919864654541, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9309090909090909, |
|
"grad_norm": 3.6881342907625374, |
|
"kl": 281.2275085449219, |
|
"learning_rate": 5.5860940000714016e-08, |
|
"logits/chosen": -92601429.33333333, |
|
"logits/rejected": -64497465.6, |
|
"logps/chosen": -416.5006510416667, |
|
"logps/rejected": -220.225244140625, |
|
"loss": 0.334, |
|
"rewards/chosen": 5.220906575520833, |
|
"rewards/margins": 9.354856236775717, |
|
"rewards/rejected": -4.133949661254883, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9336796536796537, |
|
"grad_norm": 3.2732744144842303, |
|
"kl": 135.52395629882812, |
|
"learning_rate": 5.131850405094535e-08, |
|
"logits/chosen": -66077589.333333336, |
|
"logits/rejected": -41077076.0, |
|
"logps/chosen": -265.8728434244792, |
|
"logps/rejected": -235.4630889892578, |
|
"loss": 0.2889, |
|
"rewards/chosen": -3.7092259724934897, |
|
"rewards/margins": 9.39453379313151, |
|
"rewards/rejected": -13.103759765625, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9364502164502164, |
|
"grad_norm": 4.262613045998365, |
|
"kl": 127.82938385009766, |
|
"learning_rate": 4.6966758677113865e-08, |
|
"logits/chosen": -81822281.14285715, |
|
"logits/rejected": -62049500.44444445, |
|
"logps/chosen": -375.00167410714283, |
|
"logps/rejected": -196.10477701822916, |
|
"loss": 0.3371, |
|
"rewards/chosen": -0.4809649331229074, |
|
"rewards/margins": 6.648403319101485, |
|
"rewards/rejected": -7.129368252224392, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9392207792207792, |
|
"grad_norm": 2.7567661957637566, |
|
"kl": 131.0496826171875, |
|
"learning_rate": 4.280604277050932e-08, |
|
"logits/chosen": -65746481.777777776, |
|
"logits/rejected": -71039881.14285715, |
|
"logps/chosen": -240.61138237847223, |
|
"logps/rejected": -201.98568289620536, |
|
"loss": 0.3823, |
|
"rewards/chosen": -0.5426181687249078, |
|
"rewards/margins": 1.088538616422623, |
|
"rewards/rejected": -1.6311567851475306, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.941991341991342, |
|
"grad_norm": 3.810451018052984, |
|
"kl": 188.156005859375, |
|
"learning_rate": 3.88366803460416e-08, |
|
"logits/chosen": -50966796.8, |
|
"logits/rejected": -45116309.333333336, |
|
"logps/chosen": -225.039013671875, |
|
"logps/rejected": -156.35626220703125, |
|
"loss": 0.3137, |
|
"rewards/chosen": 2.0323509216308593, |
|
"rewards/margins": 5.4068340301513675, |
|
"rewards/rejected": -3.374483108520508, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9447619047619048, |
|
"grad_norm": 4.023653516232581, |
|
"kl": 261.86407470703125, |
|
"learning_rate": 3.505898051700596e-08, |
|
"logits/chosen": -60868138.666666664, |
|
"logits/rejected": -103185574.4, |
|
"logps/chosen": -187.67252604166666, |
|
"logps/rejected": -203.42421875, |
|
"loss": 0.2495, |
|
"rewards/chosen": 0.37401819229125977, |
|
"rewards/margins": 7.960676670074463, |
|
"rewards/rejected": -7.586658477783203, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9475324675324676, |
|
"grad_norm": 2.848154809605621, |
|
"kl": 333.7825927734375, |
|
"learning_rate": 3.147323747101222e-08, |
|
"logits/chosen": -42793696.0, |
|
"logits/rejected": -84218375.1111111, |
|
"logps/chosen": -240.01213727678572, |
|
"logps/rejected": -197.28348795572916, |
|
"loss": 0.318, |
|
"rewards/chosen": 3.4886975969587053, |
|
"rewards/margins": 10.118460882277716, |
|
"rewards/rejected": -6.629763285319011, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9503030303030303, |
|
"grad_norm": 4.052057580524384, |
|
"kl": 295.6441650390625, |
|
"learning_rate": 2.8079730447073685e-08, |
|
"logits/chosen": -74955200.0, |
|
"logits/rejected": -61429477.333333336, |
|
"logps/chosen": -338.2581787109375, |
|
"logps/rejected": -154.8097941080729, |
|
"loss": 0.27, |
|
"rewards/chosen": -0.7783769607543946, |
|
"rewards/margins": 2.1431963602701822, |
|
"rewards/rejected": -2.9215733210245767, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9530735930735931, |
|
"grad_norm": 2.588708817226156, |
|
"kl": 252.98692321777344, |
|
"learning_rate": 2.487872371386424e-08, |
|
"logits/chosen": -71219725.71428572, |
|
"logits/rejected": -135706026.66666666, |
|
"logps/chosen": -215.78688267299108, |
|
"logps/rejected": -177.41084798177084, |
|
"loss": 0.2689, |
|
"rewards/chosen": 0.5888588087899345, |
|
"rewards/margins": 6.814543928418841, |
|
"rewards/rejected": -6.225685119628906, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9558441558441558, |
|
"grad_norm": 2.475512688880312, |
|
"kl": 287.192626953125, |
|
"learning_rate": 2.187046654913455e-08, |
|
"logits/chosen": -48774666.666666664, |
|
"logits/rejected": -141680844.8, |
|
"logps/chosen": -129.57332356770834, |
|
"logps/rejected": -232.22451171875, |
|
"loss": 0.3501, |
|
"rewards/chosen": -0.7337352434794108, |
|
"rewards/margins": 7.57529567082723, |
|
"rewards/rejected": -8.309030914306641, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9586147186147186, |
|
"grad_norm": 2.345963196880215, |
|
"kl": 178.36175537109375, |
|
"learning_rate": 1.9055193220302582e-08, |
|
"logits/chosen": -74306917.33333333, |
|
"logits/rejected": -118739724.8, |
|
"logps/chosen": -419.848876953125, |
|
"logps/rejected": -203.6920166015625, |
|
"loss": 0.2536, |
|
"rewards/chosen": 4.156125386555989, |
|
"rewards/margins": 12.88521931966146, |
|
"rewards/rejected": -8.729093933105469, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9613852813852813, |
|
"grad_norm": 3.535440511298158, |
|
"kl": 232.6870574951172, |
|
"learning_rate": 1.6433122966209303e-08, |
|
"logits/chosen": -52279899.428571425, |
|
"logits/rejected": -56559985.777777776, |
|
"logps/chosen": -284.62510463169644, |
|
"logps/rejected": -222.73961046006946, |
|
"loss": 0.3007, |
|
"rewards/chosen": -0.15411996841430664, |
|
"rewards/margins": 8.343976921505398, |
|
"rewards/rejected": -8.498096889919704, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9641558441558442, |
|
"grad_norm": 2.7174709299005877, |
|
"kl": 179.14004516601562, |
|
"learning_rate": 1.4004459980045127e-08, |
|
"logits/chosen": -51113212.44444445, |
|
"logits/rejected": -70323492.57142857, |
|
"logps/chosen": -293.8728841145833, |
|
"logps/rejected": -163.85121372767858, |
|
"loss": 0.2698, |
|
"rewards/chosen": 0.6734237670898438, |
|
"rewards/margins": 6.350269862583706, |
|
"rewards/rejected": -5.676846095493862, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.966926406926407, |
|
"grad_norm": 2.7297787722044076, |
|
"kl": 299.56707763671875, |
|
"learning_rate": 1.1769393393448459e-08, |
|
"logits/chosen": -51520972.0, |
|
"logits/rejected": -111233944.0, |
|
"logps/chosen": -255.2079620361328, |
|
"logps/rejected": -269.553955078125, |
|
"loss": 0.2803, |
|
"rewards/chosen": -3.926908254623413, |
|
"rewards/margins": 3.280689001083374, |
|
"rewards/rejected": -7.207597255706787, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 3.458726157837369, |
|
"kl": 194.5485076904297, |
|
"learning_rate": 9.728097261777202e-09, |
|
"logits/chosen": -69388939.63636364, |
|
"logits/rejected": -59349760.0, |
|
"logps/chosen": -200.84548117897728, |
|
"logps/rejected": -153.4105712890625, |
|
"loss": 0.3073, |
|
"rewards/chosen": -0.6452965302900835, |
|
"rewards/margins": 6.0389731840653855, |
|
"rewards/rejected": -6.684269714355469, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9724675324675325, |
|
"grad_norm": 3.043597739972342, |
|
"kl": 288.1514587402344, |
|
"learning_rate": 7.88073055055516e-09, |
|
"logits/chosen": -43270261.333333336, |
|
"logits/rejected": -62705388.0, |
|
"logps/chosen": -233.39241536458334, |
|
"logps/rejected": -178.01548767089844, |
|
"loss": 0.3206, |
|
"rewards/chosen": -0.9711050192515055, |
|
"rewards/margins": 4.855683883031209, |
|
"rewards/rejected": -5.826788902282715, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9752380952380952, |
|
"grad_norm": 3.3453162638571334, |
|
"kl": 273.7534484863281, |
|
"learning_rate": 6.2274371230905405e-09, |
|
"logits/chosen": -51228424.0, |
|
"logits/rejected": -43057528.0, |
|
"logps/chosen": -257.15313720703125, |
|
"logps/rejected": -185.47024536132812, |
|
"loss": 0.2676, |
|
"rewards/chosen": 1.785893440246582, |
|
"rewards/margins": 11.457650184631348, |
|
"rewards/rejected": -9.671756744384766, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.978008658008658, |
|
"grad_norm": 2.82927755492849, |
|
"kl": 410.8206481933594, |
|
"learning_rate": 4.7683457292743705e-09, |
|
"logits/chosen": -95908460.8, |
|
"logits/rejected": -64794293.333333336, |
|
"logps/chosen": -242.651708984375, |
|
"logps/rejected": -233.58734130859375, |
|
"loss": 0.2688, |
|
"rewards/chosen": 4.084451293945312, |
|
"rewards/margins": 13.245009994506836, |
|
"rewards/rejected": -9.160558700561523, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9807792207792208, |
|
"grad_norm": 3.9253391250529686, |
|
"kl": 298.74639892578125, |
|
"learning_rate": 3.503569995554068e-09, |
|
"logits/chosen": -42239523.2, |
|
"logits/rejected": -21779062.666666668, |
|
"logps/chosen": -217.557080078125, |
|
"logps/rejected": -178.1170654296875, |
|
"loss": 0.3393, |
|
"rewards/chosen": 1.7639793395996093, |
|
"rewards/margins": 7.103317642211914, |
|
"rewards/rejected": -5.339338302612305, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9835497835497835, |
|
"grad_norm": 2.4850995996925946, |
|
"kl": 195.93548583984375, |
|
"learning_rate": 2.4332084160835766e-09, |
|
"logits/chosen": -47525056.0, |
|
"logits/rejected": -50351388.0, |
|
"logps/chosen": -262.4061279296875, |
|
"logps/rejected": -249.47586059570312, |
|
"loss": 0.3643, |
|
"rewards/chosen": -1.0697911977767944, |
|
"rewards/margins": 4.326021790504456, |
|
"rewards/rejected": -5.39581298828125, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9863203463203464, |
|
"grad_norm": 3.454830902434755, |
|
"kl": 169.8162841796875, |
|
"learning_rate": 1.5573443450545012e-09, |
|
"logits/chosen": -66981845.333333336, |
|
"logits/rejected": -78846729.14285715, |
|
"logps/chosen": -282.89678276909723, |
|
"logps/rejected": -214.99260602678572, |
|
"loss": 0.3501, |
|
"rewards/chosen": 0.8235403696695963, |
|
"rewards/margins": 7.977521169753302, |
|
"rewards/rejected": -7.153980800083706, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9890909090909091, |
|
"grad_norm": 3.5602617502447202, |
|
"kl": 369.330078125, |
|
"learning_rate": 8.760459902037998e-10, |
|
"logits/chosen": -86534136.8888889, |
|
"logits/rejected": -49817129.14285714, |
|
"logps/chosen": -335.5671115451389, |
|
"logps/rejected": -184.69977678571428, |
|
"loss": 0.2598, |
|
"rewards/chosen": 2.4023797776963978, |
|
"rewards/margins": 9.863903651161799, |
|
"rewards/rejected": -7.461523873465402, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9918614718614719, |
|
"grad_norm": 3.652592934041048, |
|
"kl": 289.38031005859375, |
|
"learning_rate": 3.8936640750358856e-10, |
|
"logits/chosen": -53824308.36363637, |
|
"logits/rejected": -23989609.6, |
|
"logps/chosen": -296.4788707386364, |
|
"logps/rejected": -154.59755859375, |
|
"loss": 0.2426, |
|
"rewards/chosen": -2.885924599387429, |
|
"rewards/margins": 2.8186373970725316, |
|
"rewards/rejected": -5.704561996459961, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9946320346320346, |
|
"grad_norm": 4.316881838802012, |
|
"kl": 284.5806884765625, |
|
"learning_rate": 9.734349702722468e-11, |
|
"logits/chosen": -87349336.0, |
|
"logits/rejected": -81604216.0, |
|
"logps/chosen": -296.7097473144531, |
|
"logps/rejected": -270.17901611328125, |
|
"loss": 0.3314, |
|
"rewards/chosen": -4.627019882202148, |
|
"rewards/margins": 2.630732536315918, |
|
"rewards/rejected": -7.257752418518066, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9974025974025974, |
|
"grad_norm": 2.621055829790202, |
|
"kl": 262.32708740234375, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -90589222.4, |
|
"logits/rejected": -62416917.333333336, |
|
"logps/chosen": -352.624951171875, |
|
"logps/rejected": -277.27325439453125, |
|
"loss": 0.3122, |
|
"rewards/chosen": 0.6123371124267578, |
|
"rewards/margins": 8.145875930786133, |
|
"rewards/rejected": -7.533538818359375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9974025974025974, |
|
"eval_logits/chosen": -69679219.2, |
|
"eval_logits/rejected": -34451756.137931034, |
|
"eval_logps/chosen": -343.90514322916664, |
|
"eval_logps/rejected": -233.7683694773707, |
|
"eval_loss": 0.30314239859580994, |
|
"eval_rewards/chosen": 1.5421129862467449, |
|
"eval_rewards/margins": 8.846664735640603, |
|
"eval_rewards/rejected": -7.304551749393858, |
|
"eval_runtime": 375.3852, |
|
"eval_samples_per_second": 1.244, |
|
"eval_steps_per_second": 0.157, |
|
"kl": 1080.3172607421875, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9974025974025974, |
|
"step": 360, |
|
"total_flos": 8.196772297546138e+16, |
|
"train_loss": 0.31702631492581634, |
|
"train_runtime": 54644.2785, |
|
"train_samples_per_second": 0.845, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 360, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 360, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.196772297546138e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|