|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 1911, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005232862375719519, |
|
"grad_norm": 9.407495523559882, |
|
"learning_rate": 2.6041666666666664e-09, |
|
"logits/chosen": -3.5152194499969482, |
|
"logits/rejected": -3.4632656574249268, |
|
"logps/chosen": -481.34503173828125, |
|
"logps/rejected": -587.6341552734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 9.368182608191617, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -3.0836780071258545, |
|
"logits/rejected": -3.089723825454712, |
|
"logps/chosen": -324.0572814941406, |
|
"logps/rejected": -271.6081237792969, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00036163980257697403, |
|
"rewards/margins": 0.0006375669036060572, |
|
"rewards/rejected": -0.00027592710102908313, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 8.394791410826315, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -3.0828237533569336, |
|
"logits/rejected": -3.0825557708740234, |
|
"logps/chosen": -218.6238250732422, |
|
"logps/rejected": -207.8960418701172, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0005469070747494698, |
|
"rewards/margins": -0.000912196934223175, |
|
"rewards/rejected": 0.00036528988857753575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 8.439650036364021, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -2.9580674171447754, |
|
"logits/rejected": -2.9382100105285645, |
|
"logps/chosen": -292.75689697265625, |
|
"logps/rejected": -271.7955322265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -4.3508225644472986e-05, |
|
"rewards/margins": 0.00016178758232854307, |
|
"rewards/rejected": -0.00020529590256046504, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 9.082347486356902, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -3.1704773902893066, |
|
"logits/rejected": -3.0822200775146484, |
|
"logps/chosen": -311.4849548339844, |
|
"logps/rejected": -300.40850830078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0013842134503647685, |
|
"rewards/margins": -0.001490533584728837, |
|
"rewards/rejected": 0.00010632032353896648, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 9.372177486400643, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -3.188302755355835, |
|
"logits/rejected": -3.0731282234191895, |
|
"logps/chosen": -290.1669921875, |
|
"logps/rejected": -265.48614501953125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00022276137315202504, |
|
"rewards/margins": 0.001895324676297605, |
|
"rewards/rejected": -0.0016725633759051561, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 9.02504176831172, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -3.0605216026306152, |
|
"logits/rejected": -3.0358798503875732, |
|
"logps/chosen": -264.3572692871094, |
|
"logps/rejected": -259.52154541015625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0007779047591611743, |
|
"rewards/margins": -0.0010117461206391454, |
|
"rewards/rejected": 0.00023384133237414062, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 9.83446985377744, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -3.186253786087036, |
|
"logits/rejected": -3.1285574436187744, |
|
"logps/chosen": -321.60968017578125, |
|
"logps/rejected": -260.00146484375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0025626576971262693, |
|
"rewards/margins": 0.0030271965079009533, |
|
"rewards/rejected": -0.0004645389853976667, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 9.016157988853823, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -3.117447853088379, |
|
"logits/rejected": -3.089796543121338, |
|
"logps/chosen": -297.1509704589844, |
|
"logps/rejected": -296.9673156738281, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.001138419727794826, |
|
"rewards/margins": -0.0007983344839885831, |
|
"rewards/rejected": -0.00034008515649475157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 8.986620311126446, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -3.1331019401550293, |
|
"logits/rejected": -3.095825672149658, |
|
"logps/chosen": -238.6189422607422, |
|
"logps/rejected": -209.0961456298828, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.004111393354833126, |
|
"rewards/margins": 0.006361853331327438, |
|
"rewards/rejected": -0.002250460209324956, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 8.436083042758137, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -3.1380972862243652, |
|
"logits/rejected": -3.033982753753662, |
|
"logps/chosen": -216.36532592773438, |
|
"logps/rejected": -193.93771362304688, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0019389173248782754, |
|
"rewards/margins": 0.0037226087879389524, |
|
"rewards/rejected": -0.0017836916958913207, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 8.52568146043833, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -3.0441882610321045, |
|
"logits/rejected": -2.9532344341278076, |
|
"logps/chosen": -251.6013641357422, |
|
"logps/rejected": -191.75926208496094, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.000411161599913612, |
|
"rewards/margins": 0.002570229349657893, |
|
"rewards/rejected": -0.0029813905712217093, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 8.445678366827929, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -3.1838107109069824, |
|
"logits/rejected": -3.192498207092285, |
|
"logps/chosen": -359.9312744140625, |
|
"logps/rejected": -331.210205078125, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0036508466582745314, |
|
"rewards/margins": 0.0020910254679620266, |
|
"rewards/rejected": 0.0015598213067278266, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 8.44639171787651, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -3.0322957038879395, |
|
"logits/rejected": -3.0342295169830322, |
|
"logps/chosen": -216.66983032226562, |
|
"logps/rejected": -232.0410614013672, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0037786834873259068, |
|
"rewards/margins": 0.015015101060271263, |
|
"rewards/rejected": -0.011236417107284069, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 8.559455756582349, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.9691271781921387, |
|
"logits/rejected": -3.0017247200012207, |
|
"logps/chosen": -289.2220458984375, |
|
"logps/rejected": -299.85601806640625, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.010763688944280148, |
|
"rewards/margins": 0.03104465827345848, |
|
"rewards/rejected": -0.020280972123146057, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 9.486475102502801, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -3.1427695751190186, |
|
"logits/rejected": -3.0511577129364014, |
|
"logps/chosen": -254.4264678955078, |
|
"logps/rejected": -243.5124053955078, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.009364760480821133, |
|
"rewards/margins": -0.0007057435577735305, |
|
"rewards/rejected": -0.008659017272293568, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 8.89210469731796, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -3.142425775527954, |
|
"logits/rejected": -3.034043073654175, |
|
"logps/chosen": -253.0974578857422, |
|
"logps/rejected": -231.36776733398438, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.007529817521572113, |
|
"rewards/margins": 0.029269713908433914, |
|
"rewards/rejected": -0.0217398963868618, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 9.900718750163009, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -3.10660982131958, |
|
"logits/rejected": -3.1103694438934326, |
|
"logps/chosen": -226.6344451904297, |
|
"logps/rejected": -257.1600036621094, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007634393870830536, |
|
"rewards/margins": 0.022635120898485184, |
|
"rewards/rejected": -0.01500072330236435, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 7.853608706291147, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.1087143421173096, |
|
"logits/rejected": -3.011237621307373, |
|
"logps/chosen": -289.0339050292969, |
|
"logps/rejected": -264.6435546875, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.037013303488492966, |
|
"rewards/margins": 0.020231764763593674, |
|
"rewards/rejected": -0.05724506452679634, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 9.812737278822723, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -3.12058162689209, |
|
"logits/rejected": -3.022966146469116, |
|
"logps/chosen": -296.3725891113281, |
|
"logps/rejected": -256.09344482421875, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0012107163202017546, |
|
"rewards/margins": 0.051225028932094574, |
|
"rewards/rejected": -0.05001431703567505, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 8.738180842452081, |
|
"learning_rate": 4.999732803821339e-07, |
|
"logits/chosen": -3.0218393802642822, |
|
"logits/rejected": -2.9373745918273926, |
|
"logps/chosen": -268.26177978515625, |
|
"logps/rejected": -296.3043518066406, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02693028375506401, |
|
"rewards/margins": 0.05266191437840462, |
|
"rewards/rejected": -0.07959219813346863, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 9.461191742617881, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": -3.091495990753174, |
|
"logits/rejected": -3.0208005905151367, |
|
"logps/chosen": -209.35812377929688, |
|
"logps/rejected": -196.67874145507812, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02744319476187229, |
|
"rewards/margins": 0.011946072801947594, |
|
"rewards/rejected": -0.039389267563819885, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 8.44271373017445, |
|
"learning_rate": 4.996727502703357e-07, |
|
"logits/chosen": -3.131009817123413, |
|
"logits/rejected": -3.0901293754577637, |
|
"logps/chosen": -272.07928466796875, |
|
"logps/rejected": -248.29647827148438, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.016782300546765327, |
|
"rewards/margins": 0.09954167157411575, |
|
"rewards/rejected": -0.08275936543941498, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 8.775453685675133, |
|
"learning_rate": 4.993973701470142e-07, |
|
"logits/chosen": -3.136620044708252, |
|
"logits/rejected": -3.1166210174560547, |
|
"logps/chosen": -241.89395141601562, |
|
"logps/rejected": -324.9334411621094, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.018461737781763077, |
|
"rewards/margins": 0.09357274323701859, |
|
"rewards/rejected": -0.11203447729349136, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 9.86269291461345, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": -3.1064624786376953, |
|
"logits/rejected": -3.0300252437591553, |
|
"logps/chosen": -231.7109832763672, |
|
"logps/rejected": -224.2830810546875, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08668054640293121, |
|
"rewards/margins": 0.05723382160067558, |
|
"rewards/rejected": -0.1439143866300583, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 10.212159212954173, |
|
"learning_rate": 4.985968396084284e-07, |
|
"logits/chosen": -3.0460567474365234, |
|
"logits/rejected": -3.044189214706421, |
|
"logps/chosen": -292.87689208984375, |
|
"logps/rejected": -261.9239196777344, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08876132220029831, |
|
"rewards/margins": 0.09951107949018478, |
|
"rewards/rejected": -0.1882723867893219, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 11.20369101929034, |
|
"learning_rate": 4.98071956563861e-07, |
|
"logits/chosen": -3.13952898979187, |
|
"logits/rejected": -3.0703797340393066, |
|
"logps/chosen": -291.0438537597656, |
|
"logps/rejected": -283.5665588378906, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05392254516482353, |
|
"rewards/margins": 0.1665399819612503, |
|
"rewards/rejected": -0.22046248614788055, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 8.772172628691353, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": -3.1266958713531494, |
|
"logits/rejected": -3.067598819732666, |
|
"logps/chosen": -276.22149658203125, |
|
"logps/rejected": -286.74560546875, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.15288567543029785, |
|
"rewards/margins": 0.0729939192533493, |
|
"rewards/rejected": -0.22587962448596954, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 10.816402025955542, |
|
"learning_rate": 4.967738313989918e-07, |
|
"logits/chosen": -3.081346035003662, |
|
"logits/rejected": -3.089521646499634, |
|
"logps/chosen": -311.0265808105469, |
|
"logps/rejected": -308.26239013671875, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11529602110385895, |
|
"rewards/margins": 0.16599558293819427, |
|
"rewards/rejected": -0.2812916338443756, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 9.73794525369063, |
|
"learning_rate": 4.960010228419499e-07, |
|
"logits/chosen": -3.1872620582580566, |
|
"logits/rejected": -3.0724518299102783, |
|
"logps/chosen": -333.497314453125, |
|
"logps/rejected": -263.186767578125, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.20890942215919495, |
|
"rewards/margins": 0.10614234209060669, |
|
"rewards/rejected": -0.31505173444747925, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 10.007837082019684, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": -3.1770780086517334, |
|
"logits/rejected": -3.140578508377075, |
|
"logps/chosen": -326.86395263671875, |
|
"logps/rejected": -284.9017639160156, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2358376681804657, |
|
"rewards/margins": 0.13834154605865479, |
|
"rewards/rejected": -0.3741792142391205, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 10.330150006802821, |
|
"learning_rate": 4.942092042513458e-07, |
|
"logits/chosen": -3.22660493850708, |
|
"logits/rejected": -3.1128604412078857, |
|
"logps/chosen": -324.91644287109375, |
|
"logps/rejected": -321.35369873046875, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12580624222755432, |
|
"rewards/margins": 0.21708102524280548, |
|
"rewards/rejected": -0.3428873121738434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 12.217335777313616, |
|
"learning_rate": 4.931907926706373e-07, |
|
"logits/chosen": -3.2435035705566406, |
|
"logits/rejected": -3.0931334495544434, |
|
"logps/chosen": -352.33209228515625, |
|
"logps/rejected": -265.0364685058594, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.315904825925827, |
|
"rewards/margins": 0.16817089915275574, |
|
"rewards/rejected": -0.4840756952762604, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 14.213391612337704, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": -3.0946052074432373, |
|
"logits/rejected": -2.948371410369873, |
|
"logps/chosen": -278.31695556640625, |
|
"logps/rejected": -248.463134765625, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32006198167800903, |
|
"rewards/margins": 0.2539673149585724, |
|
"rewards/rejected": -0.5740293264389038, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 14.132045995126276, |
|
"learning_rate": 4.909106655307787e-07, |
|
"logits/chosen": -3.1650452613830566, |
|
"logits/rejected": -3.1769704818725586, |
|
"logps/chosen": -310.7571105957031, |
|
"logps/rejected": -356.88214111328125, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.40563052892684937, |
|
"rewards/margins": 0.21457433700561523, |
|
"rewards/rejected": -0.6202049851417542, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 13.659908414137531, |
|
"learning_rate": 4.896497115155709e-07, |
|
"logits/chosen": -3.0945162773132324, |
|
"logits/rejected": -3.1463465690612793, |
|
"logps/chosen": -215.58175659179688, |
|
"logps/rejected": -293.8269348144531, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.37528592348098755, |
|
"rewards/margins": 0.4616432785987854, |
|
"rewards/rejected": -0.8369291424751282, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 14.108920174076033, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": -3.1719155311584473, |
|
"logits/rejected": -3.0958831310272217, |
|
"logps/chosen": -303.5609130859375, |
|
"logps/rejected": -338.8874816894531, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4844132959842682, |
|
"rewards/margins": 0.25994834303855896, |
|
"rewards/rejected": -0.7443616390228271, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 30.77794803800273, |
|
"learning_rate": 4.868881281959282e-07, |
|
"logits/chosen": -3.1315181255340576, |
|
"logits/rejected": -3.0689644813537598, |
|
"logps/chosen": -350.0355224609375, |
|
"logps/rejected": -372.88226318359375, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.649621844291687, |
|
"rewards/margins": 0.5146955251693726, |
|
"rewards/rejected": -1.1643173694610596, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 17.909188378713296, |
|
"learning_rate": 4.853884212378889e-07, |
|
"logits/chosen": -2.978480815887451, |
|
"logits/rejected": -3.040266990661621, |
|
"logps/chosen": -269.56805419921875, |
|
"logps/rejected": -432.9898376464844, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7455266714096069, |
|
"rewards/margins": 0.4270196557044983, |
|
"rewards/rejected": -1.172546148300171, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 17.871870749808622, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": -3.112365245819092, |
|
"logits/rejected": -3.0050134658813477, |
|
"logps/chosen": -456.1292419433594, |
|
"logps/rejected": -380.1213073730469, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7795182466506958, |
|
"rewards/margins": 0.42506784200668335, |
|
"rewards/rejected": -1.204586148262024, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 23.774138344079837, |
|
"learning_rate": 4.821536810077878e-07, |
|
"logits/chosen": -3.161402702331543, |
|
"logits/rejected": -3.0537612438201904, |
|
"logps/chosen": -365.30194091796875, |
|
"logps/rejected": -366.2790222167969, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8323550224304199, |
|
"rewards/margins": 0.44887009263038635, |
|
"rewards/rejected": -1.2812249660491943, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 23.899851886696286, |
|
"learning_rate": 4.804197281126862e-07, |
|
"logits/chosen": -3.041658878326416, |
|
"logits/rejected": -3.0123302936553955, |
|
"logps/chosen": -333.7040100097656, |
|
"logps/rejected": -387.1278381347656, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6120246052742004, |
|
"rewards/margins": 0.2642322778701782, |
|
"rewards/rejected": -0.8762569427490234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 27.092999180625082, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": -3.0973618030548096, |
|
"logits/rejected": -3.074772834777832, |
|
"logps/chosen": -348.03253173828125, |
|
"logps/rejected": -382.26348876953125, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7947549819946289, |
|
"rewards/margins": 0.20850534737110138, |
|
"rewards/rejected": -1.0032602548599243, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 18.710028827309927, |
|
"learning_rate": 4.767215521998648e-07, |
|
"logits/chosen": -3.1823196411132812, |
|
"logits/rejected": -3.060070037841797, |
|
"logps/chosen": -365.1378479003906, |
|
"logps/rejected": -375.0210876464844, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6514087319374084, |
|
"rewards/margins": 0.7235566973686218, |
|
"rewards/rejected": -1.3749655485153198, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 19.160389427236, |
|
"learning_rate": 4.7475856434285853e-07, |
|
"logits/chosen": -3.0749258995056152, |
|
"logits/rejected": -3.038853883743286, |
|
"logps/chosen": -341.9783935546875, |
|
"logps/rejected": -360.7558898925781, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7828488349914551, |
|
"rewards/margins": 0.42785197496414185, |
|
"rewards/rejected": -1.2107007503509521, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 18.671007546111333, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": -2.8811748027801514, |
|
"logits/rejected": -2.9171783924102783, |
|
"logps/chosen": -314.2234802246094, |
|
"logps/rejected": -364.9255676269531, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9071494936943054, |
|
"rewards/margins": 0.43959683179855347, |
|
"rewards/rejected": -1.3467462062835693, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 21.740623256705202, |
|
"learning_rate": 4.706080667186738e-07, |
|
"logits/chosen": -3.034032106399536, |
|
"logits/rejected": -2.936366558074951, |
|
"logps/chosen": -348.19818115234375, |
|
"logps/rejected": -367.1400146484375, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7079233527183533, |
|
"rewards/margins": 0.4359031617641449, |
|
"rewards/rejected": -1.1438263654708862, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 16.162917051078917, |
|
"learning_rate": 4.68421943183986e-07, |
|
"logits/chosen": -3.020691394805908, |
|
"logits/rejected": -3.0022692680358887, |
|
"logps/chosen": -361.812255859375, |
|
"logps/rejected": -411.245361328125, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8578980565071106, |
|
"rewards/margins": 0.5008631944656372, |
|
"rewards/rejected": -1.3587613105773926, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 18.008247568845675, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": -3.017376661300659, |
|
"logits/rejected": -3.0296788215637207, |
|
"logps/chosen": -306.74578857421875, |
|
"logps/rejected": -355.64898681640625, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7197850346565247, |
|
"rewards/margins": 0.5877081751823425, |
|
"rewards/rejected": -1.3074933290481567, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 22.769589560997968, |
|
"learning_rate": 4.638315971630662e-07, |
|
"logits/chosen": -3.0244431495666504, |
|
"logits/rejected": -3.0133399963378906, |
|
"logps/chosen": -367.41778564453125, |
|
"logps/rejected": -381.48919677734375, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8502008318901062, |
|
"rewards/margins": 0.5050338506698608, |
|
"rewards/rejected": -1.3552347421646118, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 20.696725558242417, |
|
"learning_rate": 4.6142890781511635e-07, |
|
"logits/chosen": -2.9782967567443848, |
|
"logits/rejected": -2.982962131500244, |
|
"logps/chosen": -326.0328369140625, |
|
"logps/rejected": -403.2933349609375, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7732757329940796, |
|
"rewards/margins": 0.6741504669189453, |
|
"rewards/rejected": -1.4474260807037354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 18.257533796451074, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": -3.076591968536377, |
|
"logits/rejected": -2.978769302368164, |
|
"logps/chosen": -352.5182189941406, |
|
"logps/rejected": -368.4728088378906, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7696360945701599, |
|
"rewards/margins": 0.4942626953125, |
|
"rewards/rejected": -1.2638987302780151, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 14.913139673876909, |
|
"learning_rate": 4.5641250856983743e-07, |
|
"logits/chosen": -3.051644802093506, |
|
"logits/rejected": -3.033609390258789, |
|
"logps/chosen": -345.4510192871094, |
|
"logps/rejected": -381.84149169921875, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8861405253410339, |
|
"rewards/margins": 0.18610253930091858, |
|
"rewards/rejected": -1.072243094444275, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 20.156563747913594, |
|
"learning_rate": 4.5380047410910655e-07, |
|
"logits/chosen": -2.9867942333221436, |
|
"logits/rejected": -2.976576089859009, |
|
"logps/chosen": -372.96954345703125, |
|
"logps/rejected": -352.43194580078125, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.49105939269065857, |
|
"rewards/margins": 0.6594048738479614, |
|
"rewards/rejected": -1.1504642963409424, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 18.267314148010716, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": -2.9626283645629883, |
|
"logits/rejected": -2.9342105388641357, |
|
"logps/chosen": -389.43304443359375, |
|
"logps/rejected": -416.20458984375, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0119785070419312, |
|
"rewards/margins": 0.5072382688522339, |
|
"rewards/rejected": -1.519216775894165, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 22.245459444787983, |
|
"learning_rate": 4.4837309721154536e-07, |
|
"logits/chosen": -3.071584701538086, |
|
"logits/rejected": -2.9756951332092285, |
|
"logps/chosen": -405.8622741699219, |
|
"logps/rejected": -432.721923828125, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.832485020160675, |
|
"rewards/margins": 0.8155497312545776, |
|
"rewards/rejected": -1.648034691810608, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 20.317776276573877, |
|
"learning_rate": 4.4555956747451065e-07, |
|
"logits/chosen": -3.0436346530914307, |
|
"logits/rejected": -3.003649950027466, |
|
"logps/chosen": -354.3453063964844, |
|
"logps/rejected": -403.5852966308594, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.745491623878479, |
|
"rewards/margins": 0.7005642056465149, |
|
"rewards/rejected": -1.4460558891296387, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 23.15587894808025, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": -3.079277753829956, |
|
"logits/rejected": -2.9669790267944336, |
|
"logps/chosen": -363.68408203125, |
|
"logps/rejected": -392.4742126464844, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9164775013923645, |
|
"rewards/margins": 0.5597431659698486, |
|
"rewards/rejected": -1.4762208461761475, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 22.081284981593797, |
|
"learning_rate": 4.397375235904669e-07, |
|
"logits/chosen": -3.0565836429595947, |
|
"logits/rejected": -2.9745848178863525, |
|
"logps/chosen": -407.9656677246094, |
|
"logps/rejected": -364.0897216796875, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0237916707992554, |
|
"rewards/margins": 0.5156306624412537, |
|
"rewards/rejected": -1.5394222736358643, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 19.911353831828233, |
|
"learning_rate": 4.3673095395882074e-07, |
|
"logits/chosen": -2.8461005687713623, |
|
"logits/rejected": -2.873964786529541, |
|
"logps/chosen": -305.05712890625, |
|
"logps/rejected": -354.9154968261719, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9405692219734192, |
|
"rewards/margins": 0.4907251000404358, |
|
"rewards/rejected": -1.4312940835952759, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 20.62008098338957, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": -2.9502501487731934, |
|
"logits/rejected": -2.9600391387939453, |
|
"logps/chosen": -352.8346862792969, |
|
"logps/rejected": -389.2740783691406, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7951434254646301, |
|
"rewards/margins": 0.4735126495361328, |
|
"rewards/rejected": -1.2686560153961182, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 23.35259209843221, |
|
"learning_rate": 4.3053173983006395e-07, |
|
"logits/chosen": -2.997762441635132, |
|
"logits/rejected": -2.9072015285491943, |
|
"logps/chosen": -268.5374450683594, |
|
"logps/rejected": -340.7001953125, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7231623530387878, |
|
"rewards/margins": 0.5718687772750854, |
|
"rewards/rejected": -1.295031189918518, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 19.603010964113807, |
|
"learning_rate": 4.2734116582011403e-07, |
|
"logits/chosen": -3.0787484645843506, |
|
"logits/rejected": -2.917550563812256, |
|
"logps/chosen": -418.8789978027344, |
|
"logps/rejected": -343.0426025390625, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7103374004364014, |
|
"rewards/margins": 0.6512194871902466, |
|
"rewards/rejected": -1.361556887626648, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 14.875758380403331, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": -2.9392685890197754, |
|
"logits/rejected": -2.946084499359131, |
|
"logps/chosen": -309.2885437011719, |
|
"logps/rejected": -416.11248779296875, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9219547510147095, |
|
"rewards/margins": 0.6291396021842957, |
|
"rewards/rejected": -1.5510942935943604, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 20.283142782761487, |
|
"learning_rate": 4.207834116821672e-07, |
|
"logits/chosen": -2.9835166931152344, |
|
"logits/rejected": -2.9256935119628906, |
|
"logps/chosen": -362.146728515625, |
|
"logps/rejected": -449.8570861816406, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8511027097702026, |
|
"rewards/margins": 0.7874690890312195, |
|
"rewards/rejected": -1.6385719776153564, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 19.556656759837768, |
|
"learning_rate": 4.174184217907818e-07, |
|
"logits/chosen": -2.9476585388183594, |
|
"logits/rejected": -2.919732093811035, |
|
"logps/chosen": -363.3470458984375, |
|
"logps/rejected": -402.04034423828125, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0490022897720337, |
|
"rewards/margins": 0.5906215310096741, |
|
"rewards/rejected": -1.6396238803863525, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 23.647075588737607, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": -2.8941304683685303, |
|
"logits/rejected": -2.8757565021514893, |
|
"logps/chosen": -331.69989013671875, |
|
"logps/rejected": -365.02874755859375, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0188653469085693, |
|
"rewards/margins": 0.4434829354286194, |
|
"rewards/rejected": -1.462348222732544, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 18.058235844575755, |
|
"learning_rate": 4.1052183538426426e-07, |
|
"logits/chosen": -2.845398187637329, |
|
"logits/rejected": -2.820038080215454, |
|
"logps/chosen": -337.0357360839844, |
|
"logps/rejected": -356.2289123535156, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7971991896629333, |
|
"rewards/margins": 0.494406133890152, |
|
"rewards/rejected": -1.2916053533554077, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 16.83232053209588, |
|
"learning_rate": 4.0699254227296884e-07, |
|
"logits/chosen": -2.760856866836548, |
|
"logits/rejected": -2.731729030609131, |
|
"logps/chosen": -341.89471435546875, |
|
"logps/rejected": -364.8521423339844, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.060369610786438, |
|
"rewards/margins": 0.510382354259491, |
|
"rewards/rejected": -1.5707520246505737, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 13.513720855490863, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": -2.935300350189209, |
|
"logits/rejected": -2.8605048656463623, |
|
"logps/chosen": -317.07965087890625, |
|
"logps/rejected": -352.84625244140625, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8069983720779419, |
|
"rewards/margins": 0.5210608839988708, |
|
"rewards/rejected": -1.328059434890747, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 17.11428808211033, |
|
"learning_rate": 3.9977784961675833e-07, |
|
"logits/chosen": -2.8885040283203125, |
|
"logits/rejected": -2.872298240661621, |
|
"logps/chosen": -337.2678527832031, |
|
"logps/rejected": -360.61981201171875, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9015989303588867, |
|
"rewards/margins": 0.44669684767723083, |
|
"rewards/rejected": -1.34829580783844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 16.784256002039648, |
|
"learning_rate": 3.96094859720583e-07, |
|
"logits/chosen": -2.9169678688049316, |
|
"logits/rejected": -2.833569049835205, |
|
"logps/chosen": -405.4876403808594, |
|
"logps/rejected": -413.13128662109375, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8311295509338379, |
|
"rewards/margins": 0.6479163765907288, |
|
"rewards/rejected": -1.4790458679199219, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 21.042753731901662, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": -2.7107739448547363, |
|
"logits/rejected": -2.7630531787872314, |
|
"logps/chosen": -302.10809326171875, |
|
"logps/rejected": -358.5349426269531, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9158231019973755, |
|
"rewards/margins": 0.47180694341659546, |
|
"rewards/rejected": -1.3876301050186157, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 26.12620605806209, |
|
"learning_rate": 3.8858374282478893e-07, |
|
"logits/chosen": -2.8153467178344727, |
|
"logits/rejected": -2.800262928009033, |
|
"logps/chosen": -358.6976623535156, |
|
"logps/rejected": -482.4383850097656, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0242103338241577, |
|
"rewards/margins": 0.9604707956314087, |
|
"rewards/rejected": -1.9846811294555664, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 16.423805209045664, |
|
"learning_rate": 3.8475812447719823e-07, |
|
"logits/chosen": -2.708292007446289, |
|
"logits/rejected": -2.718214511871338, |
|
"logps/chosen": -324.5830078125, |
|
"logps/rejected": -349.2548828125, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9263254404067993, |
|
"rewards/margins": 0.4629155099391937, |
|
"rewards/rejected": -1.3892408609390259, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 20.91696682314748, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": -2.7589619159698486, |
|
"logits/rejected": -2.72037935256958, |
|
"logps/chosen": -369.92535400390625, |
|
"logps/rejected": -428.5404357910156, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9816117286682129, |
|
"rewards/margins": 0.4242846369743347, |
|
"rewards/rejected": -1.4058964252471924, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 22.40132664163239, |
|
"learning_rate": 3.7697315618313644e-07, |
|
"logits/chosen": -2.8032193183898926, |
|
"logits/rejected": -2.7569103240966797, |
|
"logps/chosen": -296.2752990722656, |
|
"logps/rejected": -319.3106384277344, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7733054161071777, |
|
"rewards/margins": 0.5528251528739929, |
|
"rewards/rejected": -1.3261306285858154, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 16.787816149540767, |
|
"learning_rate": 3.7301640635283584e-07, |
|
"logits/chosen": -2.801471471786499, |
|
"logits/rejected": -2.789280652999878, |
|
"logps/chosen": -340.1591796875, |
|
"logps/rejected": -418.39581298828125, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0804179906845093, |
|
"rewards/margins": 0.42698368430137634, |
|
"rewards/rejected": -1.507401704788208, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 18.838820333043618, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": -2.7957890033721924, |
|
"logits/rejected": -2.7882323265075684, |
|
"logps/chosen": -319.119140625, |
|
"logps/rejected": -342.0630187988281, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8813630938529968, |
|
"rewards/margins": 0.6091907620429993, |
|
"rewards/rejected": -1.4905539751052856, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 16.632694713274564, |
|
"learning_rate": 3.6498098249582444e-07, |
|
"logits/chosen": -2.811414957046509, |
|
"logits/rejected": -2.8175008296966553, |
|
"logps/chosen": -307.6065979003906, |
|
"logps/rejected": -408.051513671875, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9321305155754089, |
|
"rewards/margins": 0.3750917315483093, |
|
"rewards/rejected": -1.3072223663330078, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 19.513068096670256, |
|
"learning_rate": 3.6090499223540757e-07, |
|
"logits/chosen": -2.834150791168213, |
|
"logits/rejected": -2.839689016342163, |
|
"logps/chosen": -388.6148376464844, |
|
"logps/rejected": -436.7425842285156, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.028540015220642, |
|
"rewards/margins": 0.46030083298683167, |
|
"rewards/rejected": -1.4888408184051514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 22.912358541413862, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": -2.786160945892334, |
|
"logits/rejected": -2.722895383834839, |
|
"logps/chosen": -388.2081298828125, |
|
"logps/rejected": -410.9185485839844, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9616454839706421, |
|
"rewards/margins": 0.6216525435447693, |
|
"rewards/rejected": -1.5832980871200562, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 19.833630204521402, |
|
"learning_rate": 3.5264326133425464e-07, |
|
"logits/chosen": -2.770688533782959, |
|
"logits/rejected": -2.727165699005127, |
|
"logps/chosen": -395.0430908203125, |
|
"logps/rejected": -408.1805725097656, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2092084884643555, |
|
"rewards/margins": 0.6522542834281921, |
|
"rewards/rejected": -1.8614628314971924, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 18.90029316146338, |
|
"learning_rate": 3.4846028004452693e-07, |
|
"logits/chosen": -2.8430776596069336, |
|
"logits/rejected": -2.784043788909912, |
|
"logps/chosen": -323.55841064453125, |
|
"logps/rejected": -371.6314697265625, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9803129434585571, |
|
"rewards/margins": 0.5237395763397217, |
|
"rewards/rejected": -1.5040525197982788, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 22.646062520489927, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": -2.8667078018188477, |
|
"logits/rejected": -2.706662178039551, |
|
"logps/chosen": -484.07366943359375, |
|
"logps/rejected": -441.30517578125, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9760993123054504, |
|
"rewards/margins": 0.7057408094406128, |
|
"rewards/rejected": -1.681840181350708, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 25.16275542323138, |
|
"learning_rate": 3.399970707290105e-07, |
|
"logits/chosen": -2.803353786468506, |
|
"logits/rejected": -2.7015905380249023, |
|
"logps/chosen": -343.2978820800781, |
|
"logps/rejected": -361.6952209472656, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.014632225036621, |
|
"rewards/margins": 0.509315013885498, |
|
"rewards/rejected": -1.5239471197128296, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 22.215100069898874, |
|
"learning_rate": 3.3571966934638376e-07, |
|
"logits/chosen": -2.7740490436553955, |
|
"logits/rejected": -2.7684121131896973, |
|
"logps/chosen": -250.2857666015625, |
|
"logps/rejected": -405.693603515625, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7055439949035645, |
|
"rewards/margins": 0.9115999937057495, |
|
"rewards/rejected": -1.617143988609314, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 20.522143419864445, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": -2.5709900856018066, |
|
"logits/rejected": -2.627197742462158, |
|
"logps/chosen": -310.1892395019531, |
|
"logps/rejected": -372.0884704589844, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9675874710083008, |
|
"rewards/margins": 0.6155644655227661, |
|
"rewards/rejected": -1.583151936531067, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 20.852957312991986, |
|
"learning_rate": 3.270804157408225e-07, |
|
"logits/chosen": -2.776916980743408, |
|
"logits/rejected": -2.7587342262268066, |
|
"logps/chosen": -373.98858642578125, |
|
"logps/rejected": -383.79766845703125, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1561435461044312, |
|
"rewards/margins": 0.31390270590782166, |
|
"rewards/rejected": -1.4700462818145752, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 19.899689863222886, |
|
"learning_rate": 3.227214489584128e-07, |
|
"logits/chosen": -2.8240392208099365, |
|
"logits/rejected": -2.7805697917938232, |
|
"logps/chosen": -397.1903381347656, |
|
"logps/rejected": -392.92962646484375, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9943920969963074, |
|
"rewards/margins": 0.6805135607719421, |
|
"rewards/rejected": -1.674905776977539, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 24.19854250084782, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": -2.6620640754699707, |
|
"logits/rejected": -2.636390209197998, |
|
"logps/chosen": -320.4856872558594, |
|
"logps/rejected": -480.1658630371094, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8806970715522766, |
|
"rewards/margins": 1.154190182685852, |
|
"rewards/rejected": -2.0348870754241943, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 21.959932385171246, |
|
"learning_rate": 3.139321142455703e-07, |
|
"logits/chosen": -2.638817548751831, |
|
"logits/rejected": -2.5849947929382324, |
|
"logps/chosen": -250.20504760742188, |
|
"logps/rejected": -350.050537109375, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8337706327438354, |
|
"rewards/margins": 0.9122722744941711, |
|
"rewards/rejected": -1.7460429668426514, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 17.527217382567645, |
|
"learning_rate": 3.095046818815331e-07, |
|
"logits/chosen": -2.835466146469116, |
|
"logits/rejected": -2.7251462936401367, |
|
"logps/chosen": -399.1574401855469, |
|
"logps/rejected": -405.2040100097656, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0480482578277588, |
|
"rewards/margins": 0.6532463431358337, |
|
"rewards/rejected": -1.7012945413589478, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 18.49674197452729, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": -2.7456107139587402, |
|
"logits/rejected": -2.7260804176330566, |
|
"logps/chosen": -359.6734619140625, |
|
"logps/rejected": -397.59722900390625, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.040319800376892, |
|
"rewards/margins": 0.45119404792785645, |
|
"rewards/rejected": -1.4915138483047485, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 22.29728853694896, |
|
"learning_rate": 3.0059168027656475e-07, |
|
"logits/chosen": -2.825249195098877, |
|
"logits/rejected": -2.746302366256714, |
|
"logps/chosen": -393.6676940917969, |
|
"logps/rejected": -406.54840087890625, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0840771198272705, |
|
"rewards/margins": 0.6638802289962769, |
|
"rewards/rejected": -1.747957468032837, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 21.77543545391954, |
|
"learning_rate": 2.9610908790576663e-07, |
|
"logits/chosen": -2.6952872276306152, |
|
"logits/rejected": -2.597888469696045, |
|
"logps/chosen": -366.60919189453125, |
|
"logps/rejected": -454.2276306152344, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9012922048568726, |
|
"rewards/margins": 1.0516788959503174, |
|
"rewards/rejected": -1.9529712200164795, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 17.77590681604852, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": -2.78910493850708, |
|
"logits/rejected": -2.7166571617126465, |
|
"logps/chosen": -389.7692565917969, |
|
"logps/rejected": -440.7074279785156, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2027783393859863, |
|
"rewards/margins": 0.6414567232131958, |
|
"rewards/rejected": -1.8442351818084717, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 21.568936575146136, |
|
"learning_rate": 2.8709920527469834e-07, |
|
"logits/chosen": -2.645921230316162, |
|
"logits/rejected": -2.5958800315856934, |
|
"logps/chosen": -382.9588623046875, |
|
"logps/rejected": -435.7353515625, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0234426259994507, |
|
"rewards/margins": 0.9673296213150024, |
|
"rewards/rejected": -1.9907718896865845, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 14.819512071828433, |
|
"learning_rate": 2.8257492424203685e-07, |
|
"logits/chosen": -2.7956948280334473, |
|
"logits/rejected": -2.6334726810455322, |
|
"logps/chosen": -374.8362121582031, |
|
"logps/rejected": -393.6607666015625, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7595964074134827, |
|
"rewards/margins": 0.877772331237793, |
|
"rewards/rejected": -1.6373687982559204, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 19.427480376130767, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": -2.6113762855529785, |
|
"logits/rejected": -2.550041675567627, |
|
"logps/chosen": -308.0559997558594, |
|
"logps/rejected": -378.0197448730469, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7106773257255554, |
|
"rewards/margins": 0.9421442747116089, |
|
"rewards/rejected": -1.6528217792510986, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 19.563955582140014, |
|
"learning_rate": 2.7349523760333674e-07, |
|
"logits/chosen": -2.690460205078125, |
|
"logits/rejected": -2.630561590194702, |
|
"logps/chosen": -322.8216552734375, |
|
"logps/rejected": -375.9421691894531, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0019958019256592, |
|
"rewards/margins": 0.6291624307632446, |
|
"rewards/rejected": -1.6311581134796143, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 22.498383582632794, |
|
"learning_rate": 2.6894286453887827e-07, |
|
"logits/chosen": -2.6777117252349854, |
|
"logits/rejected": -2.669342279434204, |
|
"logps/chosen": -321.1188049316406, |
|
"logps/rejected": -426.6979064941406, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9470914006233215, |
|
"rewards/margins": 0.8681204915046692, |
|
"rewards/rejected": -1.8152118921279907, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 21.919257160932045, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": -2.7340927124023438, |
|
"logits/rejected": -2.6964974403381348, |
|
"logps/chosen": -366.81317138671875, |
|
"logps/rejected": -429.38348388671875, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8678327798843384, |
|
"rewards/margins": 1.036704659461975, |
|
"rewards/rejected": -1.9045372009277344, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 19.911100659387785, |
|
"learning_rate": 2.598206606900406e-07, |
|
"logits/chosen": -2.726743459701538, |
|
"logits/rejected": -2.677971363067627, |
|
"logps/chosen": -372.01214599609375, |
|
"logps/rejected": -379.2396545410156, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.2678954601287842, |
|
"rewards/margins": 0.4320768415927887, |
|
"rewards/rejected": -1.69997239112854, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 17.538470936971432, |
|
"learning_rate": 2.552538766476443e-07, |
|
"logits/chosen": -2.742051839828491, |
|
"logits/rejected": -2.794455051422119, |
|
"logps/chosen": -366.16021728515625, |
|
"logps/rejected": -434.7002868652344, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2499899864196777, |
|
"rewards/margins": 0.5716737508773804, |
|
"rewards/rejected": -1.8216636180877686, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 20.67966851697891, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": -2.7987608909606934, |
|
"logits/rejected": -2.7127881050109863, |
|
"logps/chosen": -398.8516540527344, |
|
"logps/rejected": -454.6826171875, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8307819366455078, |
|
"rewards/margins": 0.8939768075942993, |
|
"rewards/rejected": -1.7247587442398071, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 22.985339142449046, |
|
"learning_rate": 2.461165701613333e-07, |
|
"logits/chosen": -2.678114414215088, |
|
"logits/rejected": -2.677781105041504, |
|
"logps/chosen": -322.94134521484375, |
|
"logps/rejected": -438.4015197753906, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6860709190368652, |
|
"rewards/margins": 1.1026661396026611, |
|
"rewards/rejected": -1.7887369394302368, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 16.295159140225806, |
|
"learning_rate": 2.415490995035596e-07, |
|
"logits/chosen": -2.6371679306030273, |
|
"logits/rejected": -2.6527652740478516, |
|
"logps/chosen": -420.98187255859375, |
|
"logps/rejected": -423.44085693359375, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0509339570999146, |
|
"rewards/margins": 0.5777655243873596, |
|
"rewards/rejected": -1.628699541091919, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 22.62097539281191, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": -2.7577648162841797, |
|
"logits/rejected": -2.6896519660949707, |
|
"logps/chosen": -318.0347900390625, |
|
"logps/rejected": -381.61053466796875, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9332740902900696, |
|
"rewards/margins": 0.7067046165466309, |
|
"rewards/rejected": -1.6399787664413452, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 28.515591895174353, |
|
"learning_rate": 2.3242415033975575e-07, |
|
"logits/chosen": -2.651029109954834, |
|
"logits/rejected": -2.522026777267456, |
|
"logps/chosen": -425.49774169921875, |
|
"logps/rejected": -360.559326171875, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3192552328109741, |
|
"rewards/margins": 0.4480251371860504, |
|
"rewards/rejected": -1.7672803401947021, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 20.344909177572685, |
|
"learning_rate": 2.2786971949262134e-07, |
|
"logits/chosen": -2.621579885482788, |
|
"logits/rejected": -2.5863022804260254, |
|
"logps/chosen": -357.6981506347656, |
|
"logps/rejected": -448.5857849121094, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.055567979812622, |
|
"rewards/margins": 0.7904418706893921, |
|
"rewards/rejected": -1.8460098505020142, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 26.16837961253975, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": -2.412328004837036, |
|
"logits/rejected": -2.3921778202056885, |
|
"logps/chosen": -262.47528076171875, |
|
"logps/rejected": -381.80072021484375, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9449421167373657, |
|
"rewards/margins": 0.9657685160636902, |
|
"rewards/rejected": -1.9107106924057007, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 21.90364219974117, |
|
"learning_rate": 2.1878455047436753e-07, |
|
"logits/chosen": -2.5743680000305176, |
|
"logits/rejected": -2.5253610610961914, |
|
"logps/chosen": -378.9617004394531, |
|
"logps/rejected": -424.81768798828125, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1384319067001343, |
|
"rewards/margins": 0.6650327444076538, |
|
"rewards/rejected": -1.8034645318984985, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 21.49228929242445, |
|
"learning_rate": 2.1425684667589852e-07, |
|
"logits/chosen": -2.440842866897583, |
|
"logits/rejected": -2.395437717437744, |
|
"logps/chosen": -365.72039794921875, |
|
"logps/rejected": -471.58868408203125, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.544072151184082, |
|
"rewards/margins": 0.6356275677680969, |
|
"rewards/rejected": -2.1797001361846924, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 30.123335979533568, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": -2.677391529083252, |
|
"logits/rejected": -2.6409811973571777, |
|
"logps/chosen": -364.96630859375, |
|
"logps/rejected": -449.03790283203125, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1608811616897583, |
|
"rewards/margins": 0.7243369817733765, |
|
"rewards/rejected": -1.8852180242538452, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 24.838443760030398, |
|
"learning_rate": 2.0523876107665194e-07, |
|
"logits/chosen": -2.6525421142578125, |
|
"logits/rejected": -2.4736952781677246, |
|
"logps/chosen": -368.06292724609375, |
|
"logps/rejected": -413.494140625, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.090054988861084, |
|
"rewards/margins": 0.8024319410324097, |
|
"rewards/rejected": -1.8924869298934937, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 25.160056131310814, |
|
"learning_rate": 2.0075139124320787e-07, |
|
"logits/chosen": -2.4324846267700195, |
|
"logits/rejected": -2.438986301422119, |
|
"logps/chosen": -315.16802978515625, |
|
"logps/rejected": -341.97711181640625, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0802028179168701, |
|
"rewards/margins": 0.5685726404190063, |
|
"rewards/rejected": -1.6487756967544556, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 18.8022099775039, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": -2.529344081878662, |
|
"logits/rejected": -2.4635815620422363, |
|
"logps/chosen": -359.16351318359375, |
|
"logps/rejected": -479.4518127441406, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9642885327339172, |
|
"rewards/margins": 1.158879041671753, |
|
"rewards/rejected": -2.1231675148010254, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 15.88759058302057, |
|
"learning_rate": 1.9182749073346943e-07, |
|
"logits/chosen": -2.613468885421753, |
|
"logits/rejected": -2.5453109741210938, |
|
"logps/chosen": -409.1913757324219, |
|
"logps/rejected": -419.2123107910156, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0642415285110474, |
|
"rewards/margins": 0.5339901447296143, |
|
"rewards/rejected": -1.5982316732406616, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 20.504471909733795, |
|
"learning_rate": 1.8739394056745372e-07, |
|
"logits/chosen": -2.630988597869873, |
|
"logits/rejected": -2.5344326496124268, |
|
"logps/chosen": -441.2276306152344, |
|
"logps/rejected": -423.215087890625, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8864222764968872, |
|
"rewards/margins": 0.7008800506591797, |
|
"rewards/rejected": -1.587302327156067, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 17.39552971038266, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": -2.344513177871704, |
|
"logits/rejected": -2.238572597503662, |
|
"logps/chosen": -408.66876220703125, |
|
"logps/rejected": -448.985107421875, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1611530780792236, |
|
"rewards/margins": 0.7518970370292664, |
|
"rewards/rejected": -1.9130502939224243, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 21.15724021287899, |
|
"learning_rate": 1.785910437672658e-07, |
|
"logits/chosen": -2.6438395977020264, |
|
"logits/rejected": -2.576352834701538, |
|
"logps/chosen": -396.1938171386719, |
|
"logps/rejected": -433.5428161621094, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1291828155517578, |
|
"rewards/margins": 0.7359174489974976, |
|
"rewards/rejected": -1.8651002645492554, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 39.18766969494274, |
|
"learning_rate": 1.7422463722911624e-07, |
|
"logits/chosen": -2.577260971069336, |
|
"logits/rejected": -2.5167884826660156, |
|
"logps/chosen": -414.66424560546875, |
|
"logps/rejected": -479.02349853515625, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0800281763076782, |
|
"rewards/margins": 1.009826898574829, |
|
"rewards/rejected": -2.089855194091797, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 23.81382539223185, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": -2.5534214973449707, |
|
"logits/rejected": -2.4224610328674316, |
|
"logps/chosen": -446.97784423828125, |
|
"logps/rejected": -440.23065185546875, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.156835675239563, |
|
"rewards/margins": 1.0062631368637085, |
|
"rewards/rejected": -2.1630990505218506, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 20.494707381068807, |
|
"learning_rate": 1.6556919911120081e-07, |
|
"logits/chosen": -2.4493117332458496, |
|
"logits/rejected": -2.4358694553375244, |
|
"logps/chosen": -338.51947021484375, |
|
"logps/rejected": -394.2952880859375, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0877834558486938, |
|
"rewards/margins": 0.8277204632759094, |
|
"rewards/rejected": -1.915503740310669, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 22.013430293553068, |
|
"learning_rate": 1.6128305837745546e-07, |
|
"logits/chosen": -2.6435108184814453, |
|
"logits/rejected": -2.529101610183716, |
|
"logps/chosen": -368.5247497558594, |
|
"logps/rejected": -469.41180419921875, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0766557455062866, |
|
"rewards/margins": 0.8811095356941223, |
|
"rewards/rejected": -1.9577653408050537, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 18.76065624293858, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": -2.5028953552246094, |
|
"logits/rejected": -2.4554786682128906, |
|
"logps/chosen": -369.2057800292969, |
|
"logps/rejected": -359.7743225097656, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1592403650283813, |
|
"rewards/margins": 0.7551459074020386, |
|
"rewards/rejected": -1.9143861532211304, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 20.023285735465176, |
|
"learning_rate": 1.5280109076320506e-07, |
|
"logits/chosen": -2.4673855304718018, |
|
"logits/rejected": -2.3919363021850586, |
|
"logps/chosen": -333.00347900390625, |
|
"logps/rejected": -398.36724853515625, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1060950756072998, |
|
"rewards/margins": 0.8667494654655457, |
|
"rewards/rejected": -1.9728447198867798, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 22.920565383895827, |
|
"learning_rate": 1.4860809679098158e-07, |
|
"logits/chosen": -2.4915287494659424, |
|
"logits/rejected": -2.357835292816162, |
|
"logps/chosen": -348.21612548828125, |
|
"logps/rejected": -404.42144775390625, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1405024528503418, |
|
"rewards/margins": 0.8587859272956848, |
|
"rewards/rejected": -1.9992883205413818, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 17.203581320538397, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": -2.4215731620788574, |
|
"logits/rejected": -2.402763843536377, |
|
"logps/chosen": -325.80047607421875, |
|
"logps/rejected": -460.4720764160156, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9108318090438843, |
|
"rewards/margins": 1.03805673122406, |
|
"rewards/rejected": -1.9488885402679443, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 24.051434193292977, |
|
"learning_rate": 1.403250901782354e-07, |
|
"logits/chosen": -2.489076614379883, |
|
"logits/rejected": -2.5095272064208984, |
|
"logps/chosen": -372.8560485839844, |
|
"logps/rejected": -452.8060607910156, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1308987140655518, |
|
"rewards/margins": 0.6806281208992004, |
|
"rewards/rejected": -1.811526894569397, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 31.109127335146216, |
|
"learning_rate": 1.3623784399463584e-07, |
|
"logits/chosen": -2.57063364982605, |
|
"logits/rejected": -2.5198721885681152, |
|
"logps/chosen": -335.35308837890625, |
|
"logps/rejected": -378.9818420410156, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9832290410995483, |
|
"rewards/margins": 0.8171318173408508, |
|
"rewards/rejected": -1.800360918045044, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 22.598231331948284, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": -2.5168867111206055, |
|
"logits/rejected": -2.4900808334350586, |
|
"logps/chosen": -395.2281494140625, |
|
"logps/rejected": -472.9083557128906, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.190709114074707, |
|
"rewards/margins": 0.7413493990898132, |
|
"rewards/rejected": -1.932058572769165, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 24.40009596001316, |
|
"learning_rate": 1.2817869095216624e-07, |
|
"logits/chosen": -2.507204055786133, |
|
"logits/rejected": -2.500208616256714, |
|
"logps/chosen": -353.3048400878906, |
|
"logps/rejected": -471.82275390625, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9545682668685913, |
|
"rewards/margins": 0.9104995727539062, |
|
"rewards/rejected": -1.865067720413208, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 23.876873548713156, |
|
"learning_rate": 1.2420947578494522e-07, |
|
"logits/chosen": -2.421795129776001, |
|
"logits/rejected": -2.3207345008850098, |
|
"logps/chosen": -376.72003173828125, |
|
"logps/rejected": -414.78125, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.238006353378296, |
|
"rewards/margins": 0.9127354621887207, |
|
"rewards/rejected": -2.1507418155670166, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 28.316479792999264, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": -2.473055601119995, |
|
"logits/rejected": -2.429914951324463, |
|
"logps/chosen": -388.6692810058594, |
|
"logps/rejected": -427.60760498046875, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1391390562057495, |
|
"rewards/margins": 0.7363253831863403, |
|
"rewards/rejected": -1.8754642009735107, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 24.502529904435256, |
|
"learning_rate": 1.1639839614387572e-07, |
|
"logits/chosen": -2.385852813720703, |
|
"logits/rejected": -2.3600668907165527, |
|
"logps/chosen": -451.0430603027344, |
|
"logps/rejected": -492.866455078125, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2927653789520264, |
|
"rewards/margins": 0.7592190504074097, |
|
"rewards/rejected": -2.0519843101501465, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 30.316895773560056, |
|
"learning_rate": 1.1255914050717552e-07, |
|
"logits/chosen": -2.51098370552063, |
|
"logits/rejected": -2.306142568588257, |
|
"logps/chosen": -411.83721923828125, |
|
"logps/rejected": -396.2197265625, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.124796748161316, |
|
"rewards/margins": 0.8249333500862122, |
|
"rewards/rejected": -1.9497301578521729, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 24.553311638523578, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": -2.4358837604522705, |
|
"logits/rejected": -2.3727383613586426, |
|
"logps/chosen": -299.39874267578125, |
|
"logps/rejected": -433.1842346191406, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9375057220458984, |
|
"rewards/margins": 1.0826350450515747, |
|
"rewards/rejected": -2.0201408863067627, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 26.323754702369897, |
|
"learning_rate": 1.050196085741491e-07, |
|
"logits/chosen": -2.3632800579071045, |
|
"logits/rejected": -2.254556179046631, |
|
"logps/chosen": -361.5583801269531, |
|
"logps/rejected": -418.86053466796875, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.145715355873108, |
|
"rewards/margins": 0.9661849141120911, |
|
"rewards/rejected": -2.1119003295898438, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 24.426038056159815, |
|
"learning_rate": 1.0132185042024246e-07, |
|
"logits/chosen": -2.3606743812561035, |
|
"logits/rejected": -2.3655900955200195, |
|
"logps/chosen": -371.5710754394531, |
|
"logps/rejected": -457.9090881347656, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.462693452835083, |
|
"rewards/margins": 0.7237628698348999, |
|
"rewards/rejected": -2.1864564418792725, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 24.39166898358583, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": -2.3346943855285645, |
|
"logits/rejected": -2.2823240756988525, |
|
"logps/chosen": -356.44989013671875, |
|
"logps/rejected": -437.150634765625, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1568176746368408, |
|
"rewards/margins": 0.9103207588195801, |
|
"rewards/rejected": -2.067138433456421, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 25.4325780084741, |
|
"learning_rate": 9.407652443108192e-08, |
|
"logits/chosen": -2.468954086303711, |
|
"logits/rejected": -2.366795063018799, |
|
"logps/chosen": -431.132568359375, |
|
"logps/rejected": -444.24658203125, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3367488384246826, |
|
"rewards/margins": 0.7185128927230835, |
|
"rewards/rejected": -2.0552616119384766, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 17.254780714933666, |
|
"learning_rate": 9.053137647585229e-08, |
|
"logits/chosen": -2.459615468978882, |
|
"logits/rejected": -2.320861339569092, |
|
"logps/chosen": -401.857177734375, |
|
"logps/rejected": -455.6214904785156, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2010602951049805, |
|
"rewards/margins": 0.9325162768363953, |
|
"rewards/rejected": -2.1335763931274414, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 24.87156211923429, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": -2.500220775604248, |
|
"logits/rejected": -2.4100728034973145, |
|
"logps/chosen": -439.19598388671875, |
|
"logps/rejected": -489.41632080078125, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9708153009414673, |
|
"rewards/margins": 0.9810572862625122, |
|
"rewards/rejected": -1.9518728256225586, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 28.435625187853297, |
|
"learning_rate": 8.360203050172488e-08, |
|
"logits/chosen": -2.435936450958252, |
|
"logits/rejected": -2.3032217025756836, |
|
"logps/chosen": -414.93896484375, |
|
"logps/rejected": -463.2115173339844, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3389027118682861, |
|
"rewards/margins": 0.8098167181015015, |
|
"rewards/rejected": -2.148719549179077, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 20.945371142423706, |
|
"learning_rate": 8.022014682809305e-08, |
|
"logits/chosen": -2.321044683456421, |
|
"logits/rejected": -2.3206310272216797, |
|
"logps/chosen": -322.90631103515625, |
|
"logps/rejected": -388.8543701171875, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.229504942893982, |
|
"rewards/margins": 0.540000855922699, |
|
"rewards/rejected": -1.7695058584213257, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 21.858128219395716, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": -2.482635021209717, |
|
"logits/rejected": -2.2945468425750732, |
|
"logps/chosen": -430.18426513671875, |
|
"logps/rejected": -435.81793212890625, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.131752610206604, |
|
"rewards/margins": 0.9505308866500854, |
|
"rewards/rejected": -2.0822837352752686, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 23.5664381748856, |
|
"learning_rate": 7.362760533881649e-08, |
|
"logits/chosen": -2.314892292022705, |
|
"logits/rejected": -2.294574737548828, |
|
"logps/chosen": -357.79241943359375, |
|
"logps/rejected": -422.94671630859375, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2231976985931396, |
|
"rewards/margins": 0.8575330972671509, |
|
"rewards/rejected": -2.08073091506958, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 21.487160861873303, |
|
"learning_rate": 7.041914937847584e-08, |
|
"logits/chosen": -2.1950268745422363, |
|
"logits/rejected": -2.1895930767059326, |
|
"logps/chosen": -417.36358642578125, |
|
"logps/rejected": -491.13580322265625, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4860632419586182, |
|
"rewards/margins": 0.8507789373397827, |
|
"rewards/rejected": -2.3368420600891113, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 20.590290369465663, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": -2.2567172050476074, |
|
"logits/rejected": -2.1981043815612793, |
|
"logps/chosen": -328.57794189453125, |
|
"logps/rejected": -491.99395751953125, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2706371545791626, |
|
"rewards/margins": 1.2046202421188354, |
|
"rewards/rejected": -2.475257396697998, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 21.983008302291243, |
|
"learning_rate": 6.418322465962233e-08, |
|
"logits/chosen": -2.303830623626709, |
|
"logits/rejected": -2.3280632495880127, |
|
"logps/chosen": -416.1875, |
|
"logps/rejected": -522.3909912109375, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.691542387008667, |
|
"rewards/margins": 0.7319270968437195, |
|
"rewards/rejected": -2.4234697818756104, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 18.891272454217166, |
|
"learning_rate": 6.115783864930905e-08, |
|
"logits/chosen": -2.318718433380127, |
|
"logits/rejected": -2.2490391731262207, |
|
"logps/chosen": -324.4350891113281, |
|
"logps/rejected": -456.79010009765625, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1330978870391846, |
|
"rewards/margins": 1.0510344505310059, |
|
"rewards/rejected": -2.1841323375701904, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 28.458231332719134, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": -2.5397121906280518, |
|
"logits/rejected": -2.4674534797668457, |
|
"logps/chosen": -437.0335998535156, |
|
"logps/rejected": -569.2704467773438, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3304742574691772, |
|
"rewards/margins": 0.8019523620605469, |
|
"rewards/rejected": -2.1324267387390137, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 16.005052758772024, |
|
"learning_rate": 5.529727126118228e-08, |
|
"logits/chosen": -2.4277865886688232, |
|
"logits/rejected": -2.42647385597229, |
|
"logps/chosen": -481.9827575683594, |
|
"logps/rejected": -496.3814392089844, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3508599996566772, |
|
"rewards/margins": 0.6191782355308533, |
|
"rewards/rejected": -1.9700381755828857, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 19.063592300085755, |
|
"learning_rate": 5.246404726526918e-08, |
|
"logits/chosen": -2.3657736778259277, |
|
"logits/rejected": -2.2265355587005615, |
|
"logps/chosen": -405.5267639160156, |
|
"logps/rejected": -399.3191833496094, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9781464338302612, |
|
"rewards/margins": 0.8889120817184448, |
|
"rewards/rejected": -1.867058515548706, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 20.080667969775277, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": -2.4136693477630615, |
|
"logits/rejected": -2.3041911125183105, |
|
"logps/chosen": -448.9518127441406, |
|
"logps/rejected": -465.6314392089844, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1425752639770508, |
|
"rewards/margins": 0.7552384734153748, |
|
"rewards/rejected": -1.8978137969970703, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 19.624253248781688, |
|
"learning_rate": 4.6996449722693315e-08, |
|
"logits/chosen": -2.313758611679077, |
|
"logits/rejected": -2.242504119873047, |
|
"logps/chosen": -329.8020324707031, |
|
"logps/rejected": -422.8460388183594, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0686215162277222, |
|
"rewards/margins": 0.8141248822212219, |
|
"rewards/rejected": -1.8827463388442993, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 22.941851498921892, |
|
"learning_rate": 4.436390230919465e-08, |
|
"logits/chosen": -2.388063907623291, |
|
"logits/rejected": -2.2178738117218018, |
|
"logps/chosen": -390.03240966796875, |
|
"logps/rejected": -432.62286376953125, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1851922273635864, |
|
"rewards/margins": 0.8856752514839172, |
|
"rewards/rejected": -2.0708675384521484, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 27.563077232233614, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": -2.1802332401275635, |
|
"logits/rejected": -2.0524086952209473, |
|
"logps/chosen": -330.8059387207031, |
|
"logps/rejected": -422.68792724609375, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.052398920059204, |
|
"rewards/margins": 1.1795663833618164, |
|
"rewards/rejected": -2.2319653034210205, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 26.693506881144305, |
|
"learning_rate": 3.930570615136919e-08, |
|
"logits/chosen": -2.2117209434509277, |
|
"logits/rejected": -2.2403011322021484, |
|
"logps/chosen": -379.39813232421875, |
|
"logps/rejected": -477.84185791015625, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3940322399139404, |
|
"rewards/margins": 0.792374849319458, |
|
"rewards/rejected": -2.1864070892333984, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 23.858811201784942, |
|
"learning_rate": 3.6881746803469756e-08, |
|
"logits/chosen": -2.4752418994903564, |
|
"logits/rejected": -2.336674451828003, |
|
"logps/chosen": -469.2521057128906, |
|
"logps/rejected": -528.3489990234375, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1845686435699463, |
|
"rewards/margins": 1.0137733221054077, |
|
"rewards/rejected": -2.1983418464660645, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 20.482171267477, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": -2.306239604949951, |
|
"logits/rejected": -2.1316704750061035, |
|
"logps/chosen": -406.58636474609375, |
|
"logps/rejected": -457.32977294921875, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3768417835235596, |
|
"rewards/margins": 0.9604042172431946, |
|
"rewards/rejected": -2.3372457027435303, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 23.432589866796224, |
|
"learning_rate": 3.2248153212961677e-08, |
|
"logits/chosen": -2.4169299602508545, |
|
"logits/rejected": -2.4181785583496094, |
|
"logps/chosen": -338.3548889160156, |
|
"logps/rejected": -437.1444396972656, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.117004156112671, |
|
"rewards/margins": 0.9204137921333313, |
|
"rewards/rejected": -2.0374178886413574, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 21.532289716147215, |
|
"learning_rate": 3.004006655297209e-08, |
|
"logits/chosen": -2.3161380290985107, |
|
"logits/rejected": -2.294666290283203, |
|
"logps/chosen": -397.2449645996094, |
|
"logps/rejected": -463.8539123535156, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0399081707000732, |
|
"rewards/margins": 0.9116095304489136, |
|
"rewards/rejected": -1.9515174627304077, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 21.883230218921838, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": -2.2656126022338867, |
|
"logits/rejected": -2.2227652072906494, |
|
"logps/chosen": -409.6884765625, |
|
"logps/rejected": -456.69158935546875, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.435335397720337, |
|
"rewards/margins": 0.49176207184791565, |
|
"rewards/rejected": -1.9270973205566406, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 21.314522940777362, |
|
"learning_rate": 2.5845000672245572e-08, |
|
"logits/chosen": -2.174999475479126, |
|
"logits/rejected": -2.1320688724517822, |
|
"logps/chosen": -317.60736083984375, |
|
"logps/rejected": -448.986328125, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1678593158721924, |
|
"rewards/margins": 1.0162231922149658, |
|
"rewards/rejected": -2.184082269668579, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 24.478159011731208, |
|
"learning_rate": 2.385942256943499e-08, |
|
"logits/chosen": -2.3983044624328613, |
|
"logits/rejected": -2.2930662631988525, |
|
"logps/chosen": -395.8145751953125, |
|
"logps/rejected": -459.20880126953125, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4424173831939697, |
|
"rewards/margins": 0.7109994292259216, |
|
"rewards/rejected": -2.153416395187378, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 21.588727933861673, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": -2.2726895809173584, |
|
"logits/rejected": -2.274221658706665, |
|
"logps/chosen": -352.26654052734375, |
|
"logps/rejected": -466.99713134765625, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1099419593811035, |
|
"rewards/margins": 0.9857978820800781, |
|
"rewards/rejected": -2.0957398414611816, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 28.823716137497588, |
|
"learning_rate": 2.011549165221127e-08, |
|
"logits/chosen": -2.2286887168884277, |
|
"logits/rejected": -2.160118818283081, |
|
"logps/chosen": -348.77398681640625, |
|
"logps/rejected": -447.3614196777344, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0584328174591064, |
|
"rewards/margins": 1.235668420791626, |
|
"rewards/rejected": -2.2941012382507324, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 23.55017126257268, |
|
"learning_rate": 1.8358389280311303e-08, |
|
"logits/chosen": -2.305906057357788, |
|
"logits/rejected": -2.2248482704162598, |
|
"logps/chosen": -396.25830078125, |
|
"logps/rejected": -456.83026123046875, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3627065420150757, |
|
"rewards/margins": 0.7585186958312988, |
|
"rewards/rejected": -2.121225118637085, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 29.871299875754943, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": -2.4222495555877686, |
|
"logits/rejected": -2.3127567768096924, |
|
"logps/chosen": -438.91754150390625, |
|
"logps/rejected": -514.671875, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2327755689620972, |
|
"rewards/margins": 0.8849409222602844, |
|
"rewards/rejected": -2.1177165508270264, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 23.152363355076538, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": -2.3555819988250732, |
|
"logits/rejected": -2.1849610805511475, |
|
"logps/chosen": -411.84759521484375, |
|
"logps/rejected": -418.66162109375, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3699020147323608, |
|
"rewards/margins": 0.48209986090660095, |
|
"rewards/rejected": -1.8520019054412842, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 25.47462876192454, |
|
"learning_rate": 1.3553498707832761e-08, |
|
"logits/chosen": -2.2519454956054688, |
|
"logits/rejected": -2.1864726543426514, |
|
"logps/chosen": -321.34515380859375, |
|
"logps/rejected": -380.0171203613281, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.048170566558838, |
|
"rewards/margins": 0.9982441067695618, |
|
"rewards/rejected": -2.046414375305176, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 20.7756799926154, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": -2.3004744052886963, |
|
"logits/rejected": -2.0912976264953613, |
|
"logps/chosen": -363.4620666503906, |
|
"logps/rejected": -392.9441833496094, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2563717365264893, |
|
"rewards/margins": 0.6634771227836609, |
|
"rewards/rejected": -1.9198487997055054, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 24.144868454470704, |
|
"learning_rate": 1.0744202558037014e-08, |
|
"logits/chosen": -2.461714267730713, |
|
"logits/rejected": -2.4172637462615967, |
|
"logps/chosen": -423.16168212890625, |
|
"logps/rejected": -468.603515625, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0971845388412476, |
|
"rewards/margins": 0.8608269691467285, |
|
"rewards/rejected": -1.9580116271972656, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 21.432675624907116, |
|
"learning_rate": 9.459190786024696e-09, |
|
"logits/chosen": -2.313619613647461, |
|
"logits/rejected": -2.2647922039031982, |
|
"logps/chosen": -333.91119384765625, |
|
"logps/rejected": -378.60614013671875, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.180997371673584, |
|
"rewards/margins": 0.6096967458724976, |
|
"rewards/rejected": -1.790694236755371, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 21.247299924907377, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": -2.212974786758423, |
|
"logits/rejected": -2.1130902767181396, |
|
"logps/chosen": -387.406494140625, |
|
"logps/rejected": -464.7867736816406, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1103359460830688, |
|
"rewards/margins": 1.155289649963379, |
|
"rewards/rejected": -2.265625476837158, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 19.250716248823366, |
|
"learning_rate": 7.130585621893809e-09, |
|
"logits/chosen": -2.2373719215393066, |
|
"logits/rejected": -2.1997036933898926, |
|
"logps/chosen": -354.9322814941406, |
|
"logps/rejected": -389.18292236328125, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.236328363418579, |
|
"rewards/margins": 0.680686891078949, |
|
"rewards/rejected": -1.9170153141021729, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 19.50028022672278, |
|
"learning_rate": 6.0877699649840574e-09, |
|
"logits/chosen": -2.3931384086608887, |
|
"logits/rejected": -2.4258694648742676, |
|
"logps/chosen": -420.37835693359375, |
|
"logps/rejected": -480.65924072265625, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.045689344406128, |
|
"rewards/margins": 0.6386779546737671, |
|
"rewards/rejected": -1.6843674182891846, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 23.48790296286995, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": -2.1289491653442383, |
|
"logits/rejected": -2.050173044204712, |
|
"logps/chosen": -353.542236328125, |
|
"logps/rejected": -493.37451171875, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3237100839614868, |
|
"rewards/margins": 1.1159131526947021, |
|
"rewards/rejected": -2.4396231174468994, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 32.155549914624004, |
|
"learning_rate": 4.246853844940723e-09, |
|
"logits/chosen": -2.359478712081909, |
|
"logits/rejected": -2.2460904121398926, |
|
"logps/chosen": -371.8105773925781, |
|
"logps/rejected": -408.96112060546875, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2655609846115112, |
|
"rewards/margins": 0.6940416097640991, |
|
"rewards/rejected": -1.9596025943756104, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 33.331958953697935, |
|
"learning_rate": 3.449368232836869e-09, |
|
"logits/chosen": -2.1659417152404785, |
|
"logits/rejected": -2.0680341720581055, |
|
"logps/chosen": -313.62542724609375, |
|
"logps/rejected": -353.74163818359375, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0687602758407593, |
|
"rewards/margins": 0.7445224523544312, |
|
"rewards/rejected": -1.8132827281951904, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 28.68655911528865, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": -2.3676817417144775, |
|
"logits/rejected": -2.2866101264953613, |
|
"logps/chosen": -378.5147399902344, |
|
"logps/rejected": -425.35284423828125, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0752251148223877, |
|
"rewards/margins": 0.8689172863960266, |
|
"rewards/rejected": -1.9441421031951904, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 26.019951513630883, |
|
"learning_rate": 2.1016735840859447e-09, |
|
"logits/chosen": -2.3697683811187744, |
|
"logits/rejected": -2.1446757316589355, |
|
"logps/chosen": -452.55517578125, |
|
"logps/rejected": -480.78509521484375, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.36159348487854, |
|
"rewards/margins": 0.8624799847602844, |
|
"rewards/rejected": -2.224073648452759, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 21.66758394561238, |
|
"learning_rate": 1.551914666503812e-09, |
|
"logits/chosen": -2.343492031097412, |
|
"logits/rejected": -2.2736129760742188, |
|
"logps/chosen": -451.70184326171875, |
|
"logps/rejected": -451.47296142578125, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9336226582527161, |
|
"rewards/margins": 0.7032037973403931, |
|
"rewards/rejected": -1.636826515197754, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 18.351052930792708, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": -2.3186964988708496, |
|
"logits/rejected": -2.1230340003967285, |
|
"logps/chosen": -431.39166259765625, |
|
"logps/rejected": -425.0224609375, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2134671211242676, |
|
"rewards/margins": 0.7942633628845215, |
|
"rewards/rejected": -2.00773024559021, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 24.960805339720416, |
|
"learning_rate": 7.014916586632336e-10, |
|
"logits/chosen": -2.3193869590759277, |
|
"logits/rejected": -2.18151593208313, |
|
"logps/chosen": -328.3359069824219, |
|
"logps/rejected": -389.898193359375, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9415175318717957, |
|
"rewards/margins": 0.7296713590621948, |
|
"rewards/rejected": -1.6711889505386353, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 17.530421870773836, |
|
"learning_rate": 4.011116027811956e-10, |
|
"logits/chosen": -2.352574110031128, |
|
"logits/rejected": -2.441904306411743, |
|
"logps/chosen": -352.0825500488281, |
|
"logps/rejected": -482.823974609375, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.103983998298645, |
|
"rewards/margins": 0.7162514925003052, |
|
"rewards/rejected": -1.8202356100082397, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 19.76988570659372, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": -2.428910732269287, |
|
"logits/rejected": -2.348252058029175, |
|
"logps/chosen": -479.7613220214844, |
|
"logps/rejected": -493.49237060546875, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.061830759048462, |
|
"rewards/margins": 0.696933925151825, |
|
"rewards/rejected": -1.758764624595642, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 26.527332550948614, |
|
"learning_rate": 5.051597607894087e-11, |
|
"logits/chosen": -2.3824334144592285, |
|
"logits/rejected": -2.238849639892578, |
|
"logps/chosen": -349.9302062988281, |
|
"logps/rejected": -428.8340759277344, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3673003911972046, |
|
"rewards/margins": 0.8380087018013, |
|
"rewards/rejected": -2.2053091526031494, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 29.034257494878712, |
|
"learning_rate": 4.1750135001961117e-13, |
|
"logits/chosen": -2.3000001907348633, |
|
"logits/rejected": -2.2838308811187744, |
|
"logps/chosen": -425.3233337402344, |
|
"logps/rejected": -523.2975463867188, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0534088611602783, |
|
"rewards/margins": 1.0553555488586426, |
|
"rewards/rejected": -2.108764410018921, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1911, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5515718292903051, |
|
"train_runtime": 10135.9715, |
|
"train_samples_per_second": 6.031, |
|
"train_steps_per_second": 0.189 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|