|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998519176662224, |
|
"eval_steps": 500, |
|
"global_step": 1688, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005923293351103213, |
|
"grad_norm": 504.85714834798955, |
|
"learning_rate": 2.9585798816568044e-08, |
|
"logits/chosen": -2.2128281593322754, |
|
"logits/rejected": -2.1649556159973145, |
|
"logps/chosen": -334.58282470703125, |
|
"logps/rejected": -174.33193969726562, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.003443267662078142, |
|
"rewards/margins": 0.00374796474352479, |
|
"rewards/rejected": -0.0003046986530534923, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011846586702206426, |
|
"grad_norm": 519.6103666653933, |
|
"learning_rate": 5.917159763313609e-08, |
|
"logits/chosen": -2.2768917083740234, |
|
"logits/rejected": -2.2541089057922363, |
|
"logps/chosen": -442.1549377441406, |
|
"logps/rejected": -184.51959228515625, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.013772351667284966, |
|
"rewards/margins": 0.025344645604491234, |
|
"rewards/rejected": -0.011572292074561119, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01776988005330964, |
|
"grad_norm": 433.16385797649355, |
|
"learning_rate": 8.875739644970414e-08, |
|
"logits/chosen": -2.2564902305603027, |
|
"logits/rejected": -2.263812780380249, |
|
"logps/chosen": -363.1085510253906, |
|
"logps/rejected": -180.28768920898438, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.08519863337278366, |
|
"rewards/margins": 0.12328235059976578, |
|
"rewards/rejected": -0.038083698600530624, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023693173404412852, |
|
"grad_norm": 312.3325020676633, |
|
"learning_rate": 1.1834319526627217e-07, |
|
"logits/chosen": -2.258511543273926, |
|
"logits/rejected": -2.21596097946167, |
|
"logps/chosen": -358.8316650390625, |
|
"logps/rejected": -164.7125244140625, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.21487005054950714, |
|
"rewards/margins": 0.3081914782524109, |
|
"rewards/rejected": -0.09332143515348434, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029616466755516067, |
|
"grad_norm": 290.61310964931636, |
|
"learning_rate": 1.4792899408284022e-07, |
|
"logits/chosen": -2.248854875564575, |
|
"logits/rejected": -2.2407047748565674, |
|
"logps/chosen": -362.57843017578125, |
|
"logps/rejected": -178.95758056640625, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.38509249687194824, |
|
"rewards/margins": 0.6190904974937439, |
|
"rewards/rejected": -0.23399806022644043, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03553976010661928, |
|
"grad_norm": 303.52300542572084, |
|
"learning_rate": 1.7751479289940827e-07, |
|
"logits/chosen": -2.21785044670105, |
|
"logits/rejected": -2.182326316833496, |
|
"logps/chosen": -348.1097412109375, |
|
"logps/rejected": -162.1182403564453, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.6382587552070618, |
|
"rewards/margins": 0.9943715929985046, |
|
"rewards/rejected": -0.3561127781867981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04146305345772249, |
|
"grad_norm": 251.78388219726764, |
|
"learning_rate": 2.0710059171597633e-07, |
|
"logits/chosen": -2.2731988430023193, |
|
"logits/rejected": -2.23414945602417, |
|
"logps/chosen": -337.77349853515625, |
|
"logps/rejected": -182.30088806152344, |
|
"loss": 0.4179, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.6993478536605835, |
|
"rewards/margins": 1.2595398426055908, |
|
"rewards/rejected": -0.5601919889450073, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.047386346808825704, |
|
"grad_norm": 246.56195042754825, |
|
"learning_rate": 2.3668639053254435e-07, |
|
"logits/chosen": -2.2068114280700684, |
|
"logits/rejected": -2.209404468536377, |
|
"logps/chosen": -408.40484619140625, |
|
"logps/rejected": -202.7127685546875, |
|
"loss": 0.3483, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.9628815650939941, |
|
"rewards/margins": 1.7825168371200562, |
|
"rewards/rejected": -0.8196353912353516, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05330964015992892, |
|
"grad_norm": 188.01879345063887, |
|
"learning_rate": 2.662721893491124e-07, |
|
"logits/chosen": -2.2164900302886963, |
|
"logits/rejected": -2.2228798866271973, |
|
"logps/chosen": -360.1303405761719, |
|
"logps/rejected": -179.77536010742188, |
|
"loss": 0.338, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.9752426147460938, |
|
"rewards/margins": 1.890472650527954, |
|
"rewards/rejected": -0.9152299761772156, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.059232933511032135, |
|
"grad_norm": 381.23591220338665, |
|
"learning_rate": 2.9585798816568045e-07, |
|
"logits/chosen": -2.2488455772399902, |
|
"logits/rejected": -2.2343482971191406, |
|
"logps/chosen": -347.54180908203125, |
|
"logps/rejected": -175.11048889160156, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.015298843383789, |
|
"rewards/margins": 1.9129976034164429, |
|
"rewards/rejected": -0.897698700428009, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06515622686213535, |
|
"grad_norm": 183.36363122937976, |
|
"learning_rate": 3.254437869822485e-07, |
|
"logits/chosen": -2.3256828784942627, |
|
"logits/rejected": -2.2990269660949707, |
|
"logps/chosen": -384.78863525390625, |
|
"logps/rejected": -195.26393127441406, |
|
"loss": 0.2998, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.2486822605133057, |
|
"rewards/margins": 2.4611282348632812, |
|
"rewards/rejected": -1.2124459743499756, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07107952021323856, |
|
"grad_norm": 196.55190107882535, |
|
"learning_rate": 3.5502958579881655e-07, |
|
"logits/chosen": -2.2839980125427246, |
|
"logits/rejected": -2.2439706325531006, |
|
"logps/chosen": -316.72900390625, |
|
"logps/rejected": -187.08209228515625, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.0929539203643799, |
|
"rewards/margins": 2.364865779876709, |
|
"rewards/rejected": -1.2719120979309082, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07700281356434177, |
|
"grad_norm": 187.394753566322, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -2.3191745281219482, |
|
"logits/rejected": -2.3147594928741455, |
|
"logps/chosen": -341.1308288574219, |
|
"logps/rejected": -192.97549438476562, |
|
"loss": 0.2532, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.4034380912780762, |
|
"rewards/margins": 3.091428756713867, |
|
"rewards/rejected": -1.6879905462265015, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08292610691544498, |
|
"grad_norm": 116.14861473966326, |
|
"learning_rate": 4.1420118343195265e-07, |
|
"logits/chosen": -2.361495018005371, |
|
"logits/rejected": -2.321951150894165, |
|
"logps/chosen": -333.0472412109375, |
|
"logps/rejected": -179.1328582763672, |
|
"loss": 0.2321, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.9758448600769043, |
|
"rewards/margins": 3.051640510559082, |
|
"rewards/rejected": -2.0757956504821777, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0888494002665482, |
|
"grad_norm": 259.05273624826594, |
|
"learning_rate": 4.437869822485207e-07, |
|
"logits/chosen": -2.3843483924865723, |
|
"logits/rejected": -2.3312084674835205, |
|
"logps/chosen": -323.70526123046875, |
|
"logps/rejected": -197.47764587402344, |
|
"loss": 0.2627, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.864323616027832, |
|
"rewards/margins": 3.313027858734131, |
|
"rewards/rejected": -2.448704242706299, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09477269361765141, |
|
"grad_norm": 260.57944130025356, |
|
"learning_rate": 4.733727810650887e-07, |
|
"logits/chosen": -2.3698296546936035, |
|
"logits/rejected": -2.3505420684814453, |
|
"logps/chosen": -360.91864013671875, |
|
"logps/rejected": -198.164794921875, |
|
"loss": 0.2367, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.216972827911377, |
|
"rewards/margins": 3.8299670219421387, |
|
"rewards/rejected": -2.612994432449341, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10069598696875463, |
|
"grad_norm": 150.81084803740066, |
|
"learning_rate": 4.999994653198566e-07, |
|
"logits/chosen": -2.3464887142181396, |
|
"logits/rejected": -2.3302454948425293, |
|
"logps/chosen": -335.73529052734375, |
|
"logps/rejected": -194.806640625, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.9059770703315735, |
|
"rewards/margins": 3.8774585723876953, |
|
"rewards/rejected": -2.9714818000793457, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10661928031985785, |
|
"grad_norm": 280.1531227472986, |
|
"learning_rate": 4.999353064699471e-07, |
|
"logits/chosen": -2.364449977874756, |
|
"logits/rejected": -2.3326315879821777, |
|
"logps/chosen": -389.11236572265625, |
|
"logps/rejected": -215.023681640625, |
|
"loss": 0.236, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.8287426233291626, |
|
"rewards/margins": 4.475917816162109, |
|
"rewards/rejected": -3.647174835205078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11254257367096106, |
|
"grad_norm": 175.0444339450233, |
|
"learning_rate": 4.99764243036258e-07, |
|
"logits/chosen": -2.3520586490631104, |
|
"logits/rejected": -2.3261897563934326, |
|
"logps/chosen": -359.8037109375, |
|
"logps/rejected": -211.98666381835938, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.8636599779129028, |
|
"rewards/margins": 5.269904136657715, |
|
"rewards/rejected": -4.40624475479126, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11846586702206427, |
|
"grad_norm": 237.8569924192833, |
|
"learning_rate": 4.994863481875841e-07, |
|
"logits/chosen": -2.3697867393493652, |
|
"logits/rejected": -2.3272385597229004, |
|
"logps/chosen": -382.9794006347656, |
|
"logps/rejected": -224.5498046875, |
|
"loss": 0.2205, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.797897219657898, |
|
"rewards/margins": 5.273079872131348, |
|
"rewards/rejected": -4.47518253326416, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12438916037316748, |
|
"grad_norm": 283.96288222349347, |
|
"learning_rate": 4.991017407876165e-07, |
|
"logits/chosen": -2.3532214164733887, |
|
"logits/rejected": -2.2996296882629395, |
|
"logps/chosen": -370.9749450683594, |
|
"logps/rejected": -198.25497436523438, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.7681259512901306, |
|
"rewards/margins": 5.055164337158203, |
|
"rewards/rejected": -4.2870378494262695, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1303124537242707, |
|
"grad_norm": 386.78866139109, |
|
"learning_rate": 4.98610585344102e-07, |
|
"logits/chosen": -2.400724172592163, |
|
"logits/rejected": -2.375039577484131, |
|
"logps/chosen": -351.29156494140625, |
|
"logps/rejected": -214.4847412109375, |
|
"loss": 0.2662, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.7083452939987183, |
|
"rewards/margins": 4.722594261169434, |
|
"rewards/rejected": -4.014249324798584, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13623574707537392, |
|
"grad_norm": 441.8804995826954, |
|
"learning_rate": 4.980130919384768e-07, |
|
"logits/chosen": -2.423753261566162, |
|
"logits/rejected": -2.3783416748046875, |
|
"logps/chosen": -399.88494873046875, |
|
"logps/rejected": -210.5980682373047, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.6179746389389038, |
|
"rewards/margins": 5.342424392700195, |
|
"rewards/rejected": -4.72445011138916, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14215904042647712, |
|
"grad_norm": 327.95375338533654, |
|
"learning_rate": 4.973095161360105e-07, |
|
"logits/chosen": -2.3384299278259277, |
|
"logits/rejected": -2.323362350463867, |
|
"logps/chosen": -333.81536865234375, |
|
"logps/rejected": -228.82601928710938, |
|
"loss": 0.1762, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.22780442237854004, |
|
"rewards/margins": 5.158236980438232, |
|
"rewards/rejected": -4.93043327331543, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14808233377758034, |
|
"grad_norm": 200.53822627799985, |
|
"learning_rate": 4.965001588764913e-07, |
|
"logits/chosen": -2.385148525238037, |
|
"logits/rejected": -2.3672657012939453, |
|
"logps/chosen": -397.1217041015625, |
|
"logps/rejected": -228.70773315429688, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.1027851104736328, |
|
"rewards/margins": 5.892121315002441, |
|
"rewards/rejected": -4.789336204528809, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15400562712868354, |
|
"grad_norm": 47.97910034361157, |
|
"learning_rate": 4.955853663455072e-07, |
|
"logits/chosen": -2.438140392303467, |
|
"logits/rejected": -2.4087047576904297, |
|
"logps/chosen": -373.9306945800781, |
|
"logps/rejected": -242.48764038085938, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.6679338216781616, |
|
"rewards/margins": 5.903395175933838, |
|
"rewards/rejected": -5.235461235046387, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15992892047978677, |
|
"grad_norm": 131.31461307719647, |
|
"learning_rate": 4.945655298263713e-07, |
|
"logits/chosen": -2.3899612426757812, |
|
"logits/rejected": -2.356353282928467, |
|
"logps/chosen": -375.282470703125, |
|
"logps/rejected": -228.1762237548828, |
|
"loss": 0.1561, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.6925370097160339, |
|
"rewards/margins": 5.915196418762207, |
|
"rewards/rejected": -5.222658634185791, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16585221383088997, |
|
"grad_norm": 216.04443101572159, |
|
"learning_rate": 4.934410855327585e-07, |
|
"logits/chosen": -2.4134862422943115, |
|
"logits/rejected": -2.404572010040283, |
|
"logps/chosen": -332.552490234375, |
|
"logps/rejected": -228.0837860107422, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.5202642679214478, |
|
"rewards/margins": 6.256397724151611, |
|
"rewards/rejected": -5.736133575439453, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1717755071819932, |
|
"grad_norm": 124.31837233503076, |
|
"learning_rate": 4.922125144221252e-07, |
|
"logits/chosen": -2.430438995361328, |
|
"logits/rejected": -2.398848056793213, |
|
"logps/chosen": -409.75347900390625, |
|
"logps/rejected": -227.51846313476562, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.5862516164779663, |
|
"rewards/margins": 6.339666843414307, |
|
"rewards/rejected": -5.753414630889893, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1776988005330964, |
|
"grad_norm": 90.59877362683102, |
|
"learning_rate": 4.90880341989989e-07, |
|
"logits/chosen": -2.381986141204834, |
|
"logits/rejected": -2.354752540588379, |
|
"logps/chosen": -372.0549011230469, |
|
"logps/rejected": -238.38900756835938, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.7634764909744263, |
|
"rewards/margins": 6.4423956871032715, |
|
"rewards/rejected": -5.678919792175293, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18362209388419962, |
|
"grad_norm": 214.55771845505996, |
|
"learning_rate": 4.894451380451589e-07, |
|
"logits/chosen": -2.4163644313812256, |
|
"logits/rejected": -2.3825597763061523, |
|
"logps/chosen": -325.26171875, |
|
"logps/rejected": -230.36776733398438, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.395336776971817, |
|
"rewards/margins": 5.5172624588012695, |
|
"rewards/rejected": -5.121925354003906, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18954538723530281, |
|
"grad_norm": 143.07726831372327, |
|
"learning_rate": 4.879075164660124e-07, |
|
"logits/chosen": -2.4872541427612305, |
|
"logits/rejected": -2.4530301094055176, |
|
"logps/chosen": -337.13592529296875, |
|
"logps/rejected": -217.49795532226562, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.2758009433746338, |
|
"rewards/margins": 5.386232852935791, |
|
"rewards/rejected": -5.110430717468262, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19546868058640604, |
|
"grad_norm": 96.96348568375632, |
|
"learning_rate": 4.862681349379212e-07, |
|
"logits/chosen": -2.5405662059783936, |
|
"logits/rejected": -2.506821870803833, |
|
"logps/chosen": -343.34637451171875, |
|
"logps/rejected": -218.04287719726562, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.5433809161186218, |
|
"rewards/margins": 5.724984645843506, |
|
"rewards/rejected": -5.181603908538818, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.20139197393750927, |
|
"grad_norm": 236.3908842304634, |
|
"learning_rate": 4.8452769467194e-07, |
|
"logits/chosen": -2.4440038204193115, |
|
"logits/rejected": -2.4228639602661133, |
|
"logps/chosen": -398.2060852050781, |
|
"logps/rejected": -238.4206085205078, |
|
"loss": 0.161, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.39312082529067993, |
|
"rewards/margins": 6.064854145050049, |
|
"rewards/rejected": -5.6717329025268555, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.20731526728861246, |
|
"grad_norm": 262.31086254004623, |
|
"learning_rate": 4.82686940104879e-07, |
|
"logits/chosen": -2.439896583557129, |
|
"logits/rejected": -2.415922164916992, |
|
"logps/chosen": -339.29217529296875, |
|
"logps/rejected": -242.05789184570312, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.6545895338058472, |
|
"rewards/margins": 5.870488166809082, |
|
"rewards/rejected": -6.525076866149902, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2132385606397157, |
|
"grad_norm": 222.15082465425456, |
|
"learning_rate": 4.807466585808856e-07, |
|
"logits/chosen": -2.379359006881714, |
|
"logits/rejected": -2.360146999359131, |
|
"logps/chosen": -316.02349853515625, |
|
"logps/rejected": -233.2420654296875, |
|
"loss": 0.156, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.11594714224338531, |
|
"rewards/margins": 6.157732963562012, |
|
"rewards/rejected": -6.273680210113525, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2191618539908189, |
|
"grad_norm": 242.78071949212634, |
|
"learning_rate": 4.787076800146752e-07, |
|
"logits/chosen": -2.4132204055786133, |
|
"logits/rejected": -2.403979778289795, |
|
"logps/chosen": -359.83953857421875, |
|
"logps/rejected": -234.0885772705078, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.31123560667037964, |
|
"rewards/margins": 5.990903377532959, |
|
"rewards/rejected": -6.302138328552246, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22508514734192211, |
|
"grad_norm": 199.5431768901427, |
|
"learning_rate": 4.765708765365526e-07, |
|
"logits/chosen": -2.3680121898651123, |
|
"logits/rejected": -2.37663197517395, |
|
"logps/chosen": -356.29052734375, |
|
"logps/rejected": -248.9377899169922, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06563782691955566, |
|
"rewards/margins": 6.597121238708496, |
|
"rewards/rejected": -6.662759304046631, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2310084406930253, |
|
"grad_norm": 252.4218453368612, |
|
"learning_rate": 4.7433716211937587e-07, |
|
"logits/chosen": -2.3765156269073486, |
|
"logits/rejected": -2.3698158264160156, |
|
"logps/chosen": -355.7226867675781, |
|
"logps/rejected": -231.8157501220703, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.5422400236129761, |
|
"rewards/margins": 6.704817295074463, |
|
"rewards/rejected": -6.1625776290893555, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23693173404412854, |
|
"grad_norm": 145.48012597761922, |
|
"learning_rate": 4.720074921876245e-07, |
|
"logits/chosen": -2.398963451385498, |
|
"logits/rejected": -2.394768238067627, |
|
"logps/chosen": -322.42864990234375, |
|
"logps/rejected": -235.3686065673828, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.2241075038909912, |
|
"rewards/margins": 6.238755226135254, |
|
"rewards/rejected": -6.462862491607666, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24285502739523174, |
|
"grad_norm": 292.71968928639006, |
|
"learning_rate": 4.6958286320873593e-07, |
|
"logits/chosen": -2.43870210647583, |
|
"logits/rejected": -2.3871872425079346, |
|
"logps/chosen": -397.65423583984375, |
|
"logps/rejected": -255.60720825195312, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2829202711582184, |
|
"rewards/margins": 7.782283782958984, |
|
"rewards/rejected": -7.499364376068115, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24877832074633496, |
|
"grad_norm": 224.80127439466196, |
|
"learning_rate": 4.6706431226688804e-07, |
|
"logits/chosen": -2.416389226913452, |
|
"logits/rejected": -2.3854172229766846, |
|
"logps/chosen": -403.90765380859375, |
|
"logps/rejected": -268.164794921875, |
|
"loss": 0.1664, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.07748878002166748, |
|
"rewards/margins": 8.120170593261719, |
|
"rewards/rejected": -8.197659492492676, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2547016140974382, |
|
"grad_norm": 314.70972896563933, |
|
"learning_rate": 4.6445291661940777e-07, |
|
"logits/chosen": -2.3671650886535645, |
|
"logits/rejected": -2.3478543758392334, |
|
"logps/chosen": -364.58673095703125, |
|
"logps/rejected": -255.23056030273438, |
|
"loss": 0.2109, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.149729922413826, |
|
"rewards/margins": 7.63533878326416, |
|
"rewards/rejected": -7.485608100891113, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2606249074485414, |
|
"grad_norm": 123.44333079620083, |
|
"learning_rate": 4.6174979323599715e-07, |
|
"logits/chosen": -2.418224811553955, |
|
"logits/rejected": -2.4098281860351562, |
|
"logps/chosen": -365.10400390625, |
|
"logps/rejected": -231.907470703125, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.7242127060890198, |
|
"rewards/margins": 6.6777753829956055, |
|
"rewards/rejected": -5.9535627365112305, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2665482007996446, |
|
"grad_norm": 185.88217229096983, |
|
"learning_rate": 4.5895609832097277e-07, |
|
"logits/chosen": -2.4694488048553467, |
|
"logits/rejected": -2.4495654106140137, |
|
"logps/chosen": -340.63934326171875, |
|
"logps/rejected": -227.07791137695312, |
|
"loss": 0.1729, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.8438789248466492, |
|
"rewards/margins": 6.674716949462891, |
|
"rewards/rejected": -5.830838680267334, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27247149415074784, |
|
"grad_norm": 179.72268316546433, |
|
"learning_rate": 4.560730268187236e-07, |
|
"logits/chosen": -2.389660358428955, |
|
"logits/rejected": -2.396211624145508, |
|
"logps/chosen": -332.08612060546875, |
|
"logps/rejected": -227.017578125, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.7443017959594727, |
|
"rewards/margins": 6.060439586639404, |
|
"rewards/rejected": -5.316138744354248, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27839478750185104, |
|
"grad_norm": 110.66902509862196, |
|
"learning_rate": 4.531018119025989e-07, |
|
"logits/chosen": -2.4120264053344727, |
|
"logits/rejected": -2.4028701782226562, |
|
"logps/chosen": -314.36895751953125, |
|
"logps/rejected": -218.5847930908203, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.032317258417606354, |
|
"rewards/margins": 5.494265556335449, |
|
"rewards/rejected": -5.461948871612549, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28431808085295424, |
|
"grad_norm": 214.74519309520247, |
|
"learning_rate": 4.5004372444744376e-07, |
|
"logits/chosen": -2.426173686981201, |
|
"logits/rejected": -2.408202648162842, |
|
"logps/chosen": -353.477294921875, |
|
"logps/rejected": -240.3783721923828, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.20403608679771423, |
|
"rewards/margins": 6.32252311706543, |
|
"rewards/rejected": -6.526559352874756, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.29024137420405743, |
|
"grad_norm": 191.0207951068833, |
|
"learning_rate": 4.4690007248600967e-07, |
|
"logits/chosen": -2.422132968902588, |
|
"logits/rejected": -2.40049409866333, |
|
"logps/chosen": -396.6956481933594, |
|
"logps/rejected": -240.05593872070312, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.5469577312469482, |
|
"rewards/margins": 7.562119483947754, |
|
"rewards/rejected": -7.015161037445068, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2961646675551607, |
|
"grad_norm": 98.30324866742373, |
|
"learning_rate": 4.436722006494701e-07, |
|
"logits/chosen": -2.439347505569458, |
|
"logits/rejected": -2.4101297855377197, |
|
"logps/chosen": -355.91400146484375, |
|
"logps/rejected": -236.58407592773438, |
|
"loss": 0.1413, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17143169045448303, |
|
"rewards/margins": 7.068209648132324, |
|
"rewards/rejected": -6.896778106689453, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3020879609062639, |
|
"grad_norm": 172.09787160323572, |
|
"learning_rate": 4.4036148959228356e-07, |
|
"logits/chosen": -2.4719622135162354, |
|
"logits/rejected": -2.4255988597869873, |
|
"logps/chosen": -408.50079345703125, |
|
"logps/rejected": -240.2022247314453, |
|
"loss": 0.1282, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.17151732742786407, |
|
"rewards/margins": 7.430660247802734, |
|
"rewards/rejected": -7.259142875671387, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3080112542573671, |
|
"grad_norm": 213.31450495313408, |
|
"learning_rate": 4.3696935540164705e-07, |
|
"logits/chosen": -2.4813694953918457, |
|
"logits/rejected": -2.4537417888641357, |
|
"logps/chosen": -305.8951110839844, |
|
"logps/rejected": -212.90701293945312, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.024075621739029884, |
|
"rewards/margins": 6.6571245193481445, |
|
"rewards/rejected": -6.6812005043029785, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3139345476084703, |
|
"grad_norm": 83.43235360842944, |
|
"learning_rate": 4.334972489917947e-07, |
|
"logits/chosen": -2.4770638942718506, |
|
"logits/rejected": -2.4384360313415527, |
|
"logps/chosen": -343.23797607421875, |
|
"logps/rejected": -231.5509033203125, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.25519540905952454, |
|
"rewards/margins": 7.639869689941406, |
|
"rewards/rejected": -7.3846755027771, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31985784095957354, |
|
"grad_norm": 48.96905584146253, |
|
"learning_rate": 4.299466554833997e-07, |
|
"logits/chosen": -2.462367534637451, |
|
"logits/rejected": -2.433013439178467, |
|
"logps/chosen": -331.65179443359375, |
|
"logps/rejected": -237.80703735351562, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.43259042501449585, |
|
"rewards/margins": 7.181746482849121, |
|
"rewards/rejected": -7.614336967468262, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32578113431067673, |
|
"grad_norm": 157.94476432481386, |
|
"learning_rate": 4.263190935683449e-07, |
|
"logits/chosen": -2.454803943634033, |
|
"logits/rejected": -2.4091312885284424, |
|
"logps/chosen": -346.77508544921875, |
|
"logps/rejected": -241.5703125, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.16140493750572205, |
|
"rewards/margins": 8.025360107421875, |
|
"rewards/rejected": -7.863955497741699, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33170442766177993, |
|
"grad_norm": 144.6392465996477, |
|
"learning_rate": 4.2261611486013437e-07, |
|
"logits/chosen": -2.361588478088379, |
|
"logits/rejected": -2.340947389602661, |
|
"logps/chosen": -376.0790710449219, |
|
"logps/rejected": -261.4156799316406, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15833595395088196, |
|
"rewards/margins": 7.758933067321777, |
|
"rewards/rejected": -7.917269706726074, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3376277210128832, |
|
"grad_norm": 65.04958454200869, |
|
"learning_rate": 4.188393032302233e-07, |
|
"logits/chosen": -2.3772928714752197, |
|
"logits/rejected": -2.3414306640625, |
|
"logps/chosen": -364.4604187011719, |
|
"logps/rejected": -261.02935791015625, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.16394826769828796, |
|
"rewards/margins": 8.307961463928223, |
|
"rewards/rejected": -8.471909523010254, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3435510143639864, |
|
"grad_norm": 39.92370071225025, |
|
"learning_rate": 4.1499027413055e-07, |
|
"logits/chosen": -2.4158213138580322, |
|
"logits/rejected": -2.400824546813965, |
|
"logps/chosen": -355.3429260253906, |
|
"logps/rejected": -234.93331909179688, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20681849122047424, |
|
"rewards/margins": 8.111043930053711, |
|
"rewards/rejected": -7.904225826263428, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3494743077150896, |
|
"grad_norm": 113.63629001311939, |
|
"learning_rate": 4.1107067390256056e-07, |
|
"logits/chosen": -2.3974995613098145, |
|
"logits/rejected": -2.385651111602783, |
|
"logps/chosen": -371.2859802246094, |
|
"logps/rejected": -251.9286346435547, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.022083889693021774, |
|
"rewards/margins": 8.15199089050293, |
|
"rewards/rejected": -8.174076080322266, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3553976010661928, |
|
"grad_norm": 76.65150743146755, |
|
"learning_rate": 4.0708217907302047e-07, |
|
"logits/chosen": -2.446026563644409, |
|
"logits/rejected": -2.446206569671631, |
|
"logps/chosen": -390.41510009765625, |
|
"logps/rejected": -249.01260375976562, |
|
"loss": 0.1302, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1251598298549652, |
|
"rewards/margins": 8.266278266906738, |
|
"rewards/rejected": -8.141119003295898, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36132089441729603, |
|
"grad_norm": 154.61613568075327, |
|
"learning_rate": 4.030264956369157e-07, |
|
"logits/chosen": -2.431563377380371, |
|
"logits/rejected": -2.402116298675537, |
|
"logps/chosen": -367.7977600097656, |
|
"logps/rejected": -256.6099548339844, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.1916094869375229, |
|
"rewards/margins": 8.200936317443848, |
|
"rewards/rejected": -8.392545700073242, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36724418776839923, |
|
"grad_norm": 365.6788351549646, |
|
"learning_rate": 3.989053583277492e-07, |
|
"logits/chosen": -2.437472105026245, |
|
"logits/rejected": -2.424696445465088, |
|
"logps/chosen": -364.15374755859375, |
|
"logps/rejected": -273.16595458984375, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.120966911315918, |
|
"rewards/margins": 7.895718574523926, |
|
"rewards/rejected": -9.016683578491211, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.37316748111950243, |
|
"grad_norm": 170.20992171320333, |
|
"learning_rate": 3.947205298755447e-07, |
|
"logits/chosen": -2.448801279067993, |
|
"logits/rejected": -2.427565813064575, |
|
"logps/chosen": -411.0587463378906, |
|
"logps/rejected": -271.62652587890625, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.17977580428123474, |
|
"rewards/margins": 8.981353759765625, |
|
"rewards/rejected": -9.161130905151367, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37909077447060563, |
|
"grad_norm": 177.25293521329584, |
|
"learning_rate": 3.9047380025287634e-07, |
|
"logits/chosen": -2.372959613800049, |
|
"logits/rejected": -2.3525047302246094, |
|
"logps/chosen": -356.154296875, |
|
"logps/rejected": -274.4513854980469, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8354924321174622, |
|
"rewards/margins": 8.103582382202148, |
|
"rewards/rejected": -8.93907356262207, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3850140678217089, |
|
"grad_norm": 360.5455787004992, |
|
"learning_rate": 3.8616698590924523e-07, |
|
"logits/chosen": -2.4072346687316895, |
|
"logits/rejected": -2.383105993270874, |
|
"logps/chosen": -351.6363220214844, |
|
"logps/rejected": -239.79788208007812, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.6761496663093567, |
|
"rewards/margins": 7.148809909820557, |
|
"rewards/rejected": -7.824960231781006, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3909373611728121, |
|
"grad_norm": 270.4274889017812, |
|
"learning_rate": 3.8180192899413123e-07, |
|
"logits/chosen": -2.4213879108428955, |
|
"logits/rejected": -2.4075303077697754, |
|
"logps/chosen": -417.3751525878906, |
|
"logps/rejected": -265.44854736328125, |
|
"loss": 0.1753, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.6145623326301575, |
|
"rewards/margins": 9.045318603515625, |
|
"rewards/rejected": -8.430756568908691, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3968606545239153, |
|
"grad_norm": 167.55864604832982, |
|
"learning_rate": 3.7738049656905225e-07, |
|
"logits/chosen": -2.42012619972229, |
|
"logits/rejected": -2.407224416732788, |
|
"logps/chosen": -367.69439697265625, |
|
"logps/rejected": -252.48849487304688, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.16578371822834015, |
|
"rewards/margins": 8.239130020141602, |
|
"rewards/rejected": -8.073347091674805, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.40278394787501853, |
|
"grad_norm": 199.41539357853415, |
|
"learning_rate": 3.7290457980896787e-07, |
|
"logits/chosen": -2.481067180633545, |
|
"logits/rejected": -2.4814565181732178, |
|
"logps/chosen": -396.1634216308594, |
|
"logps/rejected": -250.05990600585938, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06804431229829788, |
|
"rewards/margins": 7.441187381744385, |
|
"rewards/rejected": -7.5092315673828125, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.40870724122612173, |
|
"grad_norm": 127.14212704342435, |
|
"learning_rate": 3.68376093193369e-07, |
|
"logits/chosen": -2.447186231613159, |
|
"logits/rejected": -2.441771984100342, |
|
"logps/chosen": -361.73516845703125, |
|
"logps/rejected": -253.1171417236328, |
|
"loss": 0.2343, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.23877505958080292, |
|
"rewards/margins": 8.379014015197754, |
|
"rewards/rejected": -8.140238761901855, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41463053457722493, |
|
"grad_norm": 156.38290182401465, |
|
"learning_rate": 3.637969736873992e-07, |
|
"logits/chosen": -2.468182325363159, |
|
"logits/rejected": -2.4434332847595215, |
|
"logps/chosen": -365.93463134765625, |
|
"logps/rejected": -235.07699584960938, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.2839972972869873, |
|
"rewards/margins": 7.469516754150391, |
|
"rewards/rejected": -7.185519218444824, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42055382792832813, |
|
"grad_norm": 80.92449178173787, |
|
"learning_rate": 3.591691799133587e-07, |
|
"logits/chosen": -2.4627020359039307, |
|
"logits/rejected": -2.4086921215057373, |
|
"logps/chosen": -399.5291442871094, |
|
"logps/rejected": -245.04385375976562, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.26195061206817627, |
|
"rewards/margins": 8.103439331054688, |
|
"rewards/rejected": -7.841488838195801, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4264771212794314, |
|
"grad_norm": 164.79458174222637, |
|
"learning_rate": 3.5449469131294476e-07, |
|
"logits/chosen": -2.4840645790100098, |
|
"logits/rejected": -2.4384331703186035, |
|
"logps/chosen": -356.69952392578125, |
|
"logps/rejected": -245.77041625976562, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.7162678837776184, |
|
"rewards/margins": 7.782705783843994, |
|
"rewards/rejected": -8.49897289276123, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4324004146305346, |
|
"grad_norm": 172.5370543885783, |
|
"learning_rate": 3.497755073005868e-07, |
|
"logits/chosen": -2.47273850440979, |
|
"logits/rejected": -2.4551682472229004, |
|
"logps/chosen": -394.3757629394531, |
|
"logps/rejected": -252.3651885986328, |
|
"loss": 0.1914, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.40301981568336487, |
|
"rewards/margins": 7.574839115142822, |
|
"rewards/rejected": -7.9778594970703125, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4383237079816378, |
|
"grad_norm": 133.2758942704595, |
|
"learning_rate": 3.4501364640823926e-07, |
|
"logits/chosen": -2.5142226219177246, |
|
"logits/rejected": -2.48344087600708, |
|
"logps/chosen": -351.15582275390625, |
|
"logps/rejected": -255.9786376953125, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.5328763723373413, |
|
"rewards/margins": 7.858250617980957, |
|
"rewards/rejected": -8.39112663269043, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44424700133274103, |
|
"grad_norm": 151.90351812794674, |
|
"learning_rate": 3.402111454219966e-07, |
|
"logits/chosen": -2.4132637977600098, |
|
"logits/rejected": -2.3917102813720703, |
|
"logps/chosen": -364.5772705078125, |
|
"logps/rejected": -241.1504364013672, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.33990636467933655, |
|
"rewards/margins": 7.563208103179932, |
|
"rewards/rejected": -7.90311336517334, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.45017029468384423, |
|
"grad_norm": 175.38346833521052, |
|
"learning_rate": 3.353700585109005e-07, |
|
"logits/chosen": -2.4272594451904297, |
|
"logits/rejected": -2.4220120906829834, |
|
"logps/chosen": -374.5780334472656, |
|
"logps/rejected": -245.2948760986328, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.7049421668052673, |
|
"rewards/margins": 8.787199020385742, |
|
"rewards/rejected": -8.082255363464355, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.45609358803494743, |
|
"grad_norm": 105.96376988895295, |
|
"learning_rate": 3.304924563483129e-07, |
|
"logits/chosen": -2.398015260696411, |
|
"logits/rejected": -2.389971971511841, |
|
"logps/chosen": -383.84344482421875, |
|
"logps/rejected": -238.3228759765625, |
|
"loss": 0.1439, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1009693592786789, |
|
"rewards/margins": 8.56285285949707, |
|
"rewards/rejected": -8.461882591247559, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4620168813860506, |
|
"grad_norm": 210.20985866366246, |
|
"learning_rate": 3.255804252262283e-07, |
|
"logits/chosen": -2.450796604156494, |
|
"logits/rejected": -2.423675060272217, |
|
"logps/chosen": -340.67572021484375, |
|
"logps/rejected": -249.12155151367188, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.6850844621658325, |
|
"rewards/margins": 7.610543251037598, |
|
"rewards/rejected": -8.29562759399414, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4679401747371539, |
|
"grad_norm": 38.37233058357723, |
|
"learning_rate": 3.2063606616290626e-07, |
|
"logits/chosen": -2.3822109699249268, |
|
"logits/rejected": -2.354607105255127, |
|
"logps/chosen": -382.82598876953125, |
|
"logps/rejected": -249.13320922851562, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.23880510032176971, |
|
"rewards/margins": 9.092573165893555, |
|
"rewards/rejected": -8.853767395019531, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4738634680882571, |
|
"grad_norm": 116.83040603007437, |
|
"learning_rate": 3.1566149400420523e-07, |
|
"logits/chosen": -2.448554515838623, |
|
"logits/rejected": -2.4384427070617676, |
|
"logps/chosen": -364.5985107421875, |
|
"logps/rejected": -256.18109130859375, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.08134280145168304, |
|
"rewards/margins": 8.44970989227295, |
|
"rewards/rejected": -8.368366241455078, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4797867614393603, |
|
"grad_norm": 120.81771343753893, |
|
"learning_rate": 3.1065883651900087e-07, |
|
"logits/chosen": -2.403247356414795, |
|
"logits/rejected": -2.3994150161743164, |
|
"logps/chosen": -369.07373046875, |
|
"logps/rejected": -260.661865234375, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.24362042546272278, |
|
"rewards/margins": 8.64600658416748, |
|
"rewards/rejected": -8.402385711669922, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4857100547904635, |
|
"grad_norm": 163.57574382904446, |
|
"learning_rate": 3.056302334890786e-07, |
|
"logits/chosen": -2.472740888595581, |
|
"logits/rejected": -2.447871685028076, |
|
"logps/chosen": -364.73382568359375, |
|
"logps/rejected": -243.4113311767578, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.1390383541584015, |
|
"rewards/margins": 7.885109901428223, |
|
"rewards/rejected": -7.746070861816406, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.49163334814156673, |
|
"grad_norm": 110.291493965559, |
|
"learning_rate": 3.0057783579388586e-07, |
|
"logits/chosen": -2.3814120292663574, |
|
"logits/rejected": -2.3707213401794434, |
|
"logps/chosen": -380.00115966796875, |
|
"logps/rejected": -259.52813720703125, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.010458474978804588, |
|
"rewards/margins": 8.064732551574707, |
|
"rewards/rejected": -8.075190544128418, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4975566414926699, |
|
"grad_norm": 149.32298193701925, |
|
"learning_rate": 2.9550380449053907e-07, |
|
"logits/chosen": -2.4556055068969727, |
|
"logits/rejected": -2.4424943923950195, |
|
"logps/chosen": -353.40087890625, |
|
"logps/rejected": -249.0240020751953, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6178598403930664, |
|
"rewards/margins": 6.908316135406494, |
|
"rewards/rejected": -7.526176452636719, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5034799348437732, |
|
"grad_norm": 298.0495441928926, |
|
"learning_rate": 2.904103098894767e-07, |
|
"logits/chosen": -2.4620985984802246, |
|
"logits/rejected": -2.4430508613586426, |
|
"logps/chosen": -381.40338134765625, |
|
"logps/rejected": -244.94735717773438, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.1066194549202919, |
|
"rewards/margins": 8.322629928588867, |
|
"rewards/rejected": -8.42924976348877, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5094032281948764, |
|
"grad_norm": 44.65987676821851, |
|
"learning_rate": 2.852995306261545e-07, |
|
"logits/chosen": -2.4761202335357666, |
|
"logits/rejected": -2.458343982696533, |
|
"logps/chosen": -416.10015869140625, |
|
"logps/rejected": -269.155029296875, |
|
"loss": 0.1904, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5461829900741577, |
|
"rewards/margins": 8.20594310760498, |
|
"rewards/rejected": -8.75212574005127, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5153265215459796, |
|
"grad_norm": 138.61047314539718, |
|
"learning_rate": 2.801736527291797e-07, |
|
"logits/chosen": -2.4353713989257812, |
|
"logits/rejected": -2.440162420272827, |
|
"logps/chosen": -368.56671142578125, |
|
"logps/rejected": -262.32000732421875, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.6010546684265137, |
|
"rewards/margins": 8.25703239440918, |
|
"rewards/rejected": -8.858087539672852, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5212498148970828, |
|
"grad_norm": 32.99592732987078, |
|
"learning_rate": 2.750348686852836e-07, |
|
"logits/chosen": -2.4713826179504395, |
|
"logits/rejected": -2.4460389614105225, |
|
"logps/chosen": -395.4850769042969, |
|
"logps/rejected": -263.14697265625, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.47209230065345764, |
|
"rewards/margins": 8.793320655822754, |
|
"rewards/rejected": -9.265413284301758, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.527173108248186, |
|
"grad_norm": 225.08525680844807, |
|
"learning_rate": 2.69885376501531e-07, |
|
"logits/chosen": -2.4371562004089355, |
|
"logits/rejected": -2.4292545318603516, |
|
"logps/chosen": -368.02349853515625, |
|
"logps/rejected": -282.25390625, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.43533068895339966, |
|
"rewards/margins": 8.678312301635742, |
|
"rewards/rejected": -9.113642692565918, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5330964015992892, |
|
"grad_norm": 117.84513266560047, |
|
"learning_rate": 2.647273787651687e-07, |
|
"logits/chosen": -2.4487619400024414, |
|
"logits/rejected": -2.401111125946045, |
|
"logps/chosen": -412.74359130859375, |
|
"logps/rejected": -252.1238250732422, |
|
"loss": 0.1775, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.4897252917289734, |
|
"rewards/margins": 8.060205459594727, |
|
"rewards/rejected": -8.549931526184082, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5390196949503924, |
|
"grad_norm": 109.88865766710877, |
|
"learning_rate": 2.5956308170151526e-07, |
|
"logits/chosen": -2.403801441192627, |
|
"logits/rejected": -2.3616394996643066, |
|
"logps/chosen": -408.7337951660156, |
|
"logps/rejected": -258.9072570800781, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.19426563382148743, |
|
"rewards/margins": 8.246191024780273, |
|
"rewards/rejected": -8.44045639038086, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5449429883014957, |
|
"grad_norm": 129.4752751715463, |
|
"learning_rate": 2.543946942302944e-07, |
|
"logits/chosen": -2.4435765743255615, |
|
"logits/rejected": -2.440274477005005, |
|
"logps/chosen": -338.3968505859375, |
|
"logps/rejected": -252.08511352539062, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5266432762145996, |
|
"rewards/margins": 7.261737823486328, |
|
"rewards/rejected": -7.7883806228637695, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5508662816525989, |
|
"grad_norm": 121.9458874503311, |
|
"learning_rate": 2.492244270208158e-07, |
|
"logits/chosen": -2.441021680831909, |
|
"logits/rejected": -2.4099669456481934, |
|
"logps/chosen": -378.41082763671875, |
|
"logps/rejected": -243.3146209716797, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.03578373044729233, |
|
"rewards/margins": 7.419234275817871, |
|
"rewards/rejected": -7.455018520355225, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5567895750037021, |
|
"grad_norm": 100.11022642436808, |
|
"learning_rate": 2.440544915464078e-07, |
|
"logits/chosen": -2.3852946758270264, |
|
"logits/rejected": -2.384293794631958, |
|
"logps/chosen": -395.0909118652344, |
|
"logps/rejected": -283.81109619140625, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.41745632886886597, |
|
"rewards/margins": 8.085180282592773, |
|
"rewards/rejected": -8.502635955810547, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5627128683548053, |
|
"grad_norm": 239.01205162989254, |
|
"learning_rate": 2.3888709913850593e-07, |
|
"logits/chosen": -2.415767192840576, |
|
"logits/rejected": -2.4106380939483643, |
|
"logps/chosen": -308.04681396484375, |
|
"logps/rejected": -246.5779266357422, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.8913130760192871, |
|
"rewards/margins": 7.0888872146606445, |
|
"rewards/rejected": -7.980200290679932, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5686361617059085, |
|
"grad_norm": 63.97848384116781, |
|
"learning_rate": 2.337244600408025e-07, |
|
"logits/chosen": -2.3705592155456543, |
|
"logits/rejected": -2.3697023391723633, |
|
"logps/chosen": -387.23468017578125, |
|
"logps/rejected": -259.0006408691406, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.3846256732940674, |
|
"rewards/margins": 8.353618621826172, |
|
"rewards/rejected": -8.738243103027344, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5745594550570117, |
|
"grad_norm": 117.6322527275228, |
|
"learning_rate": 2.2856878246386085e-07, |
|
"logits/chosen": -2.3312573432922363, |
|
"logits/rejected": -2.337036371231079, |
|
"logps/chosen": -396.6022644042969, |
|
"logps/rejected": -264.2213439941406, |
|
"loss": 0.1741, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.13234379887580872, |
|
"rewards/margins": 8.489058494567871, |
|
"rewards/rejected": -8.621402740478516, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5804827484081149, |
|
"grad_norm": 104.9036090682925, |
|
"learning_rate": 2.2342227164060035e-07, |
|
"logits/chosen": -2.3327765464782715, |
|
"logits/rejected": -2.3284411430358887, |
|
"logps/chosen": -389.34027099609375, |
|
"logps/rejected": -267.3720703125, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.05067775771021843, |
|
"rewards/margins": 8.64169692993164, |
|
"rewards/rejected": -8.591019630432129, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5864060417592182, |
|
"grad_norm": 159.50861121207825, |
|
"learning_rate": 2.182871288830533e-07, |
|
"logits/chosen": -2.286221742630005, |
|
"logits/rejected": -2.3094284534454346, |
|
"logps/chosen": -356.39630126953125, |
|
"logps/rejected": -273.9451599121094, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7623409032821655, |
|
"rewards/margins": 7.787258148193359, |
|
"rewards/rejected": -8.549599647521973, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5923293351103214, |
|
"grad_norm": 133.61865184419023, |
|
"learning_rate": 2.131655506408007e-07, |
|
"logits/chosen": -2.295893430709839, |
|
"logits/rejected": -2.2877814769744873, |
|
"logps/chosen": -390.1469421386719, |
|
"logps/rejected": -258.03076171875, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3687962293624878, |
|
"rewards/margins": 8.301212310791016, |
|
"rewards/rejected": -8.670007705688477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5982526284614246, |
|
"grad_norm": 281.753571055999, |
|
"learning_rate": 2.0805972756148643e-07, |
|
"logits/chosen": -2.3332886695861816, |
|
"logits/rejected": -2.3315188884735107, |
|
"logps/chosen": -409.4767150878906, |
|
"logps/rejected": -261.0493469238281, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04291856661438942, |
|
"rewards/margins": 8.258207321166992, |
|
"rewards/rejected": -8.301126480102539, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6041759218125278, |
|
"grad_norm": 284.7843229605494, |
|
"learning_rate": 2.0297184355381432e-07, |
|
"logits/chosen": -2.3366830348968506, |
|
"logits/rejected": -2.3286259174346924, |
|
"logps/chosen": -380.1471862792969, |
|
"logps/rejected": -256.72039794921875, |
|
"loss": 0.1423, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.037542905658483505, |
|
"rewards/margins": 8.408895492553711, |
|
"rewards/rejected": -8.37135124206543, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.610099215163631, |
|
"grad_norm": 211.9761172407754, |
|
"learning_rate": 1.9790407485342638e-07, |
|
"logits/chosen": -2.2436161041259766, |
|
"logits/rejected": -2.2300117015838623, |
|
"logps/chosen": -398.33917236328125, |
|
"logps/rejected": -265.8940734863281, |
|
"loss": 0.1086, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.20479007065296173, |
|
"rewards/margins": 8.657182693481445, |
|
"rewards/rejected": -8.861973762512207, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6160225085147342, |
|
"grad_norm": 170.42441601315315, |
|
"learning_rate": 1.928585890920641e-07, |
|
"logits/chosen": -2.310929536819458, |
|
"logits/rejected": -2.305142879486084, |
|
"logps/chosen": -329.87359619140625, |
|
"logps/rejected": -256.7166748046875, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6634355783462524, |
|
"rewards/margins": 8.081941604614258, |
|
"rewards/rejected": -8.745377540588379, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6219458018658374, |
|
"grad_norm": 128.54909030583076, |
|
"learning_rate": 1.8783754437040902e-07, |
|
"logits/chosen": -2.3843979835510254, |
|
"logits/rejected": -2.374628782272339, |
|
"logps/chosen": -384.445068359375, |
|
"logps/rejected": -262.91778564453125, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.14523005485534668, |
|
"rewards/margins": 9.181239128112793, |
|
"rewards/rejected": -9.326468467712402, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6278690952169406, |
|
"grad_norm": 307.8448068760573, |
|
"learning_rate": 1.8284308833500118e-07, |
|
"logits/chosen": -2.3700027465820312, |
|
"logits/rejected": -2.3581337928771973, |
|
"logps/chosen": -355.612060546875, |
|
"logps/rejected": -268.969970703125, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.7148059010505676, |
|
"rewards/margins": 8.257380485534668, |
|
"rewards/rejected": -8.972186088562012, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6337923885680439, |
|
"grad_norm": 214.38553119597148, |
|
"learning_rate": 1.7787735725962756e-07, |
|
"logits/chosen": -2.358398199081421, |
|
"logits/rejected": -2.3408474922180176, |
|
"logps/chosen": -337.56427001953125, |
|
"logps/rejected": -256.54119873046875, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.1531927585601807, |
|
"rewards/margins": 7.760645389556885, |
|
"rewards/rejected": -8.913838386535645, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6397156819191471, |
|
"grad_norm": 175.56218680703643, |
|
"learning_rate": 1.7294247513157616e-07, |
|
"logits/chosen": -2.3990116119384766, |
|
"logits/rejected": -2.3715598583221436, |
|
"logps/chosen": -430.0047302246094, |
|
"logps/rejected": -277.5758361816406, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.28831297159194946, |
|
"rewards/margins": 9.353979110717773, |
|
"rewards/rejected": -9.642291069030762, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6456389752702503, |
|
"grad_norm": 251.12704474839504, |
|
"learning_rate": 1.6804055274314494e-07, |
|
"logits/chosen": -2.3074963092803955, |
|
"logits/rejected": -2.3215384483337402, |
|
"logps/chosen": -360.34307861328125, |
|
"logps/rejected": -262.5448913574219, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.527413010597229, |
|
"rewards/margins": 7.982884883880615, |
|
"rewards/rejected": -8.510297775268555, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6515622686213535, |
|
"grad_norm": 199.1781794411605, |
|
"learning_rate": 1.6317368678879496e-07, |
|
"logits/chosen": -2.358985662460327, |
|
"logits/rejected": -2.3370273113250732, |
|
"logps/chosen": -427.634521484375, |
|
"logps/rejected": -271.89276123046875, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.5357122421264648, |
|
"rewards/margins": 8.577353477478027, |
|
"rewards/rejected": -9.113065719604492, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6574855619724567, |
|
"grad_norm": 117.97757864532069, |
|
"learning_rate": 1.5834395896833281e-07, |
|
"logits/chosen": -2.4176013469696045, |
|
"logits/rejected": -2.4249093532562256, |
|
"logps/chosen": -401.90667724609375, |
|
"logps/rejected": -275.45086669921875, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.13858655095100403, |
|
"rewards/margins": 9.566910743713379, |
|
"rewards/rejected": -9.705496788024902, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6634088553235599, |
|
"grad_norm": 258.70885157576424, |
|
"learning_rate": 1.535534350965075e-07, |
|
"logits/chosen": -2.3172340393066406, |
|
"logits/rejected": -2.309823989868164, |
|
"logps/chosen": -340.2889709472656, |
|
"logps/rejected": -265.4190979003906, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.003075361251831, |
|
"rewards/margins": 8.131772994995117, |
|
"rewards/rejected": -9.134848594665527, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6693321486746631, |
|
"grad_norm": 201.96502150689784, |
|
"learning_rate": 1.4880416421940154e-07, |
|
"logits/chosen": -2.4160609245300293, |
|
"logits/rejected": -2.4233767986297607, |
|
"logps/chosen": -405.70684814453125, |
|
"logps/rejected": -260.34912109375, |
|
"loss": 0.1724, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.08480462431907654, |
|
"rewards/margins": 8.918376922607422, |
|
"rewards/rejected": -8.833572387695312, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6752554420257664, |
|
"grad_norm": 181.74682648839334, |
|
"learning_rate": 1.4409817773799459e-07, |
|
"logits/chosen": -2.4215950965881348, |
|
"logits/rejected": -2.4049665927886963, |
|
"logps/chosen": -366.7375183105469, |
|
"logps/rejected": -254.3837432861328, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.5654062032699585, |
|
"rewards/margins": 8.568774223327637, |
|
"rewards/rejected": -9.134181022644043, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6811787353768696, |
|
"grad_norm": 67.27024810633608, |
|
"learning_rate": 1.3943748853927385e-07, |
|
"logits/chosen": -2.405170202255249, |
|
"logits/rejected": -2.371568202972412, |
|
"logps/chosen": -390.42718505859375, |
|
"logps/rejected": -272.73919677734375, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.37090998888015747, |
|
"rewards/margins": 8.646610260009766, |
|
"rewards/rejected": -9.017520904541016, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6871020287279728, |
|
"grad_norm": 55.76657618504751, |
|
"learning_rate": 1.3482409013526436e-07, |
|
"logits/chosen": -2.441098213195801, |
|
"logits/rejected": -2.4157867431640625, |
|
"logps/chosen": -384.2054443359375, |
|
"logps/rejected": -268.0479736328125, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.44497281312942505, |
|
"rewards/margins": 8.480072021484375, |
|
"rewards/rejected": -8.925044059753418, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.693025322079076, |
|
"grad_norm": 25.040104944811635, |
|
"learning_rate": 1.302599558103456e-07, |
|
"logits/chosen": -2.3982086181640625, |
|
"logits/rejected": -2.385756015777588, |
|
"logps/chosen": -382.56939697265625, |
|
"logps/rejected": -260.25616455078125, |
|
"loss": 0.1179, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5692565441131592, |
|
"rewards/margins": 8.688024520874023, |
|
"rewards/rejected": -9.257280349731445, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6989486154301792, |
|
"grad_norm": 296.46624172556943, |
|
"learning_rate": 1.257470377772214e-07, |
|
"logits/chosen": -2.3739066123962402, |
|
"logits/rejected": -2.3816215991973877, |
|
"logps/chosen": -385.3483581542969, |
|
"logps/rejected": -271.0584411621094, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.0011343419319018722, |
|
"rewards/margins": 9.203813552856445, |
|
"rewards/rejected": -9.202679634094238, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7048719087812824, |
|
"grad_norm": 62.58357200837184, |
|
"learning_rate": 1.2128726634190046e-07, |
|
"logits/chosen": -2.4125571250915527, |
|
"logits/rejected": -2.3731584548950195, |
|
"logps/chosen": -360.48260498046875, |
|
"logps/rejected": -257.780517578125, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.00764580350369215, |
|
"rewards/margins": 8.383014678955078, |
|
"rewards/rejected": -8.375368118286133, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7107952021323856, |
|
"grad_norm": 104.3167050157556, |
|
"learning_rate": 1.1688254907804992e-07, |
|
"logits/chosen": -2.4090094566345215, |
|
"logits/rejected": -2.4037535190582275, |
|
"logps/chosen": -446.6856384277344, |
|
"logps/rejected": -272.03125, |
|
"loss": 0.1451, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.24328398704528809, |
|
"rewards/margins": 8.856546401977539, |
|
"rewards/rejected": -8.613263130187988, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7167184954834889, |
|
"grad_norm": 167.28701038200384, |
|
"learning_rate": 1.1253477001106956e-07, |
|
"logits/chosen": -2.4815921783447266, |
|
"logits/rejected": -2.4600720405578613, |
|
"logps/chosen": -373.46539306640625, |
|
"logps/rejected": -249.20217895507812, |
|
"loss": 0.1822, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.3091612756252289, |
|
"rewards/margins": 8.595071792602539, |
|
"rewards/rejected": -8.285909652709961, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7226417888345921, |
|
"grad_norm": 103.53904259555343, |
|
"learning_rate": 1.0824578881224065e-07, |
|
"logits/chosen": -2.4788851737976074, |
|
"logits/rejected": -2.473902940750122, |
|
"logps/chosen": -348.9431457519531, |
|
"logps/rejected": -238.5637664794922, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04494175314903259, |
|
"rewards/margins": 7.9483323097229, |
|
"rewards/rejected": -7.993273735046387, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7285650821856953, |
|
"grad_norm": 60.68963545277756, |
|
"learning_rate": 1.0401744000328918e-07, |
|
"logits/chosen": -2.423682451248169, |
|
"logits/rejected": -2.424591302871704, |
|
"logps/chosen": -362.5735168457031, |
|
"logps/rejected": -250.6082305908203, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.03537973761558533, |
|
"rewards/margins": 8.45991325378418, |
|
"rewards/rejected": -8.42453384399414, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7344883755367985, |
|
"grad_norm": 178.49917347104252, |
|
"learning_rate": 9.985153217170902e-08, |
|
"logits/chosen": -2.441357135772705, |
|
"logits/rejected": -2.414604902267456, |
|
"logps/chosen": -331.5675964355469, |
|
"logps/rejected": -249.1182861328125, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4034241735935211, |
|
"rewards/margins": 7.731410026550293, |
|
"rewards/rejected": -8.134834289550781, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7404116688879017, |
|
"grad_norm": 150.3556045078429, |
|
"learning_rate": 9.574984719717553e-08, |
|
"logits/chosen": -2.402959108352661, |
|
"logits/rejected": -2.3822813034057617, |
|
"logps/chosen": -344.8562316894531, |
|
"logps/rejected": -261.72198486328125, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.2789481282234192, |
|
"rewards/margins": 8.856972694396973, |
|
"rewards/rejected": -9.135921478271484, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7463349622390049, |
|
"grad_norm": 107.9110175024558, |
|
"learning_rate": 9.171413948938459e-08, |
|
"logits/chosen": -2.4185938835144043, |
|
"logits/rejected": -2.4270195960998535, |
|
"logps/chosen": -351.55206298828125, |
|
"logps/rejected": -245.40463256835938, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.38523998856544495, |
|
"rewards/margins": 8.319580078125, |
|
"rewards/rejected": -8.70482063293457, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7522582555901081, |
|
"grad_norm": 201.2050831618457, |
|
"learning_rate": 8.774613523764049e-08, |
|
"logits/chosen": -2.417402744293213, |
|
"logits/rejected": -2.3926000595092773, |
|
"logps/chosen": -295.12255859375, |
|
"logps/rejected": -243.2334442138672, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.7759536504745483, |
|
"rewards/margins": 7.477177619934082, |
|
"rewards/rejected": -8.253131866455078, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7581815489412113, |
|
"grad_norm": 118.14466939117713, |
|
"learning_rate": 8.384753167251412e-08, |
|
"logits/chosen": -2.4332449436187744, |
|
"logits/rejected": -2.4204506874084473, |
|
"logps/chosen": -347.2571716308594, |
|
"logps/rejected": -239.56527709960938, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.4777204990386963, |
|
"rewards/margins": 7.958296775817871, |
|
"rewards/rejected": -8.436017990112305, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7641048422923146, |
|
"grad_norm": 115.9052038218948, |
|
"learning_rate": 8.001999633988942e-08, |
|
"logits/chosen": -2.409921169281006, |
|
"logits/rejected": -2.38704776763916, |
|
"logps/chosen": -343.9907531738281, |
|
"logps/rejected": -241.80130004882812, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.26248881220817566, |
|
"rewards/margins": 8.137993812561035, |
|
"rewards/rejected": -8.400481224060059, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7700281356434178, |
|
"grad_norm": 44.99756218547529, |
|
"learning_rate": 7.62651663877042e-08, |
|
"logits/chosen": -2.431091547012329, |
|
"logits/rejected": -2.4076178073883057, |
|
"logps/chosen": -413.41339111328125, |
|
"logps/rejected": -272.5387268066406, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2713491916656494, |
|
"rewards/margins": 8.43775749206543, |
|
"rewards/rejected": -8.709107398986816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.775951428994521, |
|
"grad_norm": 271.4269072656633, |
|
"learning_rate": 7.258464786569549e-08, |
|
"logits/chosen": -2.382718086242676, |
|
"logits/rejected": -2.3769688606262207, |
|
"logps/chosen": -327.7436218261719, |
|
"logps/rejected": -232.5410614013672, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.5554865002632141, |
|
"rewards/margins": 7.468624114990234, |
|
"rewards/rejected": -8.024110794067383, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7818747223456242, |
|
"grad_norm": 101.64320308184249, |
|
"learning_rate": 6.898001503844483e-08, |
|
"logits/chosen": -2.417436361312866, |
|
"logits/rejected": -2.4156861305236816, |
|
"logps/chosen": -355.09088134765625, |
|
"logps/rejected": -256.650634765625, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.4817001223564148, |
|
"rewards/margins": 8.431524276733398, |
|
"rewards/rejected": -8.913224220275879, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7877980156967274, |
|
"grad_norm": 70.70476612750028, |
|
"learning_rate": 6.545280971202014e-08, |
|
"logits/chosen": -2.3889780044555664, |
|
"logits/rejected": -2.3656888008117676, |
|
"logps/chosen": -353.8454895019531, |
|
"logps/rejected": -263.7891540527344, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.36209869384765625, |
|
"rewards/margins": 8.301480293273926, |
|
"rewards/rejected": -8.663579940795898, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7937213090478306, |
|
"grad_norm": 115.20773379897521, |
|
"learning_rate": 6.200454057450022e-08, |
|
"logits/chosen": -2.4155004024505615, |
|
"logits/rejected": -2.3988840579986572, |
|
"logps/chosen": -358.6512756347656, |
|
"logps/rejected": -260.19317626953125, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13175497949123383, |
|
"rewards/margins": 8.193792343139648, |
|
"rewards/rejected": -8.325546264648438, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7996446023989338, |
|
"grad_norm": 348.05960789906396, |
|
"learning_rate": 5.863668255066492e-08, |
|
"logits/chosen": -2.415785312652588, |
|
"logits/rejected": -2.402935028076172, |
|
"logps/chosen": -328.21258544921875, |
|
"logps/rejected": -232.1653289794922, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03695619851350784, |
|
"rewards/margins": 8.016546249389648, |
|
"rewards/rejected": -8.053503036499023, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8055678957500371, |
|
"grad_norm": 164.95215219581857, |
|
"learning_rate": 5.53506761711274e-08, |
|
"logits/chosen": -2.4385123252868652, |
|
"logits/rejected": -2.410877227783203, |
|
"logps/chosen": -429.80712890625, |
|
"logps/rejected": -269.0475769042969, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.22647914290428162, |
|
"rewards/margins": 9.079859733581543, |
|
"rewards/rejected": -8.85338020324707, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8114911891011403, |
|
"grad_norm": 153.76387128912395, |
|
"learning_rate": 5.2147926956177174e-08, |
|
"logits/chosen": -2.400052309036255, |
|
"logits/rejected": -2.3625330924987793, |
|
"logps/chosen": -375.1744079589844, |
|
"logps/rejected": -251.15213012695312, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.37147119641304016, |
|
"rewards/margins": 7.78417444229126, |
|
"rewards/rejected": -8.155645370483398, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8174144824522435, |
|
"grad_norm": 61.83899130563118, |
|
"learning_rate": 4.902980481459834e-08, |
|
"logits/chosen": -2.410595655441284, |
|
"logits/rejected": -2.3901000022888184, |
|
"logps/chosen": -425.96466064453125, |
|
"logps/rejected": -264.555908203125, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.039742302149534225, |
|
"rewards/margins": 8.688650131225586, |
|
"rewards/rejected": -8.648908615112305, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8233377758033467, |
|
"grad_norm": 66.74057591923928, |
|
"learning_rate": 4.5997643457719646e-08, |
|
"logits/chosen": -2.343575954437256, |
|
"logits/rejected": -2.366262912750244, |
|
"logps/chosen": -375.5441589355469, |
|
"logps/rejected": -252.6354522705078, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.5915766954421997, |
|
"rewards/margins": 8.14863395690918, |
|
"rewards/rejected": -8.74021053314209, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8292610691544499, |
|
"grad_norm": 114.35276279118753, |
|
"learning_rate": 4.305273982894772e-08, |
|
"logits/chosen": -2.423459529876709, |
|
"logits/rejected": -2.391444683074951, |
|
"logps/chosen": -352.93212890625, |
|
"logps/rejected": -244.15249633789062, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.4639635980129242, |
|
"rewards/margins": 8.43293285369873, |
|
"rewards/rejected": -8.896896362304688, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8351843625055531, |
|
"grad_norm": 289.8483422636645, |
|
"learning_rate": 4.0196353549026786e-08, |
|
"logits/chosen": -2.4569976329803467, |
|
"logits/rejected": -2.421668529510498, |
|
"logps/chosen": -413.7970275878906, |
|
"logps/rejected": -260.17559814453125, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.029109030961990356, |
|
"rewards/margins": 8.420695304870605, |
|
"rewards/rejected": -8.449804306030273, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8411076558566563, |
|
"grad_norm": 268.194865051895, |
|
"learning_rate": 3.742970637726181e-08, |
|
"logits/chosen": -2.401829242706299, |
|
"logits/rejected": -2.3853981494903564, |
|
"logps/chosen": -336.02154541015625, |
|
"logps/rejected": -260.9532775878906, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.7859910726547241, |
|
"rewards/margins": 7.957832336425781, |
|
"rewards/rejected": -8.743824005126953, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8470309492077596, |
|
"grad_norm": 108.72556342354649, |
|
"learning_rate": 3.4753981688937284e-08, |
|
"logits/chosen": -2.4282424449920654, |
|
"logits/rejected": -2.4077298641204834, |
|
"logps/chosen": -372.74859619140625, |
|
"logps/rejected": -255.5635986328125, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.21870934963226318, |
|
"rewards/margins": 8.647294998168945, |
|
"rewards/rejected": -8.866004943847656, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8529542425588628, |
|
"grad_norm": 91.89889805093581, |
|
"learning_rate": 3.217032396915265e-08, |
|
"logits/chosen": -2.392305850982666, |
|
"logits/rejected": -2.3840205669403076, |
|
"logps/chosen": -384.5450439453125, |
|
"logps/rejected": -271.7799987792969, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.14383617043495178, |
|
"rewards/margins": 8.799039840698242, |
|
"rewards/rejected": -8.655204772949219, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.858877535909966, |
|
"grad_norm": 110.87871865085356, |
|
"learning_rate": 2.9679838323293404e-08, |
|
"logits/chosen": -2.4049434661865234, |
|
"logits/rejected": -2.3713347911834717, |
|
"logps/chosen": -388.73480224609375, |
|
"logps/rejected": -259.99884033203125, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4035407602787018, |
|
"rewards/margins": 8.943741798400879, |
|
"rewards/rejected": -9.347283363342285, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8648008292610692, |
|
"grad_norm": 173.62149862701466, |
|
"learning_rate": 2.728359000434488e-08, |
|
"logits/chosen": -2.460850715637207, |
|
"logits/rejected": -2.4532432556152344, |
|
"logps/chosen": -408.2056884765625, |
|
"logps/rejected": -253.97256469726562, |
|
"loss": 0.1465, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10670559108257294, |
|
"rewards/margins": 8.27380084991455, |
|
"rewards/rejected": -8.38050651550293, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8707241226121724, |
|
"grad_norm": 71.42558720517053, |
|
"learning_rate": 2.498260395725302e-08, |
|
"logits/chosen": -2.43896484375, |
|
"logits/rejected": -2.4272048473358154, |
|
"logps/chosen": -360.89898681640625, |
|
"logps/rejected": -257.8056945800781, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.2727930545806885, |
|
"rewards/margins": 8.105804443359375, |
|
"rewards/rejected": -8.378597259521484, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8766474159632756, |
|
"grad_norm": 101.27358766803283, |
|
"learning_rate": 2.2777864380525426e-08, |
|
"logits/chosen": -2.4488790035247803, |
|
"logits/rejected": -2.4199492931365967, |
|
"logps/chosen": -368.7762145996094, |
|
"logps/rejected": -244.88949584960938, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.07807255536317825, |
|
"rewards/margins": 8.640520095825195, |
|
"rewards/rejected": -8.56244945526123, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8825707093143788, |
|
"grad_norm": 248.7079758081644, |
|
"learning_rate": 2.0670314305261423e-08, |
|
"logits/chosen": -2.443664073944092, |
|
"logits/rejected": -2.4383292198181152, |
|
"logps/chosen": -358.8103332519531, |
|
"logps/rejected": -255.68338012695312, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.19392237067222595, |
|
"rewards/margins": 8.529777526855469, |
|
"rewards/rejected": -8.723699569702148, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8884940026654821, |
|
"grad_norm": 228.603556403638, |
|
"learning_rate": 1.866085519178995e-08, |
|
"logits/chosen": -2.430896282196045, |
|
"logits/rejected": -2.39713716506958, |
|
"logps/chosen": -370.627685546875, |
|
"logps/rejected": -263.00537109375, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3709534704685211, |
|
"rewards/margins": 8.486063003540039, |
|
"rewards/rejected": -8.85701847076416, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8944172960165853, |
|
"grad_norm": 98.42860918890575, |
|
"learning_rate": 1.675034654408894e-08, |
|
"logits/chosen": -2.434990167617798, |
|
"logits/rejected": -2.408191204071045, |
|
"logps/chosen": -367.3474426269531, |
|
"logps/rejected": -252.94888305664062, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.12121151387691498, |
|
"rewards/margins": 8.94743537902832, |
|
"rewards/rejected": -9.068646430969238, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9003405893676885, |
|
"grad_norm": 139.6690107400583, |
|
"learning_rate": 1.4939605542150595e-08, |
|
"logits/chosen": -2.399888277053833, |
|
"logits/rejected": -2.36537504196167, |
|
"logps/chosen": -379.11102294921875, |
|
"logps/rejected": -256.906005859375, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.062218405306339264, |
|
"rewards/margins": 8.892073631286621, |
|
"rewards/rejected": -8.954293251037598, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9062638827187917, |
|
"grad_norm": 67.8318737973644, |
|
"learning_rate": 1.3229406692449791e-08, |
|
"logits/chosen": -2.479100227355957, |
|
"logits/rejected": -2.4435877799987793, |
|
"logps/chosen": -313.84320068359375, |
|
"logps/rejected": -239.1044921875, |
|
"loss": 0.1053, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.45267003774642944, |
|
"rewards/margins": 7.700293064117432, |
|
"rewards/rejected": -8.152963638305664, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9121871760698949, |
|
"grad_norm": 133.6230788419226, |
|
"learning_rate": 1.162048149666503e-08, |
|
"logits/chosen": -2.3948609828948975, |
|
"logits/rejected": -2.3574843406677246, |
|
"logps/chosen": -375.52130126953125, |
|
"logps/rejected": -248.8327178955078, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5586664080619812, |
|
"rewards/margins": 8.581660270690918, |
|
"rewards/rejected": -9.140327453613281, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.918110469420998, |
|
"grad_norm": 81.24757118228953, |
|
"learning_rate": 1.0113518138794047e-08, |
|
"logits/chosen": -2.424410581588745, |
|
"logits/rejected": -2.394204616546631, |
|
"logps/chosen": -375.75787353515625, |
|
"logps/rejected": -253.07760620117188, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5351217985153198, |
|
"rewards/margins": 7.9864182472229, |
|
"rewards/rejected": -8.521539688110352, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9240337627721013, |
|
"grad_norm": 335.2650242487096, |
|
"learning_rate": 8.709161190797565e-09, |
|
"logits/chosen": -2.427302122116089, |
|
"logits/rejected": -2.419010639190674, |
|
"logps/chosen": -371.4006652832031, |
|
"logps/rejected": -262.81365966796875, |
|
"loss": 0.1524, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2929847836494446, |
|
"rewards/margins": 8.156495094299316, |
|
"rewards/rejected": -8.449480056762695, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9299570561232045, |
|
"grad_norm": 196.1060901854074, |
|
"learning_rate": 7.408011336897141e-09, |
|
"logits/chosen": -2.4616658687591553, |
|
"logits/rejected": -2.4268431663513184, |
|
"logps/chosen": -371.90484619140625, |
|
"logps/rejected": -251.7766876220703, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5105483531951904, |
|
"rewards/margins": 8.057133674621582, |
|
"rewards/rejected": -8.567682266235352, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9358803494743078, |
|
"grad_norm": 58.4219582518985, |
|
"learning_rate": 6.210625116645135e-09, |
|
"logits/chosen": -2.403099775314331, |
|
"logits/rejected": -2.384831666946411, |
|
"logps/chosen": -417.583251953125, |
|
"logps/rejected": -272.24603271484375, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.04987464100122452, |
|
"rewards/margins": 8.769102096557617, |
|
"rewards/rejected": -8.818976402282715, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.941803642825411, |
|
"grad_norm": 144.546751800466, |
|
"learning_rate": 5.117514686876378e-09, |
|
"logits/chosen": -2.456322193145752, |
|
"logits/rejected": -2.416351795196533, |
|
"logps/chosen": -351.6046447753906, |
|
"logps/rejected": -273.55303955078125, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.4101320207118988, |
|
"rewards/margins": 9.183103561401367, |
|
"rewards/rejected": -9.593236923217773, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9477269361765142, |
|
"grad_norm": 120.48952563193308, |
|
"learning_rate": 4.1291476026441565e-09, |
|
"logits/chosen": -2.456437110900879, |
|
"logits/rejected": -2.4311928749084473, |
|
"logps/chosen": -365.16168212890625, |
|
"logps/rejected": -253.9194793701172, |
|
"loss": 0.1579, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.06400365382432938, |
|
"rewards/margins": 8.453577041625977, |
|
"rewards/rejected": -8.51758098602295, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9536502295276174, |
|
"grad_norm": 106.80281991069545, |
|
"learning_rate": 3.2459466172331253e-09, |
|
"logits/chosen": -2.3881659507751465, |
|
"logits/rejected": -2.397707939147949, |
|
"logps/chosen": -372.622314453125, |
|
"logps/rejected": -245.3422393798828, |
|
"loss": 0.1586, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.32000666856765747, |
|
"rewards/margins": 7.897205352783203, |
|
"rewards/rejected": -8.217211723327637, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9595735228787206, |
|
"grad_norm": 193.24252371332162, |
|
"learning_rate": 2.4682895013354854e-09, |
|
"logits/chosen": -2.4109058380126953, |
|
"logits/rejected": -2.413801670074463, |
|
"logps/chosen": -360.99072265625, |
|
"logps/rejected": -256.06756591796875, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6014559268951416, |
|
"rewards/margins": 8.314714431762695, |
|
"rewards/rejected": -8.916170120239258, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9654968162298238, |
|
"grad_norm": 182.40318024580077, |
|
"learning_rate": 1.7965088814675677e-09, |
|
"logits/chosen": -2.384791851043701, |
|
"logits/rejected": -2.382201671600342, |
|
"logps/chosen": -375.5494689941406, |
|
"logps/rejected": -269.6348876953125, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.9617233276367188, |
|
"rewards/margins": 8.054598808288574, |
|
"rewards/rejected": -9.016322135925293, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.971420109580927, |
|
"grad_norm": 142.9169068872898, |
|
"learning_rate": 1.2308920976958348e-09, |
|
"logits/chosen": -2.448244094848633, |
|
"logits/rejected": -2.4210681915283203, |
|
"logps/chosen": -364.89703369140625, |
|
"logps/rejected": -260.6488342285156, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6372100114822388, |
|
"rewards/margins": 8.421440124511719, |
|
"rewards/rejected": -9.058650016784668, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9773434029320303, |
|
"grad_norm": 72.39563192418329, |
|
"learning_rate": 7.716810807330276e-10, |
|
"logits/chosen": -2.4073166847229004, |
|
"logits/rejected": -2.365865707397461, |
|
"logps/chosen": -390.2394104003906, |
|
"logps/rejected": -262.54150390625, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4426153302192688, |
|
"rewards/margins": 8.694158554077148, |
|
"rewards/rejected": -9.136773109436035, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9832666962831335, |
|
"grad_norm": 109.48295555626903, |
|
"learning_rate": 4.190722484575804e-10, |
|
"logits/chosen": -2.4156811237335205, |
|
"logits/rejected": -2.381577968597412, |
|
"logps/chosen": -399.725341796875, |
|
"logps/rejected": -263.4638366699219, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.32644200325012207, |
|
"rewards/margins": 8.409704208374023, |
|
"rewards/rejected": -8.736146926879883, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9891899896342367, |
|
"grad_norm": 137.41353285844747, |
|
"learning_rate": 1.732164218998522e-10, |
|
"logits/chosen": -2.3911380767822266, |
|
"logits/rejected": -2.386594533920288, |
|
"logps/chosen": -363.5224609375, |
|
"logps/rejected": -253.3739776611328, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.11769469082355499, |
|
"rewards/margins": 8.378522872924805, |
|
"rewards/rejected": -8.496217727661133, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9951132829853399, |
|
"grad_norm": 198.01011960831866, |
|
"learning_rate": 3.4218760731730136e-11, |
|
"logits/chosen": -2.465393543243408, |
|
"logits/rejected": -2.44226336479187, |
|
"logps/chosen": -390.7846984863281, |
|
"logps/rejected": -258.70721435546875, |
|
"loss": 0.1317, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5431281924247742, |
|
"rewards/margins": 8.081016540527344, |
|
"rewards/rejected": -8.624144554138184, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9998519176662224, |
|
"step": 1688, |
|
"total_flos": 0.0, |
|
"train_loss": 0.17761736296081995, |
|
"train_runtime": 39274.1948, |
|
"train_samples_per_second": 1.375, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1688, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|