|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1359, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.209654163226836, |
|
"learning_rate": 3.676470588235294e-09, |
|
"logits/chosen": -1.4681403636932373, |
|
"logits/rejected": -0.8821791410446167, |
|
"logps/chosen": -326.7279052734375, |
|
"logps/rejected": -393.66143798828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.833527457219724, |
|
"learning_rate": 3.676470588235294e-08, |
|
"logits/chosen": -1.1554194688796997, |
|
"logits/rejected": -1.069737434387207, |
|
"logps/chosen": -260.11224365234375, |
|
"logps/rejected": -278.21954345703125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 0.001127632916904986, |
|
"rewards/margins": 0.001941706403158605, |
|
"rewards/margins_max": 0.0066660139709711075, |
|
"rewards/margins_min": -0.0027826009318232536, |
|
"rewards/margins_std": 0.006681179627776146, |
|
"rewards/rejected": -0.0008140733698382974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.539912592900294, |
|
"learning_rate": 7.352941176470588e-08, |
|
"logits/chosen": -1.1387906074523926, |
|
"logits/rejected": -1.2151895761489868, |
|
"logps/chosen": -226.5954132080078, |
|
"logps/rejected": -194.97735595703125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004003068897873163, |
|
"rewards/margins": 0.0006232298910617828, |
|
"rewards/margins_max": 0.0029323583003133535, |
|
"rewards/margins_min": -0.0016858980525285006, |
|
"rewards/margins_std": 0.0032655999530106783, |
|
"rewards/rejected": -0.0002229233068646863, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.074297699065875, |
|
"learning_rate": 1.1029411764705881e-07, |
|
"logits/chosen": -0.9134622812271118, |
|
"logits/rejected": -1.1061055660247803, |
|
"logps/chosen": -286.9056091308594, |
|
"logps/rejected": -306.0609130859375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0033608167432248592, |
|
"rewards/margins": 0.0010996473720297217, |
|
"rewards/margins_max": 0.003882316406816244, |
|
"rewards/margins_min": -0.0016830215463414788, |
|
"rewards/margins_std": 0.003935288172215223, |
|
"rewards/rejected": 0.0022611692547798157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.630354149069055, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -0.9963411092758179, |
|
"logits/rejected": -1.3301975727081299, |
|
"logps/chosen": -237.13650512695312, |
|
"logps/rejected": -233.420654296875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0003052559623029083, |
|
"rewards/margins": -0.00031435777782462537, |
|
"rewards/margins_max": 0.003875983878970146, |
|
"rewards/margins_min": -0.0045046997256577015, |
|
"rewards/margins_std": 0.005926038138568401, |
|
"rewards/rejected": 9.101861905946862e-06, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.11233300662631, |
|
"learning_rate": 1.8382352941176472e-07, |
|
"logits/chosen": -0.9264333844184875, |
|
"logits/rejected": -1.0728222131729126, |
|
"logps/chosen": -219.332763671875, |
|
"logps/rejected": -220.7531280517578, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0008567962795495987, |
|
"rewards/margins": 0.0023684161715209484, |
|
"rewards/margins_max": 0.00644815806299448, |
|
"rewards/margins_min": -0.001711326651275158, |
|
"rewards/margins_std": 0.005769627168774605, |
|
"rewards/rejected": -0.001511619659140706, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.3634169311373245, |
|
"learning_rate": 2.2058823529411763e-07, |
|
"logits/chosen": -1.1445600986480713, |
|
"logits/rejected": -1.3254610300064087, |
|
"logps/chosen": -269.0830993652344, |
|
"logps/rejected": -234.78726196289062, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0016944237286224961, |
|
"rewards/margins": 0.004126362036913633, |
|
"rewards/margins_max": 0.006431617774069309, |
|
"rewards/margins_min": 0.0018211060669273138, |
|
"rewards/margins_std": 0.0032601244747638702, |
|
"rewards/rejected": -0.0024319379590451717, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.891925253538908, |
|
"learning_rate": 2.5735294117647057e-07, |
|
"logits/chosen": -1.414535403251648, |
|
"logits/rejected": -1.5020934343338013, |
|
"logps/chosen": -295.0069580078125, |
|
"logps/rejected": -283.39984130859375, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.003248781431466341, |
|
"rewards/margins": 0.005960130598396063, |
|
"rewards/margins_max": 0.01083610113710165, |
|
"rewards/margins_min": 0.0010841598268598318, |
|
"rewards/margins_std": 0.006895663682371378, |
|
"rewards/rejected": -0.0027113493997603655, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 27.08598358859191, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.1046959161758423, |
|
"logits/rejected": -1.121512770652771, |
|
"logps/chosen": -233.47909545898438, |
|
"logps/rejected": -228.24447631835938, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0034522090572863817, |
|
"rewards/margins": 0.010937942191958427, |
|
"rewards/margins_max": 0.015041169710457325, |
|
"rewards/margins_min": 0.006834716536104679, |
|
"rewards/margins_std": 0.005802837200462818, |
|
"rewards/rejected": -0.007485733367502689, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.043695997862729, |
|
"learning_rate": 3.3088235294117644e-07, |
|
"logits/chosen": -1.1739518642425537, |
|
"logits/rejected": -1.1855499744415283, |
|
"logps/chosen": -201.79940795898438, |
|
"logps/rejected": -239.0184783935547, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.005821605678647757, |
|
"rewards/margins": 0.016343099996447563, |
|
"rewards/margins_max": 0.022502990439534187, |
|
"rewards/margins_min": 0.010183211416006088, |
|
"rewards/margins_std": 0.008711399510502815, |
|
"rewards/rejected": -0.010521495714783669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.240948244130348, |
|
"learning_rate": 3.6764705882352943e-07, |
|
"logits/chosen": -1.226905345916748, |
|
"logits/rejected": -1.402093529701233, |
|
"logps/chosen": -276.8337707519531, |
|
"logps/rejected": -248.4552459716797, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.008135917596518993, |
|
"rewards/margins": 0.024016622453927994, |
|
"rewards/margins_max": 0.033059027045965195, |
|
"rewards/margins_min": 0.014974219724535942, |
|
"rewards/margins_std": 0.012787890620529652, |
|
"rewards/rejected": -0.015880707651376724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -1.1694660186767578, |
|
"eval_logits/rejected": -1.1956290006637573, |
|
"eval_logps/chosen": -345.8330993652344, |
|
"eval_logps/rejected": -336.38427734375, |
|
"eval_loss": 0.6919357776641846, |
|
"eval_rewards/accuracies": 0.5416666865348816, |
|
"eval_rewards/chosen": 2.3678861907683313e-05, |
|
"eval_rewards/margins": 0.002057413337752223, |
|
"eval_rewards/margins_max": 0.027664856985211372, |
|
"eval_rewards/margins_min": -0.02450541965663433, |
|
"eval_rewards/margins_std": 0.017513444647192955, |
|
"eval_rewards/rejected": -0.002033734694123268, |
|
"eval_runtime": 419.0939, |
|
"eval_samples_per_second": 9.544, |
|
"eval_steps_per_second": 0.15, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.566894084924568, |
|
"learning_rate": 4.044117647058823e-07, |
|
"logits/chosen": -1.3186091184616089, |
|
"logits/rejected": -1.2772490978240967, |
|
"logps/chosen": -379.5386657714844, |
|
"logps/rejected": -246.4805450439453, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.007909432053565979, |
|
"rewards/margins": 0.024571493268013, |
|
"rewards/margins_max": 0.036393627524375916, |
|
"rewards/margins_min": 0.012749359011650085, |
|
"rewards/margins_std": 0.016719024628400803, |
|
"rewards/rejected": -0.01666206307709217, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.5380569300473175, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -0.9861418008804321, |
|
"logits/rejected": -1.2131096124649048, |
|
"logps/chosen": -280.57135009765625, |
|
"logps/rejected": -222.57217407226562, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017649073153734207, |
|
"rewards/margins": 0.06698472797870636, |
|
"rewards/margins_max": 0.10137734562158585, |
|
"rewards/margins_min": 0.032592128962278366, |
|
"rewards/margins_std": 0.0486384816467762, |
|
"rewards/rejected": -0.04933566227555275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 15.361998761868088, |
|
"learning_rate": 4.779411764705882e-07, |
|
"logits/chosen": -1.0785776376724243, |
|
"logits/rejected": -0.898257851600647, |
|
"logps/chosen": -283.1363525390625, |
|
"logps/rejected": -214.15316772460938, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02150227688252926, |
|
"rewards/margins": 0.08108994364738464, |
|
"rewards/margins_max": 0.10384353250265121, |
|
"rewards/margins_min": 0.05833636596798897, |
|
"rewards/margins_std": 0.032178424298763275, |
|
"rewards/rejected": -0.059587668627500534, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.451118326133565, |
|
"learning_rate": 4.999868030671756e-07, |
|
"logits/chosen": -0.9526296854019165, |
|
"logits/rejected": -0.9190389513969421, |
|
"logps/chosen": -236.9579620361328, |
|
"logps/rejected": -269.78240966796875, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015391260385513306, |
|
"rewards/margins": 0.07113742083311081, |
|
"rewards/margins_max": 0.10363912582397461, |
|
"rewards/margins_min": 0.0386357307434082, |
|
"rewards/margins_std": 0.04596434161067009, |
|
"rewards/rejected": -0.0557461753487587, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.767935898839982, |
|
"learning_rate": 4.998383535732973e-07, |
|
"logits/chosen": -1.1545963287353516, |
|
"logits/rejected": -1.3083815574645996, |
|
"logps/chosen": -272.58392333984375, |
|
"logps/rejected": -251.5518798828125, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.027133097872138023, |
|
"rewards/margins": 0.1410999596118927, |
|
"rewards/margins_max": 0.18325701355934143, |
|
"rewards/margins_min": 0.09894292801618576, |
|
"rewards/margins_std": 0.05961906909942627, |
|
"rewards/rejected": -0.11396688222885132, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.075069261969173, |
|
"learning_rate": 4.995250566954361e-07, |
|
"logits/chosen": -1.2339075803756714, |
|
"logits/rejected": -1.3427120447158813, |
|
"logps/chosen": -278.045654296875, |
|
"logps/rejected": -249.33016967773438, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.021242624148726463, |
|
"rewards/margins": 0.1352781355381012, |
|
"rewards/margins_max": 0.18264132738113403, |
|
"rewards/margins_min": 0.08791494369506836, |
|
"rewards/margins_std": 0.06698166579008102, |
|
"rewards/rejected": -0.11403550952672958, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.288881821825863, |
|
"learning_rate": 4.990471191519357e-07, |
|
"logits/chosen": -1.2296701669692993, |
|
"logits/rejected": -1.3137729167938232, |
|
"logps/chosen": -271.8497009277344, |
|
"logps/rejected": -257.36285400390625, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.027015607804059982, |
|
"rewards/margins": 0.2098924219608307, |
|
"rewards/margins_max": 0.28914040327072144, |
|
"rewards/margins_min": 0.13064439594745636, |
|
"rewards/margins_std": 0.1120736226439476, |
|
"rewards/rejected": -0.1828767955303192, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.722529871025577, |
|
"learning_rate": 4.984048562937129e-07, |
|
"logits/chosen": -1.104107141494751, |
|
"logits/rejected": -1.2799243927001953, |
|
"logps/chosen": -267.16131591796875, |
|
"logps/rejected": -320.7081298828125, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.009208987466990948, |
|
"rewards/margins": 0.15969006717205048, |
|
"rewards/margins_max": 0.21888110041618347, |
|
"rewards/margins_min": 0.1004989966750145, |
|
"rewards/margins_std": 0.08370877802371979, |
|
"rewards/rejected": -0.15048107504844666, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.706882294745904, |
|
"learning_rate": 4.975986918961825e-07, |
|
"logits/chosen": -1.1564669609069824, |
|
"logits/rejected": -1.3084397315979004, |
|
"logps/chosen": -287.58294677734375, |
|
"logps/rejected": -235.0350799560547, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015282683074474335, |
|
"rewards/margins": 0.2435847818851471, |
|
"rewards/margins_max": 0.34512418508529663, |
|
"rewards/margins_min": 0.14204536378383636, |
|
"rewards/margins_std": 0.14359840750694275, |
|
"rewards/rejected": -0.22830209136009216, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 6.10064839157769, |
|
"learning_rate": 4.966291578796448e-07, |
|
"logits/chosen": -1.2383778095245361, |
|
"logits/rejected": -1.2699321508407593, |
|
"logps/chosen": -246.54550170898438, |
|
"logps/rejected": -299.7005920410156, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00011487379379104823, |
|
"rewards/margins": 0.3421292304992676, |
|
"rewards/margins_max": 0.5104770064353943, |
|
"rewards/margins_min": 0.17378148436546326, |
|
"rewards/margins_std": 0.23807969689369202, |
|
"rewards/rejected": -0.3420143723487854, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -1.1466065645217896, |
|
"eval_logits/rejected": -1.1508780717849731, |
|
"eval_logps/chosen": -357.1989440917969, |
|
"eval_logps/rejected": -350.5012512207031, |
|
"eval_loss": 0.679348349571228, |
|
"eval_rewards/accuracies": 0.579365074634552, |
|
"eval_rewards/chosen": -0.11363494396209717, |
|
"eval_rewards/margins": 0.029568513855338097, |
|
"eval_rewards/margins_max": 0.24946285784244537, |
|
"eval_rewards/margins_min": -0.1965206265449524, |
|
"eval_rewards/margins_std": 0.1510881930589676, |
|
"eval_rewards/rejected": -0.143203467130661, |
|
"eval_runtime": 417.1858, |
|
"eval_samples_per_second": 9.588, |
|
"eval_steps_per_second": 0.151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.130935509068585, |
|
"learning_rate": 4.954968939583149e-07, |
|
"logits/chosen": -0.82276850938797, |
|
"logits/rejected": -1.0703377723693848, |
|
"logps/chosen": -308.36981201171875, |
|
"logps/rejected": -285.35321044921875, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06309916079044342, |
|
"rewards/margins": 0.28919515013694763, |
|
"rewards/margins_max": 0.4261881709098816, |
|
"rewards/margins_min": 0.1522020846605301, |
|
"rewards/margins_std": 0.1937374323606491, |
|
"rewards/rejected": -0.35229426622390747, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.18218978578797, |
|
"learning_rate": 4.942026472182297e-07, |
|
"logits/chosen": -1.133894681930542, |
|
"logits/rejected": -0.9819344282150269, |
|
"logps/chosen": -357.5079345703125, |
|
"logps/rejected": -290.6125183105469, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12442765384912491, |
|
"rewards/margins": 0.420942485332489, |
|
"rewards/margins_max": 0.6367592215538025, |
|
"rewards/margins_min": 0.20512573421001434, |
|
"rewards/margins_std": 0.30521097779273987, |
|
"rewards/rejected": -0.5453701615333557, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 6.451783890738213, |
|
"learning_rate": 4.92747271624308e-07, |
|
"logits/chosen": -1.1002264022827148, |
|
"logits/rejected": -1.1289845705032349, |
|
"logps/chosen": -307.14483642578125, |
|
"logps/rejected": -330.2859802246094, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.14728474617004395, |
|
"rewards/margins": 0.6978201866149902, |
|
"rewards/margins_max": 1.030912160873413, |
|
"rewards/margins_min": 0.36472827196121216, |
|
"rewards/margins_std": 0.47106313705444336, |
|
"rewards/rejected": -0.845104992389679, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.092745398297892, |
|
"learning_rate": 4.911317274568909e-07, |
|
"logits/chosen": -1.1411150693893433, |
|
"logits/rejected": -1.1094478368759155, |
|
"logps/chosen": -294.82550048828125, |
|
"logps/rejected": -408.50970458984375, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1695319563150406, |
|
"rewards/margins": 0.7641543745994568, |
|
"rewards/margins_max": 1.1791099309921265, |
|
"rewards/margins_min": 0.34919896721839905, |
|
"rewards/margins_std": 0.586835503578186, |
|
"rewards/rejected": -0.933686375617981, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 13.168192652840903, |
|
"learning_rate": 4.89357080678133e-07, |
|
"logits/chosen": -1.0950664281845093, |
|
"logits/rejected": -1.240697979927063, |
|
"logps/chosen": -269.51092529296875, |
|
"logps/rejected": -296.14837646484375, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.32782456278800964, |
|
"rewards/margins": 0.6738765835762024, |
|
"rewards/margins_max": 0.9242515563964844, |
|
"rewards/margins_min": 0.423501580953598, |
|
"rewards/margins_std": 0.35408374667167664, |
|
"rewards/rejected": -1.0017011165618896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 5.63734344760071, |
|
"learning_rate": 4.874245022286637e-07, |
|
"logits/chosen": -1.1380219459533691, |
|
"logits/rejected": -0.8845139741897583, |
|
"logps/chosen": -245.44686889648438, |
|
"logps/rejected": -377.0203552246094, |
|
"loss": 0.4311, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39118385314941406, |
|
"rewards/margins": 0.8953431844711304, |
|
"rewards/margins_max": 1.3878755569458008, |
|
"rewards/margins_min": 0.40281087160110474, |
|
"rewards/margins_std": 0.6965457797050476, |
|
"rewards/rejected": -1.2865270376205444, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.2265046259602705, |
|
"learning_rate": 4.853352672549815e-07, |
|
"logits/chosen": -0.9493010640144348, |
|
"logits/rejected": -0.9017621874809265, |
|
"logps/chosen": -434.3206481933594, |
|
"logps/rejected": -370.5262451171875, |
|
"loss": 0.4015, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5151541829109192, |
|
"rewards/margins": 0.754838764667511, |
|
"rewards/margins_max": 1.1408073902130127, |
|
"rewards/margins_min": 0.3688700795173645, |
|
"rewards/margins_std": 0.5458420515060425, |
|
"rewards/rejected": -1.2699930667877197, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.133504257567045, |
|
"learning_rate": 4.830907542680918e-07, |
|
"logits/chosen": -1.0836373567581177, |
|
"logits/rejected": -0.9045012593269348, |
|
"logps/chosen": -264.9966125488281, |
|
"logps/rejected": -428.46539306640625, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5287370681762695, |
|
"rewards/margins": 2.0548110008239746, |
|
"rewards/margins_max": 3.620879650115967, |
|
"rewards/margins_min": 0.48874226212501526, |
|
"rewards/margins_std": 2.2147555351257324, |
|
"rewards/rejected": -2.583548069000244, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.917073426239516, |
|
"learning_rate": 4.806924442339425e-07, |
|
"logits/chosen": -1.0086328983306885, |
|
"logits/rejected": -0.8821426630020142, |
|
"logps/chosen": -305.4242248535156, |
|
"logps/rejected": -435.61737060546875, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40681153535842896, |
|
"rewards/margins": 1.1146458387374878, |
|
"rewards/margins_max": 1.7330601215362549, |
|
"rewards/margins_min": 0.4962318539619446, |
|
"rewards/margins_std": 0.8745697140693665, |
|
"rewards/rejected": -1.5214574337005615, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 7.285903481113855, |
|
"learning_rate": 4.781419195962598e-07, |
|
"logits/chosen": -0.997855544090271, |
|
"logits/rejected": -0.9541902542114258, |
|
"logps/chosen": -299.9017639160156, |
|
"logps/rejected": -388.34246826171875, |
|
"loss": 0.3597, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6117764711380005, |
|
"rewards/margins": 0.7978827953338623, |
|
"rewards/margins_max": 1.1113024950027466, |
|
"rewards/margins_min": 0.4844631552696228, |
|
"rewards/margins_std": 0.44324231147766113, |
|
"rewards/rejected": -1.4096593856811523, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_logits/chosen": -1.0628585815429688, |
|
"eval_logits/rejected": -1.051159143447876, |
|
"eval_logps/chosen": -439.3020324707031, |
|
"eval_logps/rejected": -442.590576171875, |
|
"eval_loss": 0.6787940859794617, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": -0.9346656203269958, |
|
"eval_rewards/margins": 0.12943138182163239, |
|
"eval_rewards/margins_max": 1.008405089378357, |
|
"eval_rewards/margins_min": -0.7319620251655579, |
|
"eval_rewards/margins_std": 0.5778602361679077, |
|
"eval_rewards/rejected": -1.0640968084335327, |
|
"eval_runtime": 418.5023, |
|
"eval_samples_per_second": 9.558, |
|
"eval_steps_per_second": 0.151, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.782754556563047, |
|
"learning_rate": 4.754408632324253e-07, |
|
"logits/chosen": -1.1973422765731812, |
|
"logits/rejected": -0.9350277781486511, |
|
"logps/chosen": -318.24627685546875, |
|
"logps/rejected": -525.6881103515625, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7586840987205505, |
|
"rewards/margins": 2.185839891433716, |
|
"rewards/margins_max": 2.856729745864868, |
|
"rewards/margins_min": 1.5149496793746948, |
|
"rewards/margins_std": 0.9487816691398621, |
|
"rewards/rejected": -2.944523811340332, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.923061735298404, |
|
"learning_rate": 4.725910573430866e-07, |
|
"logits/chosen": -1.0679926872253418, |
|
"logits/rejected": -0.945013165473938, |
|
"logps/chosen": -365.65472412109375, |
|
"logps/rejected": -405.1241760253906, |
|
"loss": 0.3627, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8854155540466309, |
|
"rewards/margins": 0.7383102178573608, |
|
"rewards/margins_max": 1.1103546619415283, |
|
"rewards/margins_min": 0.3662659227848053, |
|
"rewards/margins_std": 0.5261501669883728, |
|
"rewards/rejected": -1.6237256526947021, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.098229956454526, |
|
"learning_rate": 4.6959438227623293e-07, |
|
"logits/chosen": -1.1373931169509888, |
|
"logits/rejected": -0.862761378288269, |
|
"logps/chosen": -276.69671630859375, |
|
"logps/rejected": -535.3623046875, |
|
"loss": 0.2945, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7917782068252563, |
|
"rewards/margins": 2.616485595703125, |
|
"rewards/margins_max": 4.648871421813965, |
|
"rewards/margins_min": 0.5841000080108643, |
|
"rewards/margins_std": 2.874227523803711, |
|
"rewards/rejected": -3.40826416015625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.024599779277368, |
|
"learning_rate": 4.664528152865105e-07, |
|
"logits/chosen": -0.7721256613731384, |
|
"logits/rejected": -0.8172466158866882, |
|
"logps/chosen": -349.3388671875, |
|
"logps/rejected": -486.68597412109375, |
|
"loss": 0.2734, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1078470945358276, |
|
"rewards/margins": 1.6050605773925781, |
|
"rewards/margins_max": 2.6409249305725098, |
|
"rewards/margins_min": 0.5691961646080017, |
|
"rewards/margins_std": 1.4649332761764526, |
|
"rewards/rejected": -2.712907552719116, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.882556555729322, |
|
"learning_rate": 4.6316842923059816e-07, |
|
"logits/chosen": -1.0482970476150513, |
|
"logits/rejected": -0.8200104832649231, |
|
"logps/chosen": -331.43133544921875, |
|
"logps/rejected": -785.7190551757812, |
|
"loss": 0.3029, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9469194412231445, |
|
"rewards/margins": 4.725480556488037, |
|
"rewards/margins_max": 8.024388313293457, |
|
"rewards/margins_min": 1.4265724420547485, |
|
"rewards/margins_std": 4.665360450744629, |
|
"rewards/rejected": -5.672399997711182, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.928720576155937, |
|
"learning_rate": 4.5974339119950334e-07, |
|
"logits/chosen": -0.9947048425674438, |
|
"logits/rejected": -0.8432388305664062, |
|
"logps/chosen": -433.1314392089844, |
|
"logps/rejected": -583.6080932617188, |
|
"loss": 0.2952, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5369694232940674, |
|
"rewards/margins": 1.8583523035049438, |
|
"rewards/margins_max": 2.645268678665161, |
|
"rewards/margins_min": 1.0714359283447266, |
|
"rewards/margins_std": 1.1128677129745483, |
|
"rewards/rejected": -3.3953216075897217, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.540434948793406, |
|
"learning_rate": 4.5617996108867997e-07, |
|
"logits/chosen": -0.8581298589706421, |
|
"logits/rejected": -0.3961424231529236, |
|
"logps/chosen": -412.405517578125, |
|
"logps/rejected": -812.7819213867188, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.617913007736206, |
|
"rewards/margins": 4.383803844451904, |
|
"rewards/margins_max": 7.294039249420166, |
|
"rewards/margins_min": 1.4735687971115112, |
|
"rewards/margins_std": 4.115694522857666, |
|
"rewards/rejected": -6.001717567443848, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 17.370609516247765, |
|
"learning_rate": 4.5248049010691304e-07, |
|
"logits/chosen": -1.0891549587249756, |
|
"logits/rejected": -0.69083172082901, |
|
"logps/chosen": -347.2943420410156, |
|
"logps/rejected": -703.4866943359375, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3099722862243652, |
|
"rewards/margins": 3.3620052337646484, |
|
"rewards/margins_max": 5.631108283996582, |
|
"rewards/margins_min": 1.092902421951294, |
|
"rewards/margins_std": 3.208995819091797, |
|
"rewards/rejected": -4.6719770431518555, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 9.292751258662012, |
|
"learning_rate": 4.486474192249533e-07, |
|
"logits/chosen": -1.0247005224227905, |
|
"logits/rejected": -0.6028069853782654, |
|
"logps/chosen": -442.56671142578125, |
|
"logps/rejected": -660.4315185546875, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4350707530975342, |
|
"rewards/margins": 2.895498514175415, |
|
"rewards/margins_max": 3.9468486309051514, |
|
"rewards/margins_min": 1.8441476821899414, |
|
"rewards/margins_std": 1.4868338108062744, |
|
"rewards/rejected": -4.330569267272949, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.8794814274178755, |
|
"learning_rate": 4.4468327756492504e-07, |
|
"logits/chosen": -0.7380314469337463, |
|
"logits/rejected": -0.5135469436645508, |
|
"logps/chosen": -366.635986328125, |
|
"logps/rejected": -607.7274780273438, |
|
"loss": 0.2059, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5760023593902588, |
|
"rewards/margins": 2.5897469520568848, |
|
"rewards/margins_max": 3.876375913619995, |
|
"rewards/margins_min": 1.3031187057495117, |
|
"rewards/margins_std": 1.8195674419403076, |
|
"rewards/rejected": -4.165749549865723, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -0.8807379603385925, |
|
"eval_logits/rejected": -0.8695055842399597, |
|
"eval_logps/chosen": -542.6320190429688, |
|
"eval_logps/rejected": -566.7861938476562, |
|
"eval_loss": 0.7172051072120667, |
|
"eval_rewards/accuracies": 0.5972222089767456, |
|
"eval_rewards/chosen": -1.9679654836654663, |
|
"eval_rewards/margins": 0.3380873501300812, |
|
"eval_rewards/margins_max": 2.344252109527588, |
|
"eval_rewards/margins_min": -1.388581395149231, |
|
"eval_rewards/margins_std": 1.2205023765563965, |
|
"eval_rewards/rejected": -2.3060529232025146, |
|
"eval_runtime": 415.548, |
|
"eval_samples_per_second": 9.626, |
|
"eval_steps_per_second": 0.152, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 11.887878225278437, |
|
"learning_rate": 4.405906807315705e-07, |
|
"logits/chosen": -0.7631363868713379, |
|
"logits/rejected": -0.14442148804664612, |
|
"logps/chosen": -412.6502990722656, |
|
"logps/rejected": -617.9203491210938, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8327445983886719, |
|
"rewards/margins": 2.406796932220459, |
|
"rewards/margins_max": 3.8745861053466797, |
|
"rewards/margins_min": 0.9390074014663696, |
|
"rewards/margins_std": 2.075767755508423, |
|
"rewards/rejected": -4.239541530609131, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 7.6667134274072195, |
|
"learning_rate": 4.363723290864314e-07, |
|
"logits/chosen": -0.8663452863693237, |
|
"logits/rejected": -0.10104439407587051, |
|
"logps/chosen": -507.49078369140625, |
|
"logps/rejected": -824.9513549804688, |
|
"loss": 0.221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1396355628967285, |
|
"rewards/margins": 4.1518402099609375, |
|
"rewards/margins_max": 6.126175403594971, |
|
"rewards/margins_min": 2.1775054931640625, |
|
"rewards/margins_std": 2.792131185531616, |
|
"rewards/rejected": -6.291476249694824, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 15.491732727187143, |
|
"learning_rate": 4.3203100596610723e-07, |
|
"logits/chosen": -0.5918745398521423, |
|
"logits/rejected": -0.1715858429670334, |
|
"logps/chosen": -453.0254821777344, |
|
"logps/rejected": -597.4471435546875, |
|
"loss": 0.1938, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7686259746551514, |
|
"rewards/margins": 2.167701005935669, |
|
"rewards/margins_max": 3.2129874229431152, |
|
"rewards/margins_min": 1.1224141120910645, |
|
"rewards/margins_std": 1.4782588481903076, |
|
"rewards/rejected": -3.9363269805908203, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.708872027507127, |
|
"learning_rate": 4.2756957584576436e-07, |
|
"logits/chosen": -0.584081768989563, |
|
"logits/rejected": 0.096702441573143, |
|
"logps/chosen": -451.47509765625, |
|
"logps/rejected": -978.1886596679688, |
|
"loss": 0.2022, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9495502710342407, |
|
"rewards/margins": 5.0804595947265625, |
|
"rewards/margins_max": 8.66343879699707, |
|
"rewards/margins_min": 1.4974806308746338, |
|
"rewards/margins_std": 5.0670976638793945, |
|
"rewards/rejected": -7.0300092697143555, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 7.538469505929578, |
|
"learning_rate": 4.22990982449109e-07, |
|
"logits/chosen": -0.6104982495307922, |
|
"logits/rejected": -0.21484926342964172, |
|
"logps/chosen": -472.439453125, |
|
"logps/rejected": -757.8245849609375, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.049053192138672, |
|
"rewards/margins": 3.2169277667999268, |
|
"rewards/margins_max": 4.436863422393799, |
|
"rewards/margins_min": 1.9969921112060547, |
|
"rewards/margins_std": 1.7252495288848877, |
|
"rewards/rejected": -5.2659807205200195, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.825278124877386, |
|
"learning_rate": 4.1829824680607104e-07, |
|
"logits/chosen": -0.419607937335968, |
|
"logits/rejected": 0.11389993131160736, |
|
"logps/chosen": -435.0726623535156, |
|
"logps/rejected": -784.734130859375, |
|
"loss": 0.166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.990121841430664, |
|
"rewards/margins": 3.3705692291259766, |
|
"rewards/margins_max": 5.105216026306152, |
|
"rewards/margins_min": 1.6359226703643799, |
|
"rewards/margins_std": 2.4531607627868652, |
|
"rewards/rejected": -5.360690593719482, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 9.234625136932591, |
|
"learning_rate": 4.134944652594794e-07, |
|
"logits/chosen": -0.5118550062179565, |
|
"logits/rejected": 0.10812608152627945, |
|
"logps/chosen": -453.38848876953125, |
|
"logps/rejected": -1031.4366455078125, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.346630096435547, |
|
"rewards/margins": 5.6441521644592285, |
|
"rewards/margins_max": 8.810213088989258, |
|
"rewards/margins_min": 2.4780914783477783, |
|
"rewards/margins_std": 4.4774861335754395, |
|
"rewards/rejected": -7.990782260894775, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.8345938121765775, |
|
"learning_rate": 4.085828074220451e-07, |
|
"logits/chosen": -0.4821593165397644, |
|
"logits/rejected": 0.33621591329574585, |
|
"logps/chosen": -612.152587890625, |
|
"logps/rejected": -944.8914794921875, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.5179717540740967, |
|
"rewards/margins": 4.587340354919434, |
|
"rewards/margins_max": 7.297093868255615, |
|
"rewards/margins_min": 1.8775880336761475, |
|
"rewards/margins_std": 3.8321690559387207, |
|
"rewards/rejected": -7.105312347412109, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.122230946658236, |
|
"learning_rate": 4.035665140849994e-07, |
|
"logits/chosen": -0.2719888985157013, |
|
"logits/rejected": 0.40051668882369995, |
|
"logps/chosen": -519.849365234375, |
|
"logps/rejected": -941.7233276367188, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.556196689605713, |
|
"rewards/margins": 4.605846881866455, |
|
"rewards/margins_max": 6.096743106842041, |
|
"rewards/margins_min": 3.1149520874023438, |
|
"rewards/margins_std": 2.1084442138671875, |
|
"rewards/rejected": -7.162044525146484, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 4.543083572509446, |
|
"learning_rate": 3.984488950797678e-07, |
|
"logits/chosen": -0.19994431734085083, |
|
"logits/rejected": 0.6510161757469177, |
|
"logps/chosen": -450.4979553222656, |
|
"logps/rejected": -926.5679931640625, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4891421794891357, |
|
"rewards/margins": 4.708044052124023, |
|
"rewards/margins_max": 7.425878047943115, |
|
"rewards/margins_min": 1.9902098178863525, |
|
"rewards/margins_std": 3.8435981273651123, |
|
"rewards/rejected": -7.197185516357422, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -0.25537678599357605, |
|
"eval_logits/rejected": -0.16171453893184662, |
|
"eval_logps/chosen": -661.367431640625, |
|
"eval_logps/rejected": -714.6080322265625, |
|
"eval_loss": 0.8081530928611755, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -3.155320167541504, |
|
"eval_rewards/margins": 0.6289510130882263, |
|
"eval_rewards/margins_max": 4.081821918487549, |
|
"eval_rewards/margins_min": -2.2017109394073486, |
|
"eval_rewards/margins_std": 2.03205943107605, |
|
"eval_rewards/rejected": -3.784270763397217, |
|
"eval_runtime": 416.2564, |
|
"eval_samples_per_second": 9.609, |
|
"eval_steps_per_second": 0.151, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 11.627659490001143, |
|
"learning_rate": 3.9323332709408904e-07, |
|
"logits/chosen": -0.09876732528209686, |
|
"logits/rejected": 1.3991271257400513, |
|
"logps/chosen": -600.6998291015625, |
|
"logps/rejected": -968.8531494140625, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3052432537078857, |
|
"rewards/margins": 4.294064998626709, |
|
"rewards/margins_max": 6.973275184631348, |
|
"rewards/margins_min": 1.614854097366333, |
|
"rewards/margins_std": 3.788975954055786, |
|
"rewards/rejected": -7.599307060241699, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 8.873005540995397, |
|
"learning_rate": 3.879232514440227e-07, |
|
"logits/chosen": -0.3379233479499817, |
|
"logits/rejected": 0.6603206992149353, |
|
"logps/chosen": -618.7060546875, |
|
"logps/rejected": -1049.278076171875, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.1586403846740723, |
|
"rewards/margins": 4.834142208099365, |
|
"rewards/margins_max": 6.6787214279174805, |
|
"rewards/margins_min": 2.989562511444092, |
|
"rewards/margins_std": 2.6086299419403076, |
|
"rewards/rejected": -7.992783546447754, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 17.879342011641224, |
|
"learning_rate": 3.825221718033129e-07, |
|
"logits/chosen": 0.0034618079662323, |
|
"logits/rejected": 0.864820122718811, |
|
"logps/chosen": -471.9354553222656, |
|
"logps/rejected": -985.2346801757812, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.823118209838867, |
|
"rewards/margins": 5.269505023956299, |
|
"rewards/margins_max": 8.90275764465332, |
|
"rewards/margins_min": 1.6362518072128296, |
|
"rewards/margins_std": 5.138195991516113, |
|
"rewards/rejected": -8.092622756958008, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.390466873902363, |
|
"learning_rate": 3.7703365189160746e-07, |
|
"logits/chosen": -0.07338769733905792, |
|
"logits/rejected": 1.4749701023101807, |
|
"logps/chosen": -539.89697265625, |
|
"logps/rejected": -1210.6910400390625, |
|
"loss": 0.089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.919481039047241, |
|
"rewards/margins": 7.211228370666504, |
|
"rewards/margins_max": 11.77415943145752, |
|
"rewards/margins_min": 2.6482949256896973, |
|
"rewards/margins_std": 6.452960968017578, |
|
"rewards/rejected": -10.130708694458008, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 12.554873275869042, |
|
"learning_rate": 3.714613131230587e-07, |
|
"logits/chosen": -0.22135767340660095, |
|
"logits/rejected": 1.1000282764434814, |
|
"logps/chosen": -720.9986572265625, |
|
"logps/rejected": -1223.421630859375, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7738468647003174, |
|
"rewards/margins": 5.606228828430176, |
|
"rewards/margins_max": 8.233736038208008, |
|
"rewards/margins_min": 2.9787204265594482, |
|
"rewards/margins_std": 3.715857744216919, |
|
"rewards/rejected": -9.380073547363281, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 40.923616793220184, |
|
"learning_rate": 3.6580883221685533e-07, |
|
"logits/chosen": -0.0870949998497963, |
|
"logits/rejected": 1.078148603439331, |
|
"logps/chosen": -505.99774169921875, |
|
"logps/rejected": -1176.008544921875, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.822312831878662, |
|
"rewards/margins": 5.737250328063965, |
|
"rewards/margins_max": 8.857365608215332, |
|
"rewards/margins_min": 2.6171350479125977, |
|
"rewards/margins_std": 4.412509918212891, |
|
"rewards/rejected": -8.559562683105469, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.377000403316867, |
|
"learning_rate": 3.6007993877126386e-07, |
|
"logits/chosen": 0.25743845105171204, |
|
"logits/rejected": 2.0459682941436768, |
|
"logps/chosen": -640.0938110351562, |
|
"logps/rejected": -1272.0159912109375, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.934041976928711, |
|
"rewards/margins": 6.4811530113220215, |
|
"rewards/margins_max": 10.410442352294922, |
|
"rewards/margins_min": 2.5518646240234375, |
|
"rewards/margins_std": 5.556853294372559, |
|
"rewards/rejected": -10.415196418762207, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.765929979770598, |
|
"learning_rate": 3.5427841280277937e-07, |
|
"logits/chosen": 0.19738076627254486, |
|
"logits/rejected": 1.5706841945648193, |
|
"logps/chosen": -643.2400512695312, |
|
"logps/rejected": -1103.7618408203125, |
|
"loss": 0.1024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.439357280731201, |
|
"rewards/margins": 4.518318176269531, |
|
"rewards/margins_max": 6.311240196228027, |
|
"rewards/margins_min": 2.725395441055298, |
|
"rewards/margins_std": 2.5355746746063232, |
|
"rewards/rejected": -7.957674980163574, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.3572788749229394, |
|
"learning_rate": 3.484080822520096e-07, |
|
"logits/chosen": 0.4655560553073883, |
|
"logits/rejected": 1.286608099937439, |
|
"logps/chosen": -555.6957397460938, |
|
"logps/rejected": -1019.0916748046875, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.497023820877075, |
|
"rewards/margins": 4.476337432861328, |
|
"rewards/margins_max": 6.756206512451172, |
|
"rewards/margins_min": 2.1964690685272217, |
|
"rewards/margins_std": 3.2242209911346436, |
|
"rewards/rejected": -7.973361968994141, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 8.25918903118385, |
|
"learning_rate": 3.4247282045793797e-07, |
|
"logits/chosen": 0.2085554599761963, |
|
"logits/rejected": 1.3560742139816284, |
|
"logps/chosen": -595.1603393554688, |
|
"logps/rejected": -1199.1165771484375, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.877821445465088, |
|
"rewards/margins": 6.124663352966309, |
|
"rewards/margins_max": 9.755376815795898, |
|
"rewards/margins_min": 2.493950843811035, |
|
"rewards/margins_std": 5.134603500366211, |
|
"rewards/rejected": -10.002485275268555, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": 0.017259376123547554, |
|
"eval_logits/rejected": 0.1599506437778473, |
|
"eval_logps/chosen": -731.00927734375, |
|
"eval_logps/rejected": -798.1055908203125, |
|
"eval_loss": 0.8436357378959656, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -3.851738452911377, |
|
"eval_rewards/margins": 0.7675079107284546, |
|
"eval_rewards/margins_max": 4.83132266998291, |
|
"eval_rewards/margins_min": -2.431659460067749, |
|
"eval_rewards/margins_std": 2.352627992630005, |
|
"eval_rewards/rejected": -4.619246482849121, |
|
"eval_runtime": 415.8421, |
|
"eval_samples_per_second": 9.619, |
|
"eval_steps_per_second": 0.151, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 13.982869383101132, |
|
"learning_rate": 3.3647654360223144e-07, |
|
"logits/chosen": -0.18186531960964203, |
|
"logits/rejected": 1.947683572769165, |
|
"logps/chosen": -636.12548828125, |
|
"logps/rejected": -1468.92333984375, |
|
"loss": 0.08, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.338965654373169, |
|
"rewards/margins": 8.887590408325195, |
|
"rewards/margins_max": 12.813148498535156, |
|
"rewards/margins_min": 4.962031364440918, |
|
"rewards/margins_std": 5.551577568054199, |
|
"rewards/rejected": -12.226556777954102, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 24.68214704261548, |
|
"learning_rate": 3.30423208125281e-07, |
|
"logits/chosen": -0.13235849142074585, |
|
"logits/rejected": 1.7915821075439453, |
|
"logps/chosen": -697.5199584960938, |
|
"logps/rejected": -1485.5936279296875, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4117112159729004, |
|
"rewards/margins": 8.921293258666992, |
|
"rewards/margins_max": 12.249357223510742, |
|
"rewards/margins_min": 5.593228340148926, |
|
"rewards/margins_std": 4.7065935134887695, |
|
"rewards/rejected": -12.333003044128418, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.905617995495655, |
|
"learning_rate": 3.2431680811567833e-07, |
|
"logits/chosen": -0.12053610384464264, |
|
"logits/rejected": 1.8949730396270752, |
|
"logps/chosen": -630.9464111328125, |
|
"logps/rejected": -1220.925048828125, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.5450587272644043, |
|
"rewards/margins": 6.405971527099609, |
|
"rewards/margins_max": 10.655710220336914, |
|
"rewards/margins_min": 2.1562342643737793, |
|
"rewards/margins_std": 6.010036945343018, |
|
"rewards/rejected": -9.951030731201172, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.94150157360822, |
|
"learning_rate": 3.1816137267485136e-07, |
|
"logits/chosen": 0.027946263551712036, |
|
"logits/rejected": 1.485925555229187, |
|
"logps/chosen": -646.646728515625, |
|
"logps/rejected": -1238.3758544921875, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6477348804473877, |
|
"rewards/margins": 6.220660209655762, |
|
"rewards/margins_max": 9.265599250793457, |
|
"rewards/margins_min": 3.1757211685180664, |
|
"rewards/margins_std": 4.306193828582764, |
|
"rewards/rejected": -9.86839485168457, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 17.595942677326722, |
|
"learning_rate": 3.1196096325859815e-07, |
|
"logits/chosen": -0.05433236435055733, |
|
"logits/rejected": 2.2038755416870117, |
|
"logps/chosen": -578.5730590820312, |
|
"logps/rejected": -1498.58203125, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.005871534347534, |
|
"rewards/margins": 9.676332473754883, |
|
"rewards/margins_max": 15.599041938781738, |
|
"rewards/margins_min": 3.753622531890869, |
|
"rewards/margins_std": 8.3759765625, |
|
"rewards/rejected": -12.68220329284668, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.356684331269297, |
|
"learning_rate": 3.057196709972727e-07, |
|
"logits/chosen": 0.11046739667654037, |
|
"logits/rejected": 2.175269365310669, |
|
"logps/chosen": -674.2919921875, |
|
"logps/rejected": -1267.6500244140625, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1146275997161865, |
|
"rewards/margins": 7.452083587646484, |
|
"rewards/margins_max": 10.800088882446289, |
|
"rewards/margins_min": 4.104078769683838, |
|
"rewards/margins_std": 4.734793663024902, |
|
"rewards/rejected": -10.56671142578125, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.038311259187171, |
|
"learning_rate": 2.9944161399639086e-07, |
|
"logits/chosen": 0.21353694796562195, |
|
"logits/rejected": 1.7908731698989868, |
|
"logps/chosen": -616.1519165039062, |
|
"logps/rejected": -1157.595947265625, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5106310844421387, |
|
"rewards/margins": 5.999436855316162, |
|
"rewards/margins_max": 8.261363983154297, |
|
"rewards/margins_min": 3.7375106811523438, |
|
"rewards/margins_std": 3.198847532272339, |
|
"rewards/rejected": -9.510068893432617, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.3985205014158293, |
|
"learning_rate": 2.9313093461943824e-07, |
|
"logits/chosen": 0.07152876257896423, |
|
"logits/rejected": 1.9080642461776733, |
|
"logps/chosen": -658.859619140625, |
|
"logps/rejected": -1418.1920166015625, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6242880821228027, |
|
"rewards/margins": 8.017306327819824, |
|
"rewards/margins_max": 11.628385543823242, |
|
"rewards/margins_min": 4.40622615814209, |
|
"rewards/margins_std": 5.106837272644043, |
|
"rewards/rejected": -11.641593933105469, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 12.970507914933444, |
|
"learning_rate": 2.8679179675467104e-07, |
|
"logits/chosen": 0.5070677995681763, |
|
"logits/rejected": 2.8454136848449707, |
|
"logps/chosen": -661.779296875, |
|
"logps/rejected": -1588.948974609375, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.405068397521973, |
|
"rewards/margins": 9.361727714538574, |
|
"rewards/margins_max": 15.706764221191406, |
|
"rewards/margins_min": 3.016690731048584, |
|
"rewards/margins_std": 8.973237037658691, |
|
"rewards/rejected": -13.766797065734863, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 25.37176614242638, |
|
"learning_rate": 2.80428383067716e-07, |
|
"logits/chosen": -0.056868601590394974, |
|
"logits/rejected": 2.1195578575134277, |
|
"logps/chosen": -643.5035400390625, |
|
"logps/rejected": -1405.5491943359375, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8768184185028076, |
|
"rewards/margins": 7.676672458648682, |
|
"rewards/margins_max": 11.38581657409668, |
|
"rewards/margins_min": 3.967529296875, |
|
"rewards/margins_std": 5.245521545410156, |
|
"rewards/rejected": -11.553489685058594, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.4162614345550537, |
|
"eval_logits/rejected": 0.6300503015518188, |
|
"eval_logps/chosen": -840.1605224609375, |
|
"eval_logps/rejected": -929.0051879882812, |
|
"eval_loss": 0.9893194437026978, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -4.943249225616455, |
|
"eval_rewards/margins": 0.9849926233291626, |
|
"eval_rewards/margins_max": 6.353243827819824, |
|
"eval_rewards/margins_min": -3.295872688293457, |
|
"eval_rewards/margins_std": 3.1250360012054443, |
|
"eval_rewards/rejected": -5.9282426834106445, |
|
"eval_runtime": 421.7747, |
|
"eval_samples_per_second": 9.484, |
|
"eval_steps_per_second": 0.149, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.759540837366416, |
|
"learning_rate": 2.7404489224177973e-07, |
|
"logits/chosen": 0.6560094356536865, |
|
"logits/rejected": 3.2553603649139404, |
|
"logps/chosen": -783.5775756835938, |
|
"logps/rejected": -1650.650146484375, |
|
"loss": 0.1101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.931197166442871, |
|
"rewards/margins": 8.854988098144531, |
|
"rewards/margins_max": 12.33712100982666, |
|
"rewards/margins_min": 5.372857093811035, |
|
"rewards/margins_std": 4.924478054046631, |
|
"rewards/rejected": -13.786186218261719, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 53.838974553307395, |
|
"learning_rate": 2.676455362072894e-07, |
|
"logits/chosen": 0.9320627450942993, |
|
"logits/rejected": 3.438016414642334, |
|
"logps/chosen": -699.7535400390625, |
|
"logps/rejected": -1655.8385009765625, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.045767307281494, |
|
"rewards/margins": 9.726736068725586, |
|
"rewards/margins_max": 13.513631820678711, |
|
"rewards/margins_min": 5.939839839935303, |
|
"rewards/margins_std": 5.355479717254639, |
|
"rewards/rejected": -14.772501945495605, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.8391615669250567, |
|
"learning_rate": 2.612345373627937e-07, |
|
"logits/chosen": 0.2621687650680542, |
|
"logits/rejected": 1.9230273962020874, |
|
"logps/chosen": -639.4342041015625, |
|
"logps/rejected": -1445.03271484375, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.050388336181641, |
|
"rewards/margins": 8.062755584716797, |
|
"rewards/margins_max": 11.662395477294922, |
|
"rewards/margins_min": 4.4631171226501465, |
|
"rewards/margins_std": 5.090658664703369, |
|
"rewards/rejected": -12.113143920898438, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 18.77671464276547, |
|
"learning_rate": 2.54816125788955e-07, |
|
"logits/chosen": 0.5534690022468567, |
|
"logits/rejected": 2.526615858078003, |
|
"logps/chosen": -709.9898681640625, |
|
"logps/rejected": -1459.970947265625, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.665217399597168, |
|
"rewards/margins": 7.5482282638549805, |
|
"rewards/margins_max": 12.083941459655762, |
|
"rewards/margins_min": 3.0125153064727783, |
|
"rewards/margins_std": 6.414466857910156, |
|
"rewards/rejected": -12.213445663452148, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.37822813578148, |
|
"learning_rate": 2.4839453645747467e-07, |
|
"logits/chosen": 0.2104567587375641, |
|
"logits/rejected": 1.8120098114013672, |
|
"logps/chosen": -643.4108276367188, |
|
"logps/rejected": -1417.44921875, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7755866050720215, |
|
"rewards/margins": 8.031917572021484, |
|
"rewards/margins_max": 12.62381362915039, |
|
"rewards/margins_min": 3.440018892288208, |
|
"rewards/margins_std": 6.493924140930176, |
|
"rewards/rejected": -11.807502746582031, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 16.8248388008373, |
|
"learning_rate": 2.4197400643678987e-07, |
|
"logits/chosen": 0.24539189040660858, |
|
"logits/rejected": 1.6847679615020752, |
|
"logps/chosen": -639.7948608398438, |
|
"logps/rejected": -1011.7283935546875, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.634861707687378, |
|
"rewards/margins": 4.383325576782227, |
|
"rewards/margins_max": 7.218289852142334, |
|
"rewards/margins_min": 1.5483614206314087, |
|
"rewards/margins_std": 4.009244918823242, |
|
"rewards/rejected": -8.018186569213867, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 7.954736320138308, |
|
"learning_rate": 2.3555877209638726e-07, |
|
"logits/chosen": 0.0611066035926342, |
|
"logits/rejected": 1.33302640914917, |
|
"logps/chosen": -672.7412719726562, |
|
"logps/rejected": -1782.3125, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4519970417022705, |
|
"rewards/margins": 11.562549591064453, |
|
"rewards/margins_max": 20.07329559326172, |
|
"rewards/margins_min": 3.0518016815185547, |
|
"rewards/margins_std": 12.036015510559082, |
|
"rewards/rejected": -15.014546394348145, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 11.05108228058454, |
|
"learning_rate": 2.2915306631157817e-07, |
|
"logits/chosen": 0.2885664105415344, |
|
"logits/rejected": 2.206385612487793, |
|
"logps/chosen": -648.3999633789062, |
|
"logps/rejected": -1299.401123046875, |
|
"loss": 0.1085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.7839324474334717, |
|
"rewards/margins": 7.008673191070557, |
|
"rewards/margins_max": 9.869766235351562, |
|
"rewards/margins_min": 4.147579669952393, |
|
"rewards/margins_std": 4.046196937561035, |
|
"rewards/rejected": -10.792604446411133, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 32.49887802957626, |
|
"learning_rate": 2.2276111567057887e-07, |
|
"logits/chosen": 0.22940261662006378, |
|
"logits/rejected": 1.6958719491958618, |
|
"logps/chosen": -593.3724365234375, |
|
"logps/rejected": -1174.9674072265625, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.6294262409210205, |
|
"rewards/margins": 5.80316162109375, |
|
"rewards/margins_max": 8.796818733215332, |
|
"rewards/margins_min": 2.8095040321350098, |
|
"rewards/margins_std": 4.233671188354492, |
|
"rewards/rejected": -9.432588577270508, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.06163362995566, |
|
"learning_rate": 2.1638713768573936e-07, |
|
"logits/chosen": 0.06335971504449844, |
|
"logits/rejected": 1.4285287857055664, |
|
"logps/chosen": -595.5140380859375, |
|
"logps/rejected": -1295.677490234375, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.581036329269409, |
|
"rewards/margins": 7.286231994628906, |
|
"rewards/margins_max": 11.22960090637207, |
|
"rewards/margins_min": 3.342862606048584, |
|
"rewards/margins_std": 5.576765537261963, |
|
"rewards/rejected": -10.867268562316895, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": 0.12438549101352692, |
|
"eval_logits/rejected": 0.28890377283096313, |
|
"eval_logps/chosen": -732.38525390625, |
|
"eval_logps/rejected": -799.7516479492188, |
|
"eval_loss": 0.8086485862731934, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -3.8654978275299072, |
|
"eval_rewards/margins": 0.7702099680900574, |
|
"eval_rewards/margins_max": 4.502103328704834, |
|
"eval_rewards/margins_min": -2.291940450668335, |
|
"eval_rewards/margins_std": 2.2426791191101074, |
|
"eval_rewards/rejected": -4.635707378387451, |
|
"eval_runtime": 417.0386, |
|
"eval_samples_per_second": 9.591, |
|
"eval_steps_per_second": 0.151, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.3968517354258125, |
|
"learning_rate": 2.100353380107609e-07, |
|
"logits/chosen": 0.23273587226867676, |
|
"logits/rejected": 1.9462811946868896, |
|
"logps/chosen": -776.3011474609375, |
|
"logps/rejected": -1441.837158203125, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.670359134674072, |
|
"rewards/margins": 7.1968560218811035, |
|
"rewards/margins_max": 11.221755981445312, |
|
"rewards/margins_min": 3.171954393386841, |
|
"rewards/margins_std": 5.6920695304870605, |
|
"rewards/rejected": -11.86721420288086, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 15.03932252297939, |
|
"learning_rate": 2.0370990766573698e-07, |
|
"logits/chosen": -0.10733046382665634, |
|
"logits/rejected": 1.8043702840805054, |
|
"logps/chosen": -650.6616821289062, |
|
"logps/rejected": -1616.010986328125, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1738812923431396, |
|
"rewards/margins": 10.541234970092773, |
|
"rewards/margins_max": 15.04127311706543, |
|
"rewards/margins_min": 6.041195392608643, |
|
"rewards/margins_std": 6.364017009735107, |
|
"rewards/rejected": -13.715115547180176, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 31.097886733723477, |
|
"learning_rate": 1.974150202718513e-07, |
|
"logits/chosen": 0.08039845526218414, |
|
"logits/rejected": 2.343336582183838, |
|
"logps/chosen": -534.8485717773438, |
|
"logps/rejected": -1418.825439453125, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1201188564300537, |
|
"rewards/margins": 8.804471969604492, |
|
"rewards/margins_max": 12.42898178100586, |
|
"rewards/margins_min": 5.179962635040283, |
|
"rewards/margins_std": 5.12583065032959, |
|
"rewards/rejected": -11.924591064453125, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 16.60986174297272, |
|
"learning_rate": 1.9115482929755445e-07, |
|
"logits/chosen": 0.24223566055297852, |
|
"logits/rejected": 1.6932157278060913, |
|
"logps/chosen": -570.802978515625, |
|
"logps/rejected": -1331.71533203125, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.493419647216797, |
|
"rewards/margins": 7.847373962402344, |
|
"rewards/margins_max": 11.789865493774414, |
|
"rewards/margins_min": 3.9048819541931152, |
|
"rewards/margins_std": 5.575525760650635, |
|
"rewards/rejected": -11.34079360961914, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 13.502668975247548, |
|
"learning_rate": 1.8493346531803887e-07, |
|
"logits/chosen": 0.48027992248535156, |
|
"logits/rejected": 2.202148675918579, |
|
"logps/chosen": -596.4915161132812, |
|
"logps/rejected": -1282.6644287109375, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.9702115058898926, |
|
"rewards/margins": 6.948336124420166, |
|
"rewards/margins_max": 9.718558311462402, |
|
"rewards/margins_min": 4.178112506866455, |
|
"rewards/margins_std": 3.9176864624023438, |
|
"rewards/rejected": -10.918546676635742, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.139494234389517, |
|
"learning_rate": 1.7875503328981807e-07, |
|
"logits/chosen": 0.3601033091545105, |
|
"logits/rejected": 2.474608898162842, |
|
"logps/chosen": -652.9142456054688, |
|
"logps/rejected": -1604.696533203125, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.9101157188415527, |
|
"rewards/margins": 9.86131477355957, |
|
"rewards/margins_max": 14.181074142456055, |
|
"rewards/margins_min": 5.541555881500244, |
|
"rewards/margins_std": 6.1090617179870605, |
|
"rewards/rejected": -13.771429061889648, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 14.349796836286524, |
|
"learning_rate": 1.7262360984221006e-07, |
|
"logits/chosen": 0.012769157998263836, |
|
"logits/rejected": 1.9421314001083374, |
|
"logps/chosen": -664.3881225585938, |
|
"logps/rejected": -1434.8948974609375, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.80120849609375, |
|
"rewards/margins": 8.072778701782227, |
|
"rewards/margins_max": 11.85603141784668, |
|
"rewards/margins_min": 4.289526462554932, |
|
"rewards/margins_std": 5.350326061248779, |
|
"rewards/rejected": -11.873987197875977, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 15.485607186999017, |
|
"learning_rate": 1.6654324058751175e-07, |
|
"logits/chosen": 0.3775918483734131, |
|
"logits/rejected": 1.973515510559082, |
|
"logps/chosen": -713.2658081054688, |
|
"logps/rejected": -1631.826904296875, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.8164262771606445, |
|
"rewards/margins": 9.047931671142578, |
|
"rewards/margins_max": 12.981298446655273, |
|
"rewards/margins_min": 5.114563941955566, |
|
"rewards/margins_std": 5.562621116638184, |
|
"rewards/rejected": -13.864356994628906, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 15.607832046954224, |
|
"learning_rate": 1.6051793745163812e-07, |
|
"logits/chosen": 0.6472679376602173, |
|
"logits/rejected": 2.5574803352355957, |
|
"logps/chosen": -689.5281982421875, |
|
"logps/rejected": -1642.12109375, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.642444610595703, |
|
"rewards/margins": 9.717334747314453, |
|
"rewards/margins_max": 15.204099655151367, |
|
"rewards/margins_min": 4.230566501617432, |
|
"rewards/margins_std": 7.75946044921875, |
|
"rewards/rejected": -14.359777450561523, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 45.565136882193066, |
|
"learning_rate": 1.5455167602698915e-07, |
|
"logits/chosen": 0.06020700931549072, |
|
"logits/rejected": 2.2921700477600098, |
|
"logps/chosen": -727.0872192382812, |
|
"logps/rejected": -1482.33837890625, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.481110572814941, |
|
"rewards/margins": 7.914282321929932, |
|
"rewards/margins_max": 10.535211563110352, |
|
"rewards/margins_min": 5.293350696563721, |
|
"rewards/margins_std": 3.7065558433532715, |
|
"rewards/rejected": -12.395392417907715, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_logits/chosen": 0.20550121366977692, |
|
"eval_logits/rejected": 0.3917555809020996, |
|
"eval_logps/chosen": -789.8953857421875, |
|
"eval_logps/rejected": -866.7603149414062, |
|
"eval_loss": 0.8639366030693054, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": -4.4405999183654785, |
|
"eval_rewards/margins": 0.8651944398880005, |
|
"eval_rewards/margins_max": 5.159237861633301, |
|
"eval_rewards/margins_min": -2.6377525329589844, |
|
"eval_rewards/margins_std": 2.5658202171325684, |
|
"eval_rewards/rejected": -5.305793762207031, |
|
"eval_runtime": 419.7425, |
|
"eval_samples_per_second": 9.53, |
|
"eval_steps_per_second": 0.15, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 7.254262825774854, |
|
"learning_rate": 1.4864839294928924e-07, |
|
"logits/chosen": 0.2960719168186188, |
|
"logits/rejected": 2.519636392593384, |
|
"logps/chosen": -667.0858154296875, |
|
"logps/rejected": -2054.51953125, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.099366664886475, |
|
"rewards/margins": 13.694231033325195, |
|
"rewards/margins_max": 20.22653579711914, |
|
"rewards/margins_min": 7.161923408508301, |
|
"rewards/margins_std": 9.238077163696289, |
|
"rewards/rejected": -17.793596267700195, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 19.00706315113973, |
|
"learning_rate": 1.428119833001315e-07, |
|
"logits/chosen": 0.011763498187065125, |
|
"logits/rejected": 2.5436980724334717, |
|
"logps/chosen": -683.8145751953125, |
|
"logps/rejected": -1476.839111328125, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7376608848571777, |
|
"rewards/margins": 9.0157470703125, |
|
"rewards/margins_max": 13.42829418182373, |
|
"rewards/margins_min": 4.603199481964111, |
|
"rewards/margins_std": 6.2402849197387695, |
|
"rewards/rejected": -12.75340747833252, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.449628285920275, |
|
"learning_rate": 1.370462980369401e-07, |
|
"logits/chosen": 0.11705155670642853, |
|
"logits/rejected": 1.5357266664505005, |
|
"logps/chosen": -766.4974365234375, |
|
"logps/rejected": -1289.979248046875, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.693875312805176, |
|
"rewards/margins": 5.769114017486572, |
|
"rewards/margins_max": 7.741427421569824, |
|
"rewards/margins_min": 3.796800136566162, |
|
"rewards/margins_std": 2.7892730236053467, |
|
"rewards/rejected": -10.462987899780273, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.213992357762015, |
|
"learning_rate": 1.3135514145204606e-07, |
|
"logits/chosen": 0.21615874767303467, |
|
"logits/rejected": 2.0779476165771484, |
|
"logps/chosen": -605.4188232421875, |
|
"logps/rejected": -1608.945556640625, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8029799461364746, |
|
"rewards/margins": 10.208128929138184, |
|
"rewards/margins_max": 17.389694213867188, |
|
"rewards/margins_min": 3.0265650749206543, |
|
"rewards/margins_std": 10.15626335144043, |
|
"rewards/rejected": -14.011110305786133, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.371361045173521, |
|
"learning_rate": 1.257422686625539e-07, |
|
"logits/chosen": 0.16180220246315002, |
|
"logits/rejected": 2.055144786834717, |
|
"logps/chosen": -682.2508544921875, |
|
"logps/rejected": -1589.9964599609375, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.9322009086608887, |
|
"rewards/margins": 9.378369331359863, |
|
"rewards/margins_max": 14.781808853149414, |
|
"rewards/margins_min": 3.9749279022216797, |
|
"rewards/margins_std": 7.641619682312012, |
|
"rewards/rejected": -13.310567855834961, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.4893980542106102, |
|
"learning_rate": 1.2021138313265444e-07, |
|
"logits/chosen": 0.11532745510339737, |
|
"logits/rejected": 1.866121530532837, |
|
"logps/chosen": -634.554931640625, |
|
"logps/rejected": -1674.252197265625, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.853950023651123, |
|
"rewards/margins": 10.867055892944336, |
|
"rewards/margins_max": 18.760677337646484, |
|
"rewards/margins_min": 2.97343373298645, |
|
"rewards/margins_std": 11.163267135620117, |
|
"rewards/rejected": -14.7210054397583, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 14.447489915734623, |
|
"learning_rate": 1.1476613423001974e-07, |
|
"logits/chosen": 0.17886893451213837, |
|
"logits/rejected": 1.89533269405365, |
|
"logps/chosen": -677.0606079101562, |
|
"logps/rejected": -1261.5345458984375, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.194746971130371, |
|
"rewards/margins": 5.769103050231934, |
|
"rewards/margins_max": 7.97817325592041, |
|
"rewards/margins_min": 3.560032606124878, |
|
"rewards/margins_std": 3.1240971088409424, |
|
"rewards/rejected": -9.963850021362305, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.9798658979228856, |
|
"learning_rate": 1.0941011481789042e-07, |
|
"logits/chosen": 0.034214410930871964, |
|
"logits/rejected": 2.867272138595581, |
|
"logps/chosen": -702.2564697265625, |
|
"logps/rejected": -1857.8795166015625, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.056910037994385, |
|
"rewards/margins": 11.857443809509277, |
|
"rewards/margins_max": 18.30853843688965, |
|
"rewards/margins_min": 5.406346797943115, |
|
"rewards/margins_std": 9.123228073120117, |
|
"rewards/rejected": -15.91435432434082, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 27.849338662173917, |
|
"learning_rate": 1.041468588844476e-07, |
|
"logits/chosen": 0.4994427263736725, |
|
"logits/rejected": 2.539013385772705, |
|
"logps/chosen": -599.5453491210938, |
|
"logps/rejected": -1590.7774658203125, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.911860704421997, |
|
"rewards/margins": 10.174264907836914, |
|
"rewards/margins_max": 15.016085624694824, |
|
"rewards/margins_min": 5.332446098327637, |
|
"rewards/margins_std": 6.8473663330078125, |
|
"rewards/rejected": -14.086126327514648, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 4.2150247037639375, |
|
"learning_rate": 9.897983921102954e-08, |
|
"logits/chosen": -0.2390742003917694, |
|
"logits/rejected": 2.2101035118103027, |
|
"logps/chosen": -670.8737182617188, |
|
"logps/rejected": -1509.370361328125, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.90120005607605, |
|
"rewards/margins": 8.870689392089844, |
|
"rewards/margins_max": 11.843083381652832, |
|
"rewards/margins_min": 5.89829683303833, |
|
"rewards/margins_std": 4.203598976135254, |
|
"rewards/rejected": -12.771888732910156, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": 0.21985697746276855, |
|
"eval_logits/rejected": 0.4062546491622925, |
|
"eval_logps/chosen": -791.2946166992188, |
|
"eval_logps/rejected": -865.1302490234375, |
|
"eval_loss": 0.8618067502975464, |
|
"eval_rewards/accuracies": 0.6230158805847168, |
|
"eval_rewards/chosen": -4.454591751098633, |
|
"eval_rewards/margins": 0.8349014520645142, |
|
"eval_rewards/margins_max": 5.060412406921387, |
|
"eval_rewards/margins_min": -2.622389078140259, |
|
"eval_rewards/margins_std": 2.52128529548645, |
|
"eval_rewards/rejected": -5.289493083953857, |
|
"eval_runtime": 419.5466, |
|
"eval_samples_per_second": 9.534, |
|
"eval_steps_per_second": 0.15, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 12.140211164365056, |
|
"learning_rate": 9.391246508073433e-08, |
|
"logits/chosen": 0.13034725189208984, |
|
"logits/rejected": 2.0794267654418945, |
|
"logps/chosen": -724.4019775390625, |
|
"logps/rejected": -1571.5511474609375, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.304561614990234, |
|
"rewards/margins": 8.91108512878418, |
|
"rewards/margins_max": 12.622480392456055, |
|
"rewards/margins_min": 5.199688911437988, |
|
"rewards/margins_std": 5.248705863952637, |
|
"rewards/rejected": -13.215646743774414, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 72.48315962813399, |
|
"learning_rate": 8.894808002892037e-08, |
|
"logits/chosen": 0.19714145362377167, |
|
"logits/rejected": 2.8781895637512207, |
|
"logps/chosen": -689.0614624023438, |
|
"logps/rejected": -1635.4539794921875, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.8019371032714844, |
|
"rewards/margins": 10.48505687713623, |
|
"rewards/margins_max": 15.840913772583008, |
|
"rewards/margins_min": 5.129199981689453, |
|
"rewards/margins_std": 7.5743255615234375, |
|
"rewards/rejected": -14.286993026733398, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 20.88616124929115, |
|
"learning_rate": 8.408995963708756e-08, |
|
"logits/chosen": -0.0833059698343277, |
|
"logits/rejected": 2.3186755180358887, |
|
"logps/chosen": -681.8640747070312, |
|
"logps/rejected": -1602.0863037109375, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.856149673461914, |
|
"rewards/margins": 9.128388404846191, |
|
"rewards/margins_max": 12.01569652557373, |
|
"rewards/margins_min": 6.241078853607178, |
|
"rewards/margins_std": 4.0832719802856445, |
|
"rewards/rejected": -12.984537124633789, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.834472583209813, |
|
"learning_rate": 7.934130937159508e-08, |
|
"logits/chosen": 0.17558620870113373, |
|
"logits/rejected": 2.297236442565918, |
|
"logps/chosen": -637.3060302734375, |
|
"logps/rejected": -1326.9390869140625, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.932767152786255, |
|
"rewards/margins": 7.218419075012207, |
|
"rewards/margins_max": 9.979570388793945, |
|
"rewards/margins_min": 4.457267761230469, |
|
"rewards/margins_std": 3.904857635498047, |
|
"rewards/rejected": -11.151185989379883, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.026095263611361, |
|
"learning_rate": 7.470526246864364e-08, |
|
"logits/chosen": 0.39160841703414917, |
|
"logits/rejected": 2.559542179107666, |
|
"logps/chosen": -693.7269287109375, |
|
"logps/rejected": -1849.744873046875, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.237768650054932, |
|
"rewards/margins": 12.45046329498291, |
|
"rewards/margins_max": 19.93360710144043, |
|
"rewards/margins_min": 4.967319488525391, |
|
"rewards/margins_std": 10.582763671875, |
|
"rewards/rejected": -16.688232421875, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.6591285293800628, |
|
"learning_rate": 7.018487786691512e-08, |
|
"logits/chosen": 0.43399763107299805, |
|
"logits/rejected": 2.060253381729126, |
|
"logps/chosen": -745.4591674804688, |
|
"logps/rejected": -1831.240478515625, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.024683475494385, |
|
"rewards/margins": 10.875140190124512, |
|
"rewards/margins_max": 17.002622604370117, |
|
"rewards/margins_min": 4.747661113739014, |
|
"rewards/margins_std": 8.66556453704834, |
|
"rewards/rejected": -15.899823188781738, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.119017563303306, |
|
"learning_rate": 6.578313818923559e-08, |
|
"logits/chosen": -0.07052882760763168, |
|
"logits/rejected": 1.8699405193328857, |
|
"logps/chosen": -909.0846557617188, |
|
"logps/rejected": -1548.6923828125, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.350946426391602, |
|
"rewards/margins": 7.820859432220459, |
|
"rewards/margins_max": 11.624895095825195, |
|
"rewards/margins_min": 4.016822338104248, |
|
"rewards/margins_std": 5.379720211029053, |
|
"rewards/rejected": -13.171804428100586, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 16.860241482971446, |
|
"learning_rate": 6.15029477745925e-08, |
|
"logits/chosen": 0.48959070444107056, |
|
"logits/rejected": 2.1462438106536865, |
|
"logps/chosen": -734.9025268554688, |
|
"logps/rejected": -1803.1939697265625, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.121659755706787, |
|
"rewards/margins": 10.34645938873291, |
|
"rewards/margins_max": 14.924234390258789, |
|
"rewards/margins_min": 5.768682479858398, |
|
"rewards/margins_std": 6.473954200744629, |
|
"rewards/rejected": -15.468118667602539, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 18.379765722708388, |
|
"learning_rate": 5.734713076180486e-08, |
|
"logits/chosen": 0.46901997923851013, |
|
"logits/rejected": 3.454606294631958, |
|
"logps/chosen": -741.1581420898438, |
|
"logps/rejected": -1905.183349609375, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.039034843444824, |
|
"rewards/margins": 12.11182975769043, |
|
"rewards/margins_max": 19.285795211791992, |
|
"rewards/margins_min": 4.937865257263184, |
|
"rewards/margins_std": 10.145517349243164, |
|
"rewards/rejected": -17.15086555480957, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.317359176747138, |
|
"learning_rate": 5.3318429226110875e-08, |
|
"logits/chosen": 0.19755136966705322, |
|
"logits/rejected": 2.050144672393799, |
|
"logps/chosen": -604.0868530273438, |
|
"logps/rejected": -1733.5550537109375, |
|
"loss": 0.141, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.046292304992676, |
|
"rewards/margins": 11.252501487731934, |
|
"rewards/margins_max": 16.934438705444336, |
|
"rewards/margins_min": 5.57056188583374, |
|
"rewards/margins_std": 8.03547477722168, |
|
"rewards/rejected": -15.298794746398926, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": 0.3016913831233978, |
|
"eval_logits/rejected": 0.5082818865776062, |
|
"eval_logps/chosen": -832.3104858398438, |
|
"eval_logps/rejected": -915.954833984375, |
|
"eval_loss": 0.9049465656280518, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -4.864750385284424, |
|
"eval_rewards/margins": 0.9329892992973328, |
|
"eval_rewards/margins_max": 5.632690906524658, |
|
"eval_rewards/margins_min": -2.8439128398895264, |
|
"eval_rewards/margins_std": 2.7856106758117676, |
|
"eval_rewards/rejected": -5.7977399826049805, |
|
"eval_runtime": 414.0109, |
|
"eval_samples_per_second": 9.662, |
|
"eval_steps_per_second": 0.152, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 13.4435984156611, |
|
"learning_rate": 4.9419501369902026e-08, |
|
"logits/chosen": 0.08746049553155899, |
|
"logits/rejected": 2.6451172828674316, |
|
"logps/chosen": -771.4244384765625, |
|
"logps/rejected": -2024.484619140625, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.441189289093018, |
|
"rewards/margins": 13.507779121398926, |
|
"rewards/margins_max": 19.457698822021484, |
|
"rewards/margins_min": 7.557857513427734, |
|
"rewards/margins_std": 8.414458274841309, |
|
"rewards/rejected": -17.9489688873291, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.4128692999239585, |
|
"learning_rate": 4.5652919768798896e-08, |
|
"logits/chosen": 0.4677937924861908, |
|
"logits/rejected": 2.3705403804779053, |
|
"logps/chosen": -793.5311279296875, |
|
"logps/rejected": -1775.6380615234375, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.224045276641846, |
|
"rewards/margins": 10.134596824645996, |
|
"rewards/margins_max": 15.679702758789062, |
|
"rewards/margins_min": 4.589491844177246, |
|
"rewards/margins_std": 7.8419623374938965, |
|
"rewards/rejected": -15.358640670776367, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 16.039453526164788, |
|
"learning_rate": 4.2021169674223536e-08, |
|
"logits/chosen": 0.2930324077606201, |
|
"logits/rejected": 2.399545431137085, |
|
"logps/chosen": -655.0755615234375, |
|
"logps/rejected": -1648.030029296875, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.887598752975464, |
|
"rewards/margins": 10.443506240844727, |
|
"rewards/margins_max": 14.695414543151855, |
|
"rewards/margins_min": 6.191596984863281, |
|
"rewards/margins_std": 6.013107776641846, |
|
"rewards/rejected": -14.331106185913086, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 26.479285274862587, |
|
"learning_rate": 3.852664737359046e-08, |
|
"logits/chosen": 0.3496669828891754, |
|
"logits/rejected": 1.97479248046875, |
|
"logps/chosen": -852.40380859375, |
|
"logps/rejected": -1573.5230712890625, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.772242546081543, |
|
"rewards/margins": 7.9194207191467285, |
|
"rewards/margins_max": 12.818387985229492, |
|
"rewards/margins_min": 3.0204524993896484, |
|
"rewards/margins_std": 6.928186893463135, |
|
"rewards/rejected": -13.691662788391113, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.7263792166932626, |
|
"learning_rate": 3.5171658609197824e-08, |
|
"logits/chosen": 0.1613047868013382, |
|
"logits/rejected": 2.029664993286133, |
|
"logps/chosen": -742.6275024414062, |
|
"logps/rejected": -1609.7635498046875, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.6217732429504395, |
|
"rewards/margins": 8.67860221862793, |
|
"rewards/margins_max": 13.895421981811523, |
|
"rewards/margins_min": 3.4617819786071777, |
|
"rewards/margins_std": 7.377697944641113, |
|
"rewards/rejected": -13.300374984741211, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.13150099340305, |
|
"learning_rate": 3.195841705686139e-08, |
|
"logits/chosen": 0.460742712020874, |
|
"logits/rejected": 2.694736957550049, |
|
"logps/chosen": -821.4349365234375, |
|
"logps/rejected": -1898.295654296875, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.314339637756348, |
|
"rewards/margins": 11.54991626739502, |
|
"rewards/margins_max": 18.18251609802246, |
|
"rewards/margins_min": 4.917316436767578, |
|
"rewards/margins_std": 9.379911422729492, |
|
"rewards/rejected": -16.864253997802734, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 16.312675595535207, |
|
"learning_rate": 2.8889042865294837e-08, |
|
"logits/chosen": 0.13087859749794006, |
|
"logits/rejected": 2.484839916229248, |
|
"logps/chosen": -702.7008056640625, |
|
"logps/rejected": -1441.55078125, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.257961750030518, |
|
"rewards/margins": 7.842066287994385, |
|
"rewards/margins_max": 10.642562866210938, |
|
"rewards/margins_min": 5.041568756103516, |
|
"rewards/margins_std": 3.960501194000244, |
|
"rewards/rejected": -12.100028991699219, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 9.055687386628646, |
|
"learning_rate": 2.5965561257202036e-08, |
|
"logits/chosen": 0.1169591173529625, |
|
"logits/rejected": 2.362281560897827, |
|
"logps/chosen": -763.2276611328125, |
|
"logps/rejected": -1660.2099609375, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.720892906188965, |
|
"rewards/margins": 9.803942680358887, |
|
"rewards/margins_max": 15.876733779907227, |
|
"rewards/margins_min": 3.731149196624756, |
|
"rewards/margins_std": 8.588226318359375, |
|
"rewards/rejected": -14.524835586547852, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 22.841895074273324, |
|
"learning_rate": 2.318990119300218e-08, |
|
"logits/chosen": 0.10627205669879913, |
|
"logits/rejected": 1.2642805576324463, |
|
"logps/chosen": -798.917724609375, |
|
"logps/rejected": -2165.9775390625, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.4411396980285645, |
|
"rewards/margins": 13.126449584960938, |
|
"rewards/margins_max": 20.357501983642578, |
|
"rewards/margins_min": 5.8953962326049805, |
|
"rewards/margins_std": 10.226253509521484, |
|
"rewards/rejected": -18.567590713500977, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 17.973583296727792, |
|
"learning_rate": 2.0563894098070216e-08, |
|
"logits/chosen": 0.15934190154075623, |
|
"logits/rejected": 2.1497673988342285, |
|
"logps/chosen": -712.0560302734375, |
|
"logps/rejected": -1505.4547119140625, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.412214756011963, |
|
"rewards/margins": 8.303590774536133, |
|
"rewards/margins_max": 12.088435173034668, |
|
"rewards/margins_min": 4.5187482833862305, |
|
"rewards/margins_std": 5.352576732635498, |
|
"rewards/rejected": -12.715806007385254, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": 0.30742567777633667, |
|
"eval_logits/rejected": 0.5172090530395508, |
|
"eval_logps/chosen": -836.2312622070312, |
|
"eval_logps/rejected": -922.0319213867188, |
|
"eval_loss": 0.9049317836761475, |
|
"eval_rewards/accuracies": 0.6210317611694336, |
|
"eval_rewards/chosen": -4.903958320617676, |
|
"eval_rewards/margins": 0.9545530080795288, |
|
"eval_rewards/margins_max": 5.713037014007568, |
|
"eval_rewards/margins_min": -2.831618309020996, |
|
"eval_rewards/margins_std": 2.813220262527466, |
|
"eval_rewards/rejected": -5.858510971069336, |
|
"eval_runtime": 422.5993, |
|
"eval_samples_per_second": 9.465, |
|
"eval_steps_per_second": 0.149, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.7225416780438763, |
|
"learning_rate": 1.8089272654333353e-08, |
|
"logits/chosen": 0.28706851601600647, |
|
"logits/rejected": 1.9062206745147705, |
|
"logps/chosen": -866.8541259765625, |
|
"logps/rejected": -1701.005615234375, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.099188804626465, |
|
"rewards/margins": 9.302727699279785, |
|
"rewards/margins_max": 13.995088577270508, |
|
"rewards/margins_min": 4.61036491394043, |
|
"rewards/margins_std": 6.6360015869140625, |
|
"rewards/rejected": -14.40191650390625, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 7.492427847668467, |
|
"learning_rate": 1.5767669657019005e-08, |
|
"logits/chosen": 0.21484322845935822, |
|
"logits/rejected": 2.9490137100219727, |
|
"logps/chosen": -665.4578857421875, |
|
"logps/rejected": -1718.431640625, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.03716516494751, |
|
"rewards/margins": 11.106006622314453, |
|
"rewards/margins_max": 14.720375061035156, |
|
"rewards/margins_min": 7.491639137268066, |
|
"rewards/margins_std": 5.11148738861084, |
|
"rewards/rejected": -15.143171310424805, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 14.252457056430137, |
|
"learning_rate": 1.3600616937310267e-08, |
|
"logits/chosen": 0.3399500250816345, |
|
"logits/rejected": 2.5051798820495605, |
|
"logps/chosen": -776.6029663085938, |
|
"logps/rejected": -1890.706298828125, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.925044059753418, |
|
"rewards/margins": 11.241573333740234, |
|
"rewards/margins_max": 16.437541961669922, |
|
"rewards/margins_min": 6.045604228973389, |
|
"rewards/margins_std": 7.348209381103516, |
|
"rewards/rejected": -16.166616439819336, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 3.254929425883996, |
|
"learning_rate": 1.1589544351619047e-08, |
|
"logits/chosen": 0.8039329648017883, |
|
"logits/rejected": 3.354154109954834, |
|
"logps/chosen": -724.2069091796875, |
|
"logps/rejected": -2016.739501953125, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.160962104797363, |
|
"rewards/margins": 12.87867259979248, |
|
"rewards/margins_max": 20.457225799560547, |
|
"rewards/margins_min": 5.300119400024414, |
|
"rewards/margins_std": 10.717691421508789, |
|
"rewards/rejected": -18.03963279724121, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.743725835120221, |
|
"learning_rate": 9.735778838143749e-09, |
|
"logits/chosen": 0.17006321251392365, |
|
"logits/rejected": 3.252281904220581, |
|
"logps/chosen": -771.3798828125, |
|
"logps/rejected": -2618.41943359375, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.744952201843262, |
|
"rewards/margins": 18.770408630371094, |
|
"rewards/margins_max": 27.798681259155273, |
|
"rewards/margins_min": 9.742134094238281, |
|
"rewards/margins_std": 12.767908096313477, |
|
"rewards/rejected": -23.51535987854004, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.000437498002796, |
|
"learning_rate": 8.040543541333655e-09, |
|
"logits/chosen": 0.2970607578754425, |
|
"logits/rejected": 3.4422898292541504, |
|
"logps/chosen": -716.0152587890625, |
|
"logps/rejected": -1930.673095703125, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.7243733406066895, |
|
"rewards/margins": 12.035941123962402, |
|
"rewards/margins_max": 17.656423568725586, |
|
"rewards/margins_min": 6.415456295013428, |
|
"rewards/margins_std": 7.9485650062561035, |
|
"rewards/rejected": -16.760313034057617, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 16.466144409333417, |
|
"learning_rate": 6.504957004838746e-09, |
|
"logits/chosen": -0.05619863420724869, |
|
"logits/rejected": 1.9224863052368164, |
|
"logps/chosen": -841.8850708007812, |
|
"logps/rejected": -1936.7113037109375, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.742644309997559, |
|
"rewards/margins": 11.516191482543945, |
|
"rewards/margins_max": 15.864044189453125, |
|
"rewards/margins_min": 7.168337821960449, |
|
"rewards/margins_std": 6.148792266845703, |
|
"rewards/rejected": -16.258834838867188, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.654070322101592, |
|
"learning_rate": 5.130032433476483e-09, |
|
"logits/chosen": 0.3038169741630554, |
|
"logits/rejected": 2.8313422203063965, |
|
"logps/chosen": -728.2089233398438, |
|
"logps/rejected": -1743.801513671875, |
|
"loss": 0.069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.6006269454956055, |
|
"rewards/margins": 11.062047004699707, |
|
"rewards/margins_max": 16.25905418395996, |
|
"rewards/margins_min": 5.8650407791137695, |
|
"rewards/margins_std": 7.349676609039307, |
|
"rewards/rejected": -15.662673950195312, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 4.94775999947406, |
|
"learning_rate": 3.916677024702858e-09, |
|
"logits/chosen": 0.1287023425102234, |
|
"logits/rejected": 2.0298779010772705, |
|
"logps/chosen": -667.8201904296875, |
|
"logps/rejected": -1396.031005859375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.1791486740112305, |
|
"rewards/margins": 7.460590362548828, |
|
"rewards/margins_max": 10.305280685424805, |
|
"rewards/margins_min": 4.615899562835693, |
|
"rewards/margins_std": 4.023000240325928, |
|
"rewards/rejected": -11.639739990234375, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 62.41949761633163, |
|
"learning_rate": 2.865691370028761e-09, |
|
"logits/chosen": 0.3163800835609436, |
|
"logits/rejected": 2.587982416152954, |
|
"logps/chosen": -711.7886962890625, |
|
"logps/rejected": -1568.990234375, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.575112342834473, |
|
"rewards/margins": 9.028793334960938, |
|
"rewards/margins_max": 13.794398307800293, |
|
"rewards/margins_min": 4.263186454772949, |
|
"rewards/margins_std": 6.739584922790527, |
|
"rewards/rejected": -13.603904724121094, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": 0.2898733615875244, |
|
"eval_logits/rejected": 0.49572646617889404, |
|
"eval_logps/chosen": -832.4283447265625, |
|
"eval_logps/rejected": -916.66357421875, |
|
"eval_loss": 0.9016607403755188, |
|
"eval_rewards/accuracies": 0.6230158805847168, |
|
"eval_rewards/chosen": -4.8659281730651855, |
|
"eval_rewards/margins": 0.9388992786407471, |
|
"eval_rewards/margins_max": 5.651630401611328, |
|
"eval_rewards/margins_min": -2.8163363933563232, |
|
"eval_rewards/margins_std": 2.78544020652771, |
|
"eval_rewards/rejected": -5.8048272132873535, |
|
"eval_runtime": 417.6061, |
|
"eval_samples_per_second": 9.578, |
|
"eval_steps_per_second": 0.151, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1392819952008515, |
|
"learning_rate": 1.977768926776896e-09, |
|
"logits/chosen": 0.29715052247047424, |
|
"logits/rejected": 2.052577018737793, |
|
"logps/chosen": -763.3764038085938, |
|
"logps/rejected": -1313.391845703125, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.99444055557251, |
|
"rewards/margins": 5.8611674308776855, |
|
"rewards/margins_max": 7.457464694976807, |
|
"rewards/margins_min": 4.264869213104248, |
|
"rewards/margins_std": 2.2575066089630127, |
|
"rewards/rejected": -10.855607986450195, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.4664515698911433, |
|
"learning_rate": 1.2534955605274233e-09, |
|
"logits/chosen": 0.4122096002101898, |
|
"logits/rejected": 3.4274659156799316, |
|
"logps/chosen": -771.2412719726562, |
|
"logps/rejected": -1840.775390625, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.00337028503418, |
|
"rewards/margins": 10.93709659576416, |
|
"rewards/margins_max": 16.795883178710938, |
|
"rewards/margins_min": 5.078312873840332, |
|
"rewards/margins_std": 8.285572052001953, |
|
"rewards/rejected": -15.940465927124023, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.80559652045183, |
|
"learning_rate": 6.933491585542351e-10, |
|
"logits/chosen": 0.37182289361953735, |
|
"logits/rejected": 3.1600046157836914, |
|
"logps/chosen": -680.2762451171875, |
|
"logps/rejected": -1665.175537109375, |
|
"loss": 0.1695, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.444371223449707, |
|
"rewards/margins": 10.164952278137207, |
|
"rewards/margins_max": 14.587198257446289, |
|
"rewards/margins_min": 5.742705821990967, |
|
"rewards/margins_std": 6.254001140594482, |
|
"rewards/rejected": -14.60932445526123, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.610945717452623, |
|
"learning_rate": 2.9769931450737694e-10, |
|
"logits/chosen": 0.1386619508266449, |
|
"logits/rejected": 2.0141379833221436, |
|
"logps/chosen": -799.5162353515625, |
|
"logps/rejected": -1834.666015625, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.182085990905762, |
|
"rewards/margins": 10.368020057678223, |
|
"rewards/margins_max": 15.239529609680176, |
|
"rewards/margins_min": 5.496510028839111, |
|
"rewards/margins_std": 6.889355659484863, |
|
"rewards/rejected": -15.550105094909668, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 33.03425737131523, |
|
"learning_rate": 6.680708454906425e-11, |
|
"logits/chosen": 0.2811238169670105, |
|
"logits/rejected": 1.941292405128479, |
|
"logps/chosen": -751.3411254882812, |
|
"logps/rejected": -1699.15625, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.085223197937012, |
|
"rewards/margins": 9.496126174926758, |
|
"rewards/margins_max": 13.1726713180542, |
|
"rewards/margins_min": 5.819581508636475, |
|
"rewards/margins_std": 5.199418544769287, |
|
"rewards/rejected": -14.58134937286377, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1359, |
|
"total_flos": 0.0, |
|
"train_loss": 0.21785820982226384, |
|
"train_runtime": 12082.0351, |
|
"train_samples_per_second": 1.8, |
|
"train_steps_per_second": 0.112 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1359, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|