|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9959925193694897, |
|
"eval_steps": 400, |
|
"global_step": 233, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 0.7561385035514832, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.7804944515228271, |
|
"logits/rejected": -1.6545133590698242, |
|
"logps/chosen": -0.7810468673706055, |
|
"logps/ref_chosen": -0.7813535928726196, |
|
"logps/ref_rejected": -0.8060104250907898, |
|
"logps/rejected": -0.805632472038269, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": 0.0007667625322937965, |
|
"rewards/margins": -0.00017807073891162872, |
|
"rewards/rejected": 0.0009448332712054253, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 0.5518713593482971, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.7383073568344116, |
|
"logits/rejected": -1.7030198574066162, |
|
"logps/chosen": -0.8675562143325806, |
|
"logps/ref_chosen": -0.8662088513374329, |
|
"logps/ref_rejected": -0.9053529500961304, |
|
"logps/rejected": -0.907278835773468, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.003368514124304056, |
|
"rewards/margins": 0.0014460685197263956, |
|
"rewards/rejected": -0.004814582876861095, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 0.49651747941970825, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -1.928145170211792, |
|
"logits/rejected": -1.8129940032958984, |
|
"logps/chosen": -0.855307400226593, |
|
"logps/ref_chosen": -0.8494969606399536, |
|
"logps/ref_rejected": -0.8662179708480835, |
|
"logps/rejected": -0.8723622560501099, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.014526228420436382, |
|
"rewards/margins": 0.0008344631642103195, |
|
"rewards/rejected": -0.015360690653324127, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 0.27864935994148254, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -1.7597744464874268, |
|
"logits/rejected": -1.6725934743881226, |
|
"logps/chosen": -0.9120359420776367, |
|
"logps/ref_chosen": -0.8935796618461609, |
|
"logps/ref_rejected": -0.8952409029006958, |
|
"logps/rejected": -0.9148454666137695, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0461406409740448, |
|
"rewards/margins": 0.0028707808814942837, |
|
"rewards/rejected": -0.04901142045855522, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 0.4763440787792206, |
|
"learning_rate": 9.999435142363483e-07, |
|
"logits/chosen": -1.624091386795044, |
|
"logits/rejected": -1.5722483396530151, |
|
"logps/chosen": -0.9541429281234741, |
|
"logps/ref_chosen": -0.8983734846115112, |
|
"logps/ref_rejected": -0.9594888687133789, |
|
"logps/rejected": -1.035103440284729, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1394234150648117, |
|
"rewards/margins": 0.04961305111646652, |
|
"rewards/rejected": -0.18903647363185883, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 0.3390841782093048, |
|
"learning_rate": 9.97967852255038e-07, |
|
"logits/chosen": -1.6577975749969482, |
|
"logits/rejected": -1.5775320529937744, |
|
"logps/chosen": -0.8317171931266785, |
|
"logps/ref_chosen": -0.7469085454940796, |
|
"logps/ref_rejected": -0.79144287109375, |
|
"logps/rejected": -0.8856765031814575, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.2120215892791748, |
|
"rewards/margins": 0.023562394082546234, |
|
"rewards/rejected": -0.23558397591114044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 0.3006940484046936, |
|
"learning_rate": 9.931806517013612e-07, |
|
"logits/chosen": -1.6243362426757812, |
|
"logits/rejected": -1.6471837759017944, |
|
"logps/chosen": -0.9220904111862183, |
|
"logps/ref_chosen": -0.7822158336639404, |
|
"logps/ref_rejected": -0.8102364540100098, |
|
"logps/rejected": -0.9670912027359009, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.34968677163124084, |
|
"rewards/margins": 0.04244992882013321, |
|
"rewards/rejected": -0.39213672280311584, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 0.27084407210350037, |
|
"learning_rate": 9.856089412257604e-07, |
|
"logits/chosen": -1.6578766107559204, |
|
"logits/rejected": -1.6355148553848267, |
|
"logps/chosen": -1.05038583278656, |
|
"logps/ref_chosen": -0.8560595512390137, |
|
"logps/ref_rejected": -0.914546012878418, |
|
"logps/rejected": -1.1552728414535522, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.48581594228744507, |
|
"rewards/margins": 0.11600111424922943, |
|
"rewards/rejected": -0.6018170118331909, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 0.49249762296676636, |
|
"learning_rate": 9.752954708892377e-07, |
|
"logits/chosen": -1.5577068328857422, |
|
"logits/rejected": -1.465714931488037, |
|
"logps/chosen": -1.0501822233200073, |
|
"logps/ref_chosen": -0.8724653124809265, |
|
"logps/ref_rejected": -0.8607926368713379, |
|
"logps/rejected": -1.0398765802383423, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.4442923665046692, |
|
"rewards/margins": 0.003417615545913577, |
|
"rewards/rejected": -0.4477098882198334, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 0.32699063420295715, |
|
"learning_rate": 9.62298470795473e-07, |
|
"logits/chosen": -1.7691097259521484, |
|
"logits/rejected": -1.7416681051254272, |
|
"logps/chosen": -0.9927698969841003, |
|
"logps/ref_chosen": -0.8696678280830383, |
|
"logps/ref_rejected": -0.8965504765510559, |
|
"logps/rejected": -1.030956506729126, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.3077549934387207, |
|
"rewards/margins": 0.028260568156838417, |
|
"rewards/rejected": -0.33601561188697815, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 0.31701868772506714, |
|
"learning_rate": 9.466913223222465e-07, |
|
"logits/chosen": -1.5519920587539673, |
|
"logits/rejected": -1.4699208736419678, |
|
"logps/chosen": -0.8616452217102051, |
|
"logps/ref_chosen": -0.7731812596321106, |
|
"logps/ref_rejected": -0.7838868498802185, |
|
"logps/rejected": -0.8863974809646606, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.22115974128246307, |
|
"rewards/margins": 0.03511647880077362, |
|
"rewards/rejected": -0.2562762200832367, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 0.40317973494529724, |
|
"learning_rate": 9.285621438083997e-07, |
|
"logits/chosen": -1.601485252380371, |
|
"logits/rejected": -1.5545583963394165, |
|
"logps/chosen": -0.8779473304748535, |
|
"logps/ref_chosen": -0.7888692617416382, |
|
"logps/ref_rejected": -0.8163660168647766, |
|
"logps/rejected": -0.9202233552932739, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.22269515693187714, |
|
"rewards/margins": 0.03694819286465645, |
|
"rewards/rejected": -0.2596433460712433, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 0.5152870416641235, |
|
"learning_rate": 9.080132930355566e-07, |
|
"logits/chosen": -1.6490017175674438, |
|
"logits/rejected": -1.6716206073760986, |
|
"logps/chosen": -0.9653270840644836, |
|
"logps/ref_chosen": -0.8533055186271667, |
|
"logps/ref_rejected": -0.9036076664924622, |
|
"logps/rejected": -1.0383471250534058, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.28005388379096985, |
|
"rewards/margins": 0.05679459124803543, |
|
"rewards/rejected": -0.33684849739074707, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 0.4174951910972595, |
|
"learning_rate": 8.851607893136064e-07, |
|
"logits/chosen": -1.728899598121643, |
|
"logits/rejected": -1.6759620904922485, |
|
"logps/chosen": -0.9261114001274109, |
|
"logps/ref_chosen": -0.8132463693618774, |
|
"logps/ref_rejected": -0.8208681344985962, |
|
"logps/rejected": -0.9566439390182495, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.28216248750686646, |
|
"rewards/margins": 0.057276882231235504, |
|
"rewards/rejected": -0.33943939208984375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 0.36295104026794434, |
|
"learning_rate": 8.601336584328658e-07, |
|
"logits/chosen": -1.7176015377044678, |
|
"logits/rejected": -1.7168267965316772, |
|
"logps/chosen": -0.9694639444351196, |
|
"logps/ref_chosen": -0.8283951878547668, |
|
"logps/ref_rejected": -0.8723212480545044, |
|
"logps/rejected": -1.0357882976531982, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3526715338230133, |
|
"rewards/margins": 0.05599608272314072, |
|
"rewards/rejected": -0.4086676239967346, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 0.6786319017410278, |
|
"learning_rate": 8.330732041813366e-07, |
|
"logits/chosen": -1.8271814584732056, |
|
"logits/rejected": -1.7772512435913086, |
|
"logps/chosen": -0.8929777145385742, |
|
"logps/ref_chosen": -0.8355891108512878, |
|
"logps/ref_rejected": -0.9002590179443359, |
|
"logps/rejected": -0.9975314140319824, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14347167313098907, |
|
"rewards/margins": 0.09970954060554504, |
|
"rewards/rejected": -0.24318119883537292, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 0.8407174348831177, |
|
"learning_rate": 8.041322105400921e-07, |
|
"logits/chosen": -1.706368088722229, |
|
"logits/rejected": -1.650854468345642, |
|
"logps/chosen": -0.8318307995796204, |
|
"logps/ref_chosen": -0.8256785273551941, |
|
"logps/ref_rejected": -0.8488883972167969, |
|
"logps/rejected": -0.9011926651000977, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.01538090594112873, |
|
"rewards/margins": 0.1153799295425415, |
|
"rewards/rejected": -0.13076083362102509, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 1.0095113515853882, |
|
"learning_rate": 7.734740790612136e-07, |
|
"logits/chosen": -1.8660595417022705, |
|
"logits/rejected": -1.8641777038574219, |
|
"logps/chosen": -0.8596251606941223, |
|
"logps/ref_chosen": -0.9228288531303406, |
|
"logps/ref_rejected": -0.9406684637069702, |
|
"logps/rejected": -0.9123810529708862, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.15800921618938446, |
|
"rewards/margins": 0.08729076385498047, |
|
"rewards/rejected": 0.07071846723556519, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.8767968416213989, |
|
"learning_rate": 7.412719062986631e-07, |
|
"logits/chosen": -1.9249579906463623, |
|
"logits/rejected": -1.8531602621078491, |
|
"logps/chosen": -0.8149619102478027, |
|
"logps/ref_chosen": -0.9041957855224609, |
|
"logps/ref_rejected": -0.914394736289978, |
|
"logps/rejected": -0.8845084309577942, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.22308464348316193, |
|
"rewards/margins": 0.14836890995502472, |
|
"rewards/rejected": 0.0747157484292984, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 0.6044840812683105, |
|
"learning_rate": 7.077075065009433e-07, |
|
"logits/chosen": -1.731792688369751, |
|
"logits/rejected": -1.7363353967666626, |
|
"logps/chosen": -0.7217603921890259, |
|
"logps/ref_chosen": -0.8257284164428711, |
|
"logps/ref_rejected": -0.8479409217834473, |
|
"logps/rejected": -0.784611701965332, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.2599199414253235, |
|
"rewards/margins": 0.10159693658351898, |
|
"rewards/rejected": 0.1583230048418045, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 0.7751099467277527, |
|
"learning_rate": 6.72970385083438e-07, |
|
"logits/chosen": -1.9043171405792236, |
|
"logits/rejected": -1.789009690284729, |
|
"logps/chosen": -0.7011796236038208, |
|
"logps/ref_chosen": -0.8166704177856445, |
|
"logps/ref_rejected": -0.8361040949821472, |
|
"logps/rejected": -0.7531259655952454, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.2887269854545593, |
|
"rewards/margins": 0.0812816247344017, |
|
"rewards/rejected": 0.20744535326957703, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 0.7444783449172974, |
|
"learning_rate": 6.372566686762426e-07, |
|
"logits/chosen": -1.8429124355316162, |
|
"logits/rejected": -1.760053277015686, |
|
"logps/chosen": -0.7318671941757202, |
|
"logps/ref_chosen": -0.8331576585769653, |
|
"logps/ref_rejected": -0.8635438084602356, |
|
"logps/rejected": -0.7953906059265137, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.253226101398468, |
|
"rewards/margins": 0.08284299075603485, |
|
"rewards/rejected": 0.17038312554359436, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 0.8039557337760925, |
|
"learning_rate": 6.00767997788451e-07, |
|
"logits/chosen": -1.9033887386322021, |
|
"logits/rejected": -1.770939588546753, |
|
"logps/chosen": -0.7566145062446594, |
|
"logps/ref_chosen": -0.8713752627372742, |
|
"logps/ref_rejected": -0.8939735293388367, |
|
"logps/rejected": -0.8082603216171265, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.28690171241760254, |
|
"rewards/margins": 0.07261888682842255, |
|
"rewards/rejected": 0.2142828404903412, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 1.1172226667404175, |
|
"learning_rate": 5.637103883409525e-07, |
|
"logits/chosen": -1.9406812191009521, |
|
"logits/rejected": -1.867462158203125, |
|
"logps/chosen": -0.8249004483222961, |
|
"logps/ref_chosen": -0.873686671257019, |
|
"logps/ref_rejected": -0.9026174545288086, |
|
"logps/rejected": -0.8801124691963196, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.1219654530286789, |
|
"rewards/margins": 0.06570279598236084, |
|
"rewards/rejected": 0.05626266077160835, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 0.8093484044075012, |
|
"learning_rate": 5.262930684955438e-07, |
|
"logits/chosen": -2.0165348052978516, |
|
"logits/rejected": -1.9574447870254517, |
|
"logps/chosen": -0.8015368580818176, |
|
"logps/ref_chosen": -0.815376877784729, |
|
"logps/ref_rejected": -0.8817696571350098, |
|
"logps/rejected": -0.9249275326728821, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.034600116312503815, |
|
"rewards/margins": 0.14249476790428162, |
|
"rewards/rejected": -0.1078946590423584, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 0.8614518642425537, |
|
"learning_rate": 4.88727297347654e-07, |
|
"logits/chosen": -1.951319694519043, |
|
"logits/rejected": -1.933098554611206, |
|
"logps/chosen": -0.7757576107978821, |
|
"logps/ref_chosen": -0.7751168608665466, |
|
"logps/ref_rejected": -0.8734749555587769, |
|
"logps/rejected": -0.9476582407951355, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0016015321016311646, |
|
"rewards/margins": 0.18385668098926544, |
|
"rewards/rejected": -0.1854582130908966, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 1.054673194885254, |
|
"learning_rate": 4.512251721523659e-07, |
|
"logits/chosen": -2.005443811416626, |
|
"logits/rejected": -2.0154759883880615, |
|
"logps/chosen": -0.7493831515312195, |
|
"logps/ref_chosen": -0.7740285992622375, |
|
"logps/ref_rejected": -0.8138446807861328, |
|
"logps/rejected": -0.851770281791687, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.06161379814147949, |
|
"rewards/margins": 0.15642789006233215, |
|
"rewards/rejected": -0.09481407701969147, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 1.669247031211853, |
|
"learning_rate": 4.139984308181708e-07, |
|
"logits/chosen": -1.9584558010101318, |
|
"logits/rejected": -1.8885042667388916, |
|
"logps/chosen": -0.7844404578208923, |
|
"logps/ref_chosen": -0.8231161236763, |
|
"logps/ref_rejected": -0.83356112241745, |
|
"logps/rejected": -0.8187839388847351, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.09668895602226257, |
|
"rewards/margins": 0.0597461462020874, |
|
"rewards/rejected": 0.03694281354546547, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 1.0050734281539917, |
|
"learning_rate": 3.772572564296004e-07, |
|
"logits/chosen": -1.8883240222930908, |
|
"logits/rejected": -1.8240330219268799, |
|
"logps/chosen": -0.7662582397460938, |
|
"logps/ref_chosen": -0.8861669301986694, |
|
"logps/ref_rejected": -0.924543023109436, |
|
"logps/rejected": -0.8522801399230957, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.29977160692214966, |
|
"rewards/margins": 0.11911455541849136, |
|
"rewards/rejected": 0.18065707385540009, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 0.9123177528381348, |
|
"learning_rate": 3.412090905484337e-07, |
|
"logits/chosen": -1.9726581573486328, |
|
"logits/rejected": -1.9151983261108398, |
|
"logps/chosen": -0.7370959520339966, |
|
"logps/ref_chosen": -0.866258978843689, |
|
"logps/ref_rejected": -0.8657606840133667, |
|
"logps/rejected": -0.8145822286605835, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3229079246520996, |
|
"rewards/margins": 0.19496168196201324, |
|
"rewards/rejected": 0.12794628739356995, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 0.7277486324310303, |
|
"learning_rate": 3.060574619936075e-07, |
|
"logits/chosen": -1.8861163854599, |
|
"logits/rejected": -1.9004647731781006, |
|
"logps/chosen": -0.7181011438369751, |
|
"logps/ref_chosen": -0.8273455500602722, |
|
"logps/ref_rejected": -0.8894198536872864, |
|
"logps/rejected": -0.8557901382446289, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.27311110496520996, |
|
"rewards/margins": 0.18903681635856628, |
|
"rewards/rejected": 0.08407425880432129, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 1.0556169748306274, |
|
"learning_rate": 2.720008377125682e-07, |
|
"logits/chosen": -2.1498093605041504, |
|
"logits/rejected": -2.08402419090271, |
|
"logps/chosen": -0.7090437412261963, |
|
"logps/ref_chosen": -0.8103801012039185, |
|
"logps/ref_rejected": -0.8715206980705261, |
|
"logps/rejected": -0.7982600927352905, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.25334107875823975, |
|
"rewards/margins": 0.0701896995306015, |
|
"rewards/rejected": 0.18315134942531586, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 0.9068896770477295, |
|
"learning_rate": 2.3923150223207173e-07, |
|
"logits/chosen": -1.9448268413543701, |
|
"logits/rejected": -1.904314637184143, |
|
"logps/chosen": -0.7294695377349854, |
|
"logps/ref_chosen": -0.8327474594116211, |
|
"logps/ref_rejected": -0.9134753346443176, |
|
"logps/rejected": -0.8374517560005188, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.25819462537765503, |
|
"rewards/margins": 0.06813579052686691, |
|
"rewards/rejected": 0.1900588572025299, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 0.9222660660743713, |
|
"learning_rate": 2.0793447201508286e-07, |
|
"logits/chosen": -1.9369819164276123, |
|
"logits/rejected": -1.9469242095947266, |
|
"logps/chosen": -0.6714679002761841, |
|
"logps/ref_chosen": -0.7705163359642029, |
|
"logps/ref_rejected": -0.8333786129951477, |
|
"logps/rejected": -0.7605674862861633, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.24762126803398132, |
|
"rewards/margins": 0.06559363007545471, |
|
"rewards/rejected": 0.18202762305736542, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 0.7805910110473633, |
|
"learning_rate": 1.7828645085333644e-07, |
|
"logits/chosen": -1.9515256881713867, |
|
"logits/rejected": -1.9057369232177734, |
|
"logps/chosen": -0.7692245244979858, |
|
"logps/ref_chosen": -0.8767744302749634, |
|
"logps/ref_rejected": -0.8914516568183899, |
|
"logps/rejected": -0.8368504643440247, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2688748240470886, |
|
"rewards/margins": 0.13237187266349792, |
|
"rewards/rejected": 0.1365029364824295, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 0.8946753740310669, |
|
"learning_rate": 1.5045483219344385e-07, |
|
"logits/chosen": -2.008927583694458, |
|
"logits/rejected": -1.9914191961288452, |
|
"logps/chosen": -0.7557133436203003, |
|
"logps/ref_chosen": -0.8390854597091675, |
|
"logps/ref_rejected": -0.8787837028503418, |
|
"logps/rejected": -0.853840708732605, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2084302008152008, |
|
"rewards/margins": 0.14607290923595428, |
|
"rewards/rejected": 0.062357254326343536, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 1.0278949737548828, |
|
"learning_rate": 1.2459675402943288e-07, |
|
"logits/chosen": -2.0313353538513184, |
|
"logits/rejected": -1.9398431777954102, |
|
"logps/chosen": -0.7753286361694336, |
|
"logps/ref_chosen": -0.8660305142402649, |
|
"logps/ref_rejected": -0.8604307174682617, |
|
"logps/rejected": -0.7984707951545715, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.22675485908985138, |
|
"rewards/margins": 0.07185501605272293, |
|
"rewards/rejected": 0.15489983558654785, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.8263163566589355, |
|
"learning_rate": 1.0085821169782199e-07, |
|
"logits/chosen": -2.0978431701660156, |
|
"logits/rejected": -2.055168628692627, |
|
"logps/chosen": -0.7470763921737671, |
|
"logps/ref_chosen": -0.8603025674819946, |
|
"logps/ref_rejected": -0.9167188405990601, |
|
"logps/rejected": -0.8492987751960754, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2830653786659241, |
|
"rewards/margins": 0.11451487243175507, |
|
"rewards/rejected": 0.1685505211353302, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 0.9788782596588135, |
|
"learning_rate": 7.937323358440934e-08, |
|
"logits/chosen": -2.13584041595459, |
|
"logits/rejected": -2.0809855461120605, |
|
"logps/chosen": -0.7344987988471985, |
|
"logps/ref_chosen": -0.8153272867202759, |
|
"logps/ref_rejected": -0.8524506688117981, |
|
"logps/rejected": -0.8127390742301941, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.20207130908966064, |
|
"rewards/margins": 0.10279206931591034, |
|
"rewards/rejected": 0.09927921742200851, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 1.1459167003631592, |
|
"learning_rate": 6.026312439675551e-08, |
|
"logits/chosen": -1.9530729055404663, |
|
"logits/rejected": -1.8388773202896118, |
|
"logps/chosen": -0.7524019479751587, |
|
"logps/ref_chosen": -0.832992672920227, |
|
"logps/ref_rejected": -0.8378564715385437, |
|
"logps/rejected": -0.7900499701499939, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.20147652924060822, |
|
"rewards/margins": 0.08196047693490982, |
|
"rewards/rejected": 0.11951601505279541, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 1.2217403650283813, |
|
"learning_rate": 4.3635780274861864e-08, |
|
"logits/chosen": -1.9760059118270874, |
|
"logits/rejected": -1.881757378578186, |
|
"logps/chosen": -0.754524827003479, |
|
"logps/ref_chosen": -0.8361877202987671, |
|
"logps/ref_rejected": -0.8636215329170227, |
|
"logps/rejected": -0.8430477380752563, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.20415742695331573, |
|
"rewards/margins": 0.15272292494773865, |
|
"rewards/rejected": 0.05143451690673828, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 0.9312232732772827, |
|
"learning_rate": 2.958507960694784e-08, |
|
"logits/chosen": -1.9826923608779907, |
|
"logits/rejected": -1.963322401046753, |
|
"logps/chosen": -0.7218400239944458, |
|
"logps/ref_chosen": -0.775153636932373, |
|
"logps/ref_rejected": -0.82710200548172, |
|
"logps/rejected": -0.8231655359268188, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.13328400254249573, |
|
"rewards/margins": 0.12344253063201904, |
|
"rewards/rejected": 0.009841480292379856, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 0.9846080541610718, |
|
"learning_rate": 1.8190352989793322e-08, |
|
"logits/chosen": -1.9855095148086548, |
|
"logits/rejected": -1.907576560974121, |
|
"logps/chosen": -0.7199736833572388, |
|
"logps/ref_chosen": -0.803063690662384, |
|
"logps/ref_rejected": -0.8516971468925476, |
|
"logps/rejected": -0.8365123867988586, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2077248990535736, |
|
"rewards/margins": 0.16976311802864075, |
|
"rewards/rejected": 0.03796178475022316, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 0.7335969805717468, |
|
"learning_rate": 9.515935326265378e-09, |
|
"logits/chosen": -2.0024361610412598, |
|
"logits/rejected": -1.9636704921722412, |
|
"logps/chosen": -0.7521845698356628, |
|
"logps/ref_chosen": -0.8253963589668274, |
|
"logps/ref_rejected": -0.849533200263977, |
|
"logps/rejected": -0.8330303430557251, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.1830292046070099, |
|
"rewards/margins": 0.14177197217941284, |
|
"rewards/rejected": 0.04125722497701645, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 1.1543854475021362, |
|
"learning_rate": 3.6108025888958447e-09, |
|
"logits/chosen": -1.9360746145248413, |
|
"logits/rejected": -1.911627173423767, |
|
"logps/chosen": -0.7084980010986328, |
|
"logps/ref_chosen": -0.7970255613327026, |
|
"logps/ref_rejected": -0.8132475018501282, |
|
"logps/rejected": -0.7666771411895752, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.22131893038749695, |
|
"rewards/margins": 0.10489317029714584, |
|
"rewards/rejected": 0.1164257749915123, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 1.1417807340621948, |
|
"learning_rate": 5.082953003528456e-10, |
|
"logits/chosen": -2.013756513595581, |
|
"logits/rejected": -2.0383520126342773, |
|
"logps/chosen": -0.8249381184577942, |
|
"logps/ref_chosen": -0.8979822993278503, |
|
"logps/ref_rejected": -0.9172071218490601, |
|
"logps/rejected": -0.9182927012443542, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.1826106607913971, |
|
"rewards/margins": 0.18532457947731018, |
|
"rewards/rejected": -0.0027139366138726473, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9959925193694897, |
|
"step": 233, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4860667634931245, |
|
"train_runtime": 16529.3176, |
|
"train_samples_per_second": 3.622, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 233, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|