|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 400, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 89.9968305873071, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.7388366460800171, |
|
"logits/rejected": -0.7827404141426086, |
|
"logps/chosen": -1.15103280544281, |
|
"logps/rejected": -1.2909390926361084, |
|
"loss": 1.2935, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.30206561088562, |
|
"rewards/margins": 0.2798125147819519, |
|
"rewards/rejected": -2.581878185272217, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 24.705919418070632, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.7937806844711304, |
|
"logits/rejected": -0.8651958703994751, |
|
"logps/chosen": -1.1529361009597778, |
|
"logps/rejected": -1.3611778020858765, |
|
"loss": 1.314, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.3058722019195557, |
|
"rewards/margins": 0.41648340225219727, |
|
"rewards/rejected": -2.722355604171753, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 27.735520006717728, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -0.7491501569747925, |
|
"logits/rejected": -0.8338179588317871, |
|
"logps/chosen": -1.1712462902069092, |
|
"logps/rejected": -1.270825743675232, |
|
"loss": 1.2667, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.3424925804138184, |
|
"rewards/margins": 0.19915875792503357, |
|
"rewards/rejected": -2.541651487350464, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 22.322171681204715, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.7619983553886414, |
|
"logits/rejected": -0.9046538472175598, |
|
"logps/chosen": -1.1294901371002197, |
|
"logps/rejected": -1.2941240072250366, |
|
"loss": 1.2696, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2589802742004395, |
|
"rewards/margins": 0.32926779985427856, |
|
"rewards/rejected": -2.5882480144500732, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 12.849323230827375, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -0.772399365901947, |
|
"logits/rejected": -0.8519186973571777, |
|
"logps/chosen": -1.077214002609253, |
|
"logps/rejected": -1.2762653827667236, |
|
"loss": 1.2362, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.154428005218506, |
|
"rewards/margins": 0.39810293912887573, |
|
"rewards/rejected": -2.5525307655334473, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 84.84769866542291, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.7909184694290161, |
|
"logits/rejected": -0.8215691447257996, |
|
"logps/chosen": -1.059594988822937, |
|
"logps/rejected": -1.0990025997161865, |
|
"loss": 1.2897, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.119189977645874, |
|
"rewards/margins": 0.0788152664899826, |
|
"rewards/rejected": -2.198005199432373, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 12.477109087394112, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"logits/chosen": -0.7678741216659546, |
|
"logits/rejected": -0.8405346870422363, |
|
"logps/chosen": -0.9820269346237183, |
|
"logps/rejected": -1.2532163858413696, |
|
"loss": 1.2497, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9640538692474365, |
|
"rewards/margins": 0.5423787236213684, |
|
"rewards/rejected": -2.5064327716827393, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 10.85962784004132, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.7665027379989624, |
|
"logits/rejected": -0.8336607217788696, |
|
"logps/chosen": -0.9715523719787598, |
|
"logps/rejected": -1.1505324840545654, |
|
"loss": 1.2359, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9431047439575195, |
|
"rewards/margins": 0.35795995593070984, |
|
"rewards/rejected": -2.301064968109131, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 10.414385637292323, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -0.7420114874839783, |
|
"logits/rejected": -0.8339902758598328, |
|
"logps/chosen": -0.9872716665267944, |
|
"logps/rejected": -1.1155823469161987, |
|
"loss": 1.2267, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9745433330535889, |
|
"rewards/margins": 0.2566211223602295, |
|
"rewards/rejected": -2.2311646938323975, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 9.651448839940226, |
|
"learning_rate": 5.999678242522831e-07, |
|
"logits/chosen": -0.7927948832511902, |
|
"logits/rejected": -0.8290635943412781, |
|
"logps/chosen": -0.9459100961685181, |
|
"logps/rejected": -1.2578647136688232, |
|
"loss": 1.2207, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8918201923370361, |
|
"rewards/margins": 0.6239093542098999, |
|
"rewards/rejected": -2.5157294273376465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 43.90472722310407, |
|
"learning_rate": 5.996059263493219e-07, |
|
"logits/chosen": -0.7944079637527466, |
|
"logits/rejected": -0.9001775979995728, |
|
"logps/chosen": -1.072819471359253, |
|
"logps/rejected": -1.181773066520691, |
|
"loss": 1.2551, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.145638942718506, |
|
"rewards/margins": 0.21790704131126404, |
|
"rewards/rejected": -2.363546133041382, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 13.393066662370963, |
|
"learning_rate": 5.988423976115163e-07, |
|
"logits/chosen": -0.7826106548309326, |
|
"logits/rejected": -0.8369284868240356, |
|
"logps/chosen": -1.0628390312194824, |
|
"logps/rejected": -1.2253072261810303, |
|
"loss": 1.2246, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.125678062438965, |
|
"rewards/margins": 0.32493603229522705, |
|
"rewards/rejected": -2.4506144523620605, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 26.206483702491475, |
|
"learning_rate": 5.976782615723061e-07, |
|
"logits/chosen": -0.7975456714630127, |
|
"logits/rejected": -0.8562803268432617, |
|
"logps/chosen": -1.0680768489837646, |
|
"logps/rejected": -1.2204017639160156, |
|
"loss": 1.2268, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1361536979675293, |
|
"rewards/margins": 0.3046496510505676, |
|
"rewards/rejected": -2.4408035278320312, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 13.41584537004533, |
|
"learning_rate": 5.961150787913738e-07, |
|
"logits/chosen": -0.8376196622848511, |
|
"logits/rejected": -0.9019572138786316, |
|
"logps/chosen": -1.0893644094467163, |
|
"logps/rejected": -1.2784545421600342, |
|
"loss": 1.1754, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1787288188934326, |
|
"rewards/margins": 0.37818047404289246, |
|
"rewards/rejected": -2.5569090843200684, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 32.22425187362688, |
|
"learning_rate": 5.941549447626671e-07, |
|
"logits/chosen": -0.804112434387207, |
|
"logits/rejected": -0.845563530921936, |
|
"logps/chosen": -1.0805425643920898, |
|
"logps/rejected": -1.3212538957595825, |
|
"loss": 1.209, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1610851287841797, |
|
"rewards/margins": 0.4814226031303406, |
|
"rewards/rejected": -2.642507791519165, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 8.981853488976475, |
|
"learning_rate": 5.918004871053251e-07, |
|
"logits/chosen": -0.7968226671218872, |
|
"logits/rejected": -0.8211067318916321, |
|
"logps/chosen": -1.026604413986206, |
|
"logps/rejected": -1.3631267547607422, |
|
"loss": 1.1624, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.053208827972412, |
|
"rewards/margins": 0.673044741153717, |
|
"rewards/rejected": -2.7262535095214844, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 17.367470137588203, |
|
"learning_rate": 5.890548620412763e-07, |
|
"logits/chosen": -0.8126602172851562, |
|
"logits/rejected": -0.8794834017753601, |
|
"logps/chosen": -1.0674957036972046, |
|
"logps/rejected": -1.3523355722427368, |
|
"loss": 1.1625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.134991407394409, |
|
"rewards/margins": 0.5696790814399719, |
|
"rewards/rejected": -2.7046711444854736, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 17.833322868673477, |
|
"learning_rate": 5.859217501642258e-07, |
|
"logits/chosen": -0.840762734413147, |
|
"logits/rejected": -0.9274584054946899, |
|
"logps/chosen": -1.1602346897125244, |
|
"logps/rejected": -1.5290915966033936, |
|
"loss": 1.1734, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.320469379425049, |
|
"rewards/margins": 0.7377143502235413, |
|
"rewards/rejected": -3.058183193206787, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 22.98307788140464, |
|
"learning_rate": 5.824053515057091e-07, |
|
"logits/chosen": -0.8092079162597656, |
|
"logits/rejected": -0.8328098058700562, |
|
"logps/chosen": -1.133385419845581, |
|
"logps/rejected": -1.4298288822174072, |
|
"loss": 1.1919, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.266770839691162, |
|
"rewards/margins": 0.5928869247436523, |
|
"rewards/rejected": -2.8596577644348145, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 11.026437481785171, |
|
"learning_rate": 5.785103799048218e-07, |
|
"logits/chosen": -0.8240598440170288, |
|
"logits/rejected": -0.8689464330673218, |
|
"logps/chosen": -1.147385835647583, |
|
"logps/rejected": -1.3535184860229492, |
|
"loss": 1.2131, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.294771671295166, |
|
"rewards/margins": 0.41226544976234436, |
|
"rewards/rejected": -2.7070369720458984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 9.837343506686455, |
|
"learning_rate": 5.742420566891749e-07, |
|
"logits/chosen": -0.7966706156730652, |
|
"logits/rejected": -0.878908634185791, |
|
"logps/chosen": -1.1871858835220337, |
|
"logps/rejected": -1.4869831800460815, |
|
"loss": 1.1062, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.3743717670440674, |
|
"rewards/margins": 0.5995948314666748, |
|
"rewards/rejected": -2.973966360092163, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 19.01097451640794, |
|
"learning_rate": 5.696061036755478e-07, |
|
"logits/chosen": -0.7402995228767395, |
|
"logits/rejected": -0.8451690673828125, |
|
"logps/chosen": -1.0870535373687744, |
|
"logps/rejected": -1.3536127805709839, |
|
"loss": 1.1368, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.174107074737549, |
|
"rewards/margins": 0.5331184267997742, |
|
"rewards/rejected": -2.7072255611419678, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 89.427421788791, |
|
"learning_rate": 5.64608735499618e-07, |
|
"logits/chosen": -0.833459734916687, |
|
"logits/rejected": -0.829018235206604, |
|
"logps/chosen": -1.150940179824829, |
|
"logps/rejected": -1.287229061126709, |
|
"loss": 1.1596, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.301880359649658, |
|
"rewards/margins": 0.2725774943828583, |
|
"rewards/rejected": -2.574458122253418, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 31.745365051153907, |
|
"learning_rate": 5.592566512850545e-07, |
|
"logits/chosen": -0.79100501537323, |
|
"logits/rejected": -0.8663417100906372, |
|
"logps/chosen": -1.0571635961532593, |
|
"logps/rejected": -1.4087059497833252, |
|
"loss": 1.1752, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1143271923065186, |
|
"rewards/margins": 0.703084409236908, |
|
"rewards/rejected": -2.8174118995666504, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 14.496796822119729, |
|
"learning_rate": 5.535570256631384e-07, |
|
"logits/chosen": -0.798068642616272, |
|
"logits/rejected": -0.7694944143295288, |
|
"logps/chosen": -1.171478271484375, |
|
"logps/rejected": -1.5117442607879639, |
|
"loss": 1.1603, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.34295654296875, |
|
"rewards/margins": 0.6805320978164673, |
|
"rewards/rejected": -3.0234885215759277, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 11.15517991690276, |
|
"learning_rate": 5.475174991549528e-07, |
|
"logits/chosen": -0.7599740624427795, |
|
"logits/rejected": -0.8051120638847351, |
|
"logps/chosen": -1.1963175535202026, |
|
"logps/rejected": -1.5290193557739258, |
|
"loss": 1.1204, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.3926351070404053, |
|
"rewards/margins": 0.6654035449028015, |
|
"rewards/rejected": -3.0580387115478516, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 13.030746243741968, |
|
"learning_rate": 5.411461679290317e-07, |
|
"logits/chosen": -0.7586075663566589, |
|
"logits/rejected": -0.7899220585823059, |
|
"logps/chosen": -1.0880517959594727, |
|
"logps/rejected": -1.4661823511123657, |
|
"loss": 1.1668, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1761035919189453, |
|
"rewards/margins": 0.7562611103057861, |
|
"rewards/rejected": -2.9323647022247314, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 12.738817253337984, |
|
"learning_rate": 5.34451572948201e-07, |
|
"logits/chosen": -0.8128818273544312, |
|
"logits/rejected": -0.842110812664032, |
|
"logps/chosen": -1.2075114250183105, |
|
"logps/rejected": -1.4238183498382568, |
|
"loss": 1.2141, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.415022850036621, |
|
"rewards/margins": 0.4326140284538269, |
|
"rewards/rejected": -2.8476366996765137, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 24.983190739092922, |
|
"learning_rate": 5.274426885201582e-07, |
|
"logits/chosen": -0.7843077778816223, |
|
"logits/rejected": -0.8767129182815552, |
|
"logps/chosen": -1.1461377143859863, |
|
"logps/rejected": -1.5009636878967285, |
|
"loss": 1.1207, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2922754287719727, |
|
"rewards/margins": 0.7096518278121948, |
|
"rewards/rejected": -3.001927375793457, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 23.74860585722539, |
|
"learning_rate": 5.201289102671411e-07, |
|
"logits/chosen": -0.8561376333236694, |
|
"logits/rejected": -0.8589056134223938, |
|
"logps/chosen": -1.1982135772705078, |
|
"logps/rejected": -1.5201013088226318, |
|
"loss": 1.1476, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.3964271545410156, |
|
"rewards/margins": 0.6437759399414062, |
|
"rewards/rejected": -3.0402026176452637, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 15.03909875634319, |
|
"learning_rate": 5.12520042530811e-07, |
|
"logits/chosen": -0.7681445479393005, |
|
"logits/rejected": -0.8174452781677246, |
|
"logps/chosen": -1.2068870067596436, |
|
"logps/rejected": -1.6613304615020752, |
|
"loss": 1.1179, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.413774013519287, |
|
"rewards/margins": 0.908886730670929, |
|
"rewards/rejected": -3.3226609230041504, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 14.191169695059497, |
|
"learning_rate": 5.046262852292346e-07, |
|
"logits/chosen": -0.8029179573059082, |
|
"logits/rejected": -0.8746109008789062, |
|
"logps/chosen": -1.1898596286773682, |
|
"logps/rejected": -1.6815717220306396, |
|
"loss": 1.1138, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.3797192573547363, |
|
"rewards/margins": 0.9834240674972534, |
|
"rewards/rejected": -3.3631434440612793, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 35.93680907186828, |
|
"learning_rate": 4.964582201835856e-07, |
|
"logits/chosen": -0.7598133087158203, |
|
"logits/rejected": -0.7828689813613892, |
|
"logps/chosen": -1.1410859823226929, |
|
"logps/rejected": -1.5104478597640991, |
|
"loss": 1.1132, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.2821719646453857, |
|
"rewards/margins": 0.7387233972549438, |
|
"rewards/rejected": -3.0208957195281982, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 33.280459458949075, |
|
"learning_rate": 4.880267969328908e-07, |
|
"logits/chosen": -0.7489741444587708, |
|
"logits/rejected": -0.8511075973510742, |
|
"logps/chosen": -1.2344070672988892, |
|
"logps/rejected": -1.6722608804702759, |
|
"loss": 1.1051, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4688141345977783, |
|
"rewards/margins": 0.8757076263427734, |
|
"rewards/rejected": -3.3445217609405518, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 13.559524548726696, |
|
"learning_rate": 4.793433180558423e-07, |
|
"logits/chosen": -0.7471566796302795, |
|
"logits/rejected": -0.8381919860839844, |
|
"logps/chosen": -1.1587435007095337, |
|
"logps/rejected": -1.5522888898849487, |
|
"loss": 1.133, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3174870014190674, |
|
"rewards/margins": 0.7870910167694092, |
|
"rewards/rejected": -3.1045777797698975, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 19.60609504538111, |
|
"learning_rate": 4.704194240193467e-07, |
|
"logits/chosen": -0.7779995203018188, |
|
"logits/rejected": -0.8208974599838257, |
|
"logps/chosen": -1.1914243698120117, |
|
"logps/rejected": -1.6478986740112305, |
|
"loss": 1.0991, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.3828487396240234, |
|
"rewards/margins": 0.9129486083984375, |
|
"rewards/rejected": -3.295797348022461, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 15.986798312827595, |
|
"learning_rate": 4.6126707757412686e-07, |
|
"logits/chosen": -0.7536464333534241, |
|
"logits/rejected": -0.836445152759552, |
|
"logps/chosen": -1.18105149269104, |
|
"logps/rejected": -1.5753639936447144, |
|
"loss": 1.0801, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.36210298538208, |
|
"rewards/margins": 0.788625180721283, |
|
"rewards/rejected": -3.1507279872894287, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 11.085659412542848, |
|
"learning_rate": 4.5189854771829086e-07, |
|
"logits/chosen": -0.7779768705368042, |
|
"logits/rejected": -0.860378623008728, |
|
"logps/chosen": -1.174264907836914, |
|
"logps/rejected": -1.5782097578048706, |
|
"loss": 1.0897, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.348529815673828, |
|
"rewards/margins": 0.8078898191452026, |
|
"rewards/rejected": -3.156419515609741, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 19.478521042945726, |
|
"learning_rate": 4.4232639325036807e-07, |
|
"logits/chosen": -0.8138440251350403, |
|
"logits/rejected": -0.888975977897644, |
|
"logps/chosen": -1.1923892498016357, |
|
"logps/rejected": -1.6592342853546143, |
|
"loss": 1.1171, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.3847784996032715, |
|
"rewards/margins": 0.933690071105957, |
|
"rewards/rejected": -3.3184685707092285, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 12.673420292445082, |
|
"learning_rate": 4.32563445933859e-07, |
|
"logits/chosen": -0.7443628311157227, |
|
"logits/rejected": -0.7802754044532776, |
|
"logps/chosen": -1.211715579032898, |
|
"logps/rejected": -1.5577033758163452, |
|
"loss": 1.0631, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.423431158065796, |
|
"rewards/margins": 0.6919752955436707, |
|
"rewards/rejected": -3.1154067516326904, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 18.156036717162227, |
|
"learning_rate": 4.226227932958664e-07, |
|
"logits/chosen": -0.8596774935722351, |
|
"logits/rejected": -0.8864806294441223, |
|
"logps/chosen": -1.2197387218475342, |
|
"logps/rejected": -1.706209421157837, |
|
"loss": 1.0695, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.4394774436950684, |
|
"rewards/margins": 0.9729412794113159, |
|
"rewards/rejected": -3.412418842315674, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 18.614311057711063, |
|
"learning_rate": 4.1251776108286854e-07, |
|
"logits/chosen": -0.7632856965065002, |
|
"logits/rejected": -0.7707933187484741, |
|
"logps/chosen": -1.2796884775161743, |
|
"logps/rejected": -1.6428205966949463, |
|
"loss": 1.1264, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.5593769550323486, |
|
"rewards/margins": 0.7262641191482544, |
|
"rewards/rejected": -3.2856411933898926, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 19.070261616595026, |
|
"learning_rate": 4.022618953971514e-07, |
|
"logits/chosen": -0.7568240761756897, |
|
"logits/rejected": -0.8358641862869263, |
|
"logps/chosen": -1.308774709701538, |
|
"logps/rejected": -1.6738483905792236, |
|
"loss": 1.1102, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.617549419403076, |
|
"rewards/margins": 0.7301470041275024, |
|
"rewards/rejected": -3.3476967811584473, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 13.160800920164423, |
|
"learning_rate": 3.918689445378477e-07, |
|
"logits/chosen": -0.7660185098648071, |
|
"logits/rejected": -0.8393454551696777, |
|
"logps/chosen": -1.2900028228759766, |
|
"logps/rejected": -1.7106046676635742, |
|
"loss": 1.0429, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.580005645751953, |
|
"rewards/margins": 0.8412036895751953, |
|
"rewards/rejected": -3.4212093353271484, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 15.467772988868518, |
|
"learning_rate": 3.813528405709251e-07, |
|
"logits/chosen": -0.7320618629455566, |
|
"logits/rejected": -0.7756307125091553, |
|
"logps/chosen": -1.3943421840667725, |
|
"logps/rejected": -1.8419634103775024, |
|
"loss": 1.084, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.788684368133545, |
|
"rewards/margins": 0.8952423334121704, |
|
"rewards/rejected": -3.683926820755005, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 23.599162652169078, |
|
"learning_rate": 3.707276806528282e-07, |
|
"logits/chosen": -0.7983018159866333, |
|
"logits/rejected": -0.8536737561225891, |
|
"logps/chosen": -1.3397753238677979, |
|
"logps/rejected": -1.8982980251312256, |
|
"loss": 1.0107, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.6795506477355957, |
|
"rewards/margins": 1.1170451641082764, |
|
"rewards/rejected": -3.796596050262451, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 22.745006961113983, |
|
"learning_rate": 3.6000770813281334e-07, |
|
"logits/chosen": -0.7526620626449585, |
|
"logits/rejected": -0.7841376066207886, |
|
"logps/chosen": -1.3173251152038574, |
|
"logps/rejected": -1.6973741054534912, |
|
"loss": 1.096, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.634650230407715, |
|
"rewards/margins": 0.7600980401039124, |
|
"rewards/rejected": -3.3947482109069824, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 17.29631229132808, |
|
"learning_rate": 3.4920729345930654e-07, |
|
"logits/chosen": -0.8024924993515015, |
|
"logits/rejected": -0.8705514669418335, |
|
"logps/chosen": -1.3106586933135986, |
|
"logps/rejected": -1.8416321277618408, |
|
"loss": 1.0622, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.6213173866271973, |
|
"rewards/margins": 1.0619468688964844, |
|
"rewards/rejected": -3.6832642555236816, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 15.697390709369445, |
|
"learning_rate": 3.383409149158814e-07, |
|
"logits/chosen": -0.8013178110122681, |
|
"logits/rejected": -0.8261008262634277, |
|
"logps/chosen": -1.2374125719070435, |
|
"logps/rejected": -1.8463026285171509, |
|
"loss": 1.0412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.474825143814087, |
|
"rewards/margins": 1.2177798748016357, |
|
"rewards/rejected": -3.6926052570343018, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 38.568029652024805, |
|
"learning_rate": 3.2742313921268035e-07, |
|
"logits/chosen": -0.7440148591995239, |
|
"logits/rejected": -0.8371674418449402, |
|
"logps/chosen": -1.3792295455932617, |
|
"logps/rejected": -1.996372938156128, |
|
"loss": 1.0533, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.7584590911865234, |
|
"rewards/margins": 1.234286904335022, |
|
"rewards/rejected": -3.992745876312256, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 31.29600689027817, |
|
"learning_rate": 3.1646860195929825e-07, |
|
"logits/chosen": -0.798254132270813, |
|
"logits/rejected": -0.819698691368103, |
|
"logps/chosen": -1.4148808717727661, |
|
"logps/rejected": -1.9883480072021484, |
|
"loss": 1.1126, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.8297617435455322, |
|
"rewards/margins": 1.1469345092773438, |
|
"rewards/rejected": -3.976696014404297, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 21.255043892106038, |
|
"learning_rate": 3.054919880453032e-07, |
|
"logits/chosen": -0.8065778017044067, |
|
"logits/rejected": -0.8200203776359558, |
|
"logps/chosen": -1.3674335479736328, |
|
"logps/rejected": -1.8728046417236328, |
|
"loss": 1.0948, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.7348670959472656, |
|
"rewards/margins": 1.0107421875, |
|
"rewards/rejected": -3.7456092834472656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 15.283609874940026, |
|
"learning_rate": 2.9450801195469686e-07, |
|
"logits/chosen": -0.7686730027198792, |
|
"logits/rejected": -0.7811926603317261, |
|
"logps/chosen": -1.3809654712677002, |
|
"logps/rejected": -1.8307151794433594, |
|
"loss": 1.0502, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.7619309425354004, |
|
"rewards/margins": 0.8994992971420288, |
|
"rewards/rejected": -3.6614303588867188, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 34.69673151716839, |
|
"learning_rate": 2.835313980407017e-07, |
|
"logits/chosen": -0.8522397875785828, |
|
"logits/rejected": -0.8554953336715698, |
|
"logps/chosen": -1.4796664714813232, |
|
"logps/rejected": -1.868570327758789, |
|
"loss": 1.11, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.9593329429626465, |
|
"rewards/margins": 0.7778076529502869, |
|
"rewards/rejected": -3.737140655517578, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 12.490257980809535, |
|
"learning_rate": 2.7257686078731973e-07, |
|
"logits/chosen": -0.8593546748161316, |
|
"logits/rejected": -0.8926668167114258, |
|
"logps/chosen": -1.2937114238739014, |
|
"logps/rejected": -2.0442328453063965, |
|
"loss": 0.9612, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5874228477478027, |
|
"rewards/margins": 1.5010432004928589, |
|
"rewards/rejected": -4.088465690612793, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 17.66798289482467, |
|
"learning_rate": 2.6165908508411857e-07, |
|
"logits/chosen": -0.7889951467514038, |
|
"logits/rejected": -0.8469230532646179, |
|
"logps/chosen": -1.3164467811584473, |
|
"logps/rejected": -1.873552680015564, |
|
"loss": 1.0829, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.6328935623168945, |
|
"rewards/margins": 1.114211916923523, |
|
"rewards/rejected": -3.747105360031128, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 25.798144103608532, |
|
"learning_rate": 2.5079270654069354e-07, |
|
"logits/chosen": -0.7999380230903625, |
|
"logits/rejected": -0.8465052843093872, |
|
"logps/chosen": -1.4005292654037476, |
|
"logps/rejected": -1.9563087224960327, |
|
"loss": 1.0559, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.801058530807495, |
|
"rewards/margins": 1.1115590333938599, |
|
"rewards/rejected": -3.9126174449920654, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 26.70646393830588, |
|
"learning_rate": 2.399922918671867e-07, |
|
"logits/chosen": -0.8188889622688293, |
|
"logits/rejected": -0.8326479196548462, |
|
"logps/chosen": -1.4042682647705078, |
|
"logps/rejected": -1.8107773065567017, |
|
"loss": 1.0877, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.8085365295410156, |
|
"rewards/margins": 0.8130179643630981, |
|
"rewards/rejected": -3.6215546131134033, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 23.426122701316096, |
|
"learning_rate": 2.2927231934717176e-07, |
|
"logits/chosen": -0.8667086362838745, |
|
"logits/rejected": -0.87919682264328, |
|
"logps/chosen": -1.4516851902008057, |
|
"logps/rejected": -1.7210047245025635, |
|
"loss": 1.0425, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.9033703804016113, |
|
"rewards/margins": 0.5386390686035156, |
|
"rewards/rejected": -3.442009449005127, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 26.456279591360094, |
|
"learning_rate": 2.1864715942907487e-07, |
|
"logits/chosen": -0.8121633529663086, |
|
"logits/rejected": -0.8183205723762512, |
|
"logps/chosen": -1.4428894519805908, |
|
"logps/rejected": -1.9755233526229858, |
|
"loss": 1.0841, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.8857789039611816, |
|
"rewards/margins": 1.0652679204940796, |
|
"rewards/rejected": -3.9510467052459717, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 31.787422608248555, |
|
"learning_rate": 2.081310554621522e-07, |
|
"logits/chosen": -0.812918484210968, |
|
"logits/rejected": -0.848720371723175, |
|
"logps/chosen": -1.3704057931900024, |
|
"logps/rejected": -1.7566410303115845, |
|
"loss": 1.0211, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.740811586380005, |
|
"rewards/margins": 0.7724703550338745, |
|
"rewards/rejected": -3.513282060623169, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 20.39803180345373, |
|
"learning_rate": 1.9773810460284862e-07, |
|
"logits/chosen": -0.7991079092025757, |
|
"logits/rejected": -0.8711285591125488, |
|
"logps/chosen": -1.4278900623321533, |
|
"logps/rejected": -2.05625581741333, |
|
"loss": 0.9925, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8557801246643066, |
|
"rewards/margins": 1.256731629371643, |
|
"rewards/rejected": -4.11251163482666, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 22.590110789535018, |
|
"learning_rate": 1.874822389171314e-07, |
|
"logits/chosen": -0.8574708700180054, |
|
"logits/rejected": -0.9009912610054016, |
|
"logps/chosen": -1.545143723487854, |
|
"logps/rejected": -2.0895230770111084, |
|
"loss": 1.0237, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.090287446975708, |
|
"rewards/margins": 1.088758945465088, |
|
"rewards/rejected": -4.179046154022217, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 19.0520960322845, |
|
"learning_rate": 1.7737720670413356e-07, |
|
"logits/chosen": -0.8097273111343384, |
|
"logits/rejected": -0.8335424661636353, |
|
"logps/chosen": -1.5219576358795166, |
|
"logps/rejected": -2.0950403213500977, |
|
"loss": 1.0412, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.043915271759033, |
|
"rewards/margins": 1.1461658477783203, |
|
"rewards/rejected": -4.190080642700195, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 28.978881064657845, |
|
"learning_rate": 1.6743655406614095e-07, |
|
"logits/chosen": -0.8851544260978699, |
|
"logits/rejected": -0.8812357187271118, |
|
"logps/chosen": -1.505824089050293, |
|
"logps/rejected": -2.034778118133545, |
|
"loss": 1.0881, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.011648178100586, |
|
"rewards/margins": 1.0579078197479248, |
|
"rewards/rejected": -4.06955623626709, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 23.915843277630973, |
|
"learning_rate": 1.5767360674963198e-07, |
|
"logits/chosen": -0.870714008808136, |
|
"logits/rejected": -0.8971943855285645, |
|
"logps/chosen": -1.3601343631744385, |
|
"logps/rejected": -2.0130364894866943, |
|
"loss": 1.0087, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.720268726348877, |
|
"rewards/margins": 1.3058046102523804, |
|
"rewards/rejected": -4.026072978973389, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 26.514246744997322, |
|
"learning_rate": 1.4810145228170922e-07, |
|
"logits/chosen": -0.8225549459457397, |
|
"logits/rejected": -0.8689346313476562, |
|
"logps/chosen": -1.4374722242355347, |
|
"logps/rejected": -1.9102426767349243, |
|
"loss": 1.052, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.8749444484710693, |
|
"rewards/margins": 0.9455404281616211, |
|
"rewards/rejected": -3.8204853534698486, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 27.7004551617753, |
|
"learning_rate": 1.3873292242587306e-07, |
|
"logits/chosen": -0.8165398836135864, |
|
"logits/rejected": -0.9100580215454102, |
|
"logps/chosen": -1.461507797241211, |
|
"logps/rejected": -2.0511550903320312, |
|
"loss": 1.0709, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.923015594482422, |
|
"rewards/margins": 1.1792947053909302, |
|
"rewards/rejected": -4.1023101806640625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 24.617595975995133, |
|
"learning_rate": 1.295805759806533e-07, |
|
"logits/chosen": -0.8566834330558777, |
|
"logits/rejected": -0.8978926539421082, |
|
"logps/chosen": -1.5079203844070435, |
|
"logps/rejected": -2.044774293899536, |
|
"loss": 1.0388, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.015840768814087, |
|
"rewards/margins": 1.073707938194275, |
|
"rewards/rejected": -4.089548587799072, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 22.995198881906134, |
|
"learning_rate": 1.2065668194415777e-07, |
|
"logits/chosen": -0.8893098831176758, |
|
"logits/rejected": -0.9465163946151733, |
|
"logps/chosen": -1.5923842191696167, |
|
"logps/rejected": -2.066089153289795, |
|
"loss": 0.9896, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.1847684383392334, |
|
"rewards/margins": 0.947409987449646, |
|
"rewards/rejected": -4.13217830657959, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 22.355221430364576, |
|
"learning_rate": 1.1197320306710923e-07, |
|
"logits/chosen": -0.8776585459709167, |
|
"logits/rejected": -0.9053448438644409, |
|
"logps/chosen": -1.5153396129608154, |
|
"logps/rejected": -2.0724828243255615, |
|
"loss": 1.0507, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.030679225921631, |
|
"rewards/margins": 1.1142865419387817, |
|
"rewards/rejected": -4.144965648651123, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 23.090030368869293, |
|
"learning_rate": 1.035417798164145e-07, |
|
"logits/chosen": -0.8465662002563477, |
|
"logits/rejected": -0.9114416837692261, |
|
"logps/chosen": -1.5818672180175781, |
|
"logps/rejected": -2.124342441558838, |
|
"loss": 1.0082, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1637344360351562, |
|
"rewards/margins": 1.0849504470825195, |
|
"rewards/rejected": -4.248684883117676, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 23.156782358223225, |
|
"learning_rate": 9.537371477076535e-08, |
|
"logits/chosen": -0.8677560687065125, |
|
"logits/rejected": -0.9061796069145203, |
|
"logps/chosen": -1.5915837287902832, |
|
"logps/rejected": -2.287815570831299, |
|
"loss": 0.9867, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.1831674575805664, |
|
"rewards/margins": 1.3924639225006104, |
|
"rewards/rejected": -4.575631141662598, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 20.328637763728924, |
|
"learning_rate": 8.747995746918898e-08, |
|
"logits/chosen": -0.8234347105026245, |
|
"logits/rejected": -0.8825669288635254, |
|
"logps/chosen": -1.5265567302703857, |
|
"logps/rejected": -2.1997315883636475, |
|
"loss": 0.9162, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.0531134605407715, |
|
"rewards/margins": 1.3463497161865234, |
|
"rewards/rejected": -4.399463176727295, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 38.958615879066635, |
|
"learning_rate": 7.987108973285888e-08, |
|
"logits/chosen": -0.8697785139083862, |
|
"logits/rejected": -0.8908045887947083, |
|
"logps/chosen": -1.508302927017212, |
|
"logps/rejected": -2.1442337036132812, |
|
"loss": 1.0045, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -3.016605854034424, |
|
"rewards/margins": 1.2718614339828491, |
|
"rewards/rejected": -4.2884674072265625, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 40.64376807024019, |
|
"learning_rate": 7.255731147984174e-08, |
|
"logits/chosen": -0.8699348568916321, |
|
"logits/rejected": -0.9192712903022766, |
|
"logps/chosen": -1.5248959064483643, |
|
"logps/rejected": -2.057331085205078, |
|
"loss": 1.0402, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.0497918128967285, |
|
"rewards/margins": 1.064869999885559, |
|
"rewards/rejected": -4.114662170410156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 31.79789174489367, |
|
"learning_rate": 6.554842705179898e-08, |
|
"logits/chosen": -0.8611375093460083, |
|
"logits/rejected": -0.8788291215896606, |
|
"logps/chosen": -1.4700887203216553, |
|
"logps/rejected": -2.0618722438812256, |
|
"loss": 1.0386, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9401774406433105, |
|
"rewards/margins": 1.183566927909851, |
|
"rewards/rejected": -4.123744487762451, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 27.699401276090363, |
|
"learning_rate": 5.885383207096832e-08, |
|
"logits/chosen": -0.8817920684814453, |
|
"logits/rejected": -0.9167042970657349, |
|
"logps/chosen": -1.5808578729629517, |
|
"logps/rejected": -2.0726354122161865, |
|
"loss": 1.0164, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -3.1617157459259033, |
|
"rewards/margins": 0.9835556149482727, |
|
"rewards/rejected": -4.145270824432373, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 22.291806094067294, |
|
"learning_rate": 5.2482500845047165e-08, |
|
"logits/chosen": -0.8046171069145203, |
|
"logits/rejected": -0.8632856607437134, |
|
"logps/chosen": -1.474746823310852, |
|
"logps/rejected": -2.074794292449951, |
|
"loss": 1.0014, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.949493646621704, |
|
"rewards/margins": 1.2000950574874878, |
|
"rewards/rejected": -4.149588584899902, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 32.14293789219742, |
|
"learning_rate": 4.644297433686162e-08, |
|
"logits/chosen": -0.8459577560424805, |
|
"logits/rejected": -0.8775212168693542, |
|
"logps/chosen": -1.5837218761444092, |
|
"logps/rejected": -2.0384469032287598, |
|
"loss": 1.0682, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.1674437522888184, |
|
"rewards/margins": 0.9094497561454773, |
|
"rewards/rejected": -4.0768938064575195, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -0.9671933650970459, |
|
"eval_logits/rejected": -0.9966414570808411, |
|
"eval_logps/chosen": -1.536142349243164, |
|
"eval_logps/rejected": -2.0912911891937256, |
|
"eval_loss": 0.9954066276550293, |
|
"eval_rewards/accuracies": 0.7279999852180481, |
|
"eval_rewards/chosen": -3.072284698486328, |
|
"eval_rewards/margins": 1.1102983951568604, |
|
"eval_rewards/rejected": -4.182582378387451, |
|
"eval_runtime": 45.9263, |
|
"eval_samples_per_second": 43.548, |
|
"eval_steps_per_second": 2.722, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 23.581025931041157, |
|
"learning_rate": 4.074334871494558e-08, |
|
"logits/chosen": -0.8318978548049927, |
|
"logits/rejected": -0.9007453918457031, |
|
"logps/chosen": -1.597597360610962, |
|
"logps/rejected": -2.2467799186706543, |
|
"loss": 0.9898, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.195194721221924, |
|
"rewards/margins": 1.2983646392822266, |
|
"rewards/rejected": -4.493559837341309, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 37.34203846776795, |
|
"learning_rate": 3.5391264500382e-08, |
|
"logits/chosen": -0.8569322824478149, |
|
"logits/rejected": -0.8944110870361328, |
|
"logps/chosen": -1.6689296960830688, |
|
"logps/rejected": -2.2536518573760986, |
|
"loss": 0.9821, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -3.3378593921661377, |
|
"rewards/margins": 1.16944420337677, |
|
"rewards/rejected": -4.507303714752197, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 25.448649440851888, |
|
"learning_rate": 3.0393896324452226e-08, |
|
"logits/chosen": -0.8548834919929504, |
|
"logits/rejected": -0.8898690938949585, |
|
"logps/chosen": -1.6892175674438477, |
|
"logps/rejected": -2.1383655071258545, |
|
"loss": 1.0282, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -3.3784351348876953, |
|
"rewards/margins": 0.8982963562011719, |
|
"rewards/rejected": -4.276731014251709, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 22.81456603203954, |
|
"learning_rate": 2.5757943310825026e-08, |
|
"logits/chosen": -0.8120086789131165, |
|
"logits/rejected": -0.8377026319503784, |
|
"logps/chosen": -1.5306228399276733, |
|
"logps/rejected": -2.244910478591919, |
|
"loss": 0.9802, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0612456798553467, |
|
"rewards/margins": 1.428574800491333, |
|
"rewards/rejected": -4.489820957183838, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 37.81119467654555, |
|
"learning_rate": 2.148962009517823e-08, |
|
"logits/chosen": -0.8621734380722046, |
|
"logits/rejected": -0.9295539855957031, |
|
"logps/chosen": -1.594923973083496, |
|
"logps/rejected": -2.202113389968872, |
|
"loss": 0.9772, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.189847946166992, |
|
"rewards/margins": 1.2143787145614624, |
|
"rewards/rejected": -4.404226779937744, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 23.35609170503276, |
|
"learning_rate": 1.759464849429082e-08, |
|
"logits/chosen": -0.8409427404403687, |
|
"logits/rejected": -0.8790140151977539, |
|
"logps/chosen": -1.6252171993255615, |
|
"logps/rejected": -2.1690993309020996, |
|
"loss": 0.9766, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -3.250434398651123, |
|
"rewards/margins": 1.087764024734497, |
|
"rewards/rejected": -4.338198661804199, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 42.92976213914578, |
|
"learning_rate": 1.4078249835774169e-08, |
|
"logits/chosen": -0.8287452459335327, |
|
"logits/rejected": -0.8296720385551453, |
|
"logps/chosen": -1.493123173713684, |
|
"logps/rejected": -2.055771827697754, |
|
"loss": 1.0029, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.986246347427368, |
|
"rewards/margins": 1.1252974271774292, |
|
"rewards/rejected": -4.111543655395508, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 32.7360124305529, |
|
"learning_rate": 1.0945137958723705e-08, |
|
"logits/chosen": -0.8666203618049622, |
|
"logits/rejected": -0.9023343920707703, |
|
"logps/chosen": -1.6795040369033813, |
|
"logps/rejected": -2.055238962173462, |
|
"loss": 1.0619, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -3.3590080738067627, |
|
"rewards/margins": 0.7514694929122925, |
|
"rewards/rejected": -4.110477924346924, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 25.809975837885126, |
|
"learning_rate": 8.19951289467482e-09, |
|
"logits/chosen": -0.8226273655891418, |
|
"logits/rejected": -0.8915680646896362, |
|
"logps/chosen": -1.6063209772109985, |
|
"logps/rejected": -2.2188549041748047, |
|
"loss": 1.0036, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.212641954421997, |
|
"rewards/margins": 1.2250680923461914, |
|
"rewards/rejected": -4.437709808349609, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 27.907394126837357, |
|
"learning_rate": 5.84505523733293e-09, |
|
"logits/chosen": -0.8590003848075867, |
|
"logits/rejected": -0.9254142642021179, |
|
"logps/chosen": -1.5489723682403564, |
|
"logps/rejected": -2.138707160949707, |
|
"loss": 1.0026, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.097944736480713, |
|
"rewards/margins": 1.1794699430465698, |
|
"rewards/rejected": -4.277414321899414, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 26.194546776590737, |
|
"learning_rate": 3.8849212086261466e-09, |
|
"logits/chosen": -0.8426074981689453, |
|
"logits/rejected": -0.8449162244796753, |
|
"logps/chosen": -1.5749680995941162, |
|
"logps/rejected": -2.065624475479126, |
|
"loss": 1.0628, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.1499361991882324, |
|
"rewards/margins": 0.9813130497932434, |
|
"rewards/rejected": -4.131248950958252, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 28.647656191366522, |
|
"learning_rate": 2.3217384276938756e-09, |
|
"logits/chosen": -0.7687999606132507, |
|
"logits/rejected": -0.8947674036026001, |
|
"logps/chosen": -1.4748101234436035, |
|
"logps/rejected": -2.2467246055603027, |
|
"loss": 1.0081, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.949620246887207, |
|
"rewards/margins": 1.5438289642333984, |
|
"rewards/rejected": -4.4934492111206055, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 25.297804062883948, |
|
"learning_rate": 1.1576023884836472e-09, |
|
"logits/chosen": -0.8351796269416809, |
|
"logits/rejected": -0.8887630701065063, |
|
"logps/chosen": -1.5146936178207397, |
|
"logps/rejected": -2.2188751697540283, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0293872356414795, |
|
"rewards/margins": 1.4083633422851562, |
|
"rewards/rejected": -4.437750339508057, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 19.986270660762962, |
|
"learning_rate": 3.940736506780395e-10, |
|
"logits/chosen": -0.7743644118309021, |
|
"logits/rejected": -0.788620114326477, |
|
"logps/chosen": -1.4425890445709229, |
|
"logps/rejected": -2.27103853225708, |
|
"loss": 1.0166, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8851780891418457, |
|
"rewards/margins": 1.6568992137908936, |
|
"rewards/rejected": -4.54207706451416, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 26.273630707088135, |
|
"learning_rate": 3.2175747716822744e-11, |
|
"logits/chosen": -0.8468500971794128, |
|
"logits/rejected": -0.9172460436820984, |
|
"logps/chosen": -1.5344510078430176, |
|
"logps/rejected": -2.111969470977783, |
|
"loss": 0.9858, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.068902015686035, |
|
"rewards/margins": 1.1550369262695312, |
|
"rewards/rejected": -4.223938941955566, |
|
"step": 475 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 225, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|