|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 20.425531914893618,
|
|
"eval_steps": 300,
|
|
"global_step": 720,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.9078014184397163,
|
|
"grad_norm": 132.1505126953125,
|
|
"learning_rate": 2.222222222222222e-07,
|
|
"log_odds_chosen": 0.05492939054965973,
|
|
"log_odds_ratio": -0.7323614954948425,
|
|
"logits/chosen": -4.740067958831787,
|
|
"logits/rejected": -4.963461399078369,
|
|
"logps/chosen": -2.374514579772949,
|
|
"logps/rejected": -2.4533467292785645,
|
|
"loss": 2.8785,
|
|
"nll_loss": 2.7699854373931885,
|
|
"rewards/accuracies": 0.48828125,
|
|
"rewards/chosen": -0.3561772108078003,
|
|
"rewards/margins": 0.011824802495539188,
|
|
"rewards/rejected": -0.3680019676685333,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 1.8156028368794326,
|
|
"grad_norm": 45.387813568115234,
|
|
"learning_rate": 4.444444444444444e-07,
|
|
"log_odds_chosen": 0.22124934196472168,
|
|
"log_odds_ratio": -0.6725601553916931,
|
|
"logits/chosen": -5.0936760902404785,
|
|
"logits/rejected": -5.33966588973999,
|
|
"logps/chosen": -1.732269287109375,
|
|
"logps/rejected": -1.9329001903533936,
|
|
"loss": 2.2151,
|
|
"nll_loss": 2.172783851623535,
|
|
"rewards/accuracies": 0.6015625,
|
|
"rewards/chosen": -0.2598403990268707,
|
|
"rewards/margins": 0.030094601213932037,
|
|
"rewards/rejected": -0.28993502259254456,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 2.723404255319149,
|
|
"grad_norm": 19.709226608276367,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"log_odds_chosen": 0.2306685447692871,
|
|
"log_odds_ratio": -0.658535361289978,
|
|
"logits/chosen": -4.936949729919434,
|
|
"logits/rejected": -5.155893802642822,
|
|
"logps/chosen": -1.4097586870193481,
|
|
"logps/rejected": -1.5965328216552734,
|
|
"loss": 1.8908,
|
|
"nll_loss": 1.8126921653747559,
|
|
"rewards/accuracies": 0.6171875,
|
|
"rewards/chosen": -0.2114638090133667,
|
|
"rewards/margins": 0.028016118332743645,
|
|
"rewards/rejected": -0.2394799143075943,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 3.631205673758865,
|
|
"grad_norm": 20.5742130279541,
|
|
"learning_rate": 4.908427196539701e-07,
|
|
"log_odds_chosen": 0.3416966497898102,
|
|
"log_odds_ratio": -0.5978461503982544,
|
|
"logits/chosen": -4.871417999267578,
|
|
"logits/rejected": -5.006246566772461,
|
|
"logps/chosen": -1.3338335752487183,
|
|
"logps/rejected": -1.595802903175354,
|
|
"loss": 1.7492,
|
|
"nll_loss": 1.6234831809997559,
|
|
"rewards/accuracies": 0.66015625,
|
|
"rewards/chosen": -0.20007506012916565,
|
|
"rewards/margins": 0.039295390248298645,
|
|
"rewards/rejected": -0.2393704503774643,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 4.539007092198582,
|
|
"grad_norm": 19.219451904296875,
|
|
"learning_rate": 4.775907352415367e-07,
|
|
"log_odds_chosen": 0.4098852872848511,
|
|
"log_odds_ratio": -0.5668885111808777,
|
|
"logits/chosen": -4.7395429611206055,
|
|
"logits/rejected": -4.919832229614258,
|
|
"logps/chosen": -1.251634955406189,
|
|
"logps/rejected": -1.5617362260818481,
|
|
"loss": 1.6591,
|
|
"nll_loss": 1.5759321451187134,
|
|
"rewards/accuracies": 0.72265625,
|
|
"rewards/chosen": -0.18774525821208954,
|
|
"rewards/margins": 0.04651518166065216,
|
|
"rewards/rejected": -0.2342604398727417,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 5.446808510638298,
|
|
"grad_norm": 25.844369888305664,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"log_odds_chosen": 0.4376165568828583,
|
|
"log_odds_ratio": -0.5644897222518921,
|
|
"logits/chosen": -4.674585342407227,
|
|
"logits/rejected": -4.810555934906006,
|
|
"logps/chosen": -1.2456402778625488,
|
|
"logps/rejected": -1.5732855796813965,
|
|
"loss": 1.5996,
|
|
"nll_loss": 1.4974051713943481,
|
|
"rewards/accuracies": 0.73828125,
|
|
"rewards/chosen": -0.1868460476398468,
|
|
"rewards/margins": 0.04914678633213043,
|
|
"rewards/rejected": -0.23599283397197723,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 6.3546099290780145,
|
|
"grad_norm": 23.09563636779785,
|
|
"learning_rate": 4.3513600327725117e-07,
|
|
"log_odds_chosen": 0.3738464415073395,
|
|
"log_odds_ratio": -0.5867234468460083,
|
|
"logits/chosen": -4.663087844848633,
|
|
"logits/rejected": -4.844013214111328,
|
|
"logps/chosen": -1.3138737678527832,
|
|
"logps/rejected": -1.5835403203964233,
|
|
"loss": 1.5411,
|
|
"nll_loss": 1.4685286283493042,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": -0.197081059217453,
|
|
"rewards/margins": 0.04045000299811363,
|
|
"rewards/rejected": -0.23753106594085693,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 7.26241134751773,
|
|
"grad_norm": 23.646638870239258,
|
|
"learning_rate": 4.0695303116802467e-07,
|
|
"log_odds_chosen": 0.46367794275283813,
|
|
"log_odds_ratio": -0.547984778881073,
|
|
"logits/chosen": -4.620482921600342,
|
|
"logits/rejected": -4.765042781829834,
|
|
"logps/chosen": -1.307213544845581,
|
|
"logps/rejected": -1.65544593334198,
|
|
"loss": 1.498,
|
|
"nll_loss": 1.4461973905563354,
|
|
"rewards/accuracies": 0.75390625,
|
|
"rewards/chosen": -0.19608205556869507,
|
|
"rewards/margins": 0.05223485454916954,
|
|
"rewards/rejected": -0.2483169138431549,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 8.170212765957446,
|
|
"grad_norm": 23.585857391357422,
|
|
"learning_rate": 3.75e-07,
|
|
"log_odds_chosen": 0.4542897343635559,
|
|
"log_odds_ratio": -0.5773134827613831,
|
|
"logits/chosen": -4.633105278015137,
|
|
"logits/rejected": -4.810471057891846,
|
|
"logps/chosen": -1.3886733055114746,
|
|
"logps/rejected": -1.7219102382659912,
|
|
"loss": 1.4686,
|
|
"nll_loss": 1.3969916105270386,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": -0.20830100774765015,
|
|
"rewards/margins": 0.049985550343990326,
|
|
"rewards/rejected": -0.2582865357398987,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 8.51063829787234,
|
|
"eval_log_odds_chosen": 1.6898525953292847,
|
|
"eval_log_odds_ratio": -0.19230316579341888,
|
|
"eval_logits/chosen": -4.930749416351318,
|
|
"eval_logits/rejected": -4.758046627044678,
|
|
"eval_logps/chosen": -1.4714246988296509,
|
|
"eval_logps/rejected": -2.9621574878692627,
|
|
"eval_loss": 1.3246647119522095,
|
|
"eval_nll_loss": 1.4632530212402344,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.22071371972560883,
|
|
"eval_rewards/margins": 0.22360996901988983,
|
|
"eval_rewards/rejected": -0.44432368874549866,
|
|
"eval_runtime": 0.6144,
|
|
"eval_samples_per_second": 222.965,
|
|
"eval_steps_per_second": 4.882,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 9.078014184397164,
|
|
"grad_norm": 20.154146194458008,
|
|
"learning_rate": 3.400444312011776e-07,
|
|
"log_odds_chosen": 0.409349262714386,
|
|
"log_odds_ratio": -0.5844379663467407,
|
|
"logits/chosen": -4.599703311920166,
|
|
"logits/rejected": -4.766429901123047,
|
|
"logps/chosen": -1.3831363916397095,
|
|
"logps/rejected": -1.6733564138412476,
|
|
"loss": 1.4397,
|
|
"nll_loss": 1.4065345525741577,
|
|
"rewards/accuracies": 0.74609375,
|
|
"rewards/chosen": -0.20747046172618866,
|
|
"rewards/margins": 0.04353303089737892,
|
|
"rewards/rejected": -0.2510034739971161,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 9.98581560283688,
|
|
"grad_norm": 23.09050750732422,
|
|
"learning_rate": 3.029259680573527e-07,
|
|
"log_odds_chosen": 0.43665847182273865,
|
|
"log_odds_ratio": -0.5905143618583679,
|
|
"logits/chosen": -4.58922815322876,
|
|
"logits/rejected": -4.685288906097412,
|
|
"logps/chosen": -1.458475112915039,
|
|
"logps/rejected": -1.7894960641860962,
|
|
"loss": 1.4285,
|
|
"nll_loss": 1.3732693195343018,
|
|
"rewards/accuracies": 0.73046875,
|
|
"rewards/chosen": -0.21877126395702362,
|
|
"rewards/margins": 0.04965316504240036,
|
|
"rewards/rejected": -0.26842445135116577,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 10.893617021276595,
|
|
"grad_norm": 35.926055908203125,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"log_odds_chosen": 0.6511461138725281,
|
|
"log_odds_ratio": -0.49195483326911926,
|
|
"logits/chosen": -4.608173370361328,
|
|
"logits/rejected": -4.685794830322266,
|
|
"logps/chosen": -1.3694053888320923,
|
|
"logps/rejected": -1.8711962699890137,
|
|
"loss": 1.4144,
|
|
"nll_loss": 1.374709129333496,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": -0.20541077852249146,
|
|
"rewards/margins": 0.07526866346597672,
|
|
"rewards/rejected": -0.28067946434020996,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 11.801418439716311,
|
|
"grad_norm": 33.8105583190918,
|
|
"learning_rate": 2.2579728232420523e-07,
|
|
"log_odds_chosen": 0.5499828457832336,
|
|
"log_odds_ratio": -0.5233615040779114,
|
|
"logits/chosen": -4.537787437438965,
|
|
"logits/rejected": -4.662774085998535,
|
|
"logps/chosen": -1.3898181915283203,
|
|
"logps/rejected": -1.7920804023742676,
|
|
"loss": 1.4016,
|
|
"nll_loss": 1.3631547689437866,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": -0.20847272872924805,
|
|
"rewards/margins": 0.06033932417631149,
|
|
"rewards/rejected": -0.26881206035614014,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 12.709219858156029,
|
|
"grad_norm": 25.557348251342773,
|
|
"learning_rate": 1.8763971398550467e-07,
|
|
"log_odds_chosen": 0.5377756357192993,
|
|
"log_odds_ratio": -0.5508320927619934,
|
|
"logits/chosen": -4.532352447509766,
|
|
"logits/rejected": -4.629130840301514,
|
|
"logps/chosen": -1.3977127075195312,
|
|
"logps/rejected": -1.7851612567901611,
|
|
"loss": 1.3943,
|
|
"nll_loss": 1.3304414749145508,
|
|
"rewards/accuracies": 0.77734375,
|
|
"rewards/chosen": -0.20965692400932312,
|
|
"rewards/margins": 0.05811727046966553,
|
|
"rewards/rejected": -0.26777422428131104,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 13.617021276595745,
|
|
"grad_norm": 28.700815200805664,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"log_odds_chosen": 0.5411101579666138,
|
|
"log_odds_ratio": -0.5445564985275269,
|
|
"logits/chosen": -4.501680850982666,
|
|
"logits/rejected": -4.677550315856934,
|
|
"logps/chosen": -1.3654242753982544,
|
|
"logps/rejected": -1.7541980743408203,
|
|
"loss": 1.401,
|
|
"nll_loss": 1.2766036987304688,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": -0.2048136293888092,
|
|
"rewards/margins": 0.05831605941057205,
|
|
"rewards/rejected": -0.26312971115112305,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 14.52482269503546,
|
|
"grad_norm": 35.40031814575195,
|
|
"learning_rate": 1.1669889179957723e-07,
|
|
"log_odds_chosen": 0.7372524738311768,
|
|
"log_odds_ratio": -0.46363916993141174,
|
|
"logits/chosen": -4.513700485229492,
|
|
"logits/rejected": -4.619227886199951,
|
|
"logps/chosen": -1.3301138877868652,
|
|
"logps/rejected": -1.8859204053878784,
|
|
"loss": 1.3839,
|
|
"nll_loss": 1.219886302947998,
|
|
"rewards/accuracies": 0.87109375,
|
|
"rewards/chosen": -0.19951710104942322,
|
|
"rewards/margins": 0.08337096124887466,
|
|
"rewards/rejected": -0.2828880548477173,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 15.432624113475176,
|
|
"grad_norm": 62.16829299926758,
|
|
"learning_rate": 8.561965785773412e-08,
|
|
"log_odds_chosen": 0.661382794380188,
|
|
"log_odds_ratio": -0.4891131520271301,
|
|
"logits/chosen": -4.506048202514648,
|
|
"logits/rejected": -4.587852478027344,
|
|
"logps/chosen": -1.3864898681640625,
|
|
"logps/rejected": -1.8775601387023926,
|
|
"loss": 1.3876,
|
|
"nll_loss": 1.2974672317504883,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": -0.20797351002693176,
|
|
"rewards/margins": 0.07366053014993668,
|
|
"rewards/rejected": -0.28163403272628784,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 16.340425531914892,
|
|
"grad_norm": 29.107358932495117,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"log_odds_chosen": 0.6269708275794983,
|
|
"log_odds_ratio": -0.49757176637649536,
|
|
"logits/chosen": -4.460994243621826,
|
|
"logits/rejected": -4.661521911621094,
|
|
"logps/chosen": -1.3339214324951172,
|
|
"logps/rejected": -1.7924858331680298,
|
|
"loss": 1.3923,
|
|
"nll_loss": 1.2958626747131348,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.200088232755661,
|
|
"rewards/margins": 0.06878463923931122,
|
|
"rewards/rejected": -0.26887285709381104,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 17.02127659574468,
|
|
"eval_log_odds_chosen": 1.7557824850082397,
|
|
"eval_log_odds_ratio": -0.18494771420955658,
|
|
"eval_logits/chosen": -4.790639400482178,
|
|
"eval_logits/rejected": -4.577674865722656,
|
|
"eval_logps/chosen": -1.626247763633728,
|
|
"eval_logps/rejected": -3.2115631103515625,
|
|
"eval_loss": 1.2619013786315918,
|
|
"eval_nll_loss": 1.4078196287155151,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.2439371794462204,
|
|
"eval_rewards/margins": 0.2377973347902298,
|
|
"eval_rewards/rejected": -0.4817345142364502,
|
|
"eval_runtime": 0.6115,
|
|
"eval_samples_per_second": 224.05,
|
|
"eval_steps_per_second": 4.906,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 17.24822695035461,
|
|
"grad_norm": 29.23589515686035,
|
|
"learning_rate": 3.5958275117433404e-08,
|
|
"log_odds_chosen": 0.5763309001922607,
|
|
"log_odds_ratio": -0.5261004567146301,
|
|
"logits/chosen": -4.398637294769287,
|
|
"logits/rejected": -4.560643672943115,
|
|
"logps/chosen": -1.3885968923568726,
|
|
"logps/rejected": -1.8019691705703735,
|
|
"loss": 1.3886,
|
|
"nll_loss": 1.3023698329925537,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": -0.20828954875469208,
|
|
"rewards/margins": 0.06200582906603813,
|
|
"rewards/rejected": -0.2702953815460205,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 18.156028368794328,
|
|
"grad_norm": 27.693330764770508,
|
|
"learning_rate": 1.8569007682777415e-08,
|
|
"log_odds_chosen": 0.7424343824386597,
|
|
"log_odds_ratio": -0.46295538544654846,
|
|
"logits/chosen": -4.579552173614502,
|
|
"logits/rejected": -4.691650390625,
|
|
"logps/chosen": -1.3507909774780273,
|
|
"logps/rejected": -1.9150110483169556,
|
|
"loss": 1.3865,
|
|
"nll_loss": 1.3111711740493774,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": -0.20261868834495544,
|
|
"rewards/margins": 0.084633007645607,
|
|
"rewards/rejected": -0.28725165128707886,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 19.06382978723404,
|
|
"grad_norm": 37.925621032714844,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"log_odds_chosen": 0.6857459545135498,
|
|
"log_odds_ratio": -0.4916977882385254,
|
|
"logits/chosen": -4.52652645111084,
|
|
"logits/rejected": -4.689857482910156,
|
|
"logps/chosen": -1.341786503791809,
|
|
"logps/rejected": -1.8565285205841064,
|
|
"loss": 1.3794,
|
|
"nll_loss": 1.2754034996032715,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": -0.20126797258853912,
|
|
"rewards/margins": 0.07721129059791565,
|
|
"rewards/rejected": -0.27847927808761597,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 19.97163120567376,
|
|
"grad_norm": 24.041799545288086,
|
|
"learning_rate": 7.51764708051994e-10,
|
|
"log_odds_chosen": 0.6411248445510864,
|
|
"log_odds_ratio": -0.5100895762443542,
|
|
"logits/chosen": -4.384097099304199,
|
|
"logits/rejected": -4.515219688415527,
|
|
"logps/chosen": -1.3920109272003174,
|
|
"logps/rejected": -1.8711614608764648,
|
|
"loss": 1.3805,
|
|
"nll_loss": 1.2700397968292236,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": -0.20880162715911865,
|
|
"rewards/margins": 0.07187257707118988,
|
|
"rewards/rejected": -0.2806742191314697,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 20.425531914893618,
|
|
"grad_norm": 28.226720809936523,
|
|
"learning_rate": 0.0,
|
|
"log_odds_chosen": 0.6200518608093262,
|
|
"log_odds_ratio": -0.529932975769043,
|
|
"logits/chosen": -4.434691905975342,
|
|
"logits/rejected": -4.575813293457031,
|
|
"logps/chosen": -1.416117548942566,
|
|
"logps/rejected": -1.8715832233428955,
|
|
"loss": 1.3893,
|
|
"nll_loss": 1.2817054986953735,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": -0.2124176323413849,
|
|
"rewards/margins": 0.0683198943734169,
|
|
"rewards/rejected": -0.2807375192642212,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 20.425531914893618,
|
|
"eval_log_odds_chosen": 1.7479673624038696,
|
|
"eval_log_odds_ratio": -0.1867920309305191,
|
|
"eval_logits/chosen": -4.75565767288208,
|
|
"eval_logits/rejected": -4.538194179534912,
|
|
"eval_logps/chosen": -1.6300764083862305,
|
|
"eval_logps/rejected": -3.2087719440460205,
|
|
"eval_loss": 1.2522811889648438,
|
|
"eval_nll_loss": 1.4028778076171875,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.24451148509979248,
|
|
"eval_rewards/margins": 0.23680436611175537,
|
|
"eval_rewards/rejected": -0.48131585121154785,
|
|
"eval_runtime": 0.615,
|
|
"eval_samples_per_second": 222.778,
|
|
"eval_steps_per_second": 4.878,
|
|
"step": 720
|
|
}
|
|
],
|
|
"logging_steps": 32,
|
|
"max_steps": 720,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 21,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|