avemio-digital's picture
Upload 15 files
924aae5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.425531914893618,
"eval_steps": 300,
"global_step": 720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9078014184397163,
"grad_norm": 132.1505126953125,
"learning_rate": 2.222222222222222e-07,
"log_odds_chosen": 0.05492939054965973,
"log_odds_ratio": -0.7323614954948425,
"logits/chosen": -4.740067958831787,
"logits/rejected": -4.963461399078369,
"logps/chosen": -2.374514579772949,
"logps/rejected": -2.4533467292785645,
"loss": 2.8785,
"nll_loss": 2.7699854373931885,
"rewards/accuracies": 0.48828125,
"rewards/chosen": -0.3561772108078003,
"rewards/margins": 0.011824802495539188,
"rewards/rejected": -0.3680019676685333,
"step": 32
},
{
"epoch": 1.8156028368794326,
"grad_norm": 45.387813568115234,
"learning_rate": 4.444444444444444e-07,
"log_odds_chosen": 0.22124934196472168,
"log_odds_ratio": -0.6725601553916931,
"logits/chosen": -5.0936760902404785,
"logits/rejected": -5.33966588973999,
"logps/chosen": -1.732269287109375,
"logps/rejected": -1.9329001903533936,
"loss": 2.2151,
"nll_loss": 2.172783851623535,
"rewards/accuracies": 0.6015625,
"rewards/chosen": -0.2598403990268707,
"rewards/margins": 0.030094601213932037,
"rewards/rejected": -0.28993502259254456,
"step": 64
},
{
"epoch": 2.723404255319149,
"grad_norm": 19.709226608276367,
"learning_rate": 4.983095894354857e-07,
"log_odds_chosen": 0.2306685447692871,
"log_odds_ratio": -0.658535361289978,
"logits/chosen": -4.936949729919434,
"logits/rejected": -5.155893802642822,
"logps/chosen": -1.4097586870193481,
"logps/rejected": -1.5965328216552734,
"loss": 1.8908,
"nll_loss": 1.8126921653747559,
"rewards/accuracies": 0.6171875,
"rewards/chosen": -0.2114638090133667,
"rewards/margins": 0.028016118332743645,
"rewards/rejected": -0.2394799143075943,
"step": 96
},
{
"epoch": 3.631205673758865,
"grad_norm": 20.5742130279541,
"learning_rate": 4.908427196539701e-07,
"log_odds_chosen": 0.3416966497898102,
"log_odds_ratio": -0.5978461503982544,
"logits/chosen": -4.871417999267578,
"logits/rejected": -5.006246566772461,
"logps/chosen": -1.3338335752487183,
"logps/rejected": -1.595802903175354,
"loss": 1.7492,
"nll_loss": 1.6234831809997559,
"rewards/accuracies": 0.66015625,
"rewards/chosen": -0.20007506012916565,
"rewards/margins": 0.039295390248298645,
"rewards/rejected": -0.2393704503774643,
"step": 128
},
{
"epoch": 4.539007092198582,
"grad_norm": 19.219451904296875,
"learning_rate": 4.775907352415367e-07,
"log_odds_chosen": 0.4098852872848511,
"log_odds_ratio": -0.5668885111808777,
"logits/chosen": -4.7395429611206055,
"logits/rejected": -4.919832229614258,
"logps/chosen": -1.251634955406189,
"logps/rejected": -1.5617362260818481,
"loss": 1.6591,
"nll_loss": 1.5759321451187134,
"rewards/accuracies": 0.72265625,
"rewards/chosen": -0.18774525821208954,
"rewards/margins": 0.04651518166065216,
"rewards/rejected": -0.2342604398727417,
"step": 160
},
{
"epoch": 5.446808510638298,
"grad_norm": 25.844369888305664,
"learning_rate": 4.588719528532341e-07,
"log_odds_chosen": 0.4376165568828583,
"log_odds_ratio": -0.5644897222518921,
"logits/chosen": -4.674585342407227,
"logits/rejected": -4.810555934906006,
"logps/chosen": -1.2456402778625488,
"logps/rejected": -1.5732855796813965,
"loss": 1.5996,
"nll_loss": 1.4974051713943481,
"rewards/accuracies": 0.73828125,
"rewards/chosen": -0.1868460476398468,
"rewards/margins": 0.04914678633213043,
"rewards/rejected": -0.23599283397197723,
"step": 192
},
{
"epoch": 6.3546099290780145,
"grad_norm": 23.09563636779785,
"learning_rate": 4.3513600327725117e-07,
"log_odds_chosen": 0.3738464415073395,
"log_odds_ratio": -0.5867234468460083,
"logits/chosen": -4.663087844848633,
"logits/rejected": -4.844013214111328,
"logps/chosen": -1.3138737678527832,
"logps/rejected": -1.5835403203964233,
"loss": 1.5411,
"nll_loss": 1.4685286283493042,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.197081059217453,
"rewards/margins": 0.04045000299811363,
"rewards/rejected": -0.23753106594085693,
"step": 224
},
{
"epoch": 7.26241134751773,
"grad_norm": 23.646638870239258,
"learning_rate": 4.0695303116802467e-07,
"log_odds_chosen": 0.46367794275283813,
"log_odds_ratio": -0.547984778881073,
"logits/chosen": -4.620482921600342,
"logits/rejected": -4.765042781829834,
"logps/chosen": -1.307213544845581,
"logps/rejected": -1.65544593334198,
"loss": 1.498,
"nll_loss": 1.4461973905563354,
"rewards/accuracies": 0.75390625,
"rewards/chosen": -0.19608205556869507,
"rewards/margins": 0.05223485454916954,
"rewards/rejected": -0.2483169138431549,
"step": 256
},
{
"epoch": 8.170212765957446,
"grad_norm": 23.585857391357422,
"learning_rate": 3.75e-07,
"log_odds_chosen": 0.4542897343635559,
"log_odds_ratio": -0.5773134827613831,
"logits/chosen": -4.633105278015137,
"logits/rejected": -4.810471057891846,
"logps/chosen": -1.3886733055114746,
"logps/rejected": -1.7219102382659912,
"loss": 1.4686,
"nll_loss": 1.3969916105270386,
"rewards/accuracies": 0.7734375,
"rewards/chosen": -0.20830100774765015,
"rewards/margins": 0.049985550343990326,
"rewards/rejected": -0.2582865357398987,
"step": 288
},
{
"epoch": 8.51063829787234,
"eval_log_odds_chosen": 1.6898525953292847,
"eval_log_odds_ratio": -0.19230316579341888,
"eval_logits/chosen": -4.930749416351318,
"eval_logits/rejected": -4.758046627044678,
"eval_logps/chosen": -1.4714246988296509,
"eval_logps/rejected": -2.9621574878692627,
"eval_loss": 1.3246647119522095,
"eval_nll_loss": 1.4632530212402344,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.22071371972560883,
"eval_rewards/margins": 0.22360996901988983,
"eval_rewards/rejected": -0.44432368874549866,
"eval_runtime": 0.6144,
"eval_samples_per_second": 222.965,
"eval_steps_per_second": 4.882,
"step": 300
},
{
"epoch": 9.078014184397164,
"grad_norm": 20.154146194458008,
"learning_rate": 3.400444312011776e-07,
"log_odds_chosen": 0.409349262714386,
"log_odds_ratio": -0.5844379663467407,
"logits/chosen": -4.599703311920166,
"logits/rejected": -4.766429901123047,
"logps/chosen": -1.3831363916397095,
"logps/rejected": -1.6733564138412476,
"loss": 1.4397,
"nll_loss": 1.4065345525741577,
"rewards/accuracies": 0.74609375,
"rewards/chosen": -0.20747046172618866,
"rewards/margins": 0.04353303089737892,
"rewards/rejected": -0.2510034739971161,
"step": 320
},
{
"epoch": 9.98581560283688,
"grad_norm": 23.09050750732422,
"learning_rate": 3.029259680573527e-07,
"log_odds_chosen": 0.43665847182273865,
"log_odds_ratio": -0.5905143618583679,
"logits/chosen": -4.58922815322876,
"logits/rejected": -4.685288906097412,
"logps/chosen": -1.458475112915039,
"logps/rejected": -1.7894960641860962,
"loss": 1.4285,
"nll_loss": 1.3732693195343018,
"rewards/accuracies": 0.73046875,
"rewards/chosen": -0.21877126395702362,
"rewards/margins": 0.04965316504240036,
"rewards/rejected": -0.26842445135116577,
"step": 352
},
{
"epoch": 10.893617021276595,
"grad_norm": 35.926055908203125,
"learning_rate": 2.6453620722761895e-07,
"log_odds_chosen": 0.6511461138725281,
"log_odds_ratio": -0.49195483326911926,
"logits/chosen": -4.608173370361328,
"logits/rejected": -4.685794830322266,
"logps/chosen": -1.3694053888320923,
"logps/rejected": -1.8711962699890137,
"loss": 1.4144,
"nll_loss": 1.374709129333496,
"rewards/accuracies": 0.8359375,
"rewards/chosen": -0.20541077852249146,
"rewards/margins": 0.07526866346597672,
"rewards/rejected": -0.28067946434020996,
"step": 384
},
{
"epoch": 11.801418439716311,
"grad_norm": 33.8105583190918,
"learning_rate": 2.2579728232420523e-07,
"log_odds_chosen": 0.5499828457832336,
"log_odds_ratio": -0.5233615040779114,
"logits/chosen": -4.537787437438965,
"logits/rejected": -4.662774085998535,
"logps/chosen": -1.3898181915283203,
"logps/rejected": -1.7920804023742676,
"loss": 1.4016,
"nll_loss": 1.3631547689437866,
"rewards/accuracies": 0.7890625,
"rewards/chosen": -0.20847272872924805,
"rewards/margins": 0.06033932417631149,
"rewards/rejected": -0.26881206035614014,
"step": 416
},
{
"epoch": 12.709219858156029,
"grad_norm": 25.557348251342773,
"learning_rate": 1.8763971398550467e-07,
"log_odds_chosen": 0.5377756357192993,
"log_odds_ratio": -0.5508320927619934,
"logits/chosen": -4.532352447509766,
"logits/rejected": -4.629130840301514,
"logps/chosen": -1.3977127075195312,
"logps/rejected": -1.7851612567901611,
"loss": 1.3943,
"nll_loss": 1.3304414749145508,
"rewards/accuracies": 0.77734375,
"rewards/chosen": -0.20965692400932312,
"rewards/margins": 0.05811727046966553,
"rewards/rejected": -0.26777422428131104,
"step": 448
},
{
"epoch": 13.617021276595745,
"grad_norm": 28.700815200805664,
"learning_rate": 1.5098005849021078e-07,
"log_odds_chosen": 0.5411101579666138,
"log_odds_ratio": -0.5445564985275269,
"logits/chosen": -4.501680850982666,
"logits/rejected": -4.677550315856934,
"logps/chosen": -1.3654242753982544,
"logps/rejected": -1.7541980743408203,
"loss": 1.401,
"nll_loss": 1.2766036987304688,
"rewards/accuracies": 0.8046875,
"rewards/chosen": -0.2048136293888092,
"rewards/margins": 0.05831605941057205,
"rewards/rejected": -0.26312971115112305,
"step": 480
},
{
"epoch": 14.52482269503546,
"grad_norm": 35.40031814575195,
"learning_rate": 1.1669889179957723e-07,
"log_odds_chosen": 0.7372524738311768,
"log_odds_ratio": -0.46363916993141174,
"logits/chosen": -4.513700485229492,
"logits/rejected": -4.619227886199951,
"logps/chosen": -1.3301138877868652,
"logps/rejected": -1.8859204053878784,
"loss": 1.3839,
"nll_loss": 1.219886302947998,
"rewards/accuracies": 0.87109375,
"rewards/chosen": -0.19951710104942322,
"rewards/margins": 0.08337096124887466,
"rewards/rejected": -0.2828880548477173,
"step": 512
},
{
"epoch": 15.432624113475176,
"grad_norm": 62.16829299926758,
"learning_rate": 8.561965785773412e-08,
"log_odds_chosen": 0.661382794380188,
"log_odds_ratio": -0.4891131520271301,
"logits/chosen": -4.506048202514648,
"logits/rejected": -4.587852478027344,
"logps/chosen": -1.3864898681640625,
"logps/rejected": -1.8775601387023926,
"loss": 1.3876,
"nll_loss": 1.2974672317504883,
"rewards/accuracies": 0.8359375,
"rewards/chosen": -0.20797351002693176,
"rewards/margins": 0.07366053014993668,
"rewards/rejected": -0.28163403272628784,
"step": 544
},
{
"epoch": 16.340425531914892,
"grad_norm": 29.107358932495117,
"learning_rate": 5.848888922025552e-08,
"log_odds_chosen": 0.6269708275794983,
"log_odds_ratio": -0.49757176637649536,
"logits/chosen": -4.460994243621826,
"logits/rejected": -4.661521911621094,
"logps/chosen": -1.3339214324951172,
"logps/rejected": -1.7924858331680298,
"loss": 1.3923,
"nll_loss": 1.2958626747131348,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.200088232755661,
"rewards/margins": 0.06878463923931122,
"rewards/rejected": -0.26887285709381104,
"step": 576
},
{
"epoch": 17.02127659574468,
"eval_log_odds_chosen": 1.7557824850082397,
"eval_log_odds_ratio": -0.18494771420955658,
"eval_logits/chosen": -4.790639400482178,
"eval_logits/rejected": -4.577674865722656,
"eval_logps/chosen": -1.626247763633728,
"eval_logps/rejected": -3.2115631103515625,
"eval_loss": 1.2619013786315918,
"eval_nll_loss": 1.4078196287155151,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.2439371794462204,
"eval_rewards/margins": 0.2377973347902298,
"eval_rewards/rejected": -0.4817345142364502,
"eval_runtime": 0.6115,
"eval_samples_per_second": 224.05,
"eval_steps_per_second": 4.906,
"step": 600
},
{
"epoch": 17.24822695035461,
"grad_norm": 29.23589515686035,
"learning_rate": 3.5958275117433404e-08,
"log_odds_chosen": 0.5763309001922607,
"log_odds_ratio": -0.5261004567146301,
"logits/chosen": -4.398637294769287,
"logits/rejected": -4.560643672943115,
"logps/chosen": -1.3885968923568726,
"logps/rejected": -1.8019691705703735,
"loss": 1.3886,
"nll_loss": 1.3023698329925537,
"rewards/accuracies": 0.8046875,
"rewards/chosen": -0.20828954875469208,
"rewards/margins": 0.06200582906603813,
"rewards/rejected": -0.2702953815460205,
"step": 608
},
{
"epoch": 18.156028368794328,
"grad_norm": 27.693330764770508,
"learning_rate": 1.8569007682777415e-08,
"log_odds_chosen": 0.7424343824386597,
"log_odds_ratio": -0.46295538544654846,
"logits/chosen": -4.579552173614502,
"logits/rejected": -4.691650390625,
"logps/chosen": -1.3507909774780273,
"logps/rejected": -1.9150110483169556,
"loss": 1.3865,
"nll_loss": 1.3111711740493774,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.20261868834495544,
"rewards/margins": 0.084633007645607,
"rewards/rejected": -0.28725165128707886,
"step": 640
},
{
"epoch": 19.06382978723404,
"grad_norm": 37.925621032714844,
"learning_rate": 6.738782355044048e-09,
"log_odds_chosen": 0.6857459545135498,
"log_odds_ratio": -0.4916977882385254,
"logits/chosen": -4.52652645111084,
"logits/rejected": -4.689857482910156,
"logps/chosen": -1.341786503791809,
"logps/rejected": -1.8565285205841064,
"loss": 1.3794,
"nll_loss": 1.2754034996032715,
"rewards/accuracies": 0.8046875,
"rewards/chosen": -0.20126797258853912,
"rewards/margins": 0.07721129059791565,
"rewards/rejected": -0.27847927808761597,
"step": 672
},
{
"epoch": 19.97163120567376,
"grad_norm": 24.041799545288086,
"learning_rate": 7.51764708051994e-10,
"log_odds_chosen": 0.6411248445510864,
"log_odds_ratio": -0.5100895762443542,
"logits/chosen": -4.384097099304199,
"logits/rejected": -4.515219688415527,
"logps/chosen": -1.3920109272003174,
"logps/rejected": -1.8711614608764648,
"loss": 1.3805,
"nll_loss": 1.2700397968292236,
"rewards/accuracies": 0.8203125,
"rewards/chosen": -0.20880162715911865,
"rewards/margins": 0.07187257707118988,
"rewards/rejected": -0.2806742191314697,
"step": 704
},
{
"epoch": 20.425531914893618,
"grad_norm": 28.226720809936523,
"learning_rate": 0.0,
"log_odds_chosen": 0.6200518608093262,
"log_odds_ratio": -0.529932975769043,
"logits/chosen": -4.434691905975342,
"logits/rejected": -4.575813293457031,
"logps/chosen": -1.416117548942566,
"logps/rejected": -1.8715832233428955,
"loss": 1.3893,
"nll_loss": 1.2817054986953735,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.2124176323413849,
"rewards/margins": 0.0683198943734169,
"rewards/rejected": -0.2807375192642212,
"step": 720
},
{
"epoch": 20.425531914893618,
"eval_log_odds_chosen": 1.7479673624038696,
"eval_log_odds_ratio": -0.1867920309305191,
"eval_logits/chosen": -4.75565767288208,
"eval_logits/rejected": -4.538194179534912,
"eval_logps/chosen": -1.6300764083862305,
"eval_logps/rejected": -3.2087719440460205,
"eval_loss": 1.2522811889648438,
"eval_nll_loss": 1.4028778076171875,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.24451148509979248,
"eval_rewards/margins": 0.23680436611175537,
"eval_rewards/rejected": -0.48131585121154785,
"eval_runtime": 0.615,
"eval_samples_per_second": 222.778,
"eval_steps_per_second": 4.878,
"step": 720
}
],
"logging_steps": 32,
"max_steps": 720,
"num_input_tokens_seen": 0,
"num_train_epochs": 21,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}