approx_nash_3temp_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
87c9ccd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9945,
"eval_steps": 500,
"global_step": 153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 17.363519218688417,
"learning_rate": 3.125e-08,
"logits/chosen": -2.205641031265259,
"logits/rejected": -2.2929024696350098,
"logps/chosen": -215.50050354003906,
"logps/rejected": -237.99966430664062,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"grad_norm": 18.34878510832685,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.382091522216797,
"logits/rejected": -2.295259952545166,
"logps/chosen": -222.111328125,
"logps/rejected": -210.6314697265625,
"loss": 0.693,
"rewards/accuracies": 0.4615384638309479,
"rewards/chosen": 0.0012403662549331784,
"rewards/margins": 0.0005746442475356162,
"rewards/rejected": 0.0006657222402282059,
"step": 10
},
{
"epoch": 0.13,
"grad_norm": 18.337158741008302,
"learning_rate": 4.989490450759331e-07,
"logits/chosen": -2.383176803588867,
"logits/rejected": -2.422689199447632,
"logps/chosen": -212.94821166992188,
"logps/rejected": -232.4334259033203,
"loss": 0.6895,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.08582816272974014,
"rewards/margins": 0.0013778842985630035,
"rewards/rejected": -0.08720605075359344,
"step": 20
},
{
"epoch": 0.2,
"grad_norm": 22.542054212888594,
"learning_rate": 4.872270441827174e-07,
"logits/chosen": -2.3914377689361572,
"logits/rejected": -2.3520281314849854,
"logps/chosen": -215.9875030517578,
"logps/rejected": -227.70399475097656,
"loss": 0.6861,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.25440388917922974,
"rewards/margins": 0.03694874048233032,
"rewards/rejected": -0.29135259985923767,
"step": 30
},
{
"epoch": 0.26,
"grad_norm": 18.6076942310058,
"learning_rate": 4.6308512113530063e-07,
"logits/chosen": -2.4768216609954834,
"logits/rejected": -2.4233009815216064,
"logps/chosen": -229.41555786132812,
"logps/rejected": -242.26214599609375,
"loss": 0.6925,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.19061818718910217,
"rewards/margins": 0.01797662116587162,
"rewards/rejected": -0.20859479904174805,
"step": 40
},
{
"epoch": 0.33,
"grad_norm": 15.540618809394221,
"learning_rate": 4.277872161641681e-07,
"logits/chosen": -2.5903377532958984,
"logits/rejected": -2.5592682361602783,
"logps/chosen": -225.72836303710938,
"logps/rejected": -240.36595153808594,
"loss": 0.6897,
"rewards/accuracies": 0.5307692289352417,
"rewards/chosen": -0.033847782760858536,
"rewards/margins": 0.001607205718755722,
"rewards/rejected": -0.03545498102903366,
"step": 50
},
{
"epoch": 0.39,
"grad_norm": 24.02478451571149,
"learning_rate": 3.8318133624280046e-07,
"logits/chosen": -2.6142632961273193,
"logits/rejected": -2.6310534477233887,
"logps/chosen": -223.6808624267578,
"logps/rejected": -255.2255401611328,
"loss": 0.6903,
"rewards/accuracies": 0.5423076748847961,
"rewards/chosen": -0.10960451513528824,
"rewards/margins": 0.02808019518852234,
"rewards/rejected": -0.13768470287322998,
"step": 60
},
{
"epoch": 0.46,
"grad_norm": 20.288409772533704,
"learning_rate": 3.316028034595861e-07,
"logits/chosen": -2.5127460956573486,
"logits/rejected": -2.5037307739257812,
"logps/chosen": -230.75833129882812,
"logps/rejected": -256.0094909667969,
"loss": 0.6832,
"rewards/accuracies": 0.4961538314819336,
"rewards/chosen": -0.2965443730354309,
"rewards/margins": 0.03154058754444122,
"rewards/rejected": -0.3280849754810333,
"step": 70
},
{
"epoch": 0.52,
"grad_norm": 22.680428500041128,
"learning_rate": 2.7575199021178855e-07,
"logits/chosen": -2.5435211658477783,
"logits/rejected": -2.5064070224761963,
"logps/chosen": -251.8400421142578,
"logps/rejected": -273.2138671875,
"loss": 0.6778,
"rewards/accuracies": 0.5538461804389954,
"rewards/chosen": -0.28813430666923523,
"rewards/margins": 0.06685086339712143,
"rewards/rejected": -0.3549851179122925,
"step": 80
},
{
"epoch": 0.58,
"grad_norm": 18.86897478499211,
"learning_rate": 2.1855294234408068e-07,
"logits/chosen": -2.5077600479125977,
"logits/rejected": -2.503957986831665,
"logps/chosen": -251.26556396484375,
"logps/rejected": -242.37310791015625,
"loss": 0.6842,
"rewards/accuracies": 0.5307692289352417,
"rewards/chosen": -0.24721869826316833,
"rewards/margins": 0.009251880459487438,
"rewards/rejected": -0.25647059082984924,
"step": 90
},
{
"epoch": 0.65,
"grad_norm": 20.56846597020704,
"learning_rate": 1.6300029195778453e-07,
"logits/chosen": -2.492692708969116,
"logits/rejected": -2.3162038326263428,
"logps/chosen": -253.9385223388672,
"logps/rejected": -268.18414306640625,
"loss": 0.6821,
"rewards/accuracies": 0.5538461804389954,
"rewards/chosen": -0.4339679181575775,
"rewards/margins": 0.06273461133241653,
"rewards/rejected": -0.49670252203941345,
"step": 100
},
{
"epoch": 0.71,
"grad_norm": 29.852940409061166,
"learning_rate": 1.1200247470632392e-07,
"logits/chosen": -2.409569501876831,
"logits/rejected": -2.3867456912994385,
"logps/chosen": -297.3172607421875,
"logps/rejected": -285.6565246582031,
"loss": 0.6823,
"rewards/accuracies": 0.4923076927661896,
"rewards/chosen": -0.73952317237854,
"rewards/margins": 0.020068956539034843,
"rewards/rejected": -0.7595921754837036,
"step": 110
},
{
"epoch": 0.78,
"grad_norm": 20.239291104683563,
"learning_rate": 6.822945986946385e-08,
"logits/chosen": -2.3316874504089355,
"logits/rejected": -2.2675819396972656,
"logps/chosen": -275.67767333984375,
"logps/rejected": -291.9703674316406,
"loss": 0.6858,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.5500468611717224,
"rewards/margins": 0.06261468678712845,
"rewards/rejected": -0.6126615405082703,
"step": 120
},
{
"epoch": 0.84,
"grad_norm": 24.27835409910822,
"learning_rate": 3.397296523427806e-08,
"logits/chosen": -2.4436075687408447,
"logits/rejected": -2.309699296951294,
"logps/chosen": -233.41680908203125,
"logps/rejected": -262.0289611816406,
"loss": 0.6782,
"rewards/accuracies": 0.5807692408561707,
"rewards/chosen": -0.36947229504585266,
"rewards/margins": 0.0935312956571579,
"rewards/rejected": -0.46300360560417175,
"step": 130
},
{
"epoch": 0.91,
"grad_norm": 24.790097756991855,
"learning_rate": 1.1026475173977978e-08,
"logits/chosen": -2.3928282260894775,
"logits/rejected": -2.382559299468994,
"logps/chosen": -244.34410095214844,
"logps/rejected": -248.87876892089844,
"loss": 0.6769,
"rewards/accuracies": 0.5615384578704834,
"rewards/chosen": -0.27959996461868286,
"rewards/margins": 0.04778864234685898,
"rewards/rejected": -0.32738858461380005,
"step": 140
},
{
"epoch": 0.97,
"grad_norm": 24.029709323211232,
"learning_rate": 5.913435276374834e-10,
"logits/chosen": -2.4940154552459717,
"logits/rejected": -2.3921005725860596,
"logps/chosen": -241.1715545654297,
"logps/rejected": -262.944580078125,
"loss": 0.6737,
"rewards/accuracies": 0.6192307472229004,
"rewards/chosen": -0.2436859905719757,
"rewards/margins": 0.09733694791793823,
"rewards/rejected": -0.34102290868759155,
"step": 150
},
{
"epoch": 0.99,
"step": 153,
"total_flos": 0.0,
"train_loss": 0.6839175197034101,
"train_runtime": 39806.5992,
"train_samples_per_second": 0.502,
"train_steps_per_second": 0.004
}
],
"logging_steps": 10,
"max_steps": 153,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}