approx_nash_1_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
7623a78 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9945,
"eval_steps": 500,
"global_step": 153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 13.312925409718954,
"learning_rate": 3.125e-08,
"logits/chosen": -2.1492395401000977,
"logits/rejected": -2.139173746109009,
"logps/chosen": -189.41439819335938,
"logps/rejected": -184.15049743652344,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"grad_norm": 15.630000847331686,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.3999834060668945,
"logits/rejected": -2.346851348876953,
"logps/chosen": -178.99545288085938,
"logps/rejected": -177.0459747314453,
"loss": 0.6934,
"rewards/accuracies": 0.42307692766189575,
"rewards/chosen": -0.0037847168277949095,
"rewards/margins": 0.0002747862017713487,
"rewards/rejected": -0.004059503320604563,
"step": 10
},
{
"epoch": 0.13,
"grad_norm": 14.549393173612225,
"learning_rate": 4.989490450759331e-07,
"logits/chosen": -2.4151172637939453,
"logits/rejected": -2.356534004211426,
"logps/chosen": -179.75003051757812,
"logps/rejected": -179.4581756591797,
"loss": 0.6909,
"rewards/accuracies": 0.5461538434028625,
"rewards/chosen": -0.055207282304763794,
"rewards/margins": 0.006152572110295296,
"rewards/rejected": -0.06135985627770424,
"step": 20
},
{
"epoch": 0.2,
"grad_norm": 22.390116207007786,
"learning_rate": 4.872270441827174e-07,
"logits/chosen": -2.312279224395752,
"logits/rejected": -2.211397886276245,
"logps/chosen": -206.32656860351562,
"logps/rejected": -211.81321716308594,
"loss": 0.6929,
"rewards/accuracies": 0.557692289352417,
"rewards/chosen": -0.3904457688331604,
"rewards/margins": 0.03509727492928505,
"rewards/rejected": -0.42554304003715515,
"step": 30
},
{
"epoch": 0.26,
"grad_norm": 15.933088854619298,
"learning_rate": 4.6308512113530063e-07,
"logits/chosen": -2.2958626747131348,
"logits/rejected": -2.3168814182281494,
"logps/chosen": -236.7042999267578,
"logps/rejected": -244.78851318359375,
"loss": 0.6981,
"rewards/accuracies": 0.5461538434028625,
"rewards/chosen": -0.6312862038612366,
"rewards/margins": 0.015706488862633705,
"rewards/rejected": -0.6469926238059998,
"step": 40
},
{
"epoch": 0.33,
"grad_norm": 14.014878007482002,
"learning_rate": 4.277872161641681e-07,
"logits/chosen": -2.368952512741089,
"logits/rejected": -2.4042294025421143,
"logps/chosen": -214.369384765625,
"logps/rejected": -220.7718505859375,
"loss": 0.6913,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.2600650191307068,
"rewards/margins": 0.018586795777082443,
"rewards/rejected": -0.2786518335342407,
"step": 50
},
{
"epoch": 0.39,
"grad_norm": 14.767139513110513,
"learning_rate": 3.8318133624280046e-07,
"logits/chosen": -2.4127275943756104,
"logits/rejected": -2.434305191040039,
"logps/chosen": -217.94210815429688,
"logps/rejected": -227.94302368164062,
"loss": 0.6919,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.2718888223171234,
"rewards/margins": 0.015998326241970062,
"rewards/rejected": -0.2878871560096741,
"step": 60
},
{
"epoch": 0.46,
"grad_norm": 15.91144067203442,
"learning_rate": 3.316028034595861e-07,
"logits/chosen": -2.264232635498047,
"logits/rejected": -2.299992322921753,
"logps/chosen": -194.38172912597656,
"logps/rejected": -205.9635009765625,
"loss": 0.6833,
"rewards/accuracies": 0.5884615182876587,
"rewards/chosen": -0.17818714678287506,
"rewards/margins": 0.02437894232571125,
"rewards/rejected": -0.20256608724594116,
"step": 70
},
{
"epoch": 0.52,
"grad_norm": 18.074689046967872,
"learning_rate": 2.7575199021178855e-07,
"logits/chosen": -2.299180746078491,
"logits/rejected": -2.182999610900879,
"logps/chosen": -231.85098266601562,
"logps/rejected": -236.9989776611328,
"loss": 0.6842,
"rewards/accuracies": 0.5730769038200378,
"rewards/chosen": -0.3959502577781677,
"rewards/margins": 0.03195538371801376,
"rewards/rejected": -0.4279056191444397,
"step": 80
},
{
"epoch": 0.58,
"grad_norm": 19.634321191048826,
"learning_rate": 2.1855294234408068e-07,
"logits/chosen": -2.232875347137451,
"logits/rejected": -2.2362263202667236,
"logps/chosen": -208.51087951660156,
"logps/rejected": -207.45663452148438,
"loss": 0.689,
"rewards/accuracies": 0.5461538434028625,
"rewards/chosen": -0.22500069439411163,
"rewards/margins": 0.003552414011210203,
"rewards/rejected": -0.22855311632156372,
"step": 90
},
{
"epoch": 0.65,
"grad_norm": 17.473494481507956,
"learning_rate": 1.6300029195778453e-07,
"logits/chosen": -2.236097812652588,
"logits/rejected": -2.0412774085998535,
"logps/chosen": -213.67514038085938,
"logps/rejected": -206.89111328125,
"loss": 0.6881,
"rewards/accuracies": 0.5038461685180664,
"rewards/chosen": -0.2356816679239273,
"rewards/margins": 0.003031224012374878,
"rewards/rejected": -0.23871289193630219,
"step": 100
},
{
"epoch": 0.71,
"grad_norm": 15.544936822002546,
"learning_rate": 1.1200247470632392e-07,
"logits/chosen": -2.103285789489746,
"logits/rejected": -2.1786677837371826,
"logps/chosen": -224.00047302246094,
"logps/rejected": -220.13726806640625,
"loss": 0.6848,
"rewards/accuracies": 0.5615384578704834,
"rewards/chosen": -0.3865113854408264,
"rewards/margins": 0.03568296507000923,
"rewards/rejected": -0.42219436168670654,
"step": 110
},
{
"epoch": 0.78,
"grad_norm": 17.169881927493602,
"learning_rate": 6.822945986946385e-08,
"logits/chosen": -1.9218517541885376,
"logits/rejected": -2.109549045562744,
"logps/chosen": -220.54318237304688,
"logps/rejected": -231.7896270751953,
"loss": 0.6813,
"rewards/accuracies": 0.5769230723381042,
"rewards/chosen": -0.4736253619194031,
"rewards/margins": 0.03084597922861576,
"rewards/rejected": -0.5044713020324707,
"step": 120
},
{
"epoch": 0.84,
"grad_norm": 17.60589291870986,
"learning_rate": 3.397296523427806e-08,
"logits/chosen": -2.146359920501709,
"logits/rejected": -2.1425552368164062,
"logps/chosen": -221.13165283203125,
"logps/rejected": -225.94419860839844,
"loss": 0.6816,
"rewards/accuracies": 0.5615384578704834,
"rewards/chosen": -0.4886237382888794,
"rewards/margins": 0.03550608828663826,
"rewards/rejected": -0.5241298675537109,
"step": 130
},
{
"epoch": 0.91,
"grad_norm": 18.707751355883822,
"learning_rate": 1.1026475173977978e-08,
"logits/chosen": -2.1278481483459473,
"logits/rejected": -2.0320982933044434,
"logps/chosen": -220.7178192138672,
"logps/rejected": -217.0054931640625,
"loss": 0.6837,
"rewards/accuracies": 0.5923076868057251,
"rewards/chosen": -0.3798917829990387,
"rewards/margins": 0.05050484091043472,
"rewards/rejected": -0.4303966164588928,
"step": 140
},
{
"epoch": 0.97,
"grad_norm": 18.697426009812567,
"learning_rate": 5.913435276374834e-10,
"logits/chosen": -2.186318874359131,
"logits/rejected": -2.1368911266326904,
"logps/chosen": -221.08029174804688,
"logps/rejected": -230.6654052734375,
"loss": 0.6744,
"rewards/accuracies": 0.5961538553237915,
"rewards/chosen": -0.38067081570625305,
"rewards/margins": 0.07161368429660797,
"rewards/rejected": -0.4522845447063446,
"step": 150
},
{
"epoch": 0.99,
"step": 153,
"total_flos": 0.0,
"train_loss": 0.6871888306405809,
"train_runtime": 39835.0539,
"train_samples_per_second": 0.502,
"train_steps_per_second": 0.004
}
],
"logging_steps": 10,
"max_steps": 153,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}