approx_nash_again_1_iter_3 / trainer_state.json
YYYYYYibo's picture
Model save
a6fec24 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9992429977289932,
"eval_steps": 500,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eta": 0.0010000000474974513,
"grad_norm": 20.876303783345758,
"learning_rate": 2.941176470588235e-08,
"logits/chosen": -2.3177952766418457,
"logits/rejected": -2.3340206146240234,
"logps/chosen": -185.6923828125,
"logps/pi_response": -319.5942687988281,
"logps/ref_response": -319.5942687988281,
"logps/rejected": -187.8241729736328,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"eta": 0.0010000000474974513,
"grad_norm": 21.2222058398942,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": -2.3264760971069336,
"logits/rejected": -2.349726676940918,
"logps/chosen": -202.65338134765625,
"logps/pi_response": -336.1994934082031,
"logps/ref_response": -334.70989990234375,
"logps/rejected": -215.5554962158203,
"loss": 0.6929,
"rewards/accuracies": 0.4340277910232544,
"rewards/chosen": -0.010369324125349522,
"rewards/margins": -0.00030602168408222497,
"rewards/rejected": -0.010063301771879196,
"step": 10
},
{
"epoch": 0.12,
"eta": 0.0010000000474974513,
"grad_norm": 26.302559308113064,
"learning_rate": 4.994932636402031e-07,
"logits/chosen": -2.1999268531799316,
"logits/rejected": -2.2739574909210205,
"logps/chosen": -218.8612823486328,
"logps/pi_response": -366.29144287109375,
"logps/ref_response": -332.369140625,
"logps/rejected": -232.3873291015625,
"loss": 0.6878,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.22311308979988098,
"rewards/margins": 0.029964953660964966,
"rewards/rejected": -0.25307804346084595,
"step": 20
},
{
"epoch": 0.18,
"eta": 0.0010000000474974513,
"grad_norm": 30.864420908439598,
"learning_rate": 4.905416503522123e-07,
"logits/chosen": -2.021660804748535,
"logits/rejected": -2.0993196964263916,
"logps/chosen": -218.07278442382812,
"logps/pi_response": -377.771240234375,
"logps/ref_response": -329.1590881347656,
"logps/rejected": -238.0624542236328,
"loss": 0.6867,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.26833224296569824,
"rewards/margins": 0.06877782940864563,
"rewards/rejected": -0.33711010217666626,
"step": 30
},
{
"epoch": 0.24,
"eta": 0.0010000000474974513,
"grad_norm": 31.326684541726582,
"learning_rate": 4.707922373336523e-07,
"logits/chosen": -1.9741312265396118,
"logits/rejected": -2.056077718734741,
"logps/chosen": -245.257080078125,
"logps/pi_response": -370.0182800292969,
"logps/ref_response": -330.54022216796875,
"logps/rejected": -257.7065734863281,
"loss": 0.6926,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.3541467487812042,
"rewards/margins": 0.01397724449634552,
"rewards/rejected": -0.36812400817871094,
"step": 40
},
{
"epoch": 0.3,
"eta": 0.0010000000474974513,
"grad_norm": 20.607657828492968,
"learning_rate": 4.4113156629677313e-07,
"logits/chosen": -2.001324415206909,
"logits/rejected": -2.078733444213867,
"logps/chosen": -261.01226806640625,
"logps/pi_response": -389.60235595703125,
"logps/ref_response": -332.9416809082031,
"logps/rejected": -265.627197265625,
"loss": 0.6907,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.4424280524253845,
"rewards/margins": 0.030498838052153587,
"rewards/rejected": -0.4729268550872803,
"step": 50
},
{
"epoch": 0.36,
"eta": 0.0010000000474974513,
"grad_norm": 22.30008036950616,
"learning_rate": 4.0289109058972283e-07,
"logits/chosen": -1.991970419883728,
"logits/rejected": -1.952013373374939,
"logps/chosen": -233.18075561523438,
"logps/pi_response": -373.1258850097656,
"logps/ref_response": -330.88116455078125,
"logps/rejected": -238.22116088867188,
"loss": 0.6848,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.2081402838230133,
"rewards/margins": 0.026575928553938866,
"rewards/rejected": -0.23471620678901672,
"step": 60
},
{
"epoch": 0.42,
"eta": 0.0010000000474974513,
"grad_norm": 27.064787151807383,
"learning_rate": 3.577874068920446e-07,
"logits/chosen": -1.8990647792816162,
"logits/rejected": -1.8150758743286133,
"logps/chosen": -249.4824676513672,
"logps/pi_response": -400.8092346191406,
"logps/ref_response": -332.44757080078125,
"logps/rejected": -266.2792053222656,
"loss": 0.6848,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.4582160413265228,
"rewards/margins": 0.06068809702992439,
"rewards/rejected": -0.5189040899276733,
"step": 70
},
{
"epoch": 0.48,
"eta": 0.0010000000474974513,
"grad_norm": 24.274894592001466,
"learning_rate": 3.078451980100854e-07,
"logits/chosen": -1.836126685142517,
"logits/rejected": -1.9199883937835693,
"logps/chosen": -258.8869934082031,
"logps/pi_response": -400.2823181152344,
"logps/ref_response": -320.3209533691406,
"logps/rejected": -275.9152526855469,
"loss": 0.686,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.6283607482910156,
"rewards/margins": 0.07945629954338074,
"rewards/rejected": -0.707817018032074,
"step": 80
},
{
"epoch": 0.55,
"eta": 0.0010000000474974513,
"grad_norm": 20.666129111777522,
"learning_rate": 2.553063458334059e-07,
"logits/chosen": -1.9520018100738525,
"logits/rejected": -1.8479654788970947,
"logps/chosen": -227.83200073242188,
"logps/pi_response": -360.124755859375,
"logps/ref_response": -310.69232177734375,
"logps/rejected": -236.7330780029297,
"loss": 0.6897,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.32836586236953735,
"rewards/margins": 0.03321167081594467,
"rewards/rejected": -0.36157751083374023,
"step": 90
},
{
"epoch": 0.61,
"eta": 0.0010000000474974513,
"grad_norm": 22.034281983565204,
"learning_rate": 2.0252929432814287e-07,
"logits/chosen": -1.8997596502304077,
"logits/rejected": -2.0015318393707275,
"logps/chosen": -244.6271514892578,
"logps/pi_response": -392.9565734863281,
"logps/ref_response": -338.5196533203125,
"logps/rejected": -259.35247802734375,
"loss": 0.6864,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.3552432060241699,
"rewards/margins": 0.03130738437175751,
"rewards/rejected": -0.3865506052970886,
"step": 100
},
{
"epoch": 0.67,
"eta": 0.0010000000474974513,
"grad_norm": 21.117382385719814,
"learning_rate": 1.5188318011445906e-07,
"logits/chosen": -1.6617428064346313,
"logits/rejected": -1.7959445714950562,
"logps/chosen": -265.4134216308594,
"logps/pi_response": -426.07366943359375,
"logps/ref_response": -338.72222900390625,
"logps/rejected": -281.7206115722656,
"loss": 0.6857,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.6667092442512512,
"rewards/margins": 0.049205854535102844,
"rewards/rejected": -0.7159152030944824,
"step": 110
},
{
"epoch": 0.73,
"eta": 0.0010000000474974513,
"grad_norm": 24.334142813588905,
"learning_rate": 1.0564148305586295e-07,
"logits/chosen": -1.6403900384902954,
"logits/rejected": -1.6945642232894897,
"logps/chosen": -272.6028747558594,
"logps/pi_response": -434.99560546875,
"logps/ref_response": -341.4128112792969,
"logps/rejected": -284.1662902832031,
"loss": 0.6843,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.6931222677230835,
"rewards/margins": 0.0071820830926299095,
"rewards/rejected": -0.7003043293952942,
"step": 120
},
{
"epoch": 0.79,
"eta": 0.0010000000474974513,
"grad_norm": 23.99305159279906,
"learning_rate": 6.587997083462196e-08,
"logits/chosen": -1.64206063747406,
"logits/rejected": -1.6930261850357056,
"logps/chosen": -256.47344970703125,
"logps/pi_response": -414.23004150390625,
"logps/ref_response": -325.36041259765625,
"logps/rejected": -267.9649658203125,
"loss": 0.6719,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.5717727541923523,
"rewards/margins": 0.054326076060533524,
"rewards/rejected": -0.6260988116264343,
"step": 130
},
{
"epoch": 0.85,
"eta": 0.0010000000474974513,
"grad_norm": 28.645522892437054,
"learning_rate": 3.438351873250492e-08,
"logits/chosen": -1.672249436378479,
"logits/rejected": -1.775399923324585,
"logps/chosen": -246.2197723388672,
"logps/pi_response": -418.77423095703125,
"logps/ref_response": -333.43292236328125,
"logps/rejected": -264.335693359375,
"loss": 0.6812,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.45865583419799805,
"rewards/margins": 0.0741017609834671,
"rewards/rejected": -0.5327576398849487,
"step": 140
},
{
"epoch": 0.91,
"eta": 0.0010000000474974513,
"grad_norm": 22.513812489103497,
"learning_rate": 1.256598743236703e-08,
"logits/chosen": -1.620234489440918,
"logits/rejected": -1.7270009517669678,
"logps/chosen": -238.0688934326172,
"logps/pi_response": -416.77471923828125,
"logps/ref_response": -327.03955078125,
"logps/rejected": -263.03558349609375,
"loss": 0.6734,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.5202730298042297,
"rewards/margins": 0.08573532104492188,
"rewards/rejected": -0.6060083508491516,
"step": 150
},
{
"epoch": 0.97,
"eta": 0.0010000000474974513,
"grad_norm": 20.44841408463752,
"learning_rate": 1.406755487774386e-09,
"logits/chosen": -1.6769297122955322,
"logits/rejected": -1.6653327941894531,
"logps/chosen": -257.4883728027344,
"logps/pi_response": -424.09088134765625,
"logps/ref_response": -330.8675842285156,
"logps/rejected": -271.10101318359375,
"loss": 0.6702,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.5272750854492188,
"rewards/margins": 0.07395146042108536,
"rewards/rejected": -0.6012265086174011,
"step": 160
},
{
"epoch": 1.0,
"step": 165,
"total_flos": 0.0,
"train_loss": 0.684309244517124,
"train_runtime": 33858.6959,
"train_samples_per_second": 0.624,
"train_steps_per_second": 0.005
}
],
"logging_steps": 10,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}