selm_ours_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
ee17db0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eta": 0.0010000000474974513,
"grad_norm": 21.712186498887842,
"learning_rate": 3.125e-08,
"logits/chosen": -1.4551665782928467,
"logits/rejected": -1.606083869934082,
"logps/chosen": -144.822265625,
"logps/pi_response": -243.71868896484375,
"logps/ref_response": -243.71868896484375,
"logps/rejected": -162.54443359375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"eta": 0.0010000000474974513,
"grad_norm": 15.364437403667353,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -1.8610674142837524,
"logits/rejected": -1.8439589738845825,
"logps/chosen": -181.88343811035156,
"logps/pi_response": -265.68951416015625,
"logps/ref_response": -263.9686584472656,
"logps/rejected": -183.3703155517578,
"loss": 0.693,
"rewards/accuracies": 0.4652777910232544,
"rewards/chosen": -0.005265166517347097,
"rewards/margins": -0.0010041914647445083,
"rewards/rejected": -0.004260974936187267,
"step": 10
},
{
"epoch": 0.13,
"eta": 0.0010000000474974513,
"grad_norm": 18.345421633966485,
"learning_rate": 4.989935734988097e-07,
"logits/chosen": -1.6329383850097656,
"logits/rejected": -1.6391578912734985,
"logps/chosen": -178.48463439941406,
"logps/pi_response": -288.0892639160156,
"logps/ref_response": -259.4861755371094,
"logps/rejected": -181.0362091064453,
"loss": 0.6907,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.13044723868370056,
"rewards/margins": 0.01964881829917431,
"rewards/rejected": -0.1500960886478424,
"step": 20
},
{
"epoch": 0.19,
"eta": 0.0010000000474974513,
"grad_norm": 17.13627597968646,
"learning_rate": 4.877641290737883e-07,
"logits/chosen": -1.431341290473938,
"logits/rejected": -1.421644687652588,
"logps/chosen": -222.0276641845703,
"logps/pi_response": -336.2035827636719,
"logps/ref_response": -273.8411865234375,
"logps/rejected": -218.7247314453125,
"loss": 0.6957,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.4222361147403717,
"rewards/margins": 0.015909332782030106,
"rewards/rejected": -0.43814539909362793,
"step": 30
},
{
"epoch": 0.26,
"eta": 0.0010000000474974513,
"grad_norm": 14.253311206515136,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": -1.4934971332550049,
"logits/rejected": -1.447249412536621,
"logps/chosen": -192.37478637695312,
"logps/pi_response": -272.644775390625,
"logps/ref_response": -250.92996215820312,
"logps/rejected": -193.36312866210938,
"loss": 0.6915,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.19741061329841614,
"rewards/margins": 0.010645559057593346,
"rewards/rejected": -0.20805618166923523,
"step": 40
},
{
"epoch": 0.32,
"eta": 0.0010000000474974513,
"grad_norm": 16.796841718291038,
"learning_rate": 4.3069871595684787e-07,
"logits/chosen": -1.7099899053573608,
"logits/rejected": -1.7535244226455688,
"logps/chosen": -194.2742462158203,
"logps/pi_response": -277.76318359375,
"logps/ref_response": -266.2118835449219,
"logps/rejected": -201.6314239501953,
"loss": 0.6895,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.14295700192451477,
"rewards/margins": 0.017321351915597916,
"rewards/rejected": -0.1602783501148224,
"step": 50
},
{
"epoch": 0.38,
"eta": 0.0010000000474974513,
"grad_norm": 14.319074470267418,
"learning_rate": 3.877242453630256e-07,
"logits/chosen": -1.2345731258392334,
"logits/rejected": -1.2892169952392578,
"logps/chosen": -206.3093719482422,
"logps/pi_response": -289.80902099609375,
"logps/ref_response": -256.5821533203125,
"logps/rejected": -212.19924926757812,
"loss": 0.6896,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": -0.30671426653862,
"rewards/margins": 0.010099029168486595,
"rewards/rejected": -0.31681329011917114,
"step": 60
},
{
"epoch": 0.45,
"eta": 0.0010000000474974513,
"grad_norm": 24.50253677275901,
"learning_rate": 3.378437060203357e-07,
"logits/chosen": -1.03169584274292,
"logits/rejected": -1.021615743637085,
"logps/chosen": -221.443359375,
"logps/pi_response": -319.42242431640625,
"logps/ref_response": -262.21759033203125,
"logps/rejected": -225.0926971435547,
"loss": 0.6847,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.44103240966796875,
"rewards/margins": 0.028266970068216324,
"rewards/rejected": -0.4692993760108948,
"step": 70
},
{
"epoch": 0.51,
"eta": 0.0010000000474974513,
"grad_norm": 19.526479759830394,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": -0.8754630088806152,
"logits/rejected": -0.9458340406417847,
"logps/chosen": -225.5301971435547,
"logps/pi_response": -346.0572204589844,
"logps/ref_response": -269.8856201171875,
"logps/rejected": -233.25045776367188,
"loss": 0.689,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.513852596282959,
"rewards/margins": 0.01622053235769272,
"rewards/rejected": -0.5300731658935547,
"step": 80
},
{
"epoch": 0.58,
"eta": 0.0010000000474974513,
"grad_norm": 21.868311725000435,
"learning_rate": 2.2759017277414164e-07,
"logits/chosen": -0.814942479133606,
"logits/rejected": -0.8241022825241089,
"logps/chosen": -234.712646484375,
"logps/pi_response": -357.76568603515625,
"logps/ref_response": -260.8216552734375,
"logps/rejected": -250.9972686767578,
"loss": 0.6791,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.6007817983627319,
"rewards/margins": 0.07638835906982422,
"rewards/rejected": -0.6771702170372009,
"step": 90
},
{
"epoch": 0.64,
"eta": 0.0010000000474974513,
"grad_norm": 20.43299106630643,
"learning_rate": 1.7274575140626315e-07,
"logits/chosen": -0.897548496723175,
"logits/rejected": -0.932096004486084,
"logps/chosen": -236.4069061279297,
"logps/pi_response": -367.8551940917969,
"logps/ref_response": -274.1803283691406,
"logps/rejected": -245.63119506835938,
"loss": 0.6895,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.5783185958862305,
"rewards/margins": 0.0737391859292984,
"rewards/rejected": -0.6520577669143677,
"step": 100
},
{
"epoch": 0.7,
"eta": 0.0010000000474974513,
"grad_norm": 20.07983919936344,
"learning_rate": 1.2177518064852348e-07,
"logits/chosen": -0.7010077834129333,
"logits/rejected": -0.7780792713165283,
"logps/chosen": -239.2875518798828,
"logps/pi_response": -375.39691162109375,
"logps/ref_response": -266.8659362792969,
"logps/rejected": -249.2735595703125,
"loss": 0.6817,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.6955354809761047,
"rewards/margins": 0.06171605736017227,
"rewards/rejected": -0.7572515606880188,
"step": 110
},
{
"epoch": 0.77,
"eta": 0.0010000000474974513,
"grad_norm": 18.440065783036147,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": -0.7951020002365112,
"logits/rejected": -0.8204092979431152,
"logps/chosen": -261.5052795410156,
"logps/pi_response": -384.1082458496094,
"logps/ref_response": -273.2558288574219,
"logps/rejected": -263.2559509277344,
"loss": 0.678,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.7765410542488098,
"rewards/margins": 0.03736639395356178,
"rewards/rejected": -0.8139075040817261,
"step": 120
},
{
"epoch": 0.83,
"eta": 0.0010000000474974513,
"grad_norm": 20.63429522696927,
"learning_rate": 4.1356686569674335e-08,
"logits/chosen": -0.6491920351982117,
"logits/rejected": -0.6609460711479187,
"logps/chosen": -245.2913055419922,
"logps/pi_response": -372.6864929199219,
"logps/ref_response": -255.30258178710938,
"logps/rejected": -250.04989624023438,
"loss": 0.6865,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.7680090665817261,
"rewards/margins": 0.01968952640891075,
"rewards/rejected": -0.78769850730896,
"step": 130
},
{
"epoch": 0.9,
"eta": 0.0010000000474974513,
"grad_norm": 18.194304704086697,
"learning_rate": 1.5941282340065697e-08,
"logits/chosen": -0.8394004702568054,
"logits/rejected": -0.8077179789543152,
"logps/chosen": -243.73538208007812,
"logps/pi_response": -391.2145690917969,
"logps/ref_response": -279.54559326171875,
"logps/rejected": -255.0926513671875,
"loss": 0.6744,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.700167179107666,
"rewards/margins": 0.07673807442188263,
"rewards/rejected": -0.7769052386283875,
"step": 140
},
{
"epoch": 0.96,
"eta": 0.0010000000474974513,
"grad_norm": 17.724132159919495,
"learning_rate": 2.2625595580163247e-09,
"logits/chosen": -0.7538624405860901,
"logits/rejected": -0.7211672067642212,
"logps/chosen": -247.0625457763672,
"logps/pi_response": -371.68939208984375,
"logps/ref_response": -256.93328857421875,
"logps/rejected": -252.12765502929688,
"loss": 0.6781,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": -0.7719146609306335,
"rewards/margins": 0.03903389722108841,
"rewards/rejected": -0.8109486699104309,
"step": 150
},
{
"epoch": 1.0,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.6867941472774897,
"train_runtime": 22649.1506,
"train_samples_per_second": 0.883,
"train_steps_per_second": 0.007
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}