zephyr-gemma-dpo-no-gen-prompt / trainer_state.json
alvarobartt's picture
alvarobartt HF staff
Model save
91140df verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9905213270142181,
"eval_steps": 100,
"global_step": 210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 177.89952669184834,
"learning_rate": 5e-09,
"logits/chosen": 129.0,
"logits/rejected": 125.5,
"logps/chosen": -428.0,
"logps/rejected": -470.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.09,
"grad_norm": 173.49613288568952,
"learning_rate": 5e-08,
"logits/chosen": 125.5,
"logits/rejected": 132.0,
"logps/chosen": -414.0,
"logps/rejected": -450.0,
"loss": 0.7165,
"rewards/accuracies": 0.3888888955116272,
"rewards/chosen": -0.00750732421875,
"rewards/margins": -0.0037841796875,
"rewards/rejected": -0.003875732421875,
"step": 10
},
{
"epoch": 0.19,
"grad_norm": 181.136506215671,
"learning_rate": 1e-07,
"logits/chosen": 136.0,
"logits/rejected": 136.0,
"logps/chosen": -400.0,
"logps/rejected": -430.0,
"loss": 0.7186,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.0147705078125,
"rewards/margins": -0.026123046875,
"rewards/rejected": 0.01153564453125,
"step": 20
},
{
"epoch": 0.28,
"grad_norm": 158.92632659059979,
"learning_rate": 1.5e-07,
"logits/chosen": 130.0,
"logits/rejected": 130.0,
"logps/chosen": -374.0,
"logps/rejected": -390.0,
"loss": 0.6979,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.08251953125,
"rewards/margins": 0.08154296875,
"rewards/rejected": 0.0013427734375,
"step": 30
},
{
"epoch": 0.38,
"grad_norm": 172.98309546766845,
"learning_rate": 2e-07,
"logits/chosen": 115.0,
"logits/rejected": 128.0,
"logps/chosen": -338.0,
"logps/rejected": -430.0,
"loss": 0.6703,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.236328125,
"rewards/margins": 0.1875,
"rewards/rejected": 0.049072265625,
"step": 40
},
{
"epoch": 0.47,
"grad_norm": 171.55866229471363,
"learning_rate": 2.5e-07,
"logits/chosen": 124.5,
"logits/rejected": 124.5,
"logps/chosen": -370.0,
"logps/rejected": -388.0,
"loss": 0.6286,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.2294921875,
"rewards/margins": 0.318359375,
"rewards/rejected": -0.08935546875,
"step": 50
},
{
"epoch": 0.57,
"grad_norm": 175.84854471984127,
"learning_rate": 3e-07,
"logits/chosen": 116.0,
"logits/rejected": 124.0,
"logps/chosen": -342.0,
"logps/rejected": -416.0,
"loss": 0.5816,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.035400390625,
"rewards/margins": 0.640625,
"rewards/rejected": -0.67578125,
"step": 60
},
{
"epoch": 0.66,
"grad_norm": 134.077271259303,
"learning_rate": 3.5e-07,
"logits/chosen": 121.5,
"logits/rejected": 117.5,
"logps/chosen": -386.0,
"logps/rejected": -394.0,
"loss": 0.5647,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1162109375,
"rewards/margins": 0.77734375,
"rewards/rejected": -0.89453125,
"step": 70
},
{
"epoch": 0.76,
"grad_norm": 145.7372885946958,
"learning_rate": 4e-07,
"logits/chosen": 117.0,
"logits/rejected": 117.0,
"logps/chosen": -342.0,
"logps/rejected": -408.0,
"loss": 0.5371,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.4765625,
"rewards/margins": 0.91796875,
"rewards/rejected": -1.3984375,
"step": 80
},
{
"epoch": 0.85,
"grad_norm": 156.14756793621152,
"learning_rate": 4.5e-07,
"logits/chosen": 132.0,
"logits/rejected": 134.0,
"logps/chosen": -410.0,
"logps/rejected": -444.0,
"loss": 0.4753,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.77734375,
"rewards/margins": 1.0234375,
"rewards/rejected": -1.8046875,
"step": 90
},
{
"epoch": 0.95,
"grad_norm": 142.7704392560794,
"learning_rate": 5e-07,
"logits/chosen": 120.0,
"logits/rejected": 129.0,
"logps/chosen": -390.0,
"logps/rejected": -490.0,
"loss": 0.5155,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.94140625,
"rewards/margins": 1.3359375,
"rewards/rejected": -2.28125,
"step": 100
},
{
"epoch": 0.95,
"eval_logits/chosen": 95.0,
"eval_logits/rejected": 97.5,
"eval_logps/chosen": -378.0,
"eval_logps/rejected": -456.0,
"eval_loss": 0.500314474105835,
"eval_rewards/accuracies": 0.686170220375061,
"eval_rewards/chosen": -1.2421875,
"eval_rewards/margins": 1.265625,
"eval_rewards/rejected": -2.515625,
"eval_runtime": 135.3377,
"eval_samples_per_second": 5.542,
"eval_steps_per_second": 0.347,
"step": 100
},
{
"epoch": 1.04,
"grad_norm": 70.76245987345193,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 123.5,
"logits/rejected": 118.5,
"logps/chosen": -406.0,
"logps/rejected": -460.0,
"loss": 0.4082,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -1.1328125,
"rewards/margins": 1.7265625,
"rewards/rejected": -2.859375,
"step": 110
},
{
"epoch": 1.14,
"grad_norm": 83.18318775000992,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 117.5,
"logits/rejected": 126.0,
"logps/chosen": -362.0,
"logps/rejected": -480.0,
"loss": 0.2535,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.546875,
"rewards/margins": 2.828125,
"rewards/rejected": -3.375,
"step": 120
},
{
"epoch": 1.23,
"grad_norm": 80.23145875736671,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 111.0,
"logits/rejected": 111.5,
"logps/chosen": -400.0,
"logps/rejected": -470.0,
"loss": 0.2203,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.125,
"rewards/margins": 2.75,
"rewards/rejected": -3.875,
"step": 130
},
{
"epoch": 1.33,
"grad_norm": 92.00162024415927,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 118.0,
"logits/rejected": 125.5,
"logps/chosen": -398.0,
"logps/rejected": -512.0,
"loss": 0.2065,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -1.8359375,
"rewards/margins": 2.84375,
"rewards/rejected": -4.6875,
"step": 140
},
{
"epoch": 1.42,
"grad_norm": 68.98201787598855,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 107.0,
"logits/rejected": 110.0,
"logps/chosen": -394.0,
"logps/rejected": -494.0,
"loss": 0.1678,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.703125,
"rewards/margins": 3.21875,
"rewards/rejected": -4.9375,
"step": 150
},
{
"epoch": 1.52,
"grad_norm": 80.45634124880881,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 105.5,
"logits/rejected": 125.0,
"logps/chosen": -394.0,
"logps/rejected": -506.0,
"loss": 0.194,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -1.28125,
"rewards/margins": 3.671875,
"rewards/rejected": -4.9375,
"step": 160
},
{
"epoch": 1.61,
"grad_norm": 82.98758335477153,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 115.0,
"logits/rejected": 116.0,
"logps/chosen": -406.0,
"logps/rejected": -472.0,
"loss": 0.169,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.95703125,
"rewards/margins": 3.234375,
"rewards/rejected": -4.1875,
"step": 170
},
{
"epoch": 1.71,
"grad_norm": 87.6903501592833,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 111.0,
"logits/rejected": 123.0,
"logps/chosen": -406.0,
"logps/rejected": -532.0,
"loss": 0.1641,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.73828125,
"rewards/margins": 3.28125,
"rewards/rejected": -4.0,
"step": 180
},
{
"epoch": 1.8,
"grad_norm": 65.93189701960165,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 118.5,
"logits/rejected": 119.5,
"logps/chosen": -414.0,
"logps/rejected": -510.0,
"loss": 0.1756,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.8203125,
"rewards/margins": 3.5,
"rewards/rejected": -4.3125,
"step": 190
},
{
"epoch": 1.9,
"grad_norm": 82.38035369049618,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 100.5,
"logits/rejected": 108.5,
"logps/chosen": -384.0,
"logps/rejected": -462.0,
"loss": 0.1902,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.2890625,
"rewards/margins": 3.0,
"rewards/rejected": -4.28125,
"step": 200
},
{
"epoch": 1.9,
"eval_logits/chosen": 87.0,
"eval_logits/rejected": 89.5,
"eval_logps/chosen": -388.0,
"eval_logps/rejected": -476.0,
"eval_loss": 0.44982096552848816,
"eval_rewards/accuracies": 0.7393617033958435,
"eval_rewards/chosen": -1.796875,
"eval_rewards/margins": 1.71875,
"eval_rewards/rejected": -3.515625,
"eval_runtime": 135.9196,
"eval_samples_per_second": 5.518,
"eval_steps_per_second": 0.346,
"step": 200
},
{
"epoch": 1.99,
"grad_norm": 54.35126708805902,
"learning_rate": 0.0,
"logits/chosen": 100.5,
"logits/rejected": 102.5,
"logps/chosen": -390.0,
"logps/rejected": -444.0,
"loss": 0.2165,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -1.109375,
"rewards/margins": 3.078125,
"rewards/rejected": -4.1875,
"step": 210
},
{
"epoch": 1.99,
"step": 210,
"total_flos": 0.0,
"train_loss": 0.4032986215182713,
"train_runtime": 4950.8357,
"train_samples_per_second": 2.727,
"train_steps_per_second": 0.042
}
],
"logging_steps": 10,
"max_steps": 210,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}