|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9905213270142181, |
|
"eval_steps": 100, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 177.89952669184834, |
|
"learning_rate": 5e-09, |
|
"logits/chosen": 129.0, |
|
"logits/rejected": 125.5, |
|
"logps/chosen": -428.0, |
|
"logps/rejected": -470.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 173.49613288568952, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": 125.5, |
|
"logits/rejected": 132.0, |
|
"logps/chosen": -414.0, |
|
"logps/rejected": -450.0, |
|
"loss": 0.7165, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.00750732421875, |
|
"rewards/margins": -0.0037841796875, |
|
"rewards/rejected": -0.003875732421875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 181.136506215671, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": 136.0, |
|
"logits/rejected": 136.0, |
|
"logps/chosen": -400.0, |
|
"logps/rejected": -430.0, |
|
"loss": 0.7186, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0147705078125, |
|
"rewards/margins": -0.026123046875, |
|
"rewards/rejected": 0.01153564453125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 158.92632659059979, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": 130.0, |
|
"logits/rejected": 130.0, |
|
"logps/chosen": -374.0, |
|
"logps/rejected": -390.0, |
|
"loss": 0.6979, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.08251953125, |
|
"rewards/margins": 0.08154296875, |
|
"rewards/rejected": 0.0013427734375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 172.98309546766845, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": 115.0, |
|
"logits/rejected": 128.0, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -430.0, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.236328125, |
|
"rewards/margins": 0.1875, |
|
"rewards/rejected": 0.049072265625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 171.55866229471363, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 124.5, |
|
"logits/rejected": 124.5, |
|
"logps/chosen": -370.0, |
|
"logps/rejected": -388.0, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.2294921875, |
|
"rewards/margins": 0.318359375, |
|
"rewards/rejected": -0.08935546875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 175.84854471984127, |
|
"learning_rate": 3e-07, |
|
"logits/chosen": 116.0, |
|
"logits/rejected": 124.0, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -416.0, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.035400390625, |
|
"rewards/margins": 0.640625, |
|
"rewards/rejected": -0.67578125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 134.077271259303, |
|
"learning_rate": 3.5e-07, |
|
"logits/chosen": 121.5, |
|
"logits/rejected": 117.5, |
|
"logps/chosen": -386.0, |
|
"logps/rejected": -394.0, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.77734375, |
|
"rewards/rejected": -0.89453125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 145.7372885946958, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": 117.0, |
|
"logits/rejected": 117.0, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -408.0, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4765625, |
|
"rewards/margins": 0.91796875, |
|
"rewards/rejected": -1.3984375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 156.14756793621152, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": 132.0, |
|
"logits/rejected": 134.0, |
|
"logps/chosen": -410.0, |
|
"logps/rejected": -444.0, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.77734375, |
|
"rewards/margins": 1.0234375, |
|
"rewards/rejected": -1.8046875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 142.7704392560794, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 120.0, |
|
"logits/rejected": 129.0, |
|
"logps/chosen": -390.0, |
|
"logps/rejected": -490.0, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.94140625, |
|
"rewards/margins": 1.3359375, |
|
"rewards/rejected": -2.28125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_logits/chosen": 95.0, |
|
"eval_logits/rejected": 97.5, |
|
"eval_logps/chosen": -378.0, |
|
"eval_logps/rejected": -456.0, |
|
"eval_loss": 0.500314474105835, |
|
"eval_rewards/accuracies": 0.686170220375061, |
|
"eval_rewards/chosen": -1.2421875, |
|
"eval_rewards/margins": 1.265625, |
|
"eval_rewards/rejected": -2.515625, |
|
"eval_runtime": 135.3377, |
|
"eval_samples_per_second": 5.542, |
|
"eval_steps_per_second": 0.347, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 70.76245987345193, |
|
"learning_rate": 4.898732434036243e-07, |
|
"logits/chosen": 123.5, |
|
"logits/rejected": 118.5, |
|
"logps/chosen": -406.0, |
|
"logps/rejected": -460.0, |
|
"loss": 0.4082, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.1328125, |
|
"rewards/margins": 1.7265625, |
|
"rewards/rejected": -2.859375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 83.18318775000992, |
|
"learning_rate": 4.603133832077953e-07, |
|
"logits/chosen": 117.5, |
|
"logits/rejected": 126.0, |
|
"logps/chosen": -362.0, |
|
"logps/rejected": -480.0, |
|
"loss": 0.2535, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.546875, |
|
"rewards/margins": 2.828125, |
|
"rewards/rejected": -3.375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 80.23145875736671, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 111.0, |
|
"logits/rejected": 111.5, |
|
"logps/chosen": -400.0, |
|
"logps/rejected": -470.0, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.125, |
|
"rewards/margins": 2.75, |
|
"rewards/rejected": -3.875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 92.00162024415927, |
|
"learning_rate": 3.5385375325047163e-07, |
|
"logits/chosen": 118.0, |
|
"logits/rejected": 125.5, |
|
"logps/chosen": -398.0, |
|
"logps/rejected": -512.0, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.8359375, |
|
"rewards/margins": 2.84375, |
|
"rewards/rejected": -4.6875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 68.98201787598855, |
|
"learning_rate": 2.8557870956832133e-07, |
|
"logits/chosen": 107.0, |
|
"logits/rejected": 110.0, |
|
"logps/chosen": -394.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.703125, |
|
"rewards/margins": 3.21875, |
|
"rewards/rejected": -4.9375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 80.45634124880881, |
|
"learning_rate": 2.1442129043167873e-07, |
|
"logits/chosen": 105.5, |
|
"logits/rejected": 125.0, |
|
"logps/chosen": -394.0, |
|
"logps/rejected": -506.0, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.28125, |
|
"rewards/margins": 3.671875, |
|
"rewards/rejected": -4.9375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 82.98758335477153, |
|
"learning_rate": 1.461462467495284e-07, |
|
"logits/chosen": 115.0, |
|
"logits/rejected": 116.0, |
|
"logps/chosen": -406.0, |
|
"logps/rejected": -472.0, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.95703125, |
|
"rewards/margins": 3.234375, |
|
"rewards/rejected": -4.1875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 87.6903501592833, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 111.0, |
|
"logits/rejected": 123.0, |
|
"logps/chosen": -406.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.1641, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.73828125, |
|
"rewards/margins": 3.28125, |
|
"rewards/rejected": -4.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 65.93189701960165, |
|
"learning_rate": 3.968661679220467e-08, |
|
"logits/chosen": 118.5, |
|
"logits/rejected": 119.5, |
|
"logps/chosen": -414.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.1756, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.8203125, |
|
"rewards/margins": 3.5, |
|
"rewards/rejected": -4.3125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 82.38035369049618, |
|
"learning_rate": 1.0126756596375685e-08, |
|
"logits/chosen": 100.5, |
|
"logits/rejected": 108.5, |
|
"logps/chosen": -384.0, |
|
"logps/rejected": -462.0, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 3.0, |
|
"rewards/rejected": -4.28125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_logits/chosen": 87.0, |
|
"eval_logits/rejected": 89.5, |
|
"eval_logps/chosen": -388.0, |
|
"eval_logps/rejected": -476.0, |
|
"eval_loss": 0.44982096552848816, |
|
"eval_rewards/accuracies": 0.7393617033958435, |
|
"eval_rewards/chosen": -1.796875, |
|
"eval_rewards/margins": 1.71875, |
|
"eval_rewards/rejected": -3.515625, |
|
"eval_runtime": 135.9196, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 0.346, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 54.35126708805902, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 100.5, |
|
"logits/rejected": 102.5, |
|
"logps/chosen": -390.0, |
|
"logps/rejected": -444.0, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.109375, |
|
"rewards/margins": 3.078125, |
|
"rewards/rejected": -4.1875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"step": 210, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4032986215182713, |
|
"train_runtime": 4950.8357, |
|
"train_samples_per_second": 2.727, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|