|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 396, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0012223966186866164, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"logits/chosen": -22.664844512939453, |
|
"logits/rejected": -22.80691909790039, |
|
"logps/chosen": -81.01699829101562, |
|
"logps/rejected": -101.25294494628906, |
|
"loss": 0.2072, |
|
"rewards/accuracies": 0.8846153616905212, |
|
"rewards/chosen": 2.5555355548858643, |
|
"rewards/margins": 4.414959907531738, |
|
"rewards/rejected": -1.859424352645874, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8405307855573483e-05, |
|
"learning_rate": 2.9073033707865168e-05, |
|
"logits/chosen": -23.12621307373047, |
|
"logits/rejected": -23.24854278564453, |
|
"logps/chosen": -43.78964614868164, |
|
"logps/rejected": -156.3304901123047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.3214497566223145, |
|
"rewards/margins": 13.642704010009766, |
|
"rewards/rejected": -7.321253776550293, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.5625999367330223e-05, |
|
"learning_rate": 2.6882022471910113e-05, |
|
"logits/chosen": -23.210811614990234, |
|
"logits/rejected": -23.32987403869629, |
|
"logps/chosen": -41.96815490722656, |
|
"logps/rejected": -167.406982421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.477440357208252, |
|
"rewards/margins": 14.927824020385742, |
|
"rewards/rejected": -8.450382232666016, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.5885076209087856e-05, |
|
"learning_rate": 2.4691011235955056e-05, |
|
"logits/chosen": -23.275333404541016, |
|
"logits/rejected": -23.39052391052246, |
|
"logps/chosen": -41.763607025146484, |
|
"logps/rejected": -169.25025939941406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4945831298828125, |
|
"rewards/margins": 15.126973152160645, |
|
"rewards/rejected": -8.632390975952148, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.00013870897237211466, |
|
"learning_rate": 2.25e-05, |
|
"logits/chosen": -23.342487335205078, |
|
"logits/rejected": -23.45945167541504, |
|
"logps/chosen": -41.83483123779297, |
|
"logps/rejected": -169.22845458984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.479532718658447, |
|
"rewards/margins": 15.119216918945312, |
|
"rewards/rejected": -8.63968276977539, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.5738529327791184e-05, |
|
"learning_rate": 2.0308988764044947e-05, |
|
"logits/chosen": -23.253267288208008, |
|
"logits/rejected": -23.370222091674805, |
|
"logps/chosen": -41.68398666381836, |
|
"logps/rejected": -169.04989624023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4891557693481445, |
|
"rewards/margins": 15.119135856628418, |
|
"rewards/rejected": -8.629980087280273, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.4681028005725238e-05, |
|
"learning_rate": 1.8117977528089886e-05, |
|
"logits/chosen": -23.281639099121094, |
|
"logits/rejected": -23.397907257080078, |
|
"logps/chosen": -41.55263137817383, |
|
"logps/rejected": -170.5806427001953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.5376057624816895, |
|
"rewards/margins": 15.297250747680664, |
|
"rewards/rejected": -8.759647369384766, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.823231104935985e-05, |
|
"learning_rate": 1.5926966292134832e-05, |
|
"logits/chosen": -23.312273025512695, |
|
"logits/rejected": -23.43006706237793, |
|
"logps/chosen": -42.09364318847656, |
|
"logps/rejected": -168.72459411621094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.430633544921875, |
|
"rewards/margins": 15.028059959411621, |
|
"rewards/rejected": -8.597426414489746, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.3677333299710881e-05, |
|
"learning_rate": 1.3735955056179776e-05, |
|
"logits/chosen": -23.281251907348633, |
|
"logits/rejected": -23.39859390258789, |
|
"logps/chosen": -41.60188293457031, |
|
"logps/rejected": -169.98341369628906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.554170608520508, |
|
"rewards/margins": 15.241110801696777, |
|
"rewards/rejected": -8.686941146850586, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.329195401922334e-05, |
|
"learning_rate": 1.154494382022472e-05, |
|
"logits/chosen": -23.316593170166016, |
|
"logits/rejected": -23.435791015625, |
|
"logps/chosen": -41.78284454345703, |
|
"logps/rejected": -169.241943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4690632820129395, |
|
"rewards/margins": 15.107036590576172, |
|
"rewards/rejected": -8.63797378540039, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.4401819498743862e-05, |
|
"learning_rate": 9.353932584269662e-06, |
|
"logits/chosen": -23.297456741333008, |
|
"logits/rejected": -23.411481857299805, |
|
"logps/chosen": -41.401039123535156, |
|
"logps/rejected": -169.64952087402344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.529332160949707, |
|
"rewards/margins": 15.224197387695312, |
|
"rewards/rejected": -8.694866180419922, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.4643008398707025e-05, |
|
"learning_rate": 7.162921348314607e-06, |
|
"logits/chosen": -23.277320861816406, |
|
"logits/rejected": -23.393136978149414, |
|
"logps/chosen": -41.65689468383789, |
|
"logps/rejected": -171.18663024902344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.522489070892334, |
|
"rewards/margins": 15.33745002746582, |
|
"rewards/rejected": -8.814961433410645, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.00012409774353727698, |
|
"learning_rate": 4.97191011235955e-06, |
|
"logits/chosen": -23.303037643432617, |
|
"logits/rejected": -23.417268753051758, |
|
"logps/chosen": -41.39340591430664, |
|
"logps/rejected": -170.94407653808594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.569211483001709, |
|
"rewards/margins": 15.359162330627441, |
|
"rewards/rejected": -8.789949417114258, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.4201951671566349e-05, |
|
"learning_rate": 2.7808988764044947e-06, |
|
"logits/chosen": -23.281291961669922, |
|
"logits/rejected": -23.399757385253906, |
|
"logps/chosen": -41.88224792480469, |
|
"logps/rejected": -169.70294189453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4672112464904785, |
|
"rewards/margins": 15.137907028198242, |
|
"rewards/rejected": -8.670695304870605, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.3474539628077764e-05, |
|
"learning_rate": 5.898876404494382e-07, |
|
"logits/chosen": -23.33159065246582, |
|
"logits/rejected": -23.45261573791504, |
|
"logps/chosen": -41.745670318603516, |
|
"logps/rejected": -169.70147705078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.496582984924316, |
|
"rewards/margins": 15.183600425720215, |
|
"rewards/rejected": -8.687018394470215, |
|
"step": 390 |
|
} |
|
], |
|
"logging_steps": 26, |
|
"max_steps": 396, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|