|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 49.98692321777344, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.6455594301223755, |
|
"logits/rejected": -1.8609813451766968, |
|
"logps/chosen": -69.99601745605469, |
|
"logps/rejected": -28.68402099609375, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": 0.43002215027809143, |
|
"rewards/margins": 0.5248908400535583, |
|
"rewards/rejected": -0.09486865252256393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"eval_logits/chosen": -1.683065414428711, |
|
"eval_logits/rejected": -1.8381606340408325, |
|
"eval_logps/chosen": -65.2982406616211, |
|
"eval_logps/rejected": -31.1202335357666, |
|
"eval_loss": 0.26570644974708557, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.9695585370063782, |
|
"eval_rewards/margins": 1.199601650238037, |
|
"eval_rewards/rejected": -0.23004300892353058, |
|
"eval_runtime": 547.5928, |
|
"eval_samples_per_second": 0.51, |
|
"eval_steps_per_second": 0.51, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 18.30071449279785, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7492402791976929, |
|
"logits/rejected": -1.8840913772583008, |
|
"logps/chosen": -62.02247619628906, |
|
"logps/rejected": -31.541664123535156, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.368556022644043, |
|
"rewards/margins": 1.8034323453903198, |
|
"rewards/rejected": -0.4348761737346649, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"eval_logits/chosen": -1.717586636543274, |
|
"eval_logits/rejected": -1.840760588645935, |
|
"eval_logps/chosen": -57.23891067504883, |
|
"eval_logps/rejected": -35.770687103271484, |
|
"eval_loss": 0.0847974419593811, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 1.77549147605896, |
|
"eval_rewards/margins": 2.4705796241760254, |
|
"eval_rewards/rejected": -0.6950880885124207, |
|
"eval_runtime": 547.9305, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 6.001527309417725, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.753422498703003, |
|
"logits/rejected": -1.856553316116333, |
|
"logps/chosen": -55.98763656616211, |
|
"logps/rejected": -39.17717361450195, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.045351266860962, |
|
"rewards/margins": 3.051741361618042, |
|
"rewards/rejected": -1.00639009475708, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"eval_logits/chosen": -1.7338056564331055, |
|
"eval_logits/rejected": -1.838354229927063, |
|
"eval_logps/chosen": -51.332515716552734, |
|
"eval_logps/rejected": -42.16679382324219, |
|
"eval_loss": 0.026723211631178856, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.366131067276001, |
|
"eval_rewards/margins": 3.7008297443389893, |
|
"eval_rewards/rejected": -1.3346984386444092, |
|
"eval_runtime": 548.2166, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 3.3345212936401367, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7408527135849, |
|
"logits/rejected": -1.8468482494354248, |
|
"logps/chosen": -51.4506721496582, |
|
"logps/rejected": -46.23123550415039, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.529080867767334, |
|
"rewards/margins": 4.344725131988525, |
|
"rewards/rejected": -1.8156436681747437, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"eval_logits/chosen": -1.744242787361145, |
|
"eval_logits/rejected": -1.8309202194213867, |
|
"eval_logps/chosen": -47.80553436279297, |
|
"eval_logps/rejected": -54.1657600402832, |
|
"eval_loss": 0.006161259487271309, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.7188286781311035, |
|
"eval_rewards/margins": 5.253425121307373, |
|
"eval_rewards/rejected": -2.5345959663391113, |
|
"eval_runtime": 548.15, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.14823301136493683, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.747748851776123, |
|
"logits/rejected": -1.8711057901382446, |
|
"logps/chosen": -48.346378326416016, |
|
"logps/rejected": -64.187744140625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.810575485229492, |
|
"rewards/margins": 6.410886764526367, |
|
"rewards/rejected": -3.600311040878296, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"eval_logits/chosen": -1.7560131549835205, |
|
"eval_logits/rejected": -1.8139592409133911, |
|
"eval_logps/chosen": -46.13553237915039, |
|
"eval_logps/rejected": -74.509765625, |
|
"eval_loss": 0.0008142033475451171, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.885828733444214, |
|
"eval_rewards/margins": 7.454825401306152, |
|
"eval_rewards/rejected": -4.568996429443359, |
|
"eval_runtime": 548.1607, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.07440608739852905, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7455374002456665, |
|
"logits/rejected": -1.8116660118103027, |
|
"logps/chosen": -47.130027770996094, |
|
"logps/rejected": -80.37494659423828, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.917734384536743, |
|
"rewards/margins": 8.058551788330078, |
|
"rewards/rejected": -5.140817642211914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"eval_logits/chosen": -1.7649332284927368, |
|
"eval_logits/rejected": -1.8052014112472534, |
|
"eval_logps/chosen": -45.18332290649414, |
|
"eval_logps/rejected": -84.715576171875, |
|
"eval_loss": 0.0003367140598129481, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.9810502529144287, |
|
"eval_rewards/margins": 8.570627212524414, |
|
"eval_rewards/rejected": -5.589577674865723, |
|
"eval_runtime": 548.0227, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 0.23535361886024475, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7623131275177002, |
|
"logits/rejected": -1.8129371404647827, |
|
"logps/chosen": -47.72771072387695, |
|
"logps/rejected": -87.96331787109375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.999173879623413, |
|
"rewards/margins": 8.857995986938477, |
|
"rewards/rejected": -5.858822822570801, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"eval_logits/chosen": -1.768768310546875, |
|
"eval_logits/rejected": -1.7995201349258423, |
|
"eval_logps/chosen": -44.72746276855469, |
|
"eval_logps/rejected": -90.03182220458984, |
|
"eval_loss": 0.0002303781802766025, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 3.0266363620758057, |
|
"eval_rewards/margins": 9.14783763885498, |
|
"eval_rewards/rejected": -6.121201992034912, |
|
"eval_runtime": 538.111, |
|
"eval_samples_per_second": 0.518, |
|
"eval_steps_per_second": 0.518, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.012943393550813198, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.814980149269104, |
|
"logits/rejected": -1.8396068811416626, |
|
"logps/chosen": -45.2337646484375, |
|
"logps/rejected": -91.96194458007812, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.0036685466766357, |
|
"rewards/margins": 9.295085906982422, |
|
"rewards/rejected": -6.291417121887207, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"eval_logits/chosen": -1.772402048110962, |
|
"eval_logits/rejected": -1.7961318492889404, |
|
"eval_logps/chosen": -44.347679138183594, |
|
"eval_logps/rejected": -93.67013549804688, |
|
"eval_loss": 0.00018359384557697922, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 3.0646145343780518, |
|
"eval_rewards/margins": 9.549647331237793, |
|
"eval_rewards/rejected": -6.485032558441162, |
|
"eval_runtime": 537.2714, |
|
"eval_samples_per_second": 0.519, |
|
"eval_steps_per_second": 0.519, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.03291670233011246, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7613160610198975, |
|
"logits/rejected": -1.7991280555725098, |
|
"logps/chosen": -45.389404296875, |
|
"logps/rejected": -94.11991882324219, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.102461814880371, |
|
"rewards/margins": 9.708904266357422, |
|
"rewards/rejected": -6.606442451477051, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.7705323696136475, |
|
"eval_logits/rejected": -1.7885987758636475, |
|
"eval_logps/chosen": -44.02812194824219, |
|
"eval_logps/rejected": -96.20520782470703, |
|
"eval_loss": 0.00015724882541690022, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 3.0965700149536133, |
|
"eval_rewards/margins": 9.835110664367676, |
|
"eval_rewards/rejected": -6.738540172576904, |
|
"eval_runtime": 538.1537, |
|
"eval_samples_per_second": 0.518, |
|
"eval_steps_per_second": 0.518, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|