|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.26666666666666666, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 49.98692321777344, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.6455594301223755, |
|
"logits/rejected": -1.8609813451766968, |
|
"logps/chosen": -69.99601745605469, |
|
"logps/rejected": -28.68402099609375, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": 0.43002215027809143, |
|
"rewards/margins": 0.5248908400535583, |
|
"rewards/rejected": -0.09486865252256393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"eval_logits/chosen": -1.683065414428711, |
|
"eval_logits/rejected": -1.8381606340408325, |
|
"eval_logps/chosen": -65.2982406616211, |
|
"eval_logps/rejected": -31.1202335357666, |
|
"eval_loss": 0.26570644974708557, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.9695585370063782, |
|
"eval_rewards/margins": 1.199601650238037, |
|
"eval_rewards/rejected": -0.23004300892353058, |
|
"eval_runtime": 547.5928, |
|
"eval_samples_per_second": 0.51, |
|
"eval_steps_per_second": 0.51, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 18.30071449279785, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7492402791976929, |
|
"logits/rejected": -1.8840913772583008, |
|
"logps/chosen": -62.02247619628906, |
|
"logps/rejected": -31.541664123535156, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.368556022644043, |
|
"rewards/margins": 1.8034323453903198, |
|
"rewards/rejected": -0.4348761737346649, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"eval_logits/chosen": -1.717586636543274, |
|
"eval_logits/rejected": -1.840760588645935, |
|
"eval_logps/chosen": -57.23891067504883, |
|
"eval_logps/rejected": -35.770687103271484, |
|
"eval_loss": 0.0847974419593811, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 1.77549147605896, |
|
"eval_rewards/margins": 2.4705796241760254, |
|
"eval_rewards/rejected": -0.6950880885124207, |
|
"eval_runtime": 547.9305, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 6.001527309417725, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.753422498703003, |
|
"logits/rejected": -1.856553316116333, |
|
"logps/chosen": -55.98763656616211, |
|
"logps/rejected": -39.17717361450195, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.045351266860962, |
|
"rewards/margins": 3.051741361618042, |
|
"rewards/rejected": -1.00639009475708, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"eval_logits/chosen": -1.7338056564331055, |
|
"eval_logits/rejected": -1.838354229927063, |
|
"eval_logps/chosen": -51.332515716552734, |
|
"eval_logps/rejected": -42.16679382324219, |
|
"eval_loss": 0.026723211631178856, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.366131067276001, |
|
"eval_rewards/margins": 3.7008297443389893, |
|
"eval_rewards/rejected": -1.3346984386444092, |
|
"eval_runtime": 548.2166, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 3.3345212936401367, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7408527135849, |
|
"logits/rejected": -1.8468482494354248, |
|
"logps/chosen": -51.4506721496582, |
|
"logps/rejected": -46.23123550415039, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.529080867767334, |
|
"rewards/margins": 4.344725131988525, |
|
"rewards/rejected": -1.8156436681747437, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"eval_logits/chosen": -1.744242787361145, |
|
"eval_logits/rejected": -1.8309202194213867, |
|
"eval_logps/chosen": -47.80553436279297, |
|
"eval_logps/rejected": -54.1657600402832, |
|
"eval_loss": 0.006161259487271309, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.7188286781311035, |
|
"eval_rewards/margins": 5.253425121307373, |
|
"eval_rewards/rejected": -2.5345959663391113, |
|
"eval_runtime": 548.15, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.14823301136493683, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.747748851776123, |
|
"logits/rejected": -1.8711057901382446, |
|
"logps/chosen": -48.346378326416016, |
|
"logps/rejected": -64.187744140625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.810575485229492, |
|
"rewards/margins": 6.410886764526367, |
|
"rewards/rejected": -3.600311040878296, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"eval_logits/chosen": -1.7560131549835205, |
|
"eval_logits/rejected": -1.8139592409133911, |
|
"eval_logps/chosen": -46.13553237915039, |
|
"eval_logps/rejected": -74.509765625, |
|
"eval_loss": 0.0008142033475451171, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.885828733444214, |
|
"eval_rewards/margins": 7.454825401306152, |
|
"eval_rewards/rejected": -4.568996429443359, |
|
"eval_runtime": 548.1607, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.07440608739852905, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.7455374002456665, |
|
"logits/rejected": -1.8116660118103027, |
|
"logps/chosen": -47.130027770996094, |
|
"logps/rejected": -80.37494659423828, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.917734384536743, |
|
"rewards/margins": 8.058551788330078, |
|
"rewards/rejected": -5.140817642211914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"eval_logits/chosen": -1.7649332284927368, |
|
"eval_logits/rejected": -1.8052014112472534, |
|
"eval_logps/chosen": -45.18332290649414, |
|
"eval_logps/rejected": -84.715576171875, |
|
"eval_loss": 0.0003367140598129481, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 2.9810502529144287, |
|
"eval_rewards/margins": 8.570627212524414, |
|
"eval_rewards/rejected": -5.589577674865723, |
|
"eval_runtime": 548.0227, |
|
"eval_samples_per_second": 0.509, |
|
"eval_steps_per_second": 0.509, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|