|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 9.394233371670921, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.9077322483062744, |
|
"logits/rejected": -2.8318910598754883, |
|
"logps/chosen": -351.8885498046875, |
|
"logps/pi_response": -76.32845306396484, |
|
"logps/ref_response": -76.32845306396484, |
|
"logps/rejected": -169.29762268066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 8.360621703019095, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.7861883640289307, |
|
"logits/rejected": -2.7629709243774414, |
|
"logps/chosen": -234.14410400390625, |
|
"logps/pi_response": -70.11377716064453, |
|
"logps/ref_response": -70.02328491210938, |
|
"logps/rejected": -167.95562744140625, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.006910877302289009, |
|
"rewards/margins": 0.0037490064278244972, |
|
"rewards/rejected": 0.0031618711072951555, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 6.69893642100669, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -2.741486072540283, |
|
"logits/rejected": -2.690483331680298, |
|
"logps/chosen": -245.74948120117188, |
|
"logps/pi_response": -75.77564239501953, |
|
"logps/ref_response": -67.40553283691406, |
|
"logps/rejected": -170.17709350585938, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.04696048051118851, |
|
"rewards/margins": 0.0809074118733406, |
|
"rewards/rejected": -0.0339469388127327, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 6.686646881580658, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -2.6538798809051514, |
|
"logits/rejected": -2.620079517364502, |
|
"logps/chosen": -233.30227661132812, |
|
"logps/pi_response": -102.32685852050781, |
|
"logps/ref_response": -65.888427734375, |
|
"logps/rejected": -186.7948455810547, |
|
"loss": 0.6253, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.09834689646959305, |
|
"rewards/margins": 0.18449249863624573, |
|
"rewards/rejected": -0.2828393876552582, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 7.858132087597293, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -2.6627352237701416, |
|
"logits/rejected": -2.6404871940612793, |
|
"logps/chosen": -254.8513641357422, |
|
"logps/pi_response": -137.86434936523438, |
|
"logps/ref_response": -70.97199249267578, |
|
"logps/rejected": -225.2699737548828, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2209537923336029, |
|
"rewards/margins": 0.34001588821411133, |
|
"rewards/rejected": -0.5609697103500366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 10.578475411451166, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -2.65732479095459, |
|
"logits/rejected": -2.624019145965576, |
|
"logps/chosen": -284.77923583984375, |
|
"logps/pi_response": -154.8039093017578, |
|
"logps/ref_response": -69.12784576416016, |
|
"logps/rejected": -249.51736450195312, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.34653979539871216, |
|
"rewards/margins": 0.46229037642478943, |
|
"rewards/rejected": -0.8088302612304688, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 10.61093460315722, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -2.6534972190856934, |
|
"logits/rejected": -2.6186232566833496, |
|
"logps/chosen": -255.72732543945312, |
|
"logps/pi_response": -160.81385803222656, |
|
"logps/ref_response": -62.94016647338867, |
|
"logps/rejected": -265.20989990234375, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4366573691368103, |
|
"rewards/margins": 0.5785155892372131, |
|
"rewards/rejected": -1.0151729583740234, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 10.972518174316015, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -2.6517553329467773, |
|
"logits/rejected": -2.6147875785827637, |
|
"logps/chosen": -301.74700927734375, |
|
"logps/pi_response": -181.74948120117188, |
|
"logps/ref_response": -70.71024322509766, |
|
"logps/rejected": -276.01605224609375, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.4930971562862396, |
|
"rewards/margins": 0.6322487592697144, |
|
"rewards/rejected": -1.1253459453582764, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9905956112852664, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5845865599716766, |
|
"train_runtime": 3518.9349, |
|
"train_samples_per_second": 5.791, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|