ShenaoZ's picture
Model save
8cda389 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9905956112852664,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012539184952978056,
"grad_norm": 9.394233371670921,
"learning_rate": 6.25e-08,
"logits/chosen": -2.9077322483062744,
"logits/rejected": -2.8318910598754883,
"logps/chosen": -351.8885498046875,
"logps/pi_response": -76.32845306396484,
"logps/ref_response": -76.32845306396484,
"logps/rejected": -169.29762268066406,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.12539184952978055,
"grad_norm": 8.360621703019095,
"learning_rate": 4.990217055187362e-07,
"logits/chosen": -2.7861883640289307,
"logits/rejected": -2.7629709243774414,
"logps/chosen": -234.14410400390625,
"logps/pi_response": -70.11377716064453,
"logps/ref_response": -70.02328491210938,
"logps/rejected": -167.95562744140625,
"loss": 0.6904,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": 0.006910877302289009,
"rewards/margins": 0.0037490064278244972,
"rewards/rejected": 0.0031618711072951555,
"step": 10
},
{
"epoch": 0.2507836990595611,
"grad_norm": 6.69893642100669,
"learning_rate": 4.655786431300069e-07,
"logits/chosen": -2.741486072540283,
"logits/rejected": -2.690483331680298,
"logps/chosen": -245.74948120117188,
"logps/pi_response": -75.77564239501953,
"logps/ref_response": -67.40553283691406,
"logps/rejected": -170.17709350585938,
"loss": 0.6608,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.04696048051118851,
"rewards/margins": 0.0809074118733406,
"rewards/rejected": -0.0339469388127327,
"step": 20
},
{
"epoch": 0.3761755485893417,
"grad_norm": 6.686646881580658,
"learning_rate": 3.9061232191019517e-07,
"logits/chosen": -2.6538798809051514,
"logits/rejected": -2.620079517364502,
"logps/chosen": -233.30227661132812,
"logps/pi_response": -102.32685852050781,
"logps/ref_response": -65.888427734375,
"logps/rejected": -186.7948455810547,
"loss": 0.6253,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.09834689646959305,
"rewards/margins": 0.18449249863624573,
"rewards/rejected": -0.2828393876552582,
"step": 30
},
{
"epoch": 0.5015673981191222,
"grad_norm": 7.858132087597293,
"learning_rate": 2.8856223324132555e-07,
"logits/chosen": -2.6627352237701416,
"logits/rejected": -2.6404871940612793,
"logps/chosen": -254.8513641357422,
"logps/pi_response": -137.86434936523438,
"logps/ref_response": -70.97199249267578,
"logps/rejected": -225.2699737548828,
"loss": 0.5782,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.2209537923336029,
"rewards/margins": 0.34001588821411133,
"rewards/rejected": -0.5609697103500366,
"step": 40
},
{
"epoch": 0.6269592476489029,
"grad_norm": 10.578475411451166,
"learning_rate": 1.7908455541642582e-07,
"logits/chosen": -2.65732479095459,
"logits/rejected": -2.624019145965576,
"logps/chosen": -284.77923583984375,
"logps/pi_response": -154.8039093017578,
"logps/ref_response": -69.12784576416016,
"logps/rejected": -249.51736450195312,
"loss": 0.5471,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.34653979539871216,
"rewards/margins": 0.46229037642478943,
"rewards/rejected": -0.8088302612304688,
"step": 50
},
{
"epoch": 0.7523510971786834,
"grad_norm": 10.61093460315722,
"learning_rate": 8.32661172908373e-08,
"logits/chosen": -2.6534972190856934,
"logits/rejected": -2.6186232566833496,
"logps/chosen": -255.72732543945312,
"logps/pi_response": -160.81385803222656,
"logps/ref_response": -62.94016647338867,
"logps/rejected": -265.20989990234375,
"loss": 0.5272,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.4366573691368103,
"rewards/margins": 0.5785155892372131,
"rewards/rejected": -1.0151729583740234,
"step": 60
},
{
"epoch": 0.877742946708464,
"grad_norm": 10.972518174316015,
"learning_rate": 1.956279997278043e-08,
"logits/chosen": -2.6517553329467773,
"logits/rejected": -2.6147875785827637,
"logps/chosen": -301.74700927734375,
"logps/pi_response": -181.74948120117188,
"logps/ref_response": -70.71024322509766,
"logps/rejected": -276.01605224609375,
"loss": 0.5138,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -0.4930971562862396,
"rewards/margins": 0.6322487592697144,
"rewards/rejected": -1.1253459453582764,
"step": 70
},
{
"epoch": 0.9905956112852664,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.5845865599716766,
"train_runtime": 3518.9349,
"train_samples_per_second": 5.791,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}