gshf_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
8748366 verified
raw
history blame
7.88 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9966329966329966,
"eval_steps": 500,
"global_step": 148,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.3333333333333334e-08,
"logits/chosen": -2.023646593093872,
"logits/rejected": -1.861999750137329,
"logps/chosen": -160.15196228027344,
"logps/rejected": -164.30947875976562,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -1.8246960639953613,
"logits/rejected": -1.874166488647461,
"logps/chosen": -186.62855529785156,
"logps/rejected": -191.06869506835938,
"loss": 0.693,
"rewards/accuracies": 0.4513888955116272,
"rewards/chosen": -0.0028373675886541605,
"rewards/margins": -9.477811545366421e-06,
"rewards/rejected": -0.002827889285981655,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.98258427321406e-07,
"logits/chosen": -1.5834639072418213,
"logits/rejected": -1.6468555927276611,
"logps/chosen": -191.36276245117188,
"logps/rejected": -201.68360900878906,
"loss": 0.6936,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.09236270189285278,
"rewards/margins": 0.01027429848909378,
"rewards/rejected": -0.10263700783252716,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 4.844710954430464e-07,
"logits/chosen": -1.572912335395813,
"logits/rejected": -1.6341121196746826,
"logps/chosen": -189.92250061035156,
"logps/rejected": -188.98448181152344,
"loss": 0.6947,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.10429297387599945,
"rewards/margins": -0.007742973975837231,
"rewards/rejected": -0.09655000269412994,
"step": 30
},
{
"epoch": 0.27,
"learning_rate": 4.576621278295557e-07,
"logits/chosen": -1.6369476318359375,
"logits/rejected": -1.5702846050262451,
"logps/chosen": -203.3102264404297,
"logps/rejected": -203.18093872070312,
"loss": 0.6897,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.03835677355527878,
"rewards/margins": 0.012528707273304462,
"rewards/rejected": -0.05088547617197037,
"step": 40
},
{
"epoch": 0.34,
"learning_rate": 4.193203929064353e-07,
"logits/chosen": -1.3983080387115479,
"logits/rejected": -1.405611276626587,
"logps/chosen": -200.58035278320312,
"logps/rejected": -205.0247802734375,
"loss": 0.6879,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.2730618119239807,
"rewards/margins": 0.03439151123166084,
"rewards/rejected": -0.30745333433151245,
"step": 50
},
{
"epoch": 0.4,
"learning_rate": 3.715752452735703e-07,
"logits/chosen": -1.2504160404205322,
"logits/rejected": -1.3270930051803589,
"logps/chosen": -236.2527313232422,
"logps/rejected": -239.9370574951172,
"loss": 0.6856,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.5804754495620728,
"rewards/margins": 0.015670539811253548,
"rewards/rejected": -0.5961459279060364,
"step": 60
},
{
"epoch": 0.47,
"learning_rate": 3.170782694233712e-07,
"logits/chosen": -1.1783835887908936,
"logits/rejected": -1.141601800918579,
"logps/chosen": -263.32073974609375,
"logps/rejected": -272.93450927734375,
"loss": 0.6956,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.9808699488639832,
"rewards/margins": 0.05901496857404709,
"rewards/rejected": -1.0398849248886108,
"step": 70
},
{
"epoch": 0.54,
"learning_rate": 2.588560207905135e-07,
"logits/chosen": -1.2210887670516968,
"logits/rejected": -1.3275476694107056,
"logps/chosen": -277.7283020019531,
"logps/rejected": -284.2450256347656,
"loss": 0.6953,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.8450358510017395,
"rewards/margins": -0.01733619160950184,
"rewards/rejected": -0.8276995420455933,
"step": 80
},
{
"epoch": 0.61,
"learning_rate": 2.001419423371019e-07,
"logits/chosen": -1.2033240795135498,
"logits/rejected": -1.2934813499450684,
"logps/chosen": -233.0209197998047,
"logps/rejected": -232.87161254882812,
"loss": 0.6984,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.7631824612617493,
"rewards/margins": -0.014957061037421227,
"rewards/rejected": -0.7482253313064575,
"step": 90
},
{
"epoch": 0.67,
"learning_rate": 1.4419679138889375e-07,
"logits/chosen": -1.5676350593566895,
"logits/rejected": -1.5352352857589722,
"logps/chosen": -243.58340454101562,
"logps/rejected": -249.83670043945312,
"loss": 0.6839,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.431363046169281,
"rewards/margins": 0.026667874306440353,
"rewards/rejected": -0.45803093910217285,
"step": 100
},
{
"epoch": 0.74,
"learning_rate": 9.412754953531663e-08,
"logits/chosen": -1.4186036586761475,
"logits/rejected": -1.4321409463882446,
"logps/chosen": -207.4648895263672,
"logps/rejected": -220.63540649414062,
"loss": 0.6783,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.3499959409236908,
"rewards/margins": 0.08647538721561432,
"rewards/rejected": -0.4364713132381439,
"step": 110
},
{
"epoch": 0.81,
"learning_rate": 5.271487265090163e-08,
"logits/chosen": -1.3899421691894531,
"logits/rejected": -1.4946931600570679,
"logps/chosen": -199.7452392578125,
"logps/rejected": -206.2049102783203,
"loss": 0.6857,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.31818634271621704,
"rewards/margins": 0.03093295730650425,
"rewards/rejected": -0.34911927580833435,
"step": 120
},
{
"epoch": 0.88,
"learning_rate": 2.2258663809784888e-08,
"logits/chosen": -1.5326082706451416,
"logits/rejected": -1.4976835250854492,
"logps/chosen": -203.8050079345703,
"logps/rejected": -208.89431762695312,
"loss": 0.6865,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.27058011293411255,
"rewards/margins": 0.03228816017508507,
"rewards/rejected": -0.3028682768344879,
"step": 130
},
{
"epoch": 0.94,
"learning_rate": 4.45034538815614e-09,
"logits/chosen": -1.4072165489196777,
"logits/rejected": -1.4480218887329102,
"logps/chosen": -210.232666015625,
"logps/rejected": -218.79763793945312,
"loss": 0.6827,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.29495373368263245,
"rewards/margins": 0.03974684700369835,
"rewards/rejected": -0.3347005844116211,
"step": 140
},
{
"epoch": 1.0,
"step": 148,
"total_flos": 0.0,
"train_loss": 0.6894009854342487,
"train_runtime": 9025.6123,
"train_samples_per_second": 2.105,
"train_steps_per_second": 0.016
}
],
"logging_steps": 10,
"max_steps": 148,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}