|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9966329966329966, |
|
"eval_steps": 500, |
|
"global_step": 148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/chosen": -2.023646593093872, |
|
"logits/rejected": -1.861999750137329, |
|
"logps/chosen": -160.15196228027344, |
|
"logps/rejected": -164.30947875976562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -1.8246960639953613, |
|
"logits/rejected": -1.874166488647461, |
|
"logps/chosen": -186.62855529785156, |
|
"logps/rejected": -191.06869506835938, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": -0.0028373675886541605, |
|
"rewards/margins": -9.477811545366421e-06, |
|
"rewards/rejected": -0.002827889285981655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.98258427321406e-07, |
|
"logits/chosen": -1.5834639072418213, |
|
"logits/rejected": -1.6468555927276611, |
|
"logps/chosen": -191.36276245117188, |
|
"logps/rejected": -201.68360900878906, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09236270189285278, |
|
"rewards/margins": 0.01027429848909378, |
|
"rewards/rejected": -0.10263700783252716, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.844710954430464e-07, |
|
"logits/chosen": -1.572912335395813, |
|
"logits/rejected": -1.6341121196746826, |
|
"logps/chosen": -189.92250061035156, |
|
"logps/rejected": -188.98448181152344, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10429297387599945, |
|
"rewards/margins": -0.007742973975837231, |
|
"rewards/rejected": -0.09655000269412994, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.576621278295557e-07, |
|
"logits/chosen": -1.6369476318359375, |
|
"logits/rejected": -1.5702846050262451, |
|
"logps/chosen": -203.3102264404297, |
|
"logps/rejected": -203.18093872070312, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.03835677355527878, |
|
"rewards/margins": 0.012528707273304462, |
|
"rewards/rejected": -0.05088547617197037, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.193203929064353e-07, |
|
"logits/chosen": -1.3983080387115479, |
|
"logits/rejected": -1.405611276626587, |
|
"logps/chosen": -200.58035278320312, |
|
"logps/rejected": -205.0247802734375, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.2730618119239807, |
|
"rewards/margins": 0.03439151123166084, |
|
"rewards/rejected": -0.30745333433151245, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.715752452735703e-07, |
|
"logits/chosen": -1.2504160404205322, |
|
"logits/rejected": -1.3270930051803589, |
|
"logps/chosen": -236.2527313232422, |
|
"logps/rejected": -239.9370574951172, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.5804754495620728, |
|
"rewards/margins": 0.015670539811253548, |
|
"rewards/rejected": -0.5961459279060364, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.170782694233712e-07, |
|
"logits/chosen": -1.1783835887908936, |
|
"logits/rejected": -1.141601800918579, |
|
"logps/chosen": -263.32073974609375, |
|
"logps/rejected": -272.93450927734375, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9808699488639832, |
|
"rewards/margins": 0.05901496857404709, |
|
"rewards/rejected": -1.0398849248886108, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.588560207905135e-07, |
|
"logits/chosen": -1.2210887670516968, |
|
"logits/rejected": -1.3275476694107056, |
|
"logps/chosen": -277.7283020019531, |
|
"logps/rejected": -284.2450256347656, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.8450358510017395, |
|
"rewards/margins": -0.01733619160950184, |
|
"rewards/rejected": -0.8276995420455933, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.001419423371019e-07, |
|
"logits/chosen": -1.2033240795135498, |
|
"logits/rejected": -1.2934813499450684, |
|
"logps/chosen": -233.0209197998047, |
|
"logps/rejected": -232.87161254882812, |
|
"loss": 0.6984, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.7631824612617493, |
|
"rewards/margins": -0.014957061037421227, |
|
"rewards/rejected": -0.7482253313064575, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4419679138889375e-07, |
|
"logits/chosen": -1.5676350593566895, |
|
"logits/rejected": -1.5352352857589722, |
|
"logps/chosen": -243.58340454101562, |
|
"logps/rejected": -249.83670043945312, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.431363046169281, |
|
"rewards/margins": 0.026667874306440353, |
|
"rewards/rejected": -0.45803093910217285, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logits/chosen": -1.4186036586761475, |
|
"logits/rejected": -1.4321409463882446, |
|
"logps/chosen": -207.4648895263672, |
|
"logps/rejected": -220.63540649414062, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3499959409236908, |
|
"rewards/margins": 0.08647538721561432, |
|
"rewards/rejected": -0.4364713132381439, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.271487265090163e-08, |
|
"logits/chosen": -1.3899421691894531, |
|
"logits/rejected": -1.4946931600570679, |
|
"logps/chosen": -199.7452392578125, |
|
"logps/rejected": -206.2049102783203, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.31818634271621704, |
|
"rewards/margins": 0.03093295730650425, |
|
"rewards/rejected": -0.34911927580833435, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2258663809784888e-08, |
|
"logits/chosen": -1.5326082706451416, |
|
"logits/rejected": -1.4976835250854492, |
|
"logps/chosen": -203.8050079345703, |
|
"logps/rejected": -208.89431762695312, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.27058011293411255, |
|
"rewards/margins": 0.03228816017508507, |
|
"rewards/rejected": -0.3028682768344879, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.45034538815614e-09, |
|
"logits/chosen": -1.4072165489196777, |
|
"logits/rejected": -1.4480218887329102, |
|
"logps/chosen": -210.232666015625, |
|
"logps/rejected": -218.79763793945312, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.29495373368263245, |
|
"rewards/margins": 0.03974684700369835, |
|
"rewards/rejected": -0.3347005844116211, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 148, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6894009854342487, |
|
"train_runtime": 9025.6123, |
|
"train_samples_per_second": 2.105, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 148, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|