|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.363519218688417, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.205641031265259, |
|
"logits/rejected": -2.2929024696350098, |
|
"logps/chosen": -215.50050354003906, |
|
"logps/rejected": -237.99966430664062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 18.34878510832685, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.382091522216797, |
|
"logits/rejected": -2.295259952545166, |
|
"logps/chosen": -222.111328125, |
|
"logps/rejected": -210.6314697265625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4615384638309479, |
|
"rewards/chosen": 0.0012403662549331784, |
|
"rewards/margins": 0.0005746442475356162, |
|
"rewards/rejected": 0.0006657222402282059, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.337158741008302, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.383176803588867, |
|
"logits/rejected": -2.422689199447632, |
|
"logps/chosen": -212.94821166992188, |
|
"logps/rejected": -232.4334259033203, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.08582816272974014, |
|
"rewards/margins": 0.0013778842985630035, |
|
"rewards/rejected": -0.08720605075359344, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 22.542054212888594, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.3914377689361572, |
|
"logits/rejected": -2.3520281314849854, |
|
"logps/chosen": -215.9875030517578, |
|
"logps/rejected": -227.70399475097656, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.25440388917922974, |
|
"rewards/margins": 0.03694874048233032, |
|
"rewards/rejected": -0.29135259985923767, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 18.6076942310058, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -2.4768216609954834, |
|
"logits/rejected": -2.4233009815216064, |
|
"logps/chosen": -229.41555786132812, |
|
"logps/rejected": -242.26214599609375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.19061818718910217, |
|
"rewards/margins": 0.01797662116587162, |
|
"rewards/rejected": -0.20859479904174805, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 15.540618809394221, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -2.5903377532958984, |
|
"logits/rejected": -2.5592682361602783, |
|
"logps/chosen": -225.72836303710938, |
|
"logps/rejected": -240.36595153808594, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5307692289352417, |
|
"rewards/chosen": -0.033847782760858536, |
|
"rewards/margins": 0.001607205718755722, |
|
"rewards/rejected": -0.03545498102903366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 24.02478451571149, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -2.6142632961273193, |
|
"logits/rejected": -2.6310534477233887, |
|
"logps/chosen": -223.6808624267578, |
|
"logps/rejected": -255.2255401611328, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.10960451513528824, |
|
"rewards/margins": 0.02808019518852234, |
|
"rewards/rejected": -0.13768470287322998, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 20.288409772533704, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -2.5127460956573486, |
|
"logits/rejected": -2.5037307739257812, |
|
"logps/chosen": -230.75833129882812, |
|
"logps/rejected": -256.0094909667969, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.4961538314819336, |
|
"rewards/chosen": -0.2965443730354309, |
|
"rewards/margins": 0.03154058754444122, |
|
"rewards/rejected": -0.3280849754810333, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 22.680428500041128, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -2.5435211658477783, |
|
"logits/rejected": -2.5064070224761963, |
|
"logps/chosen": -251.8400421142578, |
|
"logps/rejected": -273.2138671875, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.28813430666923523, |
|
"rewards/margins": 0.06685086339712143, |
|
"rewards/rejected": -0.3549851179122925, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 18.86897478499211, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -2.5077600479125977, |
|
"logits/rejected": -2.503957986831665, |
|
"logps/chosen": -251.26556396484375, |
|
"logps/rejected": -242.37310791015625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5307692289352417, |
|
"rewards/chosen": -0.24721869826316833, |
|
"rewards/margins": 0.009251880459487438, |
|
"rewards/rejected": -0.25647059082984924, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 20.56846597020704, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -2.492692708969116, |
|
"logits/rejected": -2.3162038326263428, |
|
"logps/chosen": -253.9385223388672, |
|
"logps/rejected": -268.18414306640625, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.4339679181575775, |
|
"rewards/margins": 0.06273461133241653, |
|
"rewards/rejected": -0.49670252203941345, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 29.852940409061166, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -2.409569501876831, |
|
"logits/rejected": -2.3867456912994385, |
|
"logps/chosen": -297.3172607421875, |
|
"logps/rejected": -285.6565246582031, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.4923076927661896, |
|
"rewards/chosen": -0.73952317237854, |
|
"rewards/margins": 0.020068956539034843, |
|
"rewards/rejected": -0.7595921754837036, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 20.239291104683563, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -2.3316874504089355, |
|
"logits/rejected": -2.2675819396972656, |
|
"logps/chosen": -275.67767333984375, |
|
"logps/rejected": -291.9703674316406, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.5500468611717224, |
|
"rewards/margins": 0.06261468678712845, |
|
"rewards/rejected": -0.6126615405082703, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 24.27835409910822, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -2.4436075687408447, |
|
"logits/rejected": -2.309699296951294, |
|
"logps/chosen": -233.41680908203125, |
|
"logps/rejected": -262.0289611816406, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.5807692408561707, |
|
"rewards/chosen": -0.36947229504585266, |
|
"rewards/margins": 0.0935312956571579, |
|
"rewards/rejected": -0.46300360560417175, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 24.790097756991855, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -2.3928282260894775, |
|
"logits/rejected": -2.382559299468994, |
|
"logps/chosen": -244.34410095214844, |
|
"logps/rejected": -248.87876892089844, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.5615384578704834, |
|
"rewards/chosen": -0.27959996461868286, |
|
"rewards/margins": 0.04778864234685898, |
|
"rewards/rejected": -0.32738858461380005, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 24.029709323211232, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -2.4940154552459717, |
|
"logits/rejected": -2.3921005725860596, |
|
"logps/chosen": -241.1715545654297, |
|
"logps/rejected": -262.944580078125, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.6192307472229004, |
|
"rewards/chosen": -0.2436859905719757, |
|
"rewards/margins": 0.09733694791793823, |
|
"rewards/rejected": -0.34102290868759155, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6839175197034101, |
|
"train_runtime": 39806.5992, |
|
"train_samples_per_second": 0.502, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|