|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 28.229060496303497, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": 0.5326807498931885, |
|
"logits/rejected": 0.5883637070655823, |
|
"logps/chosen": -185.19822692871094, |
|
"logps/rejected": -194.60989379882812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 32.45486691880899, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 0.33694812655448914, |
|
"logits/rejected": 0.18525859713554382, |
|
"logps/chosen": -228.18931579589844, |
|
"logps/rejected": -250.44186401367188, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.44017094373703003, |
|
"rewards/chosen": -0.009515076875686646, |
|
"rewards/margins": 0.0016028096433728933, |
|
"rewards/rejected": -0.011117885820567608, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 32.74664763693393, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": 0.37088415026664734, |
|
"logits/rejected": 0.4128836989402771, |
|
"logps/chosen": -254.14837646484375, |
|
"logps/rejected": -276.72271728515625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.197755366563797, |
|
"rewards/margins": 0.020086202770471573, |
|
"rewards/rejected": -0.21784158051013947, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 38.40726627675541, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": 0.354877769947052, |
|
"logits/rejected": 0.33263731002807617, |
|
"logps/chosen": -253.26522827148438, |
|
"logps/rejected": -263.25042724609375, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5192307829856873, |
|
"rewards/chosen": -0.3028598725795746, |
|
"rewards/margins": 0.01474391482770443, |
|
"rewards/rejected": -0.3176037669181824, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 38.797592985526, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": 0.1634213924407959, |
|
"logits/rejected": 0.2352660596370697, |
|
"logps/chosen": -244.1608123779297, |
|
"logps/rejected": -260.97369384765625, |
|
"loss": 0.7013, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0662732720375061, |
|
"rewards/margins": 0.021908778697252274, |
|
"rewards/rejected": -0.08818206936120987, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 35.745848630813434, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": 0.44109058380126953, |
|
"logits/rejected": 0.4526838958263397, |
|
"logps/chosen": -233.4575653076172, |
|
"logps/rejected": -257.31494140625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.13867294788360596, |
|
"rewards/margins": 0.03718903288245201, |
|
"rewards/rejected": -0.17586196959018707, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 38.266936092602805, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": 0.3567802309989929, |
|
"logits/rejected": 0.4483684301376343, |
|
"logps/chosen": -273.6899719238281, |
|
"logps/rejected": -289.3863220214844, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5769230723381042, |
|
"rewards/chosen": -0.3489604592323303, |
|
"rewards/margins": 0.07096390426158905, |
|
"rewards/rejected": -0.4199243485927582, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 36.58606527761828, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": 0.49843645095825195, |
|
"logits/rejected": 0.5629610419273376, |
|
"logps/chosen": -280.0177917480469, |
|
"logps/rejected": -301.7088317871094, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.45351117849349976, |
|
"rewards/margins": 0.03525887802243233, |
|
"rewards/rejected": -0.4887700378894806, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 37.11287003011496, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": 0.6807990670204163, |
|
"logits/rejected": 0.7718464136123657, |
|
"logps/chosen": -280.74658203125, |
|
"logps/rejected": -311.41009521484375, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.5923076868057251, |
|
"rewards/chosen": -0.41281428933143616, |
|
"rewards/margins": 0.10995330661535263, |
|
"rewards/rejected": -0.5227676033973694, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 50.701799655354534, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": 0.6627506613731384, |
|
"logits/rejected": 0.6323168277740479, |
|
"logps/chosen": -278.023681640625, |
|
"logps/rejected": -306.6815185546875, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.4557516276836395, |
|
"rewards/margins": 0.07288946956396103, |
|
"rewards/rejected": -0.5286410450935364, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 29.961533632761956, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": 0.5369245409965515, |
|
"logits/rejected": 0.5473312735557556, |
|
"logps/chosen": -275.7201843261719, |
|
"logps/rejected": -282.8805236816406, |
|
"loss": 0.7042, |
|
"rewards/accuracies": 0.5115384459495544, |
|
"rewards/chosen": -0.3841624855995178, |
|
"rewards/margins": -0.015378502197563648, |
|
"rewards/rejected": -0.36878401041030884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 31.11285576879039, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": 0.31445741653442383, |
|
"logits/rejected": 0.33911341428756714, |
|
"logps/chosen": -258.0011901855469, |
|
"logps/rejected": -270.0567932128906, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.2574421763420105, |
|
"rewards/margins": 0.022587427869439125, |
|
"rewards/rejected": -0.2800295948982239, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 31.866161116501196, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": 0.6134840250015259, |
|
"logits/rejected": 0.691197395324707, |
|
"logps/chosen": -284.92510986328125, |
|
"logps/rejected": -306.45050048828125, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.5774205327033997, |
|
"rewards/margins": 0.037515509873628616, |
|
"rewards/rejected": -0.6149360537528992, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 41.02700179238128, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": 1.0320525169372559, |
|
"logits/rejected": 0.8667150735855103, |
|
"logps/chosen": -293.99853515625, |
|
"logps/rejected": -317.0647888183594, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5961538553237915, |
|
"rewards/chosen": -0.5775225162506104, |
|
"rewards/margins": 0.07906623929738998, |
|
"rewards/rejected": -0.6565887928009033, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 33.934196556449756, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": 0.4998157322406769, |
|
"logits/rejected": 0.41973716020584106, |
|
"logps/chosen": -282.9462585449219, |
|
"logps/rejected": -299.1942443847656, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.4659727215766907, |
|
"rewards/margins": 0.056435175240039825, |
|
"rewards/rejected": -0.5224078297615051, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 38.33373569305899, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": 0.42322245240211487, |
|
"logits/rejected": 0.43874579668045044, |
|
"logps/chosen": -272.926513671875, |
|
"logps/rejected": -309.4190368652344, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.5807692408561707, |
|
"rewards/chosen": -0.5053122043609619, |
|
"rewards/margins": 0.1106579527258873, |
|
"rewards/rejected": -0.6159701943397522, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.687320882978003, |
|
"train_runtime": 21824.845, |
|
"train_samples_per_second": 0.916, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|