|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.312925409718954, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.1492395401000977, |
|
"logits/rejected": -2.139173746109009, |
|
"logps/chosen": -189.41439819335938, |
|
"logps/rejected": -184.15049743652344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 15.630000847331686, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.3999834060668945, |
|
"logits/rejected": -2.346851348876953, |
|
"logps/chosen": -178.99545288085938, |
|
"logps/rejected": -177.0459747314453, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.42307692766189575, |
|
"rewards/chosen": -0.0037847168277949095, |
|
"rewards/margins": 0.0002747862017713487, |
|
"rewards/rejected": -0.004059503320604563, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 14.549393173612225, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.4151172637939453, |
|
"logits/rejected": -2.356534004211426, |
|
"logps/chosen": -179.75003051757812, |
|
"logps/rejected": -179.4581756591797, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5461538434028625, |
|
"rewards/chosen": -0.055207282304763794, |
|
"rewards/margins": 0.006152572110295296, |
|
"rewards/rejected": -0.06135985627770424, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 22.390116207007786, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.312279224395752, |
|
"logits/rejected": -2.211397886276245, |
|
"logps/chosen": -206.32656860351562, |
|
"logps/rejected": -211.81321716308594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.3904457688331604, |
|
"rewards/margins": 0.03509727492928505, |
|
"rewards/rejected": -0.42554304003715515, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 15.933088854619298, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -2.2958626747131348, |
|
"logits/rejected": -2.3168814182281494, |
|
"logps/chosen": -236.7042999267578, |
|
"logps/rejected": -244.78851318359375, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.5461538434028625, |
|
"rewards/chosen": -0.6312862038612366, |
|
"rewards/margins": 0.015706488862633705, |
|
"rewards/rejected": -0.6469926238059998, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 14.014878007482002, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -2.368952512741089, |
|
"logits/rejected": -2.4042294025421143, |
|
"logps/chosen": -214.369384765625, |
|
"logps/rejected": -220.7718505859375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2600650191307068, |
|
"rewards/margins": 0.018586795777082443, |
|
"rewards/rejected": -0.2786518335342407, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 14.767139513110513, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -2.4127275943756104, |
|
"logits/rejected": -2.434305191040039, |
|
"logps/chosen": -217.94210815429688, |
|
"logps/rejected": -227.94302368164062, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.2718888223171234, |
|
"rewards/margins": 0.015998326241970062, |
|
"rewards/rejected": -0.2878871560096741, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 15.91144067203442, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -2.264232635498047, |
|
"logits/rejected": -2.299992322921753, |
|
"logps/chosen": -194.38172912597656, |
|
"logps/rejected": -205.9635009765625, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5884615182876587, |
|
"rewards/chosen": -0.17818714678287506, |
|
"rewards/margins": 0.02437894232571125, |
|
"rewards/rejected": -0.20256608724594116, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 18.074689046967872, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -2.299180746078491, |
|
"logits/rejected": -2.182999610900879, |
|
"logps/chosen": -231.85098266601562, |
|
"logps/rejected": -236.9989776611328, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5730769038200378, |
|
"rewards/chosen": -0.3959502577781677, |
|
"rewards/margins": 0.03195538371801376, |
|
"rewards/rejected": -0.4279056191444397, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 19.634321191048826, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -2.232875347137451, |
|
"logits/rejected": -2.2362263202667236, |
|
"logps/chosen": -208.51087951660156, |
|
"logps/rejected": -207.45663452148438, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5461538434028625, |
|
"rewards/chosen": -0.22500069439411163, |
|
"rewards/margins": 0.003552414011210203, |
|
"rewards/rejected": -0.22855311632156372, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 17.473494481507956, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -2.236097812652588, |
|
"logits/rejected": -2.0412774085998535, |
|
"logps/chosen": -213.67514038085938, |
|
"logps/rejected": -206.89111328125, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5038461685180664, |
|
"rewards/chosen": -0.2356816679239273, |
|
"rewards/margins": 0.003031224012374878, |
|
"rewards/rejected": -0.23871289193630219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 15.544936822002546, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -2.103285789489746, |
|
"logits/rejected": -2.1786677837371826, |
|
"logps/chosen": -224.00047302246094, |
|
"logps/rejected": -220.13726806640625, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.5615384578704834, |
|
"rewards/chosen": -0.3865113854408264, |
|
"rewards/margins": 0.03568296507000923, |
|
"rewards/rejected": -0.42219436168670654, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 17.169881927493602, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -1.9218517541885376, |
|
"logits/rejected": -2.109549045562744, |
|
"logps/chosen": -220.54318237304688, |
|
"logps/rejected": -231.7896270751953, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.5769230723381042, |
|
"rewards/chosen": -0.4736253619194031, |
|
"rewards/margins": 0.03084597922861576, |
|
"rewards/rejected": -0.5044713020324707, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 17.60589291870986, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -2.146359920501709, |
|
"logits/rejected": -2.1425552368164062, |
|
"logps/chosen": -221.13165283203125, |
|
"logps/rejected": -225.94419860839844, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5615384578704834, |
|
"rewards/chosen": -0.4886237382888794, |
|
"rewards/margins": 0.03550608828663826, |
|
"rewards/rejected": -0.5241298675537109, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 18.707751355883822, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -2.1278481483459473, |
|
"logits/rejected": -2.0320982933044434, |
|
"logps/chosen": -220.7178192138672, |
|
"logps/rejected": -217.0054931640625, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5923076868057251, |
|
"rewards/chosen": -0.3798917829990387, |
|
"rewards/margins": 0.05050484091043472, |
|
"rewards/rejected": -0.4303966164588928, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 18.697426009812567, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -2.186318874359131, |
|
"logits/rejected": -2.1368911266326904, |
|
"logps/chosen": -221.08029174804688, |
|
"logps/rejected": -230.6654052734375, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.5961538553237915, |
|
"rewards/chosen": -0.38067081570625305, |
|
"rewards/margins": 0.07161368429660797, |
|
"rewards/rejected": -0.4522845447063446, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6871888306405809, |
|
"train_runtime": 39835.0539, |
|
"train_samples_per_second": 0.502, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|