|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 12.825809580615244, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.23366379737854, |
|
"logits/rejected": -2.0032992362976074, |
|
"logps/chosen": -196.23782348632812, |
|
"logps/rejected": -174.6262969970703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 17.370258695749197, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.343287944793701, |
|
"logits/rejected": -2.2950587272644043, |
|
"logps/chosen": -179.1259002685547, |
|
"logps/rejected": -178.35891723632812, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4145299196243286, |
|
"rewards/chosen": -0.001734515419229865, |
|
"rewards/margins": 0.001407344127073884, |
|
"rewards/rejected": -0.003141859546303749, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 15.379940853087401, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.3157341480255127, |
|
"logits/rejected": -2.2556896209716797, |
|
"logps/chosen": -182.99658203125, |
|
"logps/rejected": -181.58053588867188, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.4961538314819336, |
|
"rewards/chosen": -0.12090444564819336, |
|
"rewards/margins": 0.01673085428774357, |
|
"rewards/rejected": -0.13763530552387238, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 16.658033176727283, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.240647077560425, |
|
"logits/rejected": -2.139284133911133, |
|
"logps/chosen": -191.006591796875, |
|
"logps/rejected": -186.6674041748047, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4961538314819336, |
|
"rewards/chosen": -0.23701129853725433, |
|
"rewards/margins": -5.6074215535772964e-05, |
|
"rewards/rejected": -0.23695524036884308, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 13.482187340190304, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -2.2772746086120605, |
|
"logits/rejected": -2.310314893722534, |
|
"logps/chosen": -174.7858428955078, |
|
"logps/rejected": -184.82554626464844, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.03568955510854721, |
|
"rewards/margins": 0.0036310250870883465, |
|
"rewards/rejected": -0.03932058438658714, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 17.865521715649642, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -2.2564620971679688, |
|
"logits/rejected": -2.278386116027832, |
|
"logps/chosen": -192.4917755126953, |
|
"logps/rejected": -193.97947692871094, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.48076921701431274, |
|
"rewards/chosen": -0.042664218693971634, |
|
"rewards/margins": 0.010273917578160763, |
|
"rewards/rejected": -0.05293813720345497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 22.19136114113986, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -2.145270824432373, |
|
"logits/rejected": -2.073192596435547, |
|
"logps/chosen": -224.1717529296875, |
|
"logps/rejected": -225.2917022705078, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.3118091821670532, |
|
"rewards/margins": 0.0030472425278276205, |
|
"rewards/rejected": -0.3148564398288727, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 15.131244681651243, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -1.976121187210083, |
|
"logits/rejected": -1.939582347869873, |
|
"logps/chosen": -199.67727661132812, |
|
"logps/rejected": -203.1947021484375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5807692408561707, |
|
"rewards/chosen": -0.179255411028862, |
|
"rewards/margins": 0.009526830166578293, |
|
"rewards/rejected": -0.1887822449207306, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 16.3790838983982, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -2.0032222270965576, |
|
"logits/rejected": -1.8593822717666626, |
|
"logps/chosen": -202.15196228027344, |
|
"logps/rejected": -215.4062042236328, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.5846154093742371, |
|
"rewards/chosen": -0.20821429789066315, |
|
"rewards/margins": 0.04003766551613808, |
|
"rewards/rejected": -0.24825195968151093, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 20.64519176920963, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -1.5563592910766602, |
|
"logits/rejected": -1.8040456771850586, |
|
"logps/chosen": -219.68856811523438, |
|
"logps/rejected": -231.39486694335938, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.41656777262687683, |
|
"rewards/margins": 0.060058608651161194, |
|
"rewards/rejected": -0.47662636637687683, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 20.02675570793956, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -1.4402265548706055, |
|
"logits/rejected": -1.5615330934524536, |
|
"logps/chosen": -229.6835174560547, |
|
"logps/rejected": -234.88693237304688, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.5076923370361328, |
|
"rewards/chosen": -0.511164665222168, |
|
"rewards/margins": 0.028713112697005272, |
|
"rewards/rejected": -0.5398777723312378, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 17.88209150198185, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -1.8932424783706665, |
|
"logits/rejected": -1.7129985094070435, |
|
"logps/chosen": -198.45960998535156, |
|
"logps/rejected": -198.94027709960938, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.6230769157409668, |
|
"rewards/chosen": -0.1736122965812683, |
|
"rewards/margins": 0.07991237938404083, |
|
"rewards/rejected": -0.25352466106414795, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 18.1137509515123, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -1.6426665782928467, |
|
"logits/rejected": -1.8510866165161133, |
|
"logps/chosen": -188.033203125, |
|
"logps/rejected": -202.1238250732422, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5769230723381042, |
|
"rewards/chosen": -0.1463213562965393, |
|
"rewards/margins": 0.04249217361211777, |
|
"rewards/rejected": -0.18881353735923767, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 19.900932973122792, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -1.7662078142166138, |
|
"logits/rejected": -1.7134820222854614, |
|
"logps/chosen": -197.23963928222656, |
|
"logps/rejected": -202.8521270751953, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5653846263885498, |
|
"rewards/chosen": -0.2858903706073761, |
|
"rewards/margins": 0.0338900126516819, |
|
"rewards/rejected": -0.3197803497314453, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 19.787769013168933, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -1.6619917154312134, |
|
"logits/rejected": -1.7303296327590942, |
|
"logps/chosen": -194.7332305908203, |
|
"logps/rejected": -205.8466796875, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.6230769157409668, |
|
"rewards/chosen": -0.21614637970924377, |
|
"rewards/margins": 0.07540787756443024, |
|
"rewards/rejected": -0.2915542721748352, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 19.04982739845647, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -1.928228497505188, |
|
"logits/rejected": -1.5072005987167358, |
|
"logps/chosen": -211.20840454101562, |
|
"logps/rejected": -211.61215209960938, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.5884615182876587, |
|
"rewards/chosen": -0.28447794914245605, |
|
"rewards/margins": 0.0762421116232872, |
|
"rewards/rejected": -0.36072006821632385, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6859961845516379, |
|
"train_runtime": 39900.7608, |
|
"train_samples_per_second": 0.501, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|