|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9992122883024814, |
|
"eval_steps": 500, |
|
"global_step": 2538, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.010698237247525, |
|
"learning_rate": 1.968503937007874e-08, |
|
"log_odds_chosen": 0.27912598848342896, |
|
"log_odds_ratio": -0.7284179925918579, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -2.03125, |
|
"logps/rejected": -2.28125, |
|
"loss": 1.5763, |
|
"nll_loss": 1.4375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2041015625, |
|
"rewards/margins": 0.023681640625, |
|
"rewards/rejected": -0.2275390625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.135758173057069, |
|
"learning_rate": 3.937007874015748e-08, |
|
"log_odds_chosen": 0.36018067598342896, |
|
"log_odds_ratio": -0.750683605670929, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.9765625, |
|
"logps/rejected": -2.296875, |
|
"loss": 1.5927, |
|
"nll_loss": 1.5, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.197265625, |
|
"rewards/margins": 0.0322265625, |
|
"rewards/rejected": -0.2294921875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.927967353705024, |
|
"learning_rate": 5.9055118110236216e-08, |
|
"log_odds_chosen": 0.17539063096046448, |
|
"log_odds_ratio": -0.812207043170929, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -2.0625, |
|
"logps/rejected": -2.1875, |
|
"loss": 1.5598, |
|
"nll_loss": 1.5, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2060546875, |
|
"rewards/margins": 0.013427734375, |
|
"rewards/rejected": -0.2197265625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.514842032339935, |
|
"learning_rate": 7.874015748031496e-08, |
|
"log_odds_chosen": 0.3271545469760895, |
|
"log_odds_ratio": -0.705859363079071, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.9921875, |
|
"logps/rejected": -2.296875, |
|
"loss": 1.6148, |
|
"nll_loss": 1.546875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.19921875, |
|
"rewards/margins": 0.031005859375, |
|
"rewards/rejected": -0.23046875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.878873460342644, |
|
"learning_rate": 9.84251968503937e-08, |
|
"log_odds_chosen": 0.1710205078125, |
|
"log_odds_ratio": -0.77587890625, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.9609375, |
|
"logps/rejected": -2.09375, |
|
"loss": 1.5496, |
|
"nll_loss": 1.484375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1962890625, |
|
"rewards/margins": 0.0137939453125, |
|
"rewards/rejected": -0.2099609375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.390918916154321, |
|
"learning_rate": 1.1811023622047243e-07, |
|
"log_odds_chosen": 0.21818237006664276, |
|
"log_odds_ratio": -0.7723633050918579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -2.0625, |
|
"logps/rejected": -2.25, |
|
"loss": 1.6078, |
|
"nll_loss": 1.5625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2060546875, |
|
"rewards/margins": 0.0185546875, |
|
"rewards/rejected": -0.224609375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.289138170054862, |
|
"learning_rate": 1.3779527559055117e-07, |
|
"log_odds_chosen": 0.34990233182907104, |
|
"log_odds_ratio": -0.7339843511581421, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -2.0625, |
|
"logps/rejected": -2.375, |
|
"loss": 1.5634, |
|
"nll_loss": 1.5078125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.20703125, |
|
"rewards/margins": 0.0311279296875, |
|
"rewards/rejected": -0.23828125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.612859448964533, |
|
"learning_rate": 1.5748031496062992e-07, |
|
"log_odds_chosen": 0.4515624940395355, |
|
"log_odds_ratio": -0.640917956829071, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.7734375, |
|
"logps/rejected": -2.171875, |
|
"loss": 1.532, |
|
"nll_loss": 1.421875, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.177734375, |
|
"rewards/margins": 0.039794921875, |
|
"rewards/rejected": -0.2177734375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.397857351399276, |
|
"learning_rate": 1.7716535433070863e-07, |
|
"log_odds_chosen": 0.17760619521141052, |
|
"log_odds_ratio": -0.7791992425918579, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -2.0625, |
|
"logps/rejected": -2.21875, |
|
"loss": 1.5648, |
|
"nll_loss": 1.546875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.2060546875, |
|
"rewards/margins": 0.0166015625, |
|
"rewards/rejected": -0.22265625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.586832559599352, |
|
"learning_rate": 1.968503937007874e-07, |
|
"log_odds_chosen": 0.2771240174770355, |
|
"log_odds_ratio": -0.6792968511581421, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.890625, |
|
"logps/rejected": -2.125, |
|
"loss": 1.5286, |
|
"nll_loss": 1.484375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.189453125, |
|
"rewards/margins": 0.02392578125, |
|
"rewards/rejected": -0.212890625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.07791601254699, |
|
"learning_rate": 2.1653543307086615e-07, |
|
"log_odds_chosen": 0.214080810546875, |
|
"log_odds_ratio": -0.7261718511581421, |
|
"logits/chosen": -2.09375, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.890625, |
|
"logps/rejected": -2.0625, |
|
"loss": 1.4891, |
|
"nll_loss": 1.4140625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.189453125, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.2060546875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.712206786453907, |
|
"learning_rate": 2.3622047244094486e-07, |
|
"log_odds_chosen": 0.14680786430835724, |
|
"log_odds_ratio": -0.7562500238418579, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.9375, |
|
"logps/rejected": -2.03125, |
|
"loss": 1.4901, |
|
"nll_loss": 1.34375, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.193359375, |
|
"rewards/margins": 0.00994873046875, |
|
"rewards/rejected": -0.2041015625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.959474046553222, |
|
"learning_rate": 2.559055118110236e-07, |
|
"log_odds_chosen": 0.2553772032260895, |
|
"log_odds_ratio": -0.6973632574081421, |
|
"logits/chosen": -2.15625, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.7890625, |
|
"logps/rejected": -1.9921875, |
|
"loss": 1.497, |
|
"nll_loss": 1.3828125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1787109375, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.19921875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.960730803694658, |
|
"learning_rate": 2.7559055118110235e-07, |
|
"log_odds_chosen": 0.16423340141773224, |
|
"log_odds_ratio": -0.718457043170929, |
|
"logits/chosen": -2.15625, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.765625, |
|
"logps/rejected": -1.8984375, |
|
"loss": 1.5228, |
|
"nll_loss": 1.453125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1767578125, |
|
"rewards/margins": 0.0133056640625, |
|
"rewards/rejected": -0.189453125, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.558212884083645, |
|
"learning_rate": 2.9527559055118104e-07, |
|
"log_odds_chosen": 0.24582520127296448, |
|
"log_odds_ratio": -0.671191394329071, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.546875, |
|
"logps/rejected": -1.7421875, |
|
"loss": 1.4124, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.154296875, |
|
"rewards/margins": 0.0191650390625, |
|
"rewards/rejected": -0.173828125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.4831822067708686, |
|
"learning_rate": 3.1496062992125984e-07, |
|
"log_odds_chosen": 0.17824706435203552, |
|
"log_odds_ratio": -0.69384765625, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.671875, |
|
"logps/rejected": -1.8203125, |
|
"loss": 1.4297, |
|
"nll_loss": 1.390625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1669921875, |
|
"rewards/margins": 0.0145263671875, |
|
"rewards/rejected": -0.181640625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.034138231637428, |
|
"learning_rate": 3.346456692913386e-07, |
|
"log_odds_chosen": 0.12167968600988388, |
|
"log_odds_ratio": -0.7186523675918579, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.5625, |
|
"logps/rejected": -1.6640625, |
|
"loss": 1.3835, |
|
"nll_loss": 1.3203125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.15625, |
|
"rewards/margins": 0.01007080078125, |
|
"rewards/rejected": -0.166015625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.146506823609489, |
|
"learning_rate": 3.5433070866141727e-07, |
|
"log_odds_chosen": 0.13695068657398224, |
|
"log_odds_ratio": -0.719433605670929, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -1.5546875, |
|
"logps/rejected": -1.671875, |
|
"loss": 1.4352, |
|
"nll_loss": 1.3828125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1552734375, |
|
"rewards/margins": 0.0115966796875, |
|
"rewards/rejected": -0.1669921875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.5266117677319087, |
|
"learning_rate": 3.7401574803149606e-07, |
|
"log_odds_chosen": 0.17273560166358948, |
|
"log_odds_ratio": -0.681640625, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.4765625, |
|
"logps/rejected": -1.625, |
|
"loss": 1.3599, |
|
"nll_loss": 1.3046875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1474609375, |
|
"rewards/margins": 0.01495361328125, |
|
"rewards/rejected": -0.162109375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.4327683095111072, |
|
"learning_rate": 3.937007874015748e-07, |
|
"log_odds_chosen": 0.11888428032398224, |
|
"log_odds_ratio": -0.711230456829071, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -1.3828125, |
|
"logps/rejected": -1.484375, |
|
"loss": 1.3592, |
|
"nll_loss": 1.28125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.138671875, |
|
"rewards/margins": 0.0093994140625, |
|
"rewards/rejected": -0.1484375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.811761812417915, |
|
"learning_rate": 4.133858267716535e-07, |
|
"log_odds_chosen": 0.13620606064796448, |
|
"log_odds_ratio": -0.692089855670929, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.359375, |
|
"logps/rejected": -1.453125, |
|
"loss": 1.3822, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1357421875, |
|
"rewards/margins": 0.010009765625, |
|
"rewards/rejected": -0.1455078125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.1783123040584775, |
|
"learning_rate": 4.330708661417323e-07, |
|
"log_odds_chosen": 0.23652343451976776, |
|
"log_odds_ratio": -0.656054675579071, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.296875, |
|
"logps/rejected": -1.46875, |
|
"loss": 1.3022, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.017578125, |
|
"rewards/rejected": -0.146484375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.398094645144472, |
|
"learning_rate": 4.52755905511811e-07, |
|
"log_odds_chosen": 0.10042724758386612, |
|
"log_odds_ratio": -0.737500011920929, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.546875, |
|
"logps/chosen": -1.265625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.3118, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.126953125, |
|
"rewards/margins": 0.007049560546875, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.0602456337138735, |
|
"learning_rate": 4.7244094488188973e-07, |
|
"log_odds_chosen": 0.05767212063074112, |
|
"log_odds_ratio": -0.7372070550918579, |
|
"logits/chosen": -2.328125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.359375, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.3639, |
|
"nll_loss": 1.3359375, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1357421875, |
|
"rewards/margins": 0.0050048828125, |
|
"rewards/rejected": -0.140625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.1390537608793543, |
|
"learning_rate": 4.921259842519685e-07, |
|
"log_odds_chosen": 0.16054077446460724, |
|
"log_odds_ratio": -0.6732422113418579, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -1.2421875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.3024, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1240234375, |
|
"rewards/margins": 0.01104736328125, |
|
"rewards/rejected": -0.134765625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.518997318792438, |
|
"learning_rate": 4.999914863146575e-07, |
|
"log_odds_chosen": 0.16718749701976776, |
|
"log_odds_ratio": -0.6884765625, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.5, |
|
"logps/chosen": -1.234375, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.3314, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12353515625, |
|
"rewards/margins": 0.01019287109375, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.798537540317038, |
|
"learning_rate": 4.999394603374641e-07, |
|
"log_odds_chosen": 0.19011840224266052, |
|
"log_odds_ratio": -0.676562488079071, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.2872, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12060546875, |
|
"rewards/margins": 0.01422119140625, |
|
"rewards/rejected": -0.134765625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.8758259093431437, |
|
"learning_rate": 4.99840148039188e-07, |
|
"log_odds_chosen": 0.29682618379592896, |
|
"log_odds_ratio": -0.637890636920929, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.5, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.4140625, |
|
"loss": 1.2201, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1201171875, |
|
"rewards/margins": 0.021484375, |
|
"rewards/rejected": -0.1416015625, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.695897803134525, |
|
"learning_rate": 4.996935682088318e-07, |
|
"log_odds_chosen": 0.22941894829273224, |
|
"log_odds_ratio": -0.6490234136581421, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.375, |
|
"loss": 1.2819, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.015380859375, |
|
"rewards/rejected": -0.1376953125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.8982419601141585, |
|
"learning_rate": 4.994997485779947e-07, |
|
"log_odds_chosen": 0.23259887099266052, |
|
"log_odds_ratio": -0.666796863079071, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2759, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1181640625, |
|
"rewards/margins": 0.015869140625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.9056748531961585, |
|
"learning_rate": 4.992587258156258e-07, |
|
"log_odds_chosen": 0.17786864936351776, |
|
"log_odds_ratio": -0.6656249761581421, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2812, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12158203125, |
|
"rewards/margins": 0.01324462890625, |
|
"rewards/rejected": -0.134765625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.596709437423632, |
|
"learning_rate": 4.989705455210862e-07, |
|
"log_odds_chosen": 0.21816405653953552, |
|
"log_odds_ratio": -0.662890613079071, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.2184, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.134765625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.476759669633908, |
|
"learning_rate": 4.986352622155222e-07, |
|
"log_odds_chosen": 0.17100830376148224, |
|
"log_odds_ratio": -0.6903320550918579, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.1953125, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2865, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.01129150390625, |
|
"rewards/rejected": -0.130859375, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.628189970289334, |
|
"learning_rate": 4.98252939331551e-07, |
|
"log_odds_chosen": 0.17416992783546448, |
|
"log_odds_ratio": -0.697070300579071, |
|
"logits/chosen": -2.328125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2797, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12158203125, |
|
"rewards/margins": 0.01318359375, |
|
"rewards/rejected": -0.134765625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.6295659881126943, |
|
"learning_rate": 4.978236492012589e-07, |
|
"log_odds_chosen": 0.02346191368997097, |
|
"log_odds_ratio": -0.755078136920929, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -1.2578125, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.3077, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1259765625, |
|
"rewards/margins": 0.00180816650390625, |
|
"rewards/rejected": -0.126953125, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.959773704803729, |
|
"learning_rate": 4.973474730425173e-07, |
|
"log_odds_chosen": 0.19099120795726776, |
|
"log_odds_ratio": -0.6749023199081421, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2568, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.0125732421875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.613182586833654, |
|
"learning_rate": 4.968245009436167e-07, |
|
"log_odds_chosen": 0.10064697265625, |
|
"log_odds_ratio": -0.72216796875, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.28125, |
|
"logps/rejected": -1.3671875, |
|
"loss": 1.2944, |
|
"nll_loss": 1.2890625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1279296875, |
|
"rewards/margins": 0.00860595703125, |
|
"rewards/rejected": -0.13671875, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.8683189618015126, |
|
"learning_rate": 4.962548318462231e-07, |
|
"log_odds_chosen": 0.19755859673023224, |
|
"log_odds_ratio": -0.6724609136581421, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2778, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.01373291015625, |
|
"rewards/rejected": -0.130859375, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.9807597681868305, |
|
"learning_rate": 4.95638573526659e-07, |
|
"log_odds_chosen": 0.14707031846046448, |
|
"log_odds_ratio": -0.7005859613418579, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2844, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.0098876953125, |
|
"rewards/rejected": -0.126953125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.655124275329291, |
|
"learning_rate": 4.949758425755127e-07, |
|
"log_odds_chosen": 0.10791015625, |
|
"log_odds_ratio": -0.7230468988418579, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2902, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1201171875, |
|
"rewards/margins": 0.00762939453125, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.4371959032830293, |
|
"learning_rate": 4.94266764375581e-07, |
|
"log_odds_chosen": 0.20887450873851776, |
|
"log_odds_ratio": -0.6651366949081421, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2674, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.01544189453125, |
|
"rewards/rejected": -0.12890625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.729848906556158, |
|
"learning_rate": 4.935114730781475e-07, |
|
"log_odds_chosen": 0.27691650390625, |
|
"log_odds_ratio": -0.6527343988418579, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.1953125, |
|
"logps/rejected": -1.3828125, |
|
"loss": 1.2544, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11962890625, |
|
"rewards/margins": 0.018798828125, |
|
"rewards/rejected": -0.138671875, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.50393555238819, |
|
"learning_rate": 4.927101115776026e-07, |
|
"log_odds_chosen": 0.14921875298023224, |
|
"log_odds_ratio": -0.702343761920929, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.2471, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12060546875, |
|
"rewards/margins": 0.01129150390625, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.7483117165130744, |
|
"learning_rate": 4.918628314844088e-07, |
|
"log_odds_chosen": 0.04735717922449112, |
|
"log_odds_ratio": -0.749218761920929, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.2109375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2351, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.12060546875, |
|
"rewards/margins": 0.0040283203125, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.9881422727710887, |
|
"learning_rate": 4.909697930964179e-07, |
|
"log_odds_chosen": 0.16976317763328552, |
|
"log_odds_ratio": -0.6986328363418579, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -1.1953125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2467, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11962890625, |
|
"rewards/margins": 0.01153564453125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.6175879243996363, |
|
"learning_rate": 4.900311653685437e-07, |
|
"log_odds_chosen": 0.20297852158546448, |
|
"log_odds_ratio": -0.669140636920929, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2102, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.01513671875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.2300783745527317, |
|
"learning_rate": 4.890471258807968e-07, |
|
"log_odds_chosen": 0.19609375298023224, |
|
"log_odds_ratio": -0.6773437261581421, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2387, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.01116943359375, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.9471805049826094, |
|
"learning_rate": 4.880178608046894e-07, |
|
"log_odds_chosen": 0.14970703423023224, |
|
"log_odds_ratio": -0.7040039300918579, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2675, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0108642578125, |
|
"rewards/rejected": -0.125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.7162956655728623, |
|
"learning_rate": 4.869435648680116e-07, |
|
"log_odds_chosen": 0.15129394829273224, |
|
"log_odds_ratio": -0.6802734136581421, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1889, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.01043701171875, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.459550453771863, |
|
"learning_rate": 4.858244413179923e-07, |
|
"log_odds_chosen": 0.2993102967739105, |
|
"log_odds_ratio": -0.64111328125, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2095, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.0198974609375, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.2619422051015836, |
|
"learning_rate": 4.846607018828449e-07, |
|
"log_odds_chosen": 0.210205078125, |
|
"log_odds_ratio": -0.673632800579071, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.2653, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.013427734375, |
|
"rewards/rejected": -0.1328125, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.7715133106941576, |
|
"learning_rate": 4.834525667317121e-07, |
|
"log_odds_chosen": 0.22309570014476776, |
|
"log_odds_ratio": -0.66357421875, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2614, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0150146484375, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.4609772540284593, |
|
"learning_rate": 4.822002644330101e-07, |
|
"log_odds_chosen": 0.19017334282398224, |
|
"log_odds_ratio": -0.711718738079071, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.2548, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12158203125, |
|
"rewards/margins": 0.0137939453125, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.645873831673924, |
|
"learning_rate": 4.809040319111865e-07, |
|
"log_odds_chosen": 0.12646484375, |
|
"log_odds_ratio": -0.713671863079071, |
|
"logits/chosen": -2.09375, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2402, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.0096435546875, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.45398616162251, |
|
"learning_rate": 4.795641144018965e-07, |
|
"log_odds_chosen": 0.09213867038488388, |
|
"log_odds_ratio": -0.74267578125, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.2109375, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2755, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12109375, |
|
"rewards/margins": 0.006805419921875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.5512768402172683, |
|
"learning_rate": 4.781807654056053e-07, |
|
"log_odds_chosen": 0.214599609375, |
|
"log_odds_ratio": -0.692675769329071, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2303, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.411346604585139, |
|
"learning_rate": 4.7675424663962933e-07, |
|
"log_odds_chosen": 0.165435791015625, |
|
"log_odds_ratio": -0.700390636920929, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.2109375, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2571, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12109375, |
|
"rewards/margins": 0.010498046875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.48077566767054, |
|
"learning_rate": 4.752848279886212e-07, |
|
"log_odds_chosen": 0.207489013671875, |
|
"log_odds_ratio": -0.66748046875, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1978, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.0145263671875, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.7897141919738786, |
|
"learning_rate": 4.7377278745350984e-07, |
|
"log_odds_chosen": 0.2662353515625, |
|
"log_odds_ratio": -0.663867175579071, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.2309, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.0147705078125, |
|
"rewards/rejected": -0.119140625, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.758617809500896, |
|
"learning_rate": 4.7221841109890506e-07, |
|
"log_odds_chosen": 0.22445067763328552, |
|
"log_odds_ratio": -0.695117175579071, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2281, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.6514784587831204, |
|
"learning_rate": 4.706219929989771e-07, |
|
"log_odds_chosen": 0.2147216796875, |
|
"log_odds_ratio": -0.660937488079071, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2094, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.0140380859375, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.630207221232529, |
|
"learning_rate": 4.6898383518182007e-07, |
|
"log_odds_chosen": 0.19202271103858948, |
|
"log_odds_ratio": -0.6786133050918579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2307, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.014404296875, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.9225816829730427, |
|
"learning_rate": 4.67304247572311e-07, |
|
"log_odds_chosen": 0.2799316346645355, |
|
"log_odds_ratio": -0.659960925579071, |
|
"logits/chosen": -2.09375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2257, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.018310546875, |
|
"rewards/rejected": -0.130859375, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.331606515139032, |
|
"learning_rate": 4.65583547933475e-07, |
|
"log_odds_chosen": 0.16041259467601776, |
|
"log_odds_ratio": -0.713085949420929, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.21, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.01031494140625, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.541498557632385, |
|
"learning_rate": 4.6382206180636705e-07, |
|
"log_odds_chosen": 0.12631836533546448, |
|
"log_odds_ratio": -0.7256835699081421, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2675, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.01019287109375, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.2675127960880586, |
|
"learning_rate": 4.620201224484827e-07, |
|
"log_odds_chosen": 0.2113037109375, |
|
"log_odds_ratio": -0.658496081829071, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.209, |
|
"nll_loss": 1.0703125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.01470947265625, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.5735852092457248, |
|
"learning_rate": 4.601780707707087e-07, |
|
"log_odds_chosen": 0.25184327363967896, |
|
"log_odds_ratio": -0.679394543170929, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1888, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.678233631526468, |
|
"learning_rate": 4.5829625527282554e-07, |
|
"log_odds_chosen": 0.15609130263328552, |
|
"log_odds_ratio": -0.700488269329071, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2431, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.01080322265625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.423777152319806, |
|
"learning_rate": 4.5637503197757474e-07, |
|
"log_odds_chosen": 0.089111328125, |
|
"log_odds_ratio": -0.746874988079071, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.1964, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.007568359375, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.0765189053391633, |
|
"learning_rate": 4.5441476436330204e-07, |
|
"log_odds_chosen": 0.27679443359375, |
|
"log_odds_ratio": -0.677929699420929, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2492, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.018310546875, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.6130205345904334, |
|
"learning_rate": 4.5241582329519105e-07, |
|
"log_odds_chosen": 0.150299072265625, |
|
"log_odds_ratio": -0.7164062261581421, |
|
"logits/chosen": -1.984375, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2128, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0115966796875, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.710305930916119, |
|
"learning_rate": 4.503785869550984e-07, |
|
"log_odds_chosen": 0.17982177436351776, |
|
"log_odds_ratio": -0.708300769329071, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.2557, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1181640625, |
|
"rewards/margins": 0.01446533203125, |
|
"rewards/rejected": -0.1328125, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.784125203819912, |
|
"learning_rate": 4.4830344077000535e-07, |
|
"log_odds_chosen": 0.17173461616039276, |
|
"log_odds_ratio": -0.70556640625, |
|
"logits/chosen": -1.921875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2264, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.012451171875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.969932216303278, |
|
"learning_rate": 4.461907773390984e-07, |
|
"log_odds_chosen": 0.24876098334789276, |
|
"log_odds_ratio": -0.671191394329071, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2521, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0169677734375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.3389098001594553, |
|
"learning_rate": 4.4404099635949297e-07, |
|
"log_odds_chosen": 0.20144042372703552, |
|
"log_odds_ratio": -0.679394543170929, |
|
"logits/chosen": -1.984375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1809, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.012939453125, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.0086699300119872, |
|
"learning_rate": 4.418545045506144e-07, |
|
"log_odds_chosen": 0.10061035305261612, |
|
"log_odds_ratio": -0.7476562261581421, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.140625, |
|
"loss": 1.239, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.007415771484375, |
|
"rewards/rejected": -0.1142578125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.2320233236738143, |
|
"learning_rate": 4.3963171557725004e-07, |
|
"log_odds_chosen": 0.0516357421875, |
|
"log_odds_ratio": -0.776171863079071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.2253, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.00567626953125, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.5006901009181877, |
|
"learning_rate": 4.3737304997128765e-07, |
|
"log_odds_chosen": 0.2787841856479645, |
|
"log_odds_ratio": -0.668652355670929, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2413, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0184326171875, |
|
"rewards/rejected": -0.130859375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.7387563755483, |
|
"learning_rate": 4.350789350521548e-07, |
|
"log_odds_chosen": 0.19570311903953552, |
|
"log_odds_ratio": -0.71337890625, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2163, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.014892578125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.898794879634056, |
|
"learning_rate": 4.32749804845973e-07, |
|
"log_odds_chosen": 0.16457518935203552, |
|
"log_odds_ratio": -0.7186523675918579, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2604, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0098876953125, |
|
"rewards/rejected": -0.125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.5762145094524973, |
|
"learning_rate": 4.303861000034449e-07, |
|
"log_odds_chosen": 0.17528076469898224, |
|
"log_odds_ratio": -0.6820312738418579, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.1942, |
|
"nll_loss": 1.0625, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.01123046875, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.865127283376686, |
|
"learning_rate": 4.2798826771648635e-07, |
|
"log_odds_chosen": 0.24028930068016052, |
|
"log_odds_ratio": -0.6766601800918579, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2299, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.446066437808379, |
|
"learning_rate": 4.2555676163362205e-07, |
|
"log_odds_chosen": 0.21907348930835724, |
|
"log_odds_ratio": -0.6822265386581421, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2354, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.0133056640625, |
|
"rewards/rejected": -0.12890625, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.372183587847385, |
|
"learning_rate": 4.230920417741589e-07, |
|
"log_odds_chosen": 0.27910155057907104, |
|
"log_odds_ratio": -0.666308581829071, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2168, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.020263671875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.9436571486068623, |
|
"learning_rate": 4.205945744411551e-07, |
|
"log_odds_chosen": 0.12534180283546448, |
|
"log_odds_ratio": -0.73974609375, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.1640625, |
|
"loss": 1.2815, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.00909423828125, |
|
"rewards/rejected": -0.11669921875, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.8833427572143133, |
|
"learning_rate": 4.1806483213319877e-07, |
|
"log_odds_chosen": 0.23845215141773224, |
|
"log_odds_ratio": -0.6595703363418579, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2544, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.126953125, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.715581746962796, |
|
"learning_rate": 4.155032934550165e-07, |
|
"log_odds_chosen": 0.16794434189796448, |
|
"log_odds_ratio": -0.6874023675918579, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.1640625, |
|
"loss": 1.1984, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.00909423828125, |
|
"rewards/rejected": -0.11669921875, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.784822109898019, |
|
"learning_rate": 4.129104430269248e-07, |
|
"log_odds_chosen": 0.15845946967601776, |
|
"log_odds_ratio": -0.693359375, |
|
"logits/chosen": -1.9453125, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2345, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.0106201171875, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.5161836223908263, |
|
"learning_rate": 4.102867713931448e-07, |
|
"log_odds_chosen": 0.16597899794578552, |
|
"log_odds_ratio": -0.6788085699081421, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.1953125, |
|
"loss": 1.2663, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.0096435546875, |
|
"rewards/rejected": -0.119140625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.644860041118969, |
|
"learning_rate": 4.0763277492899504e-07, |
|
"log_odds_chosen": 0.23768310248851776, |
|
"log_odds_ratio": -0.6807616949081421, |
|
"logits/chosen": -1.984375, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2307, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.0157470703125, |
|
"rewards/rejected": -0.12890625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.7157553266494503, |
|
"learning_rate": 4.049489557469824e-07, |
|
"log_odds_chosen": 0.15152588486671448, |
|
"log_odds_ratio": -0.7015625238418579, |
|
"logits/chosen": -1.7890625, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.1640625, |
|
"loss": 1.168, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.01025390625, |
|
"rewards/rejected": -0.1162109375, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.302289432995534, |
|
"learning_rate": 4.0223582160180623e-07, |
|
"log_odds_chosen": 0.13297119736671448, |
|
"log_odds_ratio": -0.725781261920929, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1737, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.00994873046875, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.6556730641084543, |
|
"learning_rate": 3.9949388579429614e-07, |
|
"log_odds_chosen": 0.00870361365377903, |
|
"log_odds_ratio": -0.7632812261581421, |
|
"logits/chosen": -1.8828125, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.1015625, |
|
"loss": 1.2113, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": -0.0003070831298828125, |
|
"rewards/rejected": -0.10986328125, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.3947626659116406, |
|
"learning_rate": 3.967236670742998e-07, |
|
"log_odds_chosen": 0.22456054389476776, |
|
"log_odds_ratio": -0.681347668170929, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.2084, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.0174560546875, |
|
"rewards/rejected": -0.134765625, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.7830252945871896, |
|
"learning_rate": 3.9392568954254023e-07, |
|
"log_odds_chosen": 0.2349853515625, |
|
"log_odds_ratio": -0.6742187738418579, |
|
"logits/chosen": -1.90625, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2185, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.01708984375, |
|
"rewards/rejected": -0.130859375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.4217959208998723, |
|
"learning_rate": 3.9110048255146043e-07, |
|
"log_odds_chosen": 0.16409912705421448, |
|
"log_odds_ratio": -0.693164050579071, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.2102, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0107421875, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.7469080448706706, |
|
"learning_rate": 3.882485806050748e-07, |
|
"log_odds_chosen": 0.31447142362594604, |
|
"log_odds_ratio": -0.639355480670929, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2235, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.0194091796875, |
|
"rewards/rejected": -0.125, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.596832510754079, |
|
"learning_rate": 3.8537052325784573e-07, |
|
"log_odds_chosen": 0.2929016053676605, |
|
"log_odds_ratio": -0.650585949420929, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.1857, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.7575190212441383, |
|
"learning_rate": 3.824668550126046e-07, |
|
"log_odds_chosen": 0.19545897841453552, |
|
"log_odds_ratio": -0.6885741949081421, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.1953125, |
|
"loss": 1.1889, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0123291015625, |
|
"rewards/rejected": -0.119140625, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.592994219979355, |
|
"learning_rate": 3.7953812521753643e-07, |
|
"log_odds_chosen": 0.16755370795726776, |
|
"log_odds_ratio": -0.6943359375, |
|
"logits/chosen": -1.921875, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.171875, |
|
"loss": 1.1494, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.0098876953125, |
|
"rewards/rejected": -0.1171875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.5202564213089405, |
|
"learning_rate": 3.7658488796224885e-07, |
|
"log_odds_chosen": 0.11643066257238388, |
|
"log_odds_ratio": -0.716796875, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2212, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.00860595703125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.724799909308137, |
|
"learning_rate": 3.736077019729425e-07, |
|
"log_odds_chosen": 0.302978515625, |
|
"log_odds_ratio": -0.6385742425918579, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1893, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10400390625, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.4835614341515053, |
|
"learning_rate": 3.7060713050670546e-07, |
|
"log_odds_chosen": 0.2666015625, |
|
"log_odds_ratio": -0.6908203363418579, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.2376, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.020751953125, |
|
"rewards/rejected": -0.1328125, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.709722079150454, |
|
"learning_rate": 3.6758374124494973e-07, |
|
"log_odds_chosen": 0.185791015625, |
|
"log_odds_ratio": -0.6966797113418579, |
|
"logits/chosen": -1.8984375, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.2082, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.01458740234375, |
|
"rewards/rejected": -0.12109375, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.8331342756102167, |
|
"learning_rate": 3.645381061860113e-07, |
|
"log_odds_chosen": 0.3631835877895355, |
|
"log_odds_ratio": -0.6460937261581421, |
|
"logits/chosen": -1.921875, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -1.0078125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1933, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1005859375, |
|
"rewards/margins": 0.025146484375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.6269186805524143, |
|
"learning_rate": 3.61470801536933e-07, |
|
"log_odds_chosen": 0.12788085639476776, |
|
"log_odds_ratio": -0.7337890863418579, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.1328125, |
|
"loss": 1.2153, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10400390625, |
|
"rewards/margins": 0.00909423828125, |
|
"rewards/rejected": -0.11279296875, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.60712425422802, |
|
"learning_rate": 3.583824076044508e-07, |
|
"log_odds_chosen": 0.08272705227136612, |
|
"log_odds_ratio": -0.7518554925918579, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -1.984375, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2114, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0078125, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.742344457324174, |
|
"learning_rate": 3.55273508685206e-07, |
|
"log_odds_chosen": 0.11997070163488388, |
|
"log_odds_ratio": -0.7144531011581421, |
|
"logits/chosen": -1.8828125, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2194, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.0084228515625, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.677923442608537, |
|
"learning_rate": 3.5214469295520033e-07, |
|
"log_odds_chosen": 0.2944091856479645, |
|
"log_odds_ratio": -0.6474609375, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.1926, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0206298828125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.614103984779814, |
|
"learning_rate": 3.4899655235851903e-07, |
|
"log_odds_chosen": 0.15128174424171448, |
|
"log_odds_ratio": -0.692187488079071, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2353, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.01055908203125, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.661524044558228, |
|
"learning_rate": 3.458296824953403e-07, |
|
"log_odds_chosen": 0.19251708686351776, |
|
"log_odds_ratio": -0.681445300579071, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2002, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.01312255859375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.479788982713935, |
|
"learning_rate": 3.426446825092525e-07, |
|
"log_odds_chosen": 0.30213624238967896, |
|
"log_odds_ratio": -0.6465820074081421, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2165, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.021728515625, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.266264486839817, |
|
"learning_rate": 3.3944215497390197e-07, |
|
"log_odds_chosen": 0.12014160305261612, |
|
"log_odds_ratio": -0.707812488079071, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -1.9375, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.2284, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.00848388671875, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.397641031210895, |
|
"learning_rate": 3.362227057789915e-07, |
|
"log_odds_chosen": 0.3463378846645355, |
|
"log_odds_ratio": -0.619335949420929, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.1821, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0220947265625, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.4645155740078617, |
|
"learning_rate": 3.329869440156512e-07, |
|
"log_odds_chosen": 0.357086181640625, |
|
"log_odds_ratio": -0.63232421875, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.176, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.024658203125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.8912394279639084, |
|
"learning_rate": 3.297354818612037e-07, |
|
"log_odds_chosen": 0.05325927585363388, |
|
"log_odds_ratio": -0.7728515863418579, |
|
"logits/chosen": -1.9453125, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.125, |
|
"loss": 1.2402, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.0030975341796875, |
|
"rewards/rejected": -0.11279296875, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.563419103608563, |
|
"learning_rate": 3.264689344633461e-07, |
|
"log_odds_chosen": 0.14066162705421448, |
|
"log_odds_ratio": -0.6943359375, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1959, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0084228515625, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.8288693775232643, |
|
"learning_rate": 3.2318791982376923e-07, |
|
"log_odds_chosen": 0.19826659560203552, |
|
"log_odds_ratio": -0.6885741949081421, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2221, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.01275634765625, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.9337235954606844, |
|
"learning_rate": 3.198930586812372e-07, |
|
"log_odds_chosen": 0.3016296327114105, |
|
"log_odds_ratio": -0.67626953125, |
|
"logits/chosen": -1.8671875, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.1805, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.02197265625, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.815544385281363, |
|
"learning_rate": 3.1658497439414935e-07, |
|
"log_odds_chosen": 0.18316039443016052, |
|
"log_odds_ratio": -0.6927734613418579, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.2118, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.01153564453125, |
|
"rewards/rejected": -0.1201171875, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.806645073099231, |
|
"learning_rate": 3.132642928226061e-07, |
|
"log_odds_chosen": 0.33399659395217896, |
|
"log_odds_ratio": -0.6348632574081421, |
|
"logits/chosen": -1.8828125, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.1911, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0223388671875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.4581782453300884, |
|
"learning_rate": 3.0993164221000207e-07, |
|
"log_odds_chosen": 0.215545654296875, |
|
"log_odds_ratio": -0.672070324420929, |
|
"logits/chosen": -1.9140625, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.182, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.0145263671875, |
|
"rewards/rejected": -0.125, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.263613381972474, |
|
"learning_rate": 3.0658765306416794e-07, |
|
"log_odds_chosen": 0.166778564453125, |
|
"log_odds_ratio": -0.6953125, |
|
"logits/chosen": -1.8359375, |
|
"logits/rejected": -1.921875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.2193, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0118408203125, |
|
"rewards/rejected": -0.12109375, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.5240280415155723, |
|
"learning_rate": 3.032329580380838e-07, |
|
"log_odds_chosen": 0.28306883573532104, |
|
"log_odds_ratio": -0.6612304449081421, |
|
"logits/chosen": -1.90625, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1956, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.01904296875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.743773542575128, |
|
"learning_rate": 2.998681918101871e-07, |
|
"log_odds_chosen": 0.3384033143520355, |
|
"log_odds_ratio": -0.6493164300918579, |
|
"logits/chosen": -1.8828125, |
|
"logits/rejected": -1.9453125, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.206, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.025390625, |
|
"rewards/rejected": -0.1328125, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.8074211611598066, |
|
"learning_rate": 2.9649399096429714e-07, |
|
"log_odds_chosen": 0.23601074516773224, |
|
"log_odds_ratio": -0.6533203125, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -1.9140625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.188, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.015625, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.525044784627154, |
|
"learning_rate": 2.931109938691786e-07, |
|
"log_odds_chosen": 0.16881103813648224, |
|
"log_odds_ratio": -0.684277355670929, |
|
"logits/chosen": -1.8515625, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.188, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0128173828125, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.668512392567912, |
|
"learning_rate": 2.8971984055776853e-07, |
|
"log_odds_chosen": 0.21584472060203552, |
|
"log_odds_ratio": -0.672558605670929, |
|
"logits/chosen": -1.84375, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2336, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.01470947265625, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.659729033509314, |
|
"learning_rate": 2.863211726060875e-07, |
|
"log_odds_chosen": 0.2547973692417145, |
|
"log_odds_ratio": -0.6659179925918579, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2367, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.470961884835421, |
|
"learning_rate": 2.829156330118589e-07, |
|
"log_odds_chosen": 0.24007567763328552, |
|
"log_odds_ratio": -0.65283203125, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2008, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.5904466369333026, |
|
"learning_rate": 2.7950386607286e-07, |
|
"log_odds_chosen": 0.28740233182907104, |
|
"log_odds_ratio": -0.6572265625, |
|
"logits/chosen": -1.8671875, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2003, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.019775390625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.8679276152227726, |
|
"learning_rate": 2.7608651726502607e-07, |
|
"log_odds_chosen": 0.29725342988967896, |
|
"log_odds_ratio": -0.6602538824081421, |
|
"logits/chosen": -1.84375, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2296, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.0213623046875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 2.5150772854856243, |
|
"learning_rate": 2.7266423312033226e-07, |
|
"log_odds_chosen": 0.2159423828125, |
|
"log_odds_ratio": -0.7059570550918579, |
|
"logits/chosen": -1.9140625, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.25, |
|
"loss": 1.185, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.125, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.737219590030928, |
|
"learning_rate": 2.692376611044757e-07, |
|
"log_odds_chosen": 0.3914794921875, |
|
"log_odds_ratio": -0.640332043170929, |
|
"logits/chosen": -1.796875, |
|
"logits/rejected": -1.8671875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2041, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.026123046875, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.7769961907081293, |
|
"learning_rate": 2.6580744949438045e-07, |
|
"log_odds_chosen": 0.08111572265625, |
|
"log_odds_ratio": -0.731249988079071, |
|
"logits/chosen": -1.8828125, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.2605, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.0037078857421875, |
|
"rewards/rejected": -0.11865234375, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.9775601305183463, |
|
"learning_rate": 2.6237424725554935e-07, |
|
"log_odds_chosen": 0.3329834043979645, |
|
"log_odds_ratio": -0.635937511920929, |
|
"logits/chosen": -1.8359375, |
|
"logits/rejected": -1.921875, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2152, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.0233154296875, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.279068955006949, |
|
"learning_rate": 2.589387039192858e-07, |
|
"log_odds_chosen": 0.20733642578125, |
|
"log_odds_ratio": -0.667675793170929, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2064, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.013671875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 3.0021514828628746, |
|
"learning_rate": 2.555014694598077e-07, |
|
"log_odds_chosen": 0.23118896782398224, |
|
"log_odds_ratio": -0.6884765625, |
|
"logits/chosen": -1.765625, |
|
"logits/rejected": -1.9453125, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2152, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.0145263671875, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.735522050073968, |
|
"learning_rate": 2.5206319417127873e-07, |
|
"log_odds_chosen": 0.3378845155239105, |
|
"log_odds_ratio": -0.632128894329071, |
|
"logits/chosen": -1.7421875, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -1.0234375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1638, |
|
"nll_loss": 1.0703125, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.0228271484375, |
|
"rewards/rejected": -0.125, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.5736254747923923, |
|
"learning_rate": 2.4862452854477784e-07, |
|
"log_odds_chosen": 0.3209228515625, |
|
"log_odds_ratio": -0.65576171875, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.166, |
|
"nll_loss": 1.046875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.0225830078125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.7098667746876073, |
|
"learning_rate": 2.4518612314523265e-07, |
|
"log_odds_chosen": 0.08408202975988388, |
|
"log_odds_ratio": -0.732421875, |
|
"logits/chosen": -1.84375, |
|
"logits/rejected": -1.953125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.140625, |
|
"loss": 1.1805, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.00457763671875, |
|
"rewards/rejected": -0.1142578125, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 2.7073252776256966, |
|
"learning_rate": 2.4174862848833806e-07, |
|
"log_odds_chosen": 0.20045165717601776, |
|
"log_odds_ratio": -0.67236328125, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.8359375, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.2051, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0118408203125, |
|
"rewards/rejected": -0.119140625, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.7514653552282233, |
|
"learning_rate": 2.3831269491748467e-07, |
|
"log_odds_chosen": 0.22596435248851776, |
|
"log_odds_ratio": -0.708984375, |
|
"logits/chosen": -1.796875, |
|
"logits/rejected": -1.8828125, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.217, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.8823498677475183, |
|
"learning_rate": 2.3487897248071941e-07, |
|
"log_odds_chosen": 0.2939697206020355, |
|
"log_odds_ratio": -0.664257824420929, |
|
"logits/chosen": -1.7890625, |
|
"logits/rejected": -1.9375, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.25, |
|
"loss": 1.1892, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 2.69332509317782, |
|
"learning_rate": 2.314481108077624e-07, |
|
"log_odds_chosen": 0.1607666015625, |
|
"log_odds_ratio": -0.6968749761581421, |
|
"logits/chosen": -1.8203125, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.1978, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.01068115234375, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.5989208277674356, |
|
"learning_rate": 2.280207589871026e-07, |
|
"log_odds_chosen": 0.3521362245082855, |
|
"log_odds_ratio": -0.642382800579071, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.9375, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.1628, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.0242919921875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 2.5631030942900805, |
|
"learning_rate": 2.2459756544319627e-07, |
|
"log_odds_chosen": 0.1890869140625, |
|
"log_odds_ratio": -0.696972668170929, |
|
"logits/chosen": -1.796875, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -1.015625, |
|
"logps/rejected": -1.1328125, |
|
"loss": 1.1771, |
|
"nll_loss": 1.0546875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10205078125, |
|
"rewards/margins": 0.01129150390625, |
|
"rewards/rejected": -0.11328125, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.7548023973263613, |
|
"learning_rate": 2.2117917781379067e-07, |
|
"log_odds_chosen": 0.19255371391773224, |
|
"log_odds_ratio": -0.679394543170929, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2441, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0123291015625, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.6382486056871177, |
|
"learning_rate": 2.177662428273968e-07, |
|
"log_odds_chosen": 0.23670653998851776, |
|
"log_odds_ratio": -0.67626953125, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.1895, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0150146484375, |
|
"rewards/rejected": -0.1201171875, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 2.5099170844954317, |
|
"learning_rate": 2.1435940618093414e-07, |
|
"log_odds_chosen": 0.19310303032398224, |
|
"log_odds_ratio": -0.690625011920929, |
|
"logits/chosen": -1.765625, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1881, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0140380859375, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.7950237991583493, |
|
"learning_rate": 2.1095931241757062e-07, |
|
"log_odds_chosen": 0.2502685487270355, |
|
"log_odds_ratio": -0.680957019329071, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1906, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10400390625, |
|
"rewards/margins": 0.0185546875, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.609790265054367, |
|
"learning_rate": 2.075666048047806e-07, |
|
"log_odds_chosen": 0.15053710341453552, |
|
"log_odds_ratio": -0.698437511920929, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.828125, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.221, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.01171875, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 2.5467686003601697, |
|
"learning_rate": 2.0418192521264454e-07, |
|
"log_odds_chosen": 0.23857422173023224, |
|
"log_odds_ratio": -0.659863293170929, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.1898, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.01397705078125, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 2.459650956326835, |
|
"learning_rate": 2.0080591399241292e-07, |
|
"log_odds_chosen": 0.23247070610523224, |
|
"log_odds_ratio": -0.6712890863418579, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.7734375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1708, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.014892578125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 2.47537989067237, |
|
"learning_rate": 1.9743920985535729e-07, |
|
"log_odds_chosen": 0.3998779356479645, |
|
"log_odds_ratio": -0.625781238079071, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -0.98828125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1589, |
|
"nll_loss": 1.0390625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.09912109375, |
|
"rewards/margins": 0.0269775390625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 2.7703541098291455, |
|
"learning_rate": 1.94082449751932e-07, |
|
"log_odds_chosen": 0.2127685546875, |
|
"log_odds_ratio": -0.6846679449081421, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.1794, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0157470703125, |
|
"rewards/rejected": -0.125, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.885795668675382, |
|
"learning_rate": 1.9073626875126874e-07, |
|
"log_odds_chosen": 0.26057130098342896, |
|
"log_odds_ratio": -0.649707019329071, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0234375, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.1671, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.01611328125, |
|
"rewards/rejected": -0.11865234375, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.722691086755302, |
|
"learning_rate": 1.874012999210271e-07, |
|
"log_odds_chosen": 0.19356079399585724, |
|
"log_odds_ratio": -0.694140613079071, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -1.8828125, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.25, |
|
"loss": 1.1779, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.01385498046875, |
|
"rewards/rejected": -0.125, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.604921389210434, |
|
"learning_rate": 1.8407817420762383e-07, |
|
"log_odds_chosen": 0.26337891817092896, |
|
"log_odds_ratio": -0.6572265625, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.184, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.01806640625, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 2.732739073707677, |
|
"learning_rate": 1.8076752031686343e-07, |
|
"log_odds_chosen": 0.14312133193016052, |
|
"log_odds_ratio": -0.704882800579071, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -1.8671875, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.140625, |
|
"loss": 1.188, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.010498046875, |
|
"rewards/rejected": -0.1142578125, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 2.395290977769547, |
|
"learning_rate": 1.7746996459499254e-07, |
|
"log_odds_chosen": 0.242431640625, |
|
"log_odds_ratio": -0.6644531488418579, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.84375, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1618, |
|
"nll_loss": 1.046875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.547454975163186, |
|
"learning_rate": 1.741861309102009e-07, |
|
"log_odds_chosen": 0.26506346464157104, |
|
"log_odds_ratio": -0.6689453125, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.1665, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.12109375, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.396584161009059, |
|
"learning_rate": 1.7091664053459088e-07, |
|
"log_odds_chosen": 0.18143311142921448, |
|
"log_odds_ratio": -0.6865234375, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.8359375, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.2118, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.01043701171875, |
|
"rewards/rejected": -0.11865234375, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 2.279515658884766, |
|
"learning_rate": 1.6766211202663844e-07, |
|
"log_odds_chosen": 0.05356445163488388, |
|
"log_odds_ratio": -0.739453136920929, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.140625, |
|
"loss": 1.2049, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.0027008056640625, |
|
"rewards/rejected": -0.1142578125, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.4004304534033265, |
|
"learning_rate": 1.6442316111416743e-07, |
|
"log_odds_chosen": 0.26105958223342896, |
|
"log_odds_ratio": -0.6631835699081421, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.1816, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0184326171875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 2.763579524745402, |
|
"learning_rate": 1.6120040057785928e-07, |
|
"log_odds_chosen": 0.29625242948532104, |
|
"log_odds_ratio": -0.6499999761581421, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1727, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.0194091796875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.8036257747935154, |
|
"learning_rate": 1.5799444013532038e-07, |
|
"log_odds_chosen": 0.23708495497703552, |
|
"log_odds_ratio": -0.67431640625, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1238, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.01556396484375, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.6570430804218566, |
|
"learning_rate": 1.5480588632572885e-07, |
|
"log_odds_chosen": 0.37006837129592896, |
|
"log_odds_ratio": -0.635937511920929, |
|
"logits/chosen": -1.7421875, |
|
"logits/rejected": -1.8046875, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.1907, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.025390625, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.590262402608133, |
|
"learning_rate": 1.516353423950829e-07, |
|
"log_odds_chosen": 0.3837524354457855, |
|
"log_odds_ratio": -0.625, |
|
"logits/chosen": -1.796875, |
|
"logits/rejected": -1.9140625, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1898, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.0235595703125, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.568483644438331, |
|
"learning_rate": 1.4848340818207184e-07, |
|
"log_odds_chosen": 0.26896971464157104, |
|
"log_odds_ratio": -0.66455078125, |
|
"logits/chosen": -1.75, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1799, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.019287109375, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 3.318714021827115, |
|
"learning_rate": 1.453506800045921e-07, |
|
"log_odds_chosen": 0.12944336235523224, |
|
"log_odds_ratio": -0.7064453363418579, |
|
"logits/chosen": -1.75, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.2096, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.00970458984375, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 2.7195091239689426, |
|
"learning_rate": 1.422377505469293e-07, |
|
"log_odds_chosen": 0.14760741591453552, |
|
"log_odds_ratio": -0.72802734375, |
|
"logits/chosen": -1.703125, |
|
"logits/rejected": -1.75, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2695, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.00823974609375, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.4116745200019696, |
|
"learning_rate": 1.3914520874762726e-07, |
|
"log_odds_chosen": 0.2623352110385895, |
|
"log_odds_ratio": -0.6844726800918579, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2307, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.0191650390625, |
|
"rewards/rejected": -0.130859375, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 2.6643696734974127, |
|
"learning_rate": 1.3607363968806645e-07, |
|
"log_odds_chosen": 0.3259033262729645, |
|
"log_odds_ratio": -0.623046875, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.78125, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2087, |
|
"nll_loss": 1.046875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.022216796875, |
|
"rewards/rejected": -0.125, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 2.734224521952181, |
|
"learning_rate": 1.3302362448177167e-07, |
|
"log_odds_chosen": 0.30589598417282104, |
|
"log_odds_ratio": -0.6283203363418579, |
|
"logits/chosen": -1.703125, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.1853, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.0198974609375, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 3.213949669653505, |
|
"learning_rate": 1.2999574016447056e-07, |
|
"log_odds_chosen": 0.3102783262729645, |
|
"log_odds_ratio": -0.6421874761581421, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.196, |
|
"nll_loss": 1.0703125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.0211181640625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 2.3847902873067492, |
|
"learning_rate": 1.2699055958492344e-07, |
|
"log_odds_chosen": 0.19971923530101776, |
|
"log_odds_ratio": -0.6748046875, |
|
"logits/chosen": -1.7421875, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.171875, |
|
"loss": 1.2064, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.01287841796875, |
|
"rewards/rejected": -0.11767578125, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.5039350155364573, |
|
"learning_rate": 1.2400865129654567e-07, |
|
"log_odds_chosen": 0.27821046113967896, |
|
"log_odds_ratio": -0.652050793170929, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1371, |
|
"nll_loss": 1.0703125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10400390625, |
|
"rewards/margins": 0.018310546875, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.5879591956281995, |
|
"learning_rate": 1.210505794498422e-07, |
|
"log_odds_chosen": 0.23630371689796448, |
|
"log_odds_ratio": -0.673828125, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1706, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0164794921875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 2.752234308496576, |
|
"learning_rate": 1.1811690368567545e-07, |
|
"log_odds_chosen": 0.14584961533546448, |
|
"log_odds_ratio": -0.6947265863418579, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.1328125, |
|
"loss": 1.236, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.00872802734375, |
|
"rewards/rejected": -0.11328125, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.573963727957766, |
|
"learning_rate": 1.1520817902938618e-07, |
|
"log_odds_chosen": 0.07918091118335724, |
|
"log_odds_ratio": -0.7347656488418579, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.78125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.1915, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.00604248046875, |
|
"rewards/rejected": -0.11865234375, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.7806823611081177, |
|
"learning_rate": 1.1232495578578755e-07, |
|
"log_odds_chosen": 0.15264892578125, |
|
"log_odds_ratio": -0.6976562738418579, |
|
"logits/chosen": -1.75, |
|
"logits/rejected": -1.78125, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.1171875, |
|
"loss": 1.1626, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.00823974609375, |
|
"rewards/rejected": -0.11181640625, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 2.436201094808605, |
|
"learning_rate": 1.0946777943505254e-07, |
|
"log_odds_chosen": 0.23690184950828552, |
|
"log_odds_ratio": -0.6917968988418579, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.78125, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1971, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.417259454035703, |
|
"learning_rate": 1.0663719052951381e-07, |
|
"log_odds_chosen": 0.19016113877296448, |
|
"log_odds_ratio": -0.699414074420929, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.1953125, |
|
"loss": 1.1861, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0107421875, |
|
"rewards/rejected": -0.119140625, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.340681686492189, |
|
"learning_rate": 1.0383372459139608e-07, |
|
"log_odds_chosen": 0.30018919706344604, |
|
"log_odds_ratio": -0.6387695074081421, |
|
"logits/chosen": -1.78125, |
|
"logits/rejected": -1.921875, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1958, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0191650390625, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.530421254724575, |
|
"learning_rate": 1.0105791201150002e-07, |
|
"log_odds_chosen": 0.3886962831020355, |
|
"log_odds_ratio": -0.620312511920929, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.7734375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.16, |
|
"nll_loss": 1.046875, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.027587890625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.6492828085260225, |
|
"learning_rate": 9.831027794885713e-08, |
|
"log_odds_chosen": 0.34185791015625, |
|
"log_odds_ratio": -0.6444336175918579, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.0234375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.1779, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.0211181640625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 2.6971126252475286, |
|
"learning_rate": 9.559134223137424e-08, |
|
"log_odds_chosen": 0.2640136778354645, |
|
"log_odds_ratio": -0.673046886920929, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.8046875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2186, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.016845703125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 2.768346463128813, |
|
"learning_rate": 9.290161925748674e-08, |
|
"log_odds_chosen": 0.333740234375, |
|
"log_odds_ratio": -0.6434570550918579, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.1758, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0224609375, |
|
"rewards/rejected": -0.1328125, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.4131576506486168, |
|
"learning_rate": 9.024161789883897e-08, |
|
"log_odds_chosen": 0.13895873725414276, |
|
"log_odds_ratio": -0.695507824420929, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.1171875, |
|
"loss": 1.1368, |
|
"nll_loss": 1.0625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.009033203125, |
|
"rewards/rejected": -0.1123046875, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.5654624061461253, |
|
"learning_rate": 8.761184140401023e-08, |
|
"log_odds_chosen": 0.25886231660842896, |
|
"log_odds_ratio": -0.662109375, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -1.0390625, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.1906, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10400390625, |
|
"rewards/margins": 0.0164794921875, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 2.6681020896294676, |
|
"learning_rate": 8.501278730330463e-08, |
|
"log_odds_chosen": 0.36528319120407104, |
|
"log_odds_ratio": -0.626269519329071, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.1977, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.02490234375, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 2.7519831354278512, |
|
"learning_rate": 8.244494731462279e-08, |
|
"log_odds_chosen": 0.24447020888328552, |
|
"log_odds_ratio": -0.6788085699081421, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.703125, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.1953125, |
|
"loss": 1.164, |
|
"nll_loss": 1.0546875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.015869140625, |
|
"rewards/rejected": -0.119140625, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 2.870452087544285, |
|
"learning_rate": 7.990880725043322e-08, |
|
"log_odds_chosen": 0.2567138671875, |
|
"log_odds_ratio": -0.670703113079071, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1622, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.6968676817822645, |
|
"learning_rate": 7.740484692586074e-08, |
|
"log_odds_chosen": 0.2530761659145355, |
|
"log_odds_ratio": -0.680371105670929, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.2242, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.0174560546875, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.735947390317322, |
|
"learning_rate": 7.493354006791006e-08, |
|
"log_odds_chosen": 0.24350586533546448, |
|
"log_odds_ratio": -0.682421863079071, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1973, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.0169677734375, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.4135041559261885, |
|
"learning_rate": 7.249535422584055e-08, |
|
"log_odds_chosen": 0.19566650688648224, |
|
"log_odds_ratio": -0.6849609613418579, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.84375, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.1835, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.01129150390625, |
|
"rewards/rejected": -0.119140625, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.3578015206878575, |
|
"learning_rate": 7.009075068271031e-08, |
|
"log_odds_chosen": 0.12241820991039276, |
|
"log_odds_ratio": -0.7289062738418579, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.1747, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0087890625, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.7517093669068933, |
|
"learning_rate": 6.772018436810525e-08, |
|
"log_odds_chosen": 0.34681397676467896, |
|
"log_odds_ratio": -0.616992175579071, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.7734375, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.1863, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0240478515625, |
|
"rewards/rejected": -0.134765625, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.758447250920097, |
|
"learning_rate": 6.538410377207082e-08, |
|
"log_odds_chosen": 0.03367309644818306, |
|
"log_odds_ratio": -0.773144543170929, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.15625, |
|
"loss": 1.2376, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.0030364990234375, |
|
"rewards/rejected": -0.115234375, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.880552308468262, |
|
"learning_rate": 6.308295086026133e-08, |
|
"log_odds_chosen": 0.17825928330421448, |
|
"log_odds_ratio": -0.696972668170929, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2262, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.01220703125, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 2.439317791546686, |
|
"learning_rate": 6.081716099032417e-08, |
|
"log_odds_chosen": 0.3602050840854645, |
|
"log_odds_ratio": -0.6319335699081421, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -0.99609375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1792, |
|
"nll_loss": 1.0625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.099609375, |
|
"rewards/margins": 0.024169921875, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.810922238332508, |
|
"learning_rate": 5.858716282953407e-08, |
|
"log_odds_chosen": 0.24152831733226776, |
|
"log_odds_ratio": -0.654492199420929, |
|
"logits/chosen": -1.6328125, |
|
"logits/rejected": -1.75, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1982, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0166015625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.5961073589782466, |
|
"learning_rate": 5.639337827369289e-08, |
|
"log_odds_chosen": 0.17608642578125, |
|
"log_odds_ratio": -0.702832043170929, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1879, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.0111083984375, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.3150779833374266, |
|
"learning_rate": 5.4236222367310816e-08, |
|
"log_odds_chosen": 0.29583740234375, |
|
"log_odds_ratio": -0.653124988079071, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.1965, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.020263671875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 2.1606096674823068, |
|
"learning_rate": 5.211610322508364e-08, |
|
"log_odds_chosen": 0.15689697861671448, |
|
"log_odds_ratio": -0.70263671875, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.1640625, |
|
"loss": 1.1801, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.0098876953125, |
|
"rewards/rejected": -0.11669921875, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 2.547538507688686, |
|
"learning_rate": 5.003342195468102e-08, |
|
"log_odds_chosen": 0.221527099609375, |
|
"log_odds_ratio": -0.66357421875, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.640625, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.2133, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.01263427734375, |
|
"rewards/rejected": -0.11962890625, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 2.8414624562639546, |
|
"learning_rate": 4.798857258086053e-08, |
|
"log_odds_chosen": 0.25762939453125, |
|
"log_odds_ratio": -0.6714843511581421, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2074, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.0186767578125, |
|
"rewards/rejected": -0.125, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 3.120660574019042, |
|
"learning_rate": 4.5981941970921646e-08, |
|
"log_odds_chosen": 0.48786622285842896, |
|
"log_odds_ratio": -0.591992199420929, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.65625, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.375, |
|
"loss": 1.1441, |
|
"nll_loss": 1.0390625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.034423828125, |
|
"rewards/rejected": -0.1376953125, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.635871013914355, |
|
"learning_rate": 4.4013909761513894e-08, |
|
"log_odds_chosen": 0.2707275450229645, |
|
"log_odds_ratio": -0.649218738079071, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.1887, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0169677734375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 3.1511666169638346, |
|
"learning_rate": 4.2084848286813105e-08, |
|
"log_odds_chosen": 0.3526855409145355, |
|
"log_odds_ratio": -0.6600586175918579, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.65625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.1851, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.0281982421875, |
|
"rewards/rejected": -0.134765625, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 2.7305670197672747, |
|
"learning_rate": 4.0195122508078886e-08, |
|
"log_odds_chosen": 0.27125245332717896, |
|
"log_odds_ratio": -0.65625, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1709, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.017822265625, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 3.2312418757529726, |
|
"learning_rate": 3.834508994460736e-08, |
|
"log_odds_chosen": 0.23995360732078552, |
|
"log_odds_ratio": -0.654589831829071, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.640625, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.1875, |
|
"loss": 1.1604, |
|
"nll_loss": 1.046875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.0150146484375, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 2.874032565275268, |
|
"learning_rate": 3.653510060609166e-08, |
|
"log_odds_chosen": 0.13707275688648224, |
|
"log_odds_ratio": -0.7138671875, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.1484375, |
|
"loss": 1.173, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.00848388671875, |
|
"rewards/rejected": -0.1142578125, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 2.6604754366861822, |
|
"learning_rate": 3.476549692640316e-08, |
|
"log_odds_chosen": 0.34288328886032104, |
|
"log_odds_ratio": -0.620312511920929, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1.0, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1585, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.099609375, |
|
"rewards/margins": 0.0218505859375, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.71892900942932, |
|
"learning_rate": 3.3036613698806085e-08, |
|
"log_odds_chosen": 0.21519775688648224, |
|
"log_odds_ratio": -0.691601574420929, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.734375, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2288, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.01434326171875, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 2.878603239597823, |
|
"learning_rate": 3.134877801261765e-08, |
|
"log_odds_chosen": 0.3372802734375, |
|
"log_odds_ratio": -0.642285168170929, |
|
"logits/chosen": -1.703125, |
|
"logits/rejected": -1.734375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.2136, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.0218505859375, |
|
"rewards/rejected": -0.1328125, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 2.6992535601969085, |
|
"learning_rate": 2.9702309191325492e-08, |
|
"log_odds_chosen": 0.24042968451976776, |
|
"log_odds_ratio": -0.675585925579071, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2242, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.123046875, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.971420026998493, |
|
"learning_rate": 2.809751873217478e-08, |
|
"log_odds_chosen": 0.32117921113967896, |
|
"log_odds_ratio": -0.6463867425918579, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.1702, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0218505859375, |
|
"rewards/rejected": -0.130859375, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.311036000971507, |
|
"learning_rate": 2.653471024723547e-08, |
|
"log_odds_chosen": 0.43181151151657104, |
|
"log_odds_ratio": -0.5894531011581421, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -1.015625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.1592, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1015625, |
|
"rewards/margins": 0.0289306640625, |
|
"rewards/rejected": -0.130859375, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.5416101230783363, |
|
"learning_rate": 2.501417940596168e-08, |
|
"log_odds_chosen": 0.02521972730755806, |
|
"log_odds_ratio": -0.75, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.109375, |
|
"loss": 1.1748, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.000507354736328125, |
|
"rewards/rejected": -0.11083984375, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.471954369214634, |
|
"learning_rate": 2.353621387925375e-08, |
|
"log_odds_chosen": 0.3322509825229645, |
|
"log_odds_ratio": -0.6597656011581421, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1634, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.022705078125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 2.801308375939805, |
|
"learning_rate": 2.2101093285033373e-08, |
|
"log_odds_chosen": 0.3058715760707855, |
|
"log_odds_ratio": -0.6502929925918579, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.25, |
|
"loss": 1.1307, |
|
"nll_loss": 1.03125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.020263671875, |
|
"rewards/rejected": -0.125, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 2.6457419438315233, |
|
"learning_rate": 2.070908913534236e-08, |
|
"log_odds_chosen": 0.24928589165210724, |
|
"log_odds_ratio": -0.6776367425918579, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.6640625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2119, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.015869140625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.8172476905649764, |
|
"learning_rate": 1.9360464784975024e-08, |
|
"log_odds_chosen": 0.21148681640625, |
|
"log_odds_ratio": -0.6849609613418579, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.65625, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.1771, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.0146484375, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 2.963550785069561, |
|
"learning_rate": 1.8055475381653807e-08, |
|
"log_odds_chosen": 0.27608031034469604, |
|
"log_odds_ratio": -0.6513671875, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.046875, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2014, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1044921875, |
|
"rewards/margins": 0.018798828125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 3.1613513759896534, |
|
"learning_rate": 1.679436781775759e-08, |
|
"log_odds_chosen": 0.31138914823532104, |
|
"log_odds_ratio": -0.675976574420929, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.1978, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.0213623046875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.92153711849868, |
|
"learning_rate": 1.5577380683611807e-08, |
|
"log_odds_chosen": 0.2562316954135895, |
|
"log_odds_ratio": -0.6595703363418579, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.198, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.0164794921875, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.486180847986093, |
|
"learning_rate": 1.4404744222349358e-08, |
|
"log_odds_chosen": 0.48161619901657104, |
|
"log_odds_ratio": -0.605664074420929, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.359375, |
|
"loss": 1.172, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.032470703125, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 2.50535325154869, |
|
"learning_rate": 1.3276680286350594e-08, |
|
"log_odds_chosen": 0.31635743379592896, |
|
"log_odds_ratio": -0.641796886920929, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.204, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0203857421875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 2.6381410134685392, |
|
"learning_rate": 1.2193402295270854e-08, |
|
"log_odds_chosen": 0.2996459901332855, |
|
"log_odds_ratio": -0.6700195074081421, |
|
"logits/chosen": -1.5234375, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.1512, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10302734375, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.344444293869932, |
|
"learning_rate": 1.115511519566334e-08, |
|
"log_odds_chosen": 0.3412719666957855, |
|
"log_odds_ratio": -0.657519519329071, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -0.98828125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.1698, |
|
"nll_loss": 1.03125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0986328125, |
|
"rewards/margins": 0.0244140625, |
|
"rewards/rejected": -0.123046875, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.277202951820151, |
|
"learning_rate": 1.01620154222051e-08, |
|
"log_odds_chosen": 0.14781494438648224, |
|
"log_odds_ratio": -0.708984375, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.6953125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.1759, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0079345703125, |
|
"rewards/rejected": -0.1201171875, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.5811302552961943, |
|
"learning_rate": 9.214290860533242e-09, |
|
"log_odds_chosen": 0.22308655083179474, |
|
"log_odds_ratio": -0.6734374761581421, |
|
"logits/chosen": -1.6640625, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.205, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0133056640625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.4190439831152326, |
|
"learning_rate": 8.312120811698798e-09, |
|
"log_odds_chosen": 0.24127808213233948, |
|
"log_odds_ratio": -0.6958984136581421, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.7734375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.1753, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.017333984375, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 2.3733902102708897, |
|
"learning_rate": 7.455675958244422e-09, |
|
"log_odds_chosen": 0.2683349549770355, |
|
"log_odds_ratio": -0.660449206829071, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.1939, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.0184326171875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 2.581589417454669, |
|
"learning_rate": 6.64511833191278e-09, |
|
"log_odds_chosen": 0.16912230849266052, |
|
"log_odds_ratio": -0.703125, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.1997, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.01220703125, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 2.8853418036737297, |
|
"learning_rate": 5.8806012829916985e-09, |
|
"log_odds_chosen": 0.3464111387729645, |
|
"log_odds_ratio": -0.615234375, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -1.0078125, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.1569, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10107421875, |
|
"rewards/margins": 0.02294921875, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 2.5807272249096913, |
|
"learning_rate": 5.162269451301576e-09, |
|
"log_odds_chosen": 0.16444091498851776, |
|
"log_odds_ratio": -0.6973632574081421, |
|
"logits/chosen": -1.78125, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.2152, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.01202392578125, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 2.6925182997680515, |
|
"learning_rate": 4.490258738830771e-09, |
|
"log_odds_chosen": 0.2374267578125, |
|
"log_odds_ratio": -0.6766601800918579, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.2008, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 3.01082383722649, |
|
"learning_rate": 3.864696284024249e-09, |
|
"log_odds_chosen": 0.38875120878219604, |
|
"log_odds_ratio": -0.6109374761581421, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.1823, |
|
"nll_loss": 1.0859375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.028076171875, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 2.7770288933270755, |
|
"learning_rate": 3.285700437730077e-09, |
|
"log_odds_chosen": 0.35822755098342896, |
|
"log_odds_ratio": -0.6693359613418579, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.1699, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.0264892578125, |
|
"rewards/rejected": -0.134765625, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 2.5907890754339262, |
|
"learning_rate": 2.7533807408084973e-09, |
|
"log_odds_chosen": 0.17686156928539276, |
|
"log_odds_ratio": -0.698535144329071, |
|
"logits/chosen": -1.7421875, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.1861, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.01153564453125, |
|
"rewards/rejected": -0.12109375, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 2.4966778392886955, |
|
"learning_rate": 2.2678379034077877e-09, |
|
"log_odds_chosen": 0.2807373106479645, |
|
"log_odds_ratio": -0.6429687738418579, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2285, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.018310546875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 2.3101513660116466, |
|
"learning_rate": 1.82916378591072e-09, |
|
"log_odds_chosen": 0.35594481229782104, |
|
"log_odds_ratio": -0.6319335699081421, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1772, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.0225830078125, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 2.550820385872949, |
|
"learning_rate": 1.4374413815555763e-09, |
|
"log_odds_chosen": 0.21019287407398224, |
|
"log_odds_ratio": -0.6913086175918579, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2096, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0157470703125, |
|
"rewards/rejected": -0.125, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.3418264361304293, |
|
"learning_rate": 1.0927448007343188e-09, |
|
"log_odds_chosen": 0.2827392518520355, |
|
"log_odds_ratio": -0.6519531011581421, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2057, |
|
"nll_loss": 1.1328125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10693359375, |
|
"rewards/margins": 0.0203857421875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 3.0181770379881936, |
|
"learning_rate": 7.951392569717774e-10, |
|
"log_odds_chosen": 0.32861328125, |
|
"log_odds_ratio": -0.641406238079071, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2017, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0206298828125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 2.6598066518808965, |
|
"learning_rate": 5.446810545877423e-10, |
|
"log_odds_chosen": 0.29930418729782104, |
|
"log_odds_ratio": -0.6373046636581421, |
|
"logits/chosen": -1.703125, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -1.0703125, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.1938, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.021240234375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 2.612456535550154, |
|
"learning_rate": 3.414175780446227e-10, |
|
"log_odds_chosen": 0.27032470703125, |
|
"log_odds_ratio": -0.64501953125, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2178, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 2.5663855569599123, |
|
"learning_rate": 1.8538728298292395e-10, |
|
"log_odds_chosen": 0.25556641817092896, |
|
"log_odds_ratio": -0.6698242425918579, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.6640625, |
|
"logps/chosen": -1.0546875, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.198, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10546875, |
|
"rewards/margins": 0.01806640625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 2.5581951001300336, |
|
"learning_rate": 7.661968894551174e-11, |
|
"log_odds_chosen": 0.3158630430698395, |
|
"log_odds_ratio": -0.636425793170929, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.828125, |
|
"logps/chosen": -1.0234375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.1801, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.123046875, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 2.6003805241560958, |
|
"learning_rate": 1.513537379305152e-11, |
|
"log_odds_chosen": 0.20820312201976776, |
|
"log_odds_ratio": -0.664843738079071, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.734375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2092, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.0135498046875, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2538, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2334148878183206, |
|
"train_runtime": 17382.4906, |
|
"train_samples_per_second": 7.009, |
|
"train_steps_per_second": 0.146 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2538, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|