|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 500, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.876303783345758, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -2.3177952766418457, |
|
"logits/rejected": -2.3340206146240234, |
|
"logps/chosen": -185.6923828125, |
|
"logps/pi_response": -319.5942687988281, |
|
"logps/ref_response": -319.5942687988281, |
|
"logps/rejected": -187.8241729736328, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.2222058398942, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -2.3264760971069336, |
|
"logits/rejected": -2.349726676940918, |
|
"logps/chosen": -202.65338134765625, |
|
"logps/pi_response": -336.1994934082031, |
|
"logps/ref_response": -334.70989990234375, |
|
"logps/rejected": -215.5554962158203, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4340277910232544, |
|
"rewards/chosen": -0.010369324125349522, |
|
"rewards/margins": -0.00030602168408222497, |
|
"rewards/rejected": -0.010063301771879196, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 26.302559308113064, |
|
"learning_rate": 4.994932636402031e-07, |
|
"logits/chosen": -2.1999268531799316, |
|
"logits/rejected": -2.2739574909210205, |
|
"logps/chosen": -218.8612823486328, |
|
"logps/pi_response": -366.29144287109375, |
|
"logps/ref_response": -332.369140625, |
|
"logps/rejected": -232.3873291015625, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.22311308979988098, |
|
"rewards/margins": 0.029964953660964966, |
|
"rewards/rejected": -0.25307804346084595, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 30.864420908439598, |
|
"learning_rate": 4.905416503522123e-07, |
|
"logits/chosen": -2.021660804748535, |
|
"logits/rejected": -2.0993196964263916, |
|
"logps/chosen": -218.07278442382812, |
|
"logps/pi_response": -377.771240234375, |
|
"logps/ref_response": -329.1590881347656, |
|
"logps/rejected": -238.0624542236328, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.26833224296569824, |
|
"rewards/margins": 0.06877782940864563, |
|
"rewards/rejected": -0.33711010217666626, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 31.326684541726582, |
|
"learning_rate": 4.707922373336523e-07, |
|
"logits/chosen": -1.9741312265396118, |
|
"logits/rejected": -2.056077718734741, |
|
"logps/chosen": -245.257080078125, |
|
"logps/pi_response": -370.0182800292969, |
|
"logps/ref_response": -330.54022216796875, |
|
"logps/rejected": -257.7065734863281, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.3541467487812042, |
|
"rewards/margins": 0.01397724449634552, |
|
"rewards/rejected": -0.36812400817871094, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.607657828492968, |
|
"learning_rate": 4.4113156629677313e-07, |
|
"logits/chosen": -2.001324415206909, |
|
"logits/rejected": -2.078733444213867, |
|
"logps/chosen": -261.01226806640625, |
|
"logps/pi_response": -389.60235595703125, |
|
"logps/ref_response": -332.9416809082031, |
|
"logps/rejected": -265.627197265625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.4424280524253845, |
|
"rewards/margins": 0.030498838052153587, |
|
"rewards/rejected": -0.4729268550872803, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.30008036950616, |
|
"learning_rate": 4.0289109058972283e-07, |
|
"logits/chosen": -1.991970419883728, |
|
"logits/rejected": -1.952013373374939, |
|
"logps/chosen": -233.18075561523438, |
|
"logps/pi_response": -373.1258850097656, |
|
"logps/ref_response": -330.88116455078125, |
|
"logps/rejected": -238.22116088867188, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.2081402838230133, |
|
"rewards/margins": 0.026575928553938866, |
|
"rewards/rejected": -0.23471620678901672, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 27.064787151807383, |
|
"learning_rate": 3.577874068920446e-07, |
|
"logits/chosen": -1.8990647792816162, |
|
"logits/rejected": -1.8150758743286133, |
|
"logps/chosen": -249.4824676513672, |
|
"logps/pi_response": -400.8092346191406, |
|
"logps/ref_response": -332.44757080078125, |
|
"logps/rejected": -266.2792053222656, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.4582160413265228, |
|
"rewards/margins": 0.06068809702992439, |
|
"rewards/rejected": -0.5189040899276733, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 24.274894592001466, |
|
"learning_rate": 3.078451980100854e-07, |
|
"logits/chosen": -1.836126685142517, |
|
"logits/rejected": -1.9199883937835693, |
|
"logps/chosen": -258.8869934082031, |
|
"logps/pi_response": -400.2823181152344, |
|
"logps/ref_response": -320.3209533691406, |
|
"logps/rejected": -275.9152526855469, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.6283607482910156, |
|
"rewards/margins": 0.07945629954338074, |
|
"rewards/rejected": -0.707817018032074, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.666129111777522, |
|
"learning_rate": 2.553063458334059e-07, |
|
"logits/chosen": -1.9520018100738525, |
|
"logits/rejected": -1.8479654788970947, |
|
"logps/chosen": -227.83200073242188, |
|
"logps/pi_response": -360.124755859375, |
|
"logps/ref_response": -310.69232177734375, |
|
"logps/rejected": -236.7330780029297, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.32836586236953735, |
|
"rewards/margins": 0.03321167081594467, |
|
"rewards/rejected": -0.36157751083374023, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.034281983565204, |
|
"learning_rate": 2.0252929432814287e-07, |
|
"logits/chosen": -1.8997596502304077, |
|
"logits/rejected": -2.0015318393707275, |
|
"logps/chosen": -244.6271514892578, |
|
"logps/pi_response": -392.9565734863281, |
|
"logps/ref_response": -338.5196533203125, |
|
"logps/rejected": -259.35247802734375, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3552432060241699, |
|
"rewards/margins": 0.03130738437175751, |
|
"rewards/rejected": -0.3865506052970886, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.117382385719814, |
|
"learning_rate": 1.5188318011445906e-07, |
|
"logits/chosen": -1.6617428064346313, |
|
"logits/rejected": -1.7959445714950562, |
|
"logps/chosen": -265.4134216308594, |
|
"logps/pi_response": -426.07366943359375, |
|
"logps/ref_response": -338.72222900390625, |
|
"logps/rejected": -281.7206115722656, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.6667092442512512, |
|
"rewards/margins": 0.049205854535102844, |
|
"rewards/rejected": -0.7159152030944824, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 24.334142813588905, |
|
"learning_rate": 1.0564148305586295e-07, |
|
"logits/chosen": -1.6403900384902954, |
|
"logits/rejected": -1.6945642232894897, |
|
"logps/chosen": -272.6028747558594, |
|
"logps/pi_response": -434.99560546875, |
|
"logps/ref_response": -341.4128112792969, |
|
"logps/rejected": -284.1662902832031, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.6931222677230835, |
|
"rewards/margins": 0.0071820830926299095, |
|
"rewards/rejected": -0.7003043293952942, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 23.99305159279906, |
|
"learning_rate": 6.587997083462196e-08, |
|
"logits/chosen": -1.64206063747406, |
|
"logits/rejected": -1.6930261850357056, |
|
"logps/chosen": -256.47344970703125, |
|
"logps/pi_response": -414.23004150390625, |
|
"logps/ref_response": -325.36041259765625, |
|
"logps/rejected": -267.9649658203125, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5717727541923523, |
|
"rewards/margins": 0.054326076060533524, |
|
"rewards/rejected": -0.6260988116264343, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 28.645522892437054, |
|
"learning_rate": 3.438351873250492e-08, |
|
"logits/chosen": -1.672249436378479, |
|
"logits/rejected": -1.775399923324585, |
|
"logps/chosen": -246.2197723388672, |
|
"logps/pi_response": -418.77423095703125, |
|
"logps/ref_response": -333.43292236328125, |
|
"logps/rejected": -264.335693359375, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.45865583419799805, |
|
"rewards/margins": 0.0741017609834671, |
|
"rewards/rejected": -0.5327576398849487, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.513812489103497, |
|
"learning_rate": 1.256598743236703e-08, |
|
"logits/chosen": -1.620234489440918, |
|
"logits/rejected": -1.7270009517669678, |
|
"logps/chosen": -238.0688934326172, |
|
"logps/pi_response": -416.77471923828125, |
|
"logps/ref_response": -327.03955078125, |
|
"logps/rejected": -263.03558349609375, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5202730298042297, |
|
"rewards/margins": 0.08573532104492188, |
|
"rewards/rejected": -0.6060083508491516, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.44841408463752, |
|
"learning_rate": 1.406755487774386e-09, |
|
"logits/chosen": -1.6769297122955322, |
|
"logits/rejected": -1.6653327941894531, |
|
"logps/chosen": -257.4883728027344, |
|
"logps/pi_response": -424.09088134765625, |
|
"logps/ref_response": -330.8675842285156, |
|
"logps/rejected": -271.10101318359375, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.5272750854492188, |
|
"rewards/margins": 0.07395146042108536, |
|
"rewards/rejected": -0.6012265086174011, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.684309244517124, |
|
"train_runtime": 33858.6959, |
|
"train_samples_per_second": 0.624, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|