|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9965977492802931, |
|
"eval_steps": 50, |
|
"global_step": 238, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004187385501177702, |
|
"grad_norm": 0.41322922094683573, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -8.027767181396484, |
|
"logits/rejected": -8.085662841796875, |
|
"logps/chosen": -315.0633544921875, |
|
"logps/rejected": -339.5360412597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 0.3749268511204142, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -8.285957336425781, |
|
"logits/rejected": -8.363251686096191, |
|
"logps/chosen": -328.44573974609375, |
|
"logps/rejected": -294.34893798828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 5.56520426471252e-05, |
|
"rewards/margins": -1.4014758562552743e-05, |
|
"rewards/rejected": 6.966680666664615e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 0.3783566632626705, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -7.978540897369385, |
|
"logits/rejected": -8.10208797454834, |
|
"logps/chosen": -298.9517822265625, |
|
"logps/rejected": -284.9400329589844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 4.78029360237997e-05, |
|
"rewards/margins": -1.2775728464475833e-05, |
|
"rewards/rejected": 6.0578669945243746e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 0.365811115381345, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -8.248689651489258, |
|
"logits/rejected": -8.808431625366211, |
|
"logps/chosen": -327.92962646484375, |
|
"logps/rejected": -271.6505432128906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.00027168082306161523, |
|
"rewards/margins": -3.7765556044178084e-05, |
|
"rewards/rejected": 0.0003094463318120688, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 0.38119222418916227, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -8.283721923828125, |
|
"logits/rejected": -8.563437461853027, |
|
"logps/chosen": -270.73004150390625, |
|
"logps/rejected": -264.69805908203125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0011856909841299057, |
|
"rewards/margins": 0.00045584110193885863, |
|
"rewards/rejected": 0.0007298499112948775, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 0.3717131307404656, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -8.32921028137207, |
|
"logits/rejected": -8.343989372253418, |
|
"logps/chosen": -285.73162841796875, |
|
"logps/rejected": -277.33428955078125, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0029997099190950394, |
|
"rewards/margins": 0.001172252232208848, |
|
"rewards/rejected": 0.0018274573376402259, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 0.41761867743254133, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -8.842605590820312, |
|
"logits/rejected": -8.617189407348633, |
|
"logps/chosen": -271.97125244140625, |
|
"logps/rejected": -261.83111572265625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.005933535750955343, |
|
"rewards/margins": 0.002372791524976492, |
|
"rewards/rejected": 0.0035607446916401386, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 0.3733417083873318, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -8.783945083618164, |
|
"logits/rejected": -8.695141792297363, |
|
"logps/chosen": -318.70452880859375, |
|
"logps/rejected": -294.575927734375, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.007537335157394409, |
|
"rewards/margins": 0.0031830158550292253, |
|
"rewards/rejected": 0.0043543195351958275, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 0.4685368279017966, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -8.875042915344238, |
|
"logits/rejected": -9.194344520568848, |
|
"logps/chosen": -313.56195068359375, |
|
"logps/rejected": -266.4806823730469, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.005330189131200314, |
|
"rewards/margins": 0.00792426336556673, |
|
"rewards/rejected": -0.0025940726045519114, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 0.5132211674073757, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -10.095111846923828, |
|
"logits/rejected": -10.34981632232666, |
|
"logps/chosen": -288.3968200683594, |
|
"logps/rejected": -279.54132080078125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.006939026527106762, |
|
"rewards/margins": 0.010522229596972466, |
|
"rewards/rejected": -0.01746125891804695, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 0.670419675961168, |
|
"learning_rate": 4.998633143352315e-07, |
|
"logits/chosen": -11.052873611450195, |
|
"logits/rejected": -11.20246696472168, |
|
"logps/chosen": -314.8858337402344, |
|
"logps/rejected": -331.8305969238281, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.007041523698717356, |
|
"rewards/margins": 0.02735614776611328, |
|
"rewards/rejected": -0.0343976691365242, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -11.340270042419434, |
|
"eval_logits/rejected": -11.638143539428711, |
|
"eval_logps/chosen": -336.09466552734375, |
|
"eval_logps/rejected": -328.0053405761719, |
|
"eval_loss": 0.6815094351768494, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": -0.021773764863610268, |
|
"eval_rewards/margins": 0.02182256057858467, |
|
"eval_rewards/rejected": -0.04359632730484009, |
|
"eval_runtime": 150.9435, |
|
"eval_samples_per_second": 13.25, |
|
"eval_steps_per_second": 0.828, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 0.7268891633247492, |
|
"learning_rate": 4.983273165884096e-07, |
|
"logits/chosen": -11.379460334777832, |
|
"logits/rejected": -12.12627124786377, |
|
"logps/chosen": -355.1887512207031, |
|
"logps/rejected": -343.373046875, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04076331481337547, |
|
"rewards/margins": 0.02745387889444828, |
|
"rewards/rejected": -0.0682171955704689, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 1.1651426700711027, |
|
"learning_rate": 4.950949914687024e-07, |
|
"logits/chosen": -12.13168716430664, |
|
"logits/rejected": -12.4894380569458, |
|
"logps/chosen": -346.7480163574219, |
|
"logps/rejected": -366.7667236328125, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06704720854759216, |
|
"rewards/margins": 0.03310731425881386, |
|
"rewards/rejected": -0.10015451908111572, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 1.3796192618478396, |
|
"learning_rate": 4.901884190342121e-07, |
|
"logits/chosen": -12.9432373046875, |
|
"logits/rejected": -13.217244148254395, |
|
"logps/chosen": -414.9393615722656, |
|
"logps/rejected": -431.466796875, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.10360779613256454, |
|
"rewards/margins": 0.05821988731622696, |
|
"rewards/rejected": -0.16182765364646912, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 1.3517064281641342, |
|
"learning_rate": 4.836411161498652e-07, |
|
"logits/chosen": -13.74645709991455, |
|
"logits/rejected": -14.12658977508545, |
|
"logps/chosen": -480.12762451171875, |
|
"logps/rejected": -511.24481201171875, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1500854343175888, |
|
"rewards/margins": 0.07933008670806885, |
|
"rewards/rejected": -0.22941550612449646, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 2.2837508840523095, |
|
"learning_rate": 4.754978075332398e-07, |
|
"logits/chosen": -15.218640327453613, |
|
"logits/rejected": -15.693450927734375, |
|
"logps/chosen": -540.690185546875, |
|
"logps/rejected": -628.9052124023438, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.23948459327220917, |
|
"rewards/margins": 0.11436040699481964, |
|
"rewards/rejected": -0.3538450300693512, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 1.7535426886006145, |
|
"learning_rate": 4.6581412023939346e-07, |
|
"logits/chosen": -16.318897247314453, |
|
"logits/rejected": -16.302444458007812, |
|
"logps/chosen": -605.7731323242188, |
|
"logps/rejected": -758.9752807617188, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3184322416782379, |
|
"rewards/margins": 0.16458377242088318, |
|
"rewards/rejected": -0.4830159544944763, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 2.6529186768831545, |
|
"learning_rate": 4.546562036716731e-07, |
|
"logits/chosen": -16.481460571289062, |
|
"logits/rejected": -16.788829803466797, |
|
"logps/chosen": -638.745849609375, |
|
"logps/rejected": -767.808837890625, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3655020594596863, |
|
"rewards/margins": 0.1500101387500763, |
|
"rewards/rejected": -0.5155122876167297, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 2.72153138496113, |
|
"learning_rate": 4.4210027771421476e-07, |
|
"logits/chosen": -17.021060943603516, |
|
"logits/rejected": -17.08652114868164, |
|
"logps/chosen": -752.3135375976562, |
|
"logps/rejected": -926.8680419921875, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4733971953392029, |
|
"rewards/margins": 0.16153177618980408, |
|
"rewards/rejected": -0.6349289417266846, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 2.6592008129120215, |
|
"learning_rate": 4.282321120728493e-07, |
|
"logits/chosen": -16.358983993530273, |
|
"logits/rejected": -16.262645721435547, |
|
"logps/chosen": -745.8214111328125, |
|
"logps/rejected": -967.6781005859375, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.457266241312027, |
|
"rewards/margins": 0.20888647437095642, |
|
"rewards/rejected": -0.6661526560783386, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 3.1808401845251506, |
|
"learning_rate": 4.1314644038104213e-07, |
|
"logits/chosen": -15.009060859680176, |
|
"logits/rejected": -15.336482048034668, |
|
"logps/chosen": -740.966064453125, |
|
"logps/rejected": -862.05712890625, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.45071443915367126, |
|
"rewards/margins": 0.12646019458770752, |
|
"rewards/rejected": -0.5771746635437012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -15.678736686706543, |
|
"eval_logits/rejected": -15.50975227355957, |
|
"eval_logps/chosen": -838.1255493164062, |
|
"eval_logps/rejected": -1037.213623046875, |
|
"eval_loss": 0.6229372024536133, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": -0.5238046050071716, |
|
"eval_rewards/margins": 0.229000061750412, |
|
"eval_rewards/rejected": -0.752804696559906, |
|
"eval_runtime": 150.6688, |
|
"eval_samples_per_second": 13.274, |
|
"eval_steps_per_second": 0.83, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 3.6393194741175043, |
|
"learning_rate": 3.9694631307311825e-07, |
|
"logits/chosen": -15.765314102172852, |
|
"logits/rejected": -15.19013500213623, |
|
"logps/chosen": -911.4410400390625, |
|
"logps/rejected": -1072.3721923828125, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5906545519828796, |
|
"rewards/margins": 0.2006884515285492, |
|
"rewards/rejected": -0.7913430333137512, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 4.619726408628701, |
|
"learning_rate": 3.797423934453038e-07, |
|
"logits/chosen": -15.227154731750488, |
|
"logits/rejected": -15.2573881149292, |
|
"logps/chosen": -897.2864379882812, |
|
"logps/rejected": -1104.17578125, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6274054050445557, |
|
"rewards/margins": 0.2075110673904419, |
|
"rewards/rejected": -0.8349164724349976, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 4.936193430576564, |
|
"learning_rate": 3.6165220171320164e-07, |
|
"logits/chosen": -15.75273609161377, |
|
"logits/rejected": -15.582818984985352, |
|
"logps/chosen": -940.6126098632812, |
|
"logps/rejected": -1408.8499755859375, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6616717576980591, |
|
"rewards/margins": 0.451225221157074, |
|
"rewards/rejected": -1.1128969192504883, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 4.0220824325322155, |
|
"learning_rate": 3.4279931222955516e-07, |
|
"logits/chosen": -14.798166275024414, |
|
"logits/rejected": -14.821731567382812, |
|
"logps/chosen": -762.6677856445312, |
|
"logps/rejected": -1019.94677734375, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.48045119643211365, |
|
"rewards/margins": 0.26533186435699463, |
|
"rewards/rejected": -0.7457829713821411, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 5.124454609764516, |
|
"learning_rate": 3.233125093461162e-07, |
|
"logits/chosen": -14.470125198364258, |
|
"logits/rejected": -14.480206489562988, |
|
"logps/chosen": -922.26708984375, |
|
"logps/rejected": -1210.3658447265625, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5987042188644409, |
|
"rewards/margins": 0.3204038441181183, |
|
"rewards/rejected": -0.9191079139709473, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 13.38213845011268, |
|
"learning_rate": 3.033249076859367e-07, |
|
"logits/chosen": -14.465197563171387, |
|
"logits/rejected": -14.281991958618164, |
|
"logps/chosen": -1182.00732421875, |
|
"logps/rejected": -1630.92333984375, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8785597085952759, |
|
"rewards/margins": 0.4840659201145172, |
|
"rewards/rejected": -1.3626257181167603, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 4.505485837442961, |
|
"learning_rate": 2.8297304283551724e-07, |
|
"logits/chosen": -13.903594970703125, |
|
"logits/rejected": -13.29607105255127, |
|
"logps/chosen": -865.8912963867188, |
|
"logps/rejected": -1264.981201171875, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5570029020309448, |
|
"rewards/margins": 0.4205314517021179, |
|
"rewards/rejected": -0.9775344729423523, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 7.955445077360379, |
|
"learning_rate": 2.6239593866830556e-07, |
|
"logits/chosen": -14.410888671875, |
|
"logits/rejected": -13.651044845581055, |
|
"logps/chosen": -876.46044921875, |
|
"logps/rejected": -1102.4383544921875, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5745535492897034, |
|
"rewards/margins": 0.26768410205841064, |
|
"rewards/rejected": -0.842237651348114, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 6.562592794957003, |
|
"learning_rate": 2.4173415767067293e-07, |
|
"logits/chosen": -14.869283676147461, |
|
"logits/rejected": -14.766156196594238, |
|
"logps/chosen": -1157.8944091796875, |
|
"logps/rejected": -1581.955810546875, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.866470992565155, |
|
"rewards/margins": 0.4261693060398102, |
|
"rewards/rejected": -1.2926403284072876, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 5.969306865423692, |
|
"learning_rate": 2.2112884075760347e-07, |
|
"logits/chosen": -15.083928108215332, |
|
"logits/rejected": -14.58642578125, |
|
"logps/chosen": -949.1544189453125, |
|
"logps/rejected": -1470.0855712890625, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.628490686416626, |
|
"rewards/margins": 0.5490113496780396, |
|
"rewards/rejected": -1.177502155303955, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -15.179731369018555, |
|
"eval_logits/rejected": -14.774624824523926, |
|
"eval_logps/chosen": -1032.8834228515625, |
|
"eval_logps/rejected": -1471.7362060546875, |
|
"eval_loss": 0.5793285965919495, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": -0.7185624241828918, |
|
"eval_rewards/margins": 0.4687648415565491, |
|
"eval_rewards/rejected": -1.187327265739441, |
|
"eval_runtime": 150.2684, |
|
"eval_samples_per_second": 13.31, |
|
"eval_steps_per_second": 0.832, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 11.88961718187316, |
|
"learning_rate": 2.0072074313712993e-07, |
|
"logits/chosen": -15.340092658996582, |
|
"logits/rejected": -15.312405586242676, |
|
"logps/chosen": -1098.570068359375, |
|
"logps/rejected": -1679.193603515625, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7703229188919067, |
|
"rewards/margins": 0.6483911275863647, |
|
"rewards/rejected": -1.418714165687561, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 8.601486869561926, |
|
"learning_rate": 1.806492728095389e-07, |
|
"logits/chosen": -15.2809476852417, |
|
"logits/rejected": -14.993128776550293, |
|
"logps/chosen": -1034.3973388671875, |
|
"logps/rejected": -1639.748779296875, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7416967153549194, |
|
"rewards/margins": 0.634353518486023, |
|
"rewards/rejected": -1.376050353050232, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 9.897007035455301, |
|
"learning_rate": 1.6105153826937085e-07, |
|
"logits/chosen": -14.975976943969727, |
|
"logits/rejected": -14.773465156555176, |
|
"logps/chosen": -1030.3551025390625, |
|
"logps/rejected": -1616.5765380859375, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7377808690071106, |
|
"rewards/margins": 0.6023792028427124, |
|
"rewards/rejected": -1.3401601314544678, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 10.944418149122821, |
|
"learning_rate": 1.420614119153768e-07, |
|
"logits/chosen": -15.038368225097656, |
|
"logits/rejected": -14.671664237976074, |
|
"logps/chosen": -1127.2037353515625, |
|
"logps/rejected": -1648.9619140625, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8304765820503235, |
|
"rewards/margins": 0.5383009314537048, |
|
"rewards/rejected": -1.3687775135040283, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 7.272876700396834, |
|
"learning_rate": 1.2380861556628914e-07, |
|
"logits/chosen": -14.817059516906738, |
|
"logits/rejected": -14.438855171203613, |
|
"logps/chosen": -1002.3743896484375, |
|
"logps/rejected": -1483.3427734375, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7164356112480164, |
|
"rewards/margins": 0.5084205865859985, |
|
"rewards/rejected": -1.2248561382293701, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 7.940966891497204, |
|
"learning_rate": 1.064178343292641e-07, |
|
"logits/chosen": -14.983675956726074, |
|
"logits/rejected": -14.386784553527832, |
|
"logps/chosen": -991.7278442382812, |
|
"logps/rejected": -1572.2987060546875, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6495895981788635, |
|
"rewards/margins": 0.6349653601646423, |
|
"rewards/rejected": -1.2845550775527954, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 8.710562600121326, |
|
"learning_rate": 9.000786487417084e-08, |
|
"logits/chosen": -14.231167793273926, |
|
"logits/rejected": -13.895710945129395, |
|
"logps/chosen": -935.4019775390625, |
|
"logps/rejected": -1424.656005859375, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6355662941932678, |
|
"rewards/margins": 0.5334969758987427, |
|
"rewards/rejected": -1.1690632104873657, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 6.761392853792158, |
|
"learning_rate": 7.469080393187785e-08, |
|
"logits/chosen": -14.6929292678833, |
|
"logits/rejected": -13.966386795043945, |
|
"logps/chosen": -1072.4215087890625, |
|
"logps/rejected": -1634.0445556640625, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7651950120925903, |
|
"rewards/margins": 0.58966064453125, |
|
"rewards/rejected": -1.3548556566238403, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 9.876582631533086, |
|
"learning_rate": 6.057128255991637e-08, |
|
"logits/chosen": -14.620927810668945, |
|
"logits/rejected": -14.377177238464355, |
|
"logps/chosen": -1004.9943237304688, |
|
"logps/rejected": -1455.802001953125, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6936627626419067, |
|
"rewards/margins": 0.4617386758327484, |
|
"rewards/rejected": -1.1554014682769775, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 8.039290948138406, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -15.10753345489502, |
|
"logits/rejected": -14.191171646118164, |
|
"logps/chosen": -837.1350708007812, |
|
"logps/rejected": -1408.885498046875, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5800159573554993, |
|
"rewards/margins": 0.580711841583252, |
|
"rewards/rejected": -1.1607277393341064, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -14.668434143066406, |
|
"eval_logits/rejected": -14.186065673828125, |
|
"eval_logps/chosen": -958.5824584960938, |
|
"eval_logps/rejected": -1434.33349609375, |
|
"eval_loss": 0.5646860599517822, |
|
"eval_rewards/accuracies": 0.6919999718666077, |
|
"eval_rewards/chosen": -0.6442615985870361, |
|
"eval_rewards/margins": 0.5056628584861755, |
|
"eval_rewards/rejected": -1.149924397468567, |
|
"eval_runtime": 149.7174, |
|
"eval_samples_per_second": 13.358, |
|
"eval_steps_per_second": 0.835, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 7.7753080838732265, |
|
"learning_rate": 3.6301821853615216e-08, |
|
"logits/chosen": -14.531840324401855, |
|
"logits/rejected": -13.954656600952148, |
|
"logps/chosen": -979.337890625, |
|
"logps/rejected": -1347.329833984375, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6474758982658386, |
|
"rewards/margins": 0.4113900065422058, |
|
"rewards/rejected": -1.0588659048080444, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 8.900926616124899, |
|
"learning_rate": 2.631766754480913e-08, |
|
"logits/chosen": -15.085081100463867, |
|
"logits/rejected": -14.495912551879883, |
|
"logps/chosen": -993.9776611328125, |
|
"logps/rejected": -1629.8128662109375, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7016497850418091, |
|
"rewards/margins": 0.627029299736023, |
|
"rewards/rejected": -1.328679084777832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 7.108989309187613, |
|
"learning_rate": 1.786149037757326e-08, |
|
"logits/chosen": -14.463391304016113, |
|
"logits/rejected": -14.160985946655273, |
|
"logps/chosen": -1008.32080078125, |
|
"logps/rejected": -1559.0267333984375, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.678644061088562, |
|
"rewards/margins": 0.6104253530502319, |
|
"rewards/rejected": -1.289069414138794, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 9.175335683582457, |
|
"learning_rate": 1.0991054616410588e-08, |
|
"logits/chosen": -14.525983810424805, |
|
"logits/rejected": -14.118609428405762, |
|
"logps/chosen": -938.26904296875, |
|
"logps/rejected": -1491.09814453125, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6404693126678467, |
|
"rewards/margins": 0.5899735689163208, |
|
"rewards/rejected": -1.230442762374878, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 9.106722734484554, |
|
"learning_rate": 5.753292304100182e-09, |
|
"logits/chosen": -14.361761093139648, |
|
"logits/rejected": -14.124635696411133, |
|
"logps/chosen": -928.7554931640625, |
|
"logps/rejected": -1761.9857177734375, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.632331371307373, |
|
"rewards/margins": 0.840011477470398, |
|
"rewards/rejected": -1.472342848777771, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 9.134170824690674, |
|
"learning_rate": 2.1839826682562014e-09, |
|
"logits/chosen": -14.802205085754395, |
|
"logits/rejected": -13.968228340148926, |
|
"logps/chosen": -993.044921875, |
|
"logps/rejected": -1571.5948486328125, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6945916414260864, |
|
"rewards/margins": 0.6186043620109558, |
|
"rewards/rejected": -1.3131959438323975, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 7.825653316640832, |
|
"learning_rate": 3.075077129238157e-10, |
|
"logits/chosen": -14.510324478149414, |
|
"logits/rejected": -14.37182903289795, |
|
"logps/chosen": -1027.906982421875, |
|
"logps/rejected": -1584.23828125, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7289544343948364, |
|
"rewards/margins": 0.562315821647644, |
|
"rewards/rejected": -1.29127037525177, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9965977492802931, |
|
"step": 238, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6157421479706003, |
|
"train_runtime": 11733.4361, |
|
"train_samples_per_second": 5.21, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 238, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|