|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 1313.8543701171875, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 1.6327810287475586, |
|
"log_odds_ratio": -11.146058082580566, |
|
"logps/chosen": -21.960407257080078, |
|
"logps/rejected": -23.59285545349121, |
|
"loss": 881.0415, |
|
"nll_loss": 8.637601852416992, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -10.980203628540039, |
|
"rewards/margins": 0.8162234425544739, |
|
"rewards/rejected": -11.796427726745605, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 958.0737915039062, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 1.0805047750473022, |
|
"log_odds_ratio": -8.258191108703613, |
|
"logps/chosen": -19.551382064819336, |
|
"logps/rejected": -20.631277084350586, |
|
"loss": 757.7502, |
|
"nll_loss": 7.813385009765625, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -9.775691032409668, |
|
"rewards/margins": 0.5399460792541504, |
|
"rewards/rejected": -10.315638542175293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 1503.8668212890625, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 4.2659807205200195, |
|
"log_odds_ratio": -6.569916725158691, |
|
"logps/chosen": -17.423053741455078, |
|
"logps/rejected": -21.687484741210938, |
|
"loss": 662.6386, |
|
"nll_loss": 7.951455116271973, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -8.711526870727539, |
|
"rewards/margins": 2.132215976715088, |
|
"rewards/rejected": -10.843742370605469, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 5612.318359375, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": 0.5963099598884583, |
|
"log_odds_ratio": -4.979976177215576, |
|
"logps/chosen": -11.640253067016602, |
|
"logps/rejected": -12.238527297973633, |
|
"loss": 452.0445, |
|
"nll_loss": 6.280893802642822, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -5.820126533508301, |
|
"rewards/margins": 0.29913684725761414, |
|
"rewards/rejected": -6.119263648986816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 448.8934326171875, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 0.5738601684570312, |
|
"log_odds_ratio": -0.9728918075561523, |
|
"logps/chosen": -2.5826029777526855, |
|
"logps/rejected": -3.132516860961914, |
|
"loss": 97.8864, |
|
"nll_loss": 3.112938404083252, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2913014888763428, |
|
"rewards/margins": 0.27495700120925903, |
|
"rewards/rejected": -1.566258430480957, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 193.44644165039062, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.14733314514160156, |
|
"log_odds_ratio": -0.8188334703445435, |
|
"logps/chosen": -1.7538366317749023, |
|
"logps/rejected": -1.880671739578247, |
|
"loss": 68.7921, |
|
"nll_loss": 2.9175949096679688, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.8769183158874512, |
|
"rewards/margins": 0.0634174793958664, |
|
"rewards/rejected": -0.9403358697891235, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 169.11569213867188, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.1972377598285675, |
|
"log_odds_ratio": -0.7700116038322449, |
|
"logps/chosen": -1.5413159132003784, |
|
"logps/rejected": -1.7179752588272095, |
|
"loss": 61.1452, |
|
"nll_loss": 2.3813323974609375, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.7706579566001892, |
|
"rewards/margins": 0.08832962810993195, |
|
"rewards/rejected": -0.8589876294136047, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 250.76620483398438, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.23389403522014618, |
|
"log_odds_ratio": -0.7099635004997253, |
|
"logps/chosen": -1.4198099374771118, |
|
"logps/rejected": -1.616281270980835, |
|
"loss": 56.518, |
|
"nll_loss": 2.3243794441223145, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.7099049687385559, |
|
"rewards/margins": 0.09823578596115112, |
|
"rewards/rejected": -0.8081406354904175, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 279.9685363769531, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.1506054848432541, |
|
"log_odds_ratio": -0.7535517811775208, |
|
"logps/chosen": -1.3751205205917358, |
|
"logps/rejected": -1.4878621101379395, |
|
"loss": 55.5771, |
|
"nll_loss": 2.230128049850464, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.6875602602958679, |
|
"rewards/margins": 0.056370723992586136, |
|
"rewards/rejected": -0.7439310550689697, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 382.7770080566406, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.19314703345298767, |
|
"log_odds_ratio": -0.7266248464584351, |
|
"logps/chosen": -1.3842805624008179, |
|
"logps/rejected": -1.5405880212783813, |
|
"loss": 55.5381, |
|
"nll_loss": 2.407309055328369, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6921402812004089, |
|
"rewards/margins": 0.07815368473529816, |
|
"rewards/rejected": -0.7702940106391907, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 130.2970733642578, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.1594429314136505, |
|
"log_odds_ratio": -0.7218093276023865, |
|
"logps/chosen": -1.2733328342437744, |
|
"logps/rejected": -1.3996423482894897, |
|
"loss": 51.8507, |
|
"nll_loss": 2.147927761077881, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.6366664171218872, |
|
"rewards/margins": 0.06315477192401886, |
|
"rewards/rejected": -0.6998211741447449, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 134.74240112304688, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.2484438121318817, |
|
"log_odds_ratio": -0.6789853572845459, |
|
"logps/chosen": -1.2255313396453857, |
|
"logps/rejected": -1.4233750104904175, |
|
"loss": 49.8412, |
|
"nll_loss": 2.2157835960388184, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.6127656698226929, |
|
"rewards/margins": 0.09892191737890244, |
|
"rewards/rejected": -0.7116875052452087, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 118.31177520751953, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.2591857612133026, |
|
"log_odds_ratio": -0.6711713075637817, |
|
"logps/chosen": -1.2236008644104004, |
|
"logps/rejected": -1.4167249202728271, |
|
"loss": 49.7015, |
|
"nll_loss": 2.0784411430358887, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6118004322052002, |
|
"rewards/margins": 0.0965619757771492, |
|
"rewards/rejected": -0.7083624601364136, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 102.92163848876953, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.2674064040184021, |
|
"log_odds_ratio": -0.675905168056488, |
|
"logps/chosen": -1.2073343992233276, |
|
"logps/rejected": -1.4260364770889282, |
|
"loss": 49.125, |
|
"nll_loss": 1.9744670391082764, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6036671996116638, |
|
"rewards/margins": 0.10935105383396149, |
|
"rewards/rejected": -0.7130182385444641, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 305.7223815917969, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.28773313760757446, |
|
"log_odds_ratio": -0.6539745926856995, |
|
"logps/chosen": -1.1219004392623901, |
|
"logps/rejected": -1.327695608139038, |
|
"loss": 46.259, |
|
"nll_loss": 1.9466793537139893, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5609502196311951, |
|
"rewards/margins": 0.1028975397348404, |
|
"rewards/rejected": -0.663847804069519, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 444.95172119140625, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.2160220444202423, |
|
"log_odds_ratio": -0.6895222663879395, |
|
"logps/chosen": -1.1895593404769897, |
|
"logps/rejected": -1.3464289903640747, |
|
"loss": 48.854, |
|
"nll_loss": 1.8920223712921143, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.5947796702384949, |
|
"rewards/margins": 0.07843481004238129, |
|
"rewards/rejected": -0.6732144951820374, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 134.1718292236328, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.31953853368759155, |
|
"log_odds_ratio": -0.650363564491272, |
|
"logps/chosen": -1.1080071926116943, |
|
"logps/rejected": -1.349273681640625, |
|
"loss": 45.7072, |
|
"nll_loss": 1.8154582977294922, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5540035963058472, |
|
"rewards/margins": 0.12063322216272354, |
|
"rewards/rejected": -0.6746368408203125, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 288.4284362792969, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.21705381572246552, |
|
"log_odds_ratio": -0.6870957016944885, |
|
"logps/chosen": -1.1290249824523926, |
|
"logps/rejected": -1.2838109731674194, |
|
"loss": 46.8203, |
|
"nll_loss": 1.9802055358886719, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.5645124912261963, |
|
"rewards/margins": 0.07739301770925522, |
|
"rewards/rejected": -0.6419054865837097, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 234.74221801757812, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.23379310965538025, |
|
"log_odds_ratio": -0.6703908443450928, |
|
"logps/chosen": -1.0899484157562256, |
|
"logps/rejected": -1.263106346130371, |
|
"loss": 45.3973, |
|
"nll_loss": 1.8875010013580322, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5449742078781128, |
|
"rewards/margins": 0.08657898008823395, |
|
"rewards/rejected": -0.6315531730651855, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 112.56975555419922, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.16518335044384003, |
|
"log_odds_ratio": -0.7109084129333496, |
|
"logps/chosen": -1.1009365320205688, |
|
"logps/rejected": -1.238239049911499, |
|
"loss": 46.0418, |
|
"nll_loss": 1.8534952402114868, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.5504682660102844, |
|
"rewards/margins": 0.06865125149488449, |
|
"rewards/rejected": -0.6191195249557495, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 136.2017059326172, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.23913511633872986, |
|
"log_odds_ratio": -0.6585836410522461, |
|
"logps/chosen": -1.0759801864624023, |
|
"logps/rejected": -1.2537977695465088, |
|
"loss": 44.7828, |
|
"nll_loss": 1.795069694519043, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5379900932312012, |
|
"rewards/margins": 0.08890879154205322, |
|
"rewards/rejected": -0.6268988847732544, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 67.5390625, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.6602018475532532, |
|
"log_odds_ratio": -0.5334572196006775, |
|
"logps/chosen": -0.9451561570167542, |
|
"logps/rejected": -1.404151439666748, |
|
"loss": 35.0306, |
|
"nll_loss": 1.7614768743515015, |
|
"rewards/accuracies": 0.7307692170143127, |
|
"rewards/chosen": -0.4725780785083771, |
|
"rewards/margins": 0.22949755191802979, |
|
"rewards/rejected": -0.702075719833374, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 89.68997192382812, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.8264390230178833, |
|
"log_odds_ratio": -0.46329426765441895, |
|
"logps/chosen": -0.9025434255599976, |
|
"logps/rejected": -1.4511505365371704, |
|
"loss": 37.0855, |
|
"nll_loss": 1.7627713680267334, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4512717127799988, |
|
"rewards/margins": 0.27430346608161926, |
|
"rewards/rejected": -0.7255752682685852, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 85.83277130126953, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.888095498085022, |
|
"log_odds_ratio": -0.45123091340065, |
|
"logps/chosen": -0.8698997497558594, |
|
"logps/rejected": -1.456084966659546, |
|
"loss": 35.8665, |
|
"nll_loss": 2.0131936073303223, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.4349498748779297, |
|
"rewards/margins": 0.2930925786495209, |
|
"rewards/rejected": -0.728042483329773, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 75.02178955078125, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.8515494465827942, |
|
"log_odds_ratio": -0.46829432249069214, |
|
"logps/chosen": -0.8703139424324036, |
|
"logps/rejected": -1.4456019401550293, |
|
"loss": 36.0252, |
|
"nll_loss": 1.8564857244491577, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4351569712162018, |
|
"rewards/margins": 0.28764399886131287, |
|
"rewards/rejected": -0.7228009700775146, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 93.19149017333984, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.7404316663742065, |
|
"log_odds_ratio": -0.49976396560668945, |
|
"logps/chosen": -0.8461529016494751, |
|
"logps/rejected": -1.305490255355835, |
|
"loss": 35.7775, |
|
"nll_loss": 1.7167637348175049, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.42307645082473755, |
|
"rewards/margins": 0.22966866195201874, |
|
"rewards/rejected": -0.6527451276779175, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 79.68480682373047, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.8560611009597778, |
|
"log_odds_ratio": -0.4482923150062561, |
|
"logps/chosen": -0.8573166728019714, |
|
"logps/rejected": -1.4259978532791138, |
|
"loss": 35.4271, |
|
"nll_loss": 1.7739051580429077, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4286583364009857, |
|
"rewards/margins": 0.2843405604362488, |
|
"rewards/rejected": -0.7129989266395569, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 97.05864715576172, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.7707556486129761, |
|
"log_odds_ratio": -0.5003089308738708, |
|
"logps/chosen": -0.8969907760620117, |
|
"logps/rejected": -1.404831886291504, |
|
"loss": 37.3019, |
|
"nll_loss": 1.8181276321411133, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.44849538803100586, |
|
"rewards/margins": 0.2539205849170685, |
|
"rewards/rejected": -0.702415943145752, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 94.64373016357422, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.9273589253425598, |
|
"log_odds_ratio": -0.4503125548362732, |
|
"logps/chosen": -0.8693191409111023, |
|
"logps/rejected": -1.50619375705719, |
|
"loss": 35.7555, |
|
"nll_loss": 1.7783292531967163, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.43465957045555115, |
|
"rewards/margins": 0.31843727827072144, |
|
"rewards/rejected": -0.753096878528595, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 112.79119110107422, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.733902096748352, |
|
"log_odds_ratio": -0.49021005630493164, |
|
"logps/chosen": -0.8194792866706848, |
|
"logps/rejected": -1.2824211120605469, |
|
"loss": 34.8116, |
|
"nll_loss": 1.736271619796753, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4097396433353424, |
|
"rewards/margins": 0.23147086799144745, |
|
"rewards/rejected": -0.6412105560302734, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 120.8626480102539, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.8592801094055176, |
|
"log_odds_ratio": -0.46693143248558044, |
|
"logps/chosen": -0.8628988265991211, |
|
"logps/rejected": -1.428763508796692, |
|
"loss": 35.8416, |
|
"nll_loss": 1.7393659353256226, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.43144941329956055, |
|
"rewards/margins": 0.2829323410987854, |
|
"rewards/rejected": -0.714381754398346, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 128.55426025390625, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.7666479349136353, |
|
"log_odds_ratio": -0.48797711730003357, |
|
"logps/chosen": -0.8693684339523315, |
|
"logps/rejected": -1.35294771194458, |
|
"loss": 36.3933, |
|
"nll_loss": 1.7232725620269775, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.43468421697616577, |
|
"rewards/margins": 0.2417895793914795, |
|
"rewards/rejected": -0.67647385597229, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 123.65845489501953, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.9002155065536499, |
|
"log_odds_ratio": -0.4618147909641266, |
|
"logps/chosen": -0.85200035572052, |
|
"logps/rejected": -1.435723066329956, |
|
"loss": 35.4562, |
|
"nll_loss": 1.7452001571655273, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.42600017786026, |
|
"rewards/margins": 0.291861355304718, |
|
"rewards/rejected": -0.717861533164978, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 77.17584228515625, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.8772485852241516, |
|
"log_odds_ratio": -0.4688163697719574, |
|
"logps/chosen": -0.8404110074043274, |
|
"logps/rejected": -1.4133893251419067, |
|
"loss": 35.1622, |
|
"nll_loss": 1.7097526788711548, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4202055037021637, |
|
"rewards/margins": 0.28648921847343445, |
|
"rewards/rejected": -0.7066946625709534, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 91.94951629638672, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.859915554523468, |
|
"log_odds_ratio": -0.4548751711845398, |
|
"logps/chosen": -0.8396957516670227, |
|
"logps/rejected": -1.4033467769622803, |
|
"loss": 34.974, |
|
"nll_loss": 1.7587263584136963, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.41984787583351135, |
|
"rewards/margins": 0.2818255126476288, |
|
"rewards/rejected": -0.7016733884811401, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 75.54816436767578, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.7710874080657959, |
|
"log_odds_ratio": -0.4820574223995209, |
|
"logps/chosen": -0.8513079881668091, |
|
"logps/rejected": -1.341399073600769, |
|
"loss": 35.7398, |
|
"nll_loss": 1.7565553188323975, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.42565399408340454, |
|
"rewards/margins": 0.24504557251930237, |
|
"rewards/rejected": -0.6706995368003845, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 60.633155822753906, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.9204598665237427, |
|
"log_odds_ratio": -0.4521242678165436, |
|
"logps/chosen": -0.8442584276199341, |
|
"logps/rejected": -1.4724090099334717, |
|
"loss": 35.0333, |
|
"nll_loss": 1.7535591125488281, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.42212921380996704, |
|
"rewards/margins": 0.3140752613544464, |
|
"rewards/rejected": -0.7362045049667358, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 58.7163200378418, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.6730726361274719, |
|
"log_odds_ratio": -0.5413838624954224, |
|
"logps/chosen": -0.9189823865890503, |
|
"logps/rejected": -1.3468341827392578, |
|
"loss": 38.5132, |
|
"nll_loss": 1.7065455913543701, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.45949119329452515, |
|
"rewards/margins": 0.21392583847045898, |
|
"rewards/rejected": -0.6734170913696289, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 60.383541107177734, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.9475343823432922, |
|
"log_odds_ratio": -0.4484768509864807, |
|
"logps/chosen": -0.8385717272758484, |
|
"logps/rejected": -1.4635182619094849, |
|
"loss": 34.8245, |
|
"nll_loss": 1.713822603225708, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.4192858636379242, |
|
"rewards/margins": 0.31247326731681824, |
|
"rewards/rejected": -0.7317591309547424, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 66.11405944824219, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.8329303860664368, |
|
"log_odds_ratio": -0.49859505891799927, |
|
"logps/chosen": -0.893582820892334, |
|
"logps/rejected": -1.451395034790039, |
|
"loss": 37.0308, |
|
"nll_loss": 1.732862114906311, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.446791410446167, |
|
"rewards/margins": 0.27890610694885254, |
|
"rewards/rejected": -0.7256975173950195, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 54.821868896484375, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.6610826849937439, |
|
"log_odds_ratio": -0.5171926617622375, |
|
"logps/chosen": -0.8425942659378052, |
|
"logps/rejected": -1.258576512336731, |
|
"loss": 35.923, |
|
"nll_loss": 1.6366369724273682, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.4212971329689026, |
|
"rewards/margins": 0.20799115300178528, |
|
"rewards/rejected": -0.6292882561683655, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 410.4480895996094, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.773653507232666, |
|
"log_odds_ratio": -0.4857940673828125, |
|
"logps/chosen": -0.8226664662361145, |
|
"logps/rejected": -1.3210365772247314, |
|
"loss": 34.8322, |
|
"nll_loss": 1.650650978088379, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.41133323311805725, |
|
"rewards/margins": 0.24918513000011444, |
|
"rewards/rejected": -0.6605182886123657, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 81.6287841796875, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 1.237162470817566, |
|
"log_odds_ratio": -0.37088167667388916, |
|
"logps/chosen": -0.7396840453147888, |
|
"logps/rejected": -1.494255542755127, |
|
"loss": 27.6202, |
|
"nll_loss": 1.6762900352478027, |
|
"rewards/accuracies": 0.8531468510627747, |
|
"rewards/chosen": -0.3698420226573944, |
|
"rewards/margins": 0.3772856593132019, |
|
"rewards/rejected": -0.7471277713775635, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 63.87202453613281, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.8257486820220947, |
|
"log_odds_ratio": -0.26709312200546265, |
|
"logps/chosen": -0.6293801665306091, |
|
"logps/rejected": -1.7724393606185913, |
|
"loss": 25.9158, |
|
"nll_loss": 1.6067278385162354, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.31469008326530457, |
|
"rewards/margins": 0.5715296268463135, |
|
"rewards/rejected": -0.8862196803092957, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 63.67515563964844, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.9475319385528564, |
|
"log_odds_ratio": -0.22811241447925568, |
|
"logps/chosen": -0.6124777793884277, |
|
"logps/rejected": -1.8340566158294678, |
|
"loss": 24.8892, |
|
"nll_loss": 1.6936416625976562, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.30623888969421387, |
|
"rewards/margins": 0.61078941822052, |
|
"rewards/rejected": -0.9170283079147339, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 73.4637222290039, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.9068591594696045, |
|
"log_odds_ratio": -0.23920920491218567, |
|
"logps/chosen": -0.6302188634872437, |
|
"logps/rejected": -1.859368085861206, |
|
"loss": 25.4943, |
|
"nll_loss": 1.6373430490493774, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -0.3151094317436218, |
|
"rewards/margins": 0.6145747900009155, |
|
"rewards/rejected": -0.929684042930603, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 94.13182067871094, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.905515432357788, |
|
"log_odds_ratio": -0.2525004744529724, |
|
"logps/chosen": -0.6512196063995361, |
|
"logps/rejected": -1.868032455444336, |
|
"loss": 26.2984, |
|
"nll_loss": 1.6863908767700195, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -0.32560980319976807, |
|
"rewards/margins": 0.6084063649177551, |
|
"rewards/rejected": -0.934016227722168, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 61.78620147705078, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.8288015127182007, |
|
"log_odds_ratio": -0.2607673108577728, |
|
"logps/chosen": -0.6412376165390015, |
|
"logps/rejected": -1.772962212562561, |
|
"loss": 26.2439, |
|
"nll_loss": 1.679369568824768, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.32061880826950073, |
|
"rewards/margins": 0.565862238407135, |
|
"rewards/rejected": -0.8864811062812805, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 72.63899993896484, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.9564971923828125, |
|
"log_odds_ratio": -0.222591370344162, |
|
"logps/chosen": -0.6008509397506714, |
|
"logps/rejected": -1.823952078819275, |
|
"loss": 24.3832, |
|
"nll_loss": 1.634280800819397, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.3004254698753357, |
|
"rewards/margins": 0.6115506291389465, |
|
"rewards/rejected": -0.9119760394096375, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 59.229347229003906, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 2.0835893154144287, |
|
"log_odds_ratio": -0.2164476215839386, |
|
"logps/chosen": -0.6128490567207336, |
|
"logps/rejected": -1.9331867694854736, |
|
"loss": 24.5665, |
|
"nll_loss": 1.6675183773040771, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.3064245283603668, |
|
"rewards/margins": 0.6601688861846924, |
|
"rewards/rejected": -0.9665933847427368, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 62.527137756347656, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 2.1743245124816895, |
|
"log_odds_ratio": -0.21075662970542908, |
|
"logps/chosen": -0.5725008249282837, |
|
"logps/rejected": -1.979318380355835, |
|
"loss": 23.177, |
|
"nll_loss": 1.6942886114120483, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -0.28625041246414185, |
|
"rewards/margins": 0.7034087777137756, |
|
"rewards/rejected": -0.9896591901779175, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 83.86973571777344, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 2.1156704425811768, |
|
"log_odds_ratio": -0.22012558579444885, |
|
"logps/chosen": -0.6010316610336304, |
|
"logps/rejected": -1.9568220376968384, |
|
"loss": 24.2239, |
|
"nll_loss": 1.6713542938232422, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3005158305168152, |
|
"rewards/margins": 0.677895188331604, |
|
"rewards/rejected": -0.9784110188484192, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 78.56597900390625, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.9914929866790771, |
|
"log_odds_ratio": -0.2501711845397949, |
|
"logps/chosen": -0.6486467123031616, |
|
"logps/rejected": -1.941457748413086, |
|
"loss": 26.055, |
|
"nll_loss": 1.6230090856552124, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.3243233561515808, |
|
"rewards/margins": 0.6464055776596069, |
|
"rewards/rejected": -0.970728874206543, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 63.49893569946289, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 2.094613552093506, |
|
"log_odds_ratio": -0.2102334052324295, |
|
"logps/chosen": -0.5994306802749634, |
|
"logps/rejected": -1.9197509288787842, |
|
"loss": 24.16, |
|
"nll_loss": 1.6677443981170654, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2997153401374817, |
|
"rewards/margins": 0.6601601839065552, |
|
"rewards/rejected": -0.9598754644393921, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 79.37230682373047, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 2.1252670288085938, |
|
"log_odds_ratio": -0.2258034646511078, |
|
"logps/chosen": -0.638454794883728, |
|
"logps/rejected": -2.0011186599731445, |
|
"loss": 25.4827, |
|
"nll_loss": 1.6783549785614014, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.319227397441864, |
|
"rewards/margins": 0.6813319325447083, |
|
"rewards/rejected": -1.0005593299865723, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 66.57933807373047, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 2.1056222915649414, |
|
"log_odds_ratio": -0.2241026908159256, |
|
"logps/chosen": -0.5848366022109985, |
|
"logps/rejected": -1.9145119190216064, |
|
"loss": 23.795, |
|
"nll_loss": 1.665006399154663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.29241830110549927, |
|
"rewards/margins": 0.664837658405304, |
|
"rewards/rejected": -0.9572559595108032, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 72.5078353881836, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 2.1603641510009766, |
|
"log_odds_ratio": -0.212470144033432, |
|
"logps/chosen": -0.6109951138496399, |
|
"logps/rejected": -2.0010976791381836, |
|
"loss": 24.4558, |
|
"nll_loss": 1.7145074605941772, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.30549755692481995, |
|
"rewards/margins": 0.695051372051239, |
|
"rewards/rejected": -1.0005488395690918, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 66.63565063476562, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 2.2116315364837646, |
|
"log_odds_ratio": -0.216557115316391, |
|
"logps/chosen": -0.6101894378662109, |
|
"logps/rejected": -2.0354068279266357, |
|
"loss": 24.4504, |
|
"nll_loss": 1.7398754358291626, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -0.30509471893310547, |
|
"rewards/margins": 0.7126085758209229, |
|
"rewards/rejected": -1.0177034139633179, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 72.43724060058594, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 2.0436155796051025, |
|
"log_odds_ratio": -0.25939661264419556, |
|
"logps/chosen": -0.6374012231826782, |
|
"logps/rejected": -1.9478752613067627, |
|
"loss": 25.9435, |
|
"nll_loss": 1.6763683557510376, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.3187006115913391, |
|
"rewards/margins": 0.6552368998527527, |
|
"rewards/rejected": -0.9739376306533813, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 63.837345123291016, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 2.2019195556640625, |
|
"log_odds_ratio": -0.22682932019233704, |
|
"logps/chosen": -0.6202256679534912, |
|
"logps/rejected": -2.0658164024353027, |
|
"loss": 24.8148, |
|
"nll_loss": 1.7285759449005127, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.3101128339767456, |
|
"rewards/margins": 0.7227953672409058, |
|
"rewards/rejected": -1.0329082012176514, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 70.43053436279297, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 2.2171826362609863, |
|
"log_odds_ratio": -0.2098480463027954, |
|
"logps/chosen": -0.5961582064628601, |
|
"logps/rejected": -2.024376392364502, |
|
"loss": 23.9484, |
|
"nll_loss": 1.663637399673462, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -0.29807910323143005, |
|
"rewards/margins": 0.7141090631484985, |
|
"rewards/rejected": -1.012188196182251, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 95.00191497802734, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 2.249077081680298, |
|
"log_odds_ratio": -0.1899929940700531, |
|
"logps/chosen": -0.6121601462364197, |
|
"logps/rejected": -2.043703317642212, |
|
"loss": 24.1381, |
|
"nll_loss": 1.6788402795791626, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.30608007311820984, |
|
"rewards/margins": 0.7157715559005737, |
|
"rewards/rejected": -1.021851658821106, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 54.38016891479492, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 2.183474063873291, |
|
"log_odds_ratio": -0.20934459567070007, |
|
"logps/chosen": -0.6058934926986694, |
|
"logps/rejected": -2.00223445892334, |
|
"loss": 24.3016, |
|
"nll_loss": 1.6638948917388916, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -0.3029467463493347, |
|
"rewards/margins": 0.69817054271698, |
|
"rewards/rejected": -1.00111722946167, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 78.5789802187965, |
|
"train_runtime": 9244.4601, |
|
"train_samples_per_second": 2.191, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|