|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 56.774636606182945, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7173850536346436, |
|
"logits/rejected": -2.71553373336792, |
|
"logps/chosen": -263.896728515625, |
|
"logps/rejected": -227.8684539794922, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.018741395324468613, |
|
"rewards/margins": 0.007884879596531391, |
|
"rewards/rejected": 0.010856516659259796, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 49.171395793260366, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.658963680267334, |
|
"logits/rejected": -2.6707260608673096, |
|
"logps/chosen": -239.9593963623047, |
|
"logps/rejected": -201.82704162597656, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.49142637848854065, |
|
"rewards/margins": 0.10814571380615234, |
|
"rewards/rejected": 0.3832806348800659, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 49.521893521620925, |
|
"learning_rate": 9.983100718730718e-07, |
|
"logits/chosen": -2.462752103805542, |
|
"logits/rejected": -2.4498093128204346, |
|
"logps/chosen": -223.9767608642578, |
|
"logps/rejected": -186.4107208251953, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 1.3751007318496704, |
|
"rewards/margins": 0.8284648656845093, |
|
"rewards/rejected": 0.5466357469558716, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 39.064368139363026, |
|
"learning_rate": 9.932517109205849e-07, |
|
"logits/chosen": -2.3084564208984375, |
|
"logits/rejected": -2.284285306930542, |
|
"logps/chosen": -216.29672241210938, |
|
"logps/rejected": -210.03366088867188, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.3789255619049072, |
|
"rewards/margins": 1.200923204421997, |
|
"rewards/rejected": 0.17800235748291016, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 42.02666971304695, |
|
"learning_rate": 9.848591102083375e-07, |
|
"logits/chosen": -2.1679234504699707, |
|
"logits/rejected": -2.1585092544555664, |
|
"logps/chosen": -248.0310821533203, |
|
"logps/rejected": -202.24365234375, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.483551025390625, |
|
"rewards/margins": 1.0866239070892334, |
|
"rewards/rejected": 0.39692699909210205, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 35.54152957556429, |
|
"learning_rate": 9.731890013043367e-07, |
|
"logits/chosen": -2.1620185375213623, |
|
"logits/rejected": -2.134643793106079, |
|
"logps/chosen": -258.4416809082031, |
|
"logps/rejected": -218.0922393798828, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.5798461437225342, |
|
"rewards/margins": 1.5195716619491577, |
|
"rewards/rejected": 0.06027444079518318, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 39.252296599241106, |
|
"learning_rate": 9.583202707897073e-07, |
|
"logits/chosen": -2.246034622192383, |
|
"logits/rejected": -2.1999077796936035, |
|
"logps/chosen": -247.757568359375, |
|
"logps/rejected": -200.7145538330078, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.9202949404716492, |
|
"rewards/margins": 1.2205396890640259, |
|
"rewards/rejected": -0.3002447187900543, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 40.409666420169685, |
|
"learning_rate": 9.403534270080829e-07, |
|
"logits/chosen": -2.2683088779449463, |
|
"logits/rejected": -2.2772603034973145, |
|
"logps/chosen": -232.68746948242188, |
|
"logps/rejected": -204.03921508789062, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.9635717272758484, |
|
"rewards/margins": 0.9926932454109192, |
|
"rewards/rejected": -0.029121670871973038, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 35.68185280636666, |
|
"learning_rate": 9.19409920658098e-07, |
|
"logits/chosen": -2.325417995452881, |
|
"logits/rejected": -2.3191263675689697, |
|
"logps/chosen": -246.6074676513672, |
|
"logps/rejected": -206.07778930664062, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.4123908281326294, |
|
"rewards/margins": 1.4990627765655518, |
|
"rewards/rejected": -0.08667198568582535, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 37.69485379762672, |
|
"learning_rate": 8.956313238215823e-07, |
|
"logits/chosen": -2.412565231323242, |
|
"logits/rejected": -2.4015040397644043, |
|
"logps/chosen": -253.4149169921875, |
|
"logps/rejected": -191.1724090576172, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.2345142364501953, |
|
"rewards/margins": 1.3397375345230103, |
|
"rewards/rejected": -0.10522329807281494, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"eval_logits/chosen": -2.448784351348877, |
|
"eval_logits/rejected": -2.429978609085083, |
|
"eval_logps/chosen": -262.4934387207031, |
|
"eval_logps/rejected": -201.04635620117188, |
|
"eval_loss": 0.5474794507026672, |
|
"eval_rewards/accuracies": 0.8166666626930237, |
|
"eval_rewards/chosen": 1.3230173587799072, |
|
"eval_rewards/margins": 1.7253921031951904, |
|
"eval_rewards/rejected": -0.402374804019928, |
|
"eval_runtime": 126.4937, |
|
"eval_samples_per_second": 15.021, |
|
"eval_steps_per_second": 0.237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 33.62557977902233, |
|
"learning_rate": 8.691783729769873e-07, |
|
"logits/chosen": -2.448288679122925, |
|
"logits/rejected": -2.420079231262207, |
|
"logps/chosen": -246.814453125, |
|
"logps/rejected": -222.81369018554688, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.3512744903564453, |
|
"rewards/margins": 1.5960042476654053, |
|
"rewards/rejected": -0.24473004043102264, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 34.40548669609605, |
|
"learning_rate": 8.402298824670029e-07, |
|
"logits/chosen": -2.4062578678131104, |
|
"logits/rejected": -2.3980088233947754, |
|
"logps/chosen": -244.09963989257812, |
|
"logps/rejected": -220.0731658935547, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.0276352167129517, |
|
"rewards/margins": 1.7561748027801514, |
|
"rewards/rejected": -0.7285395860671997, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 28.164310082130665, |
|
"learning_rate": 8.089815357650089e-07, |
|
"logits/chosen": -2.3062520027160645, |
|
"logits/rejected": -2.2625319957733154, |
|
"logps/chosen": -252.2916717529297, |
|
"logps/rejected": -205.3076629638672, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.2947864532470703, |
|
"rewards/margins": 1.5517289638519287, |
|
"rewards/rejected": -0.2569425106048584, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 21.112407246277925, |
|
"learning_rate": 7.756445627110522e-07, |
|
"logits/chosen": -2.2231955528259277, |
|
"logits/rejected": -2.1976494789123535, |
|
"logps/chosen": -235.33688354492188, |
|
"logps/rejected": -231.05859375, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.7080755233764648, |
|
"rewards/margins": 2.355454921722412, |
|
"rewards/rejected": -0.6473790407180786, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 19.878898492793304, |
|
"learning_rate": 7.404443116588547e-07, |
|
"logits/chosen": -2.1846470832824707, |
|
"logits/rejected": -2.1705875396728516, |
|
"logps/chosen": -260.013427734375, |
|
"logps/rejected": -221.21255493164062, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.7602205276489258, |
|
"rewards/margins": 2.9061784744262695, |
|
"rewards/rejected": -1.1459578275680542, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 18.98334057762975, |
|
"learning_rate": 7.036187261857288e-07, |
|
"logits/chosen": -2.139054775238037, |
|
"logits/rejected": -2.1150176525115967, |
|
"logps/chosen": -220.98129272460938, |
|
"logps/rejected": -219.1953125, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.633134126663208, |
|
"rewards/margins": 3.2258033752441406, |
|
"rewards/rejected": -1.592669129371643, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 19.761118649470582, |
|
"learning_rate": 6.654167366624008e-07, |
|
"logits/chosen": -2.1024246215820312, |
|
"logits/rejected": -2.0848536491394043, |
|
"logps/chosen": -234.4004669189453, |
|
"logps/rejected": -226.5931396484375, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.7910864353179932, |
|
"rewards/margins": 3.391458511352539, |
|
"rewards/rejected": -1.600372314453125, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 18.92386433541345, |
|
"learning_rate": 6.260965775552713e-07, |
|
"logits/chosen": -2.071793794631958, |
|
"logits/rejected": -1.9989004135131836, |
|
"logps/chosen": -224.1923065185547, |
|
"logps/rejected": -204.32492065429688, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.095130681991577, |
|
"rewards/margins": 3.073395013809204, |
|
"rewards/rejected": -0.9782641530036926, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 22.257616479363563, |
|
"learning_rate": 5.859240418356614e-07, |
|
"logits/chosen": -2.0160460472106934, |
|
"logits/rejected": -1.9418752193450928, |
|
"logps/chosen": -225.38796997070312, |
|
"logps/rejected": -223.72238159179688, |
|
"loss": 0.2431, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8588430881500244, |
|
"rewards/margins": 3.6276745796203613, |
|
"rewards/rejected": -1.7688310146331787, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 21.00323840319488, |
|
"learning_rate": 5.451706842957421e-07, |
|
"logits/chosen": -1.9949266910552979, |
|
"logits/rejected": -1.9681007862091064, |
|
"logps/chosen": -226.308837890625, |
|
"logps/rejected": -214.67807006835938, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.7654300928115845, |
|
"rewards/margins": 3.4513607025146484, |
|
"rewards/rejected": -1.6859302520751953, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"eval_logits/chosen": -2.004354476928711, |
|
"eval_logits/rejected": -1.935070276260376, |
|
"eval_logps/chosen": -261.9532775878906, |
|
"eval_logps/rejected": -210.36463928222656, |
|
"eval_loss": 0.5120986700057983, |
|
"eval_rewards/accuracies": 0.8541666865348816, |
|
"eval_rewards/chosen": 1.3770320415496826, |
|
"eval_rewards/margins": 2.7112340927124023, |
|
"eval_rewards/rejected": -1.3342019319534302, |
|
"eval_runtime": 126.4659, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 0.237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 22.397179602439476, |
|
"learning_rate": 5.041119859162068e-07, |
|
"logits/chosen": -2.0054097175598145, |
|
"logits/rejected": -1.9702081680297852, |
|
"logps/chosen": -249.3375701904297, |
|
"logps/rejected": -218.004638671875, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.2315516471862793, |
|
"rewards/margins": 3.9727096557617188, |
|
"rewards/rejected": -1.74115788936615, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 20.094359300465378, |
|
"learning_rate": 4.630254916940423e-07, |
|
"logits/chosen": -1.9229028224945068, |
|
"logits/rejected": -1.9023786783218384, |
|
"logps/chosen": -224.10360717773438, |
|
"logps/rejected": -198.1254119873047, |
|
"loss": 0.2646, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.7623440027236938, |
|
"rewards/margins": 3.4057388305664062, |
|
"rewards/rejected": -1.6433947086334229, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 21.24809202102134, |
|
"learning_rate": 4.2218893451814e-07, |
|
"logits/chosen": -1.9529317617416382, |
|
"logits/rejected": -1.9107424020767212, |
|
"logps/chosen": -237.7430877685547, |
|
"logps/rejected": -229.7592315673828, |
|
"loss": 0.2487, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.7564747333526611, |
|
"rewards/margins": 3.5565059185028076, |
|
"rewards/rejected": -1.800031304359436, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 20.483863352494325, |
|
"learning_rate": 3.8187835777481375e-07, |
|
"logits/chosen": -2.03513240814209, |
|
"logits/rejected": -2.012155055999756, |
|
"logps/chosen": -255.811279296875, |
|
"logps/rejected": -215.11550903320312, |
|
"loss": 0.2907, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.8352388143539429, |
|
"rewards/margins": 3.5237317085266113, |
|
"rewards/rejected": -1.688493013381958, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 21.267003087310385, |
|
"learning_rate": 3.423662493738687e-07, |
|
"logits/chosen": -2.090149402618408, |
|
"logits/rejected": -2.037410259246826, |
|
"logps/chosen": -224.0563201904297, |
|
"logps/rejected": -232.9271697998047, |
|
"loss": 0.2537, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.5230239629745483, |
|
"rewards/margins": 3.965744733810425, |
|
"rewards/rejected": -2.442720651626587, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 19.665426167854065, |
|
"learning_rate": 3.039196998086687e-07, |
|
"logits/chosen": -2.119109630584717, |
|
"logits/rejected": -2.0918455123901367, |
|
"logps/chosen": -218.8955078125, |
|
"logps/rejected": -214.98388671875, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.6914072036743164, |
|
"rewards/margins": 3.2345032691955566, |
|
"rewards/rejected": -1.5430960655212402, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.014925373134328, |
|
"grad_norm": 15.333779299159936, |
|
"learning_rate": 2.667985967011878e-07, |
|
"logits/chosen": -2.1006662845611572, |
|
"logits/rejected": -2.1327574253082275, |
|
"logps/chosen": -227.55078125, |
|
"logps/rejected": -216.8292236328125, |
|
"loss": 0.2636, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.8686821460723877, |
|
"rewards/margins": 3.199751853942871, |
|
"rewards/rejected": -1.3310701847076416, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.08955223880597, |
|
"grad_norm": 21.411151288029885, |
|
"learning_rate": 2.3125386803640183e-07, |
|
"logits/chosen": -2.1000330448150635, |
|
"logits/rejected": -2.096583127975464, |
|
"logps/chosen": -225.46200561523438, |
|
"logps/rejected": -206.5730438232422, |
|
"loss": 0.1728, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.2989490032196045, |
|
"rewards/margins": 3.712939739227295, |
|
"rewards/rejected": -1.4139907360076904, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1641791044776117, |
|
"grad_norm": 13.508008476385893, |
|
"learning_rate": 1.9752578596124952e-07, |
|
"logits/chosen": -2.0918757915496826, |
|
"logits/rejected": -2.084589958190918, |
|
"logps/chosen": -228.3970489501953, |
|
"logps/rejected": -231.37820434570312, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.3735086917877197, |
|
"rewards/margins": 4.1814985275268555, |
|
"rewards/rejected": -1.807989478111267, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"grad_norm": 17.667435726292307, |
|
"learning_rate": 1.6584234261399532e-07, |
|
"logits/chosen": -2.0861001014709473, |
|
"logits/rejected": -2.1118366718292236, |
|
"logps/chosen": -228.27438354492188, |
|
"logps/rejected": -210.40005493164062, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.1165130138397217, |
|
"rewards/margins": 3.9735045433044434, |
|
"rewards/rejected": -1.8569917678833008, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"eval_logits/chosen": -2.098158121109009, |
|
"eval_logits/rejected": -2.039686918258667, |
|
"eval_logps/chosen": -260.8434143066406, |
|
"eval_logps/rejected": -209.58831787109375, |
|
"eval_loss": 0.5043797492980957, |
|
"eval_rewards/accuracies": 0.8541666865348816, |
|
"eval_rewards/chosen": 1.4880186319351196, |
|
"eval_rewards/margins": 2.744588613510132, |
|
"eval_rewards/rejected": -1.2565699815750122, |
|
"eval_runtime": 126.9044, |
|
"eval_samples_per_second": 14.972, |
|
"eval_steps_per_second": 0.236, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3134328358208958, |
|
"grad_norm": 14.584825419510635, |
|
"learning_rate": 1.3641770896292082e-07, |
|
"logits/chosen": -2.0857396125793457, |
|
"logits/rejected": -2.0918917655944824, |
|
"logps/chosen": -248.5997772216797, |
|
"logps/rejected": -237.12857055664062, |
|
"loss": 0.1395, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.5311789512634277, |
|
"rewards/margins": 4.292832851409912, |
|
"rewards/rejected": -1.7616539001464844, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.388059701492537, |
|
"grad_norm": 14.909309745253823, |
|
"learning_rate": 1.0945078707215221e-07, |
|
"logits/chosen": -2.121391534805298, |
|
"logits/rejected": -2.058969497680664, |
|
"logps/chosen": -240.8168487548828, |
|
"logps/rejected": -242.3682861328125, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.222982406616211, |
|
"rewards/margins": 4.225098609924316, |
|
"rewards/rejected": -2.0021157264709473, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4626865671641793, |
|
"grad_norm": 21.84821531200421, |
|
"learning_rate": 8.512386558088919e-08, |
|
"logits/chosen": -2.121065616607666, |
|
"logits/rejected": -2.1130306720733643, |
|
"logps/chosen": -261.7530517578125, |
|
"logps/rejected": -241.3323974609375, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.2334556579589844, |
|
"rewards/margins": 4.155540943145752, |
|
"rewards/rejected": -1.9220850467681885, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.5373134328358207, |
|
"grad_norm": 21.341295540033837, |
|
"learning_rate": 6.360138748461013e-08, |
|
"logits/chosen": -2.1645936965942383, |
|
"logits/rejected": -2.127746105194092, |
|
"logps/chosen": -251.10836791992188, |
|
"logps/rejected": -253.2220916748047, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.1738197803497314, |
|
"rewards/margins": 4.305379390716553, |
|
"rewards/rejected": -2.1315596103668213, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.611940298507463, |
|
"grad_norm": 18.659914383245795, |
|
"learning_rate": 4.5028838547699346e-08, |
|
"logits/chosen": -2.1111814975738525, |
|
"logits/rejected": -2.076622486114502, |
|
"logps/chosen": -222.6644287109375, |
|
"logps/rejected": -234.6880340576172, |
|
"loss": 0.1698, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.7883933782577515, |
|
"rewards/margins": 4.462481498718262, |
|
"rewards/rejected": -2.6740882396698, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.6865671641791042, |
|
"grad_norm": 20.784990616419144, |
|
"learning_rate": 2.9531763861505964e-08, |
|
"logits/chosen": -2.122183322906494, |
|
"logits/rejected": -2.038510799407959, |
|
"logps/chosen": -221.64071655273438, |
|
"logps/rejected": -208.3445281982422, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.397716999053955, |
|
"rewards/margins": 3.8158226013183594, |
|
"rewards/rejected": -2.4181056022644043, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7611940298507465, |
|
"grad_norm": 13.240442052448362, |
|
"learning_rate": 1.7214919195619125e-08, |
|
"logits/chosen": -2.1085686683654785, |
|
"logits/rejected": -2.0911266803741455, |
|
"logps/chosen": -237.5169677734375, |
|
"logps/rejected": -227.32540893554688, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.997253179550171, |
|
"rewards/margins": 4.263888835906982, |
|
"rewards/rejected": -2.2666361331939697, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.835820895522388, |
|
"grad_norm": 19.599663846818626, |
|
"learning_rate": 8.161562878982398e-09, |
|
"logits/chosen": -2.0720620155334473, |
|
"logits/rejected": -2.038602113723755, |
|
"logps/chosen": -232.0093994140625, |
|
"logps/rejected": -199.23355102539062, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.6587365865707397, |
|
"rewards/margins": 3.9811177253723145, |
|
"rewards/rejected": -2.322380781173706, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.91044776119403, |
|
"grad_norm": 13.085510048513191, |
|
"learning_rate": 2.432892997526026e-09, |
|
"logits/chosen": -2.1225340366363525, |
|
"logits/rejected": -2.0646426677703857, |
|
"logps/chosen": -248.36441040039062, |
|
"logps/rejected": -275.37506103515625, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.9732534885406494, |
|
"rewards/margins": 4.583985328674316, |
|
"rewards/rejected": -2.610731601715088, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"grad_norm": 13.592938607933718, |
|
"learning_rate": 6.763371270035457e-11, |
|
"logits/chosen": -2.10709810256958, |
|
"logits/rejected": -2.0659236907958984, |
|
"logps/chosen": -248.839599609375, |
|
"logps/rejected": -237.7239532470703, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.9815118312835693, |
|
"rewards/margins": 4.483563423156738, |
|
"rewards/rejected": -2.502051591873169, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"eval_logits/chosen": -2.107107400894165, |
|
"eval_logits/rejected": -2.0472443103790283, |
|
"eval_logps/chosen": -264.30224609375, |
|
"eval_logps/rejected": -214.93907165527344, |
|
"eval_loss": 0.5009533762931824, |
|
"eval_rewards/accuracies": 0.8583333492279053, |
|
"eval_rewards/chosen": 1.1421351432800293, |
|
"eval_rewards/margins": 2.933783769607544, |
|
"eval_rewards/rejected": -1.791648507118225, |
|
"eval_runtime": 126.3434, |
|
"eval_samples_per_second": 15.038, |
|
"eval_steps_per_second": 0.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 201, |
|
"total_flos": 2369906314051584.0, |
|
"train_loss": 0.33028975388600457, |
|
"train_runtime": 7391.1638, |
|
"train_samples_per_second": 6.939, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2369906314051584.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|