|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 504, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02976190476190476, |
|
"grad_norm": 1776.2471923828125, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"log_odds_chosen": -0.3503778874874115, |
|
"log_odds_ratio": -1.0639952421188354, |
|
"logits/chosen": 125.49534606933594, |
|
"logits/rejected": 180.3563232421875, |
|
"logps/chosen": -15.494331359863281, |
|
"logps/rejected": -15.143954277038574, |
|
"loss": 14.9629, |
|
"nll_loss": 15.042287826538086, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.7747165560722351, |
|
"rewards/margins": -0.01751876249909401, |
|
"rewards/rejected": -0.7571978569030762, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05952380952380952, |
|
"grad_norm": 1193.9168701171875, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.06253216415643692, |
|
"log_odds_ratio": -0.8492839932441711, |
|
"logits/chosen": 210.31631469726562, |
|
"logits/rejected": 245.1850128173828, |
|
"logps/chosen": -12.538459777832031, |
|
"logps/rejected": -12.600992202758789, |
|
"loss": 12.6111, |
|
"nll_loss": 12.388693809509277, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.6269229650497437, |
|
"rewards/margins": 0.0031266347505152225, |
|
"rewards/rejected": -0.6300495862960815, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 723.246826171875, |
|
"learning_rate": 7.5e-07, |
|
"log_odds_chosen": -0.26854413747787476, |
|
"log_odds_ratio": -0.9210551977157593, |
|
"logits/chosen": 300.58953857421875, |
|
"logits/rejected": 289.635009765625, |
|
"logps/chosen": -8.324029922485352, |
|
"logps/rejected": -8.055707931518555, |
|
"loss": 8.2823, |
|
"nll_loss": 8.378087997436523, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.41620150208473206, |
|
"rewards/margins": -0.013416108675301075, |
|
"rewards/rejected": -0.40278539061546326, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 216.30667114257812, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": -0.09513016790151596, |
|
"log_odds_ratio": -0.9796999096870422, |
|
"logits/chosen": 255.70156860351562, |
|
"logits/rejected": 235.23721313476562, |
|
"logps/chosen": -5.404101848602295, |
|
"logps/rejected": -5.310047149658203, |
|
"loss": 5.4458, |
|
"nll_loss": 5.430812835693359, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.2702050805091858, |
|
"rewards/margins": -0.004702714271843433, |
|
"rewards/rejected": -0.265502393245697, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1488095238095238, |
|
"grad_norm": 164.33297729492188, |
|
"learning_rate": 1.25e-06, |
|
"log_odds_chosen": 0.25551921129226685, |
|
"log_odds_ratio": -0.6588834524154663, |
|
"logits/chosen": 274.9960632324219, |
|
"logits/rejected": 302.8502502441406, |
|
"logps/chosen": -3.1612606048583984, |
|
"logps/rejected": -3.390745162963867, |
|
"loss": 3.5011, |
|
"nll_loss": 3.502755641937256, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.15806300938129425, |
|
"rewards/margins": 0.01147423591464758, |
|
"rewards/rejected": -0.1695372611284256, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 78.93386840820312, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.24210360646247864, |
|
"log_odds_ratio": -0.6352943181991577, |
|
"logits/chosen": 290.36334228515625, |
|
"logits/rejected": 385.3616638183594, |
|
"logps/chosen": -2.418250799179077, |
|
"logps/rejected": -2.630479097366333, |
|
"loss": 2.5553, |
|
"nll_loss": 2.509606122970581, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12091253697872162, |
|
"rewards/margins": 0.01061142049729824, |
|
"rewards/rejected": -0.1315239518880844, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 42.19651412963867, |
|
"learning_rate": 1.75e-06, |
|
"log_odds_chosen": 0.4038185477256775, |
|
"log_odds_ratio": -0.5650765299797058, |
|
"logits/chosen": 365.90753173828125, |
|
"logits/rejected": 389.90338134765625, |
|
"logps/chosen": -1.5057640075683594, |
|
"logps/rejected": -1.8460853099822998, |
|
"loss": 2.129, |
|
"nll_loss": 1.8491789102554321, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07528821378946304, |
|
"rewards/margins": 0.01701606810092926, |
|
"rewards/rejected": -0.0923042744398117, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 35.26750183105469, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.15938052535057068, |
|
"log_odds_ratio": -0.6910892724990845, |
|
"logits/chosen": 331.43438720703125, |
|
"logits/rejected": 372.8791198730469, |
|
"logps/chosen": -1.6345170736312866, |
|
"logps/rejected": -1.7539236545562744, |
|
"loss": 1.935, |
|
"nll_loss": 1.968824028968811, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08172585070133209, |
|
"rewards/margins": 0.005970318801701069, |
|
"rewards/rejected": -0.08769617974758148, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 87.45353698730469, |
|
"learning_rate": 2.25e-06, |
|
"log_odds_chosen": -0.15498922765254974, |
|
"log_odds_ratio": -0.8517929911613464, |
|
"logits/chosen": 386.3527526855469, |
|
"logits/rejected": 371.41851806640625, |
|
"logps/chosen": -1.4574557542800903, |
|
"logps/rejected": -1.3248956203460693, |
|
"loss": 1.8765, |
|
"nll_loss": 1.7655121088027954, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.07287278026342392, |
|
"rewards/margins": -0.006628001574426889, |
|
"rewards/rejected": -0.06624479591846466, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 41.48765563964844, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.043729472905397415, |
|
"log_odds_ratio": -0.7361106872558594, |
|
"logits/chosen": 437.00823974609375, |
|
"logits/rejected": 443.38232421875, |
|
"logps/chosen": -1.658603310585022, |
|
"logps/rejected": -1.6873445510864258, |
|
"loss": 1.8933, |
|
"nll_loss": 2.108198642730713, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08293016999959946, |
|
"rewards/margins": 0.0014370663557201624, |
|
"rewards/rejected": -0.08436723798513412, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3273809523809524, |
|
"grad_norm": 51.39496994018555, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"log_odds_chosen": 0.10381323099136353, |
|
"log_odds_ratio": -0.7126073241233826, |
|
"logits/chosen": 365.14544677734375, |
|
"logits/rejected": 364.1676330566406, |
|
"logps/chosen": -1.4783101081848145, |
|
"logps/rejected": -1.550569772720337, |
|
"loss": 1.8734, |
|
"nll_loss": 1.8976672887802124, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.073915496468544, |
|
"rewards/margins": 0.003612985834479332, |
|
"rewards/rejected": -0.07752849161624908, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 47.07830810546875, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.20662184059619904, |
|
"log_odds_ratio": -0.6513880491256714, |
|
"logits/chosen": 386.7112731933594, |
|
"logits/rejected": 356.2147216796875, |
|
"logps/chosen": -1.1965901851654053, |
|
"logps/rejected": -1.3431804180145264, |
|
"loss": 1.6989, |
|
"nll_loss": 1.9430646896362305, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05982951074838638, |
|
"rewards/margins": 0.007329514715820551, |
|
"rewards/rejected": -0.0671590268611908, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3869047619047619, |
|
"grad_norm": 116.96641540527344, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"log_odds_chosen": 0.029183167964220047, |
|
"log_odds_ratio": -0.7434911727905273, |
|
"logits/chosen": 385.2690734863281, |
|
"logits/rejected": 389.1334228515625, |
|
"logps/chosen": -1.3286149501800537, |
|
"logps/rejected": -1.3333098888397217, |
|
"loss": 1.636, |
|
"nll_loss": 1.6027374267578125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06643076241016388, |
|
"rewards/margins": 0.00023474842600990087, |
|
"rewards/rejected": -0.06666550040245056, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 40.99021530151367, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.045442335307598114, |
|
"log_odds_ratio": -0.7358086705207825, |
|
"logits/chosen": 395.22906494140625, |
|
"logits/rejected": 390.57305908203125, |
|
"logps/chosen": -1.7554155588150024, |
|
"logps/rejected": -1.765885591506958, |
|
"loss": 1.6688, |
|
"nll_loss": 1.9699609279632568, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08777078241109848, |
|
"rewards/margins": 0.0005234999698586762, |
|
"rewards/rejected": -0.08829428255558014, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 67.57577514648438, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"log_odds_chosen": 0.24833233654499054, |
|
"log_odds_ratio": -0.6203548908233643, |
|
"logits/chosen": 386.412109375, |
|
"logits/rejected": 376.1787414550781, |
|
"logps/chosen": -1.1873780488967896, |
|
"logps/rejected": -1.3402230739593506, |
|
"loss": 1.5454, |
|
"nll_loss": 1.4503333568572998, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.059368908405303955, |
|
"rewards/margins": 0.0076422421261668205, |
|
"rewards/rejected": -0.06701115518808365, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 29.741676330566406, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.29179519414901733, |
|
"log_odds_ratio": -0.6332293748855591, |
|
"logits/chosen": 389.09295654296875, |
|
"logits/rejected": 405.5395202636719, |
|
"logps/chosen": -1.2367178201675415, |
|
"logps/rejected": -1.3924906253814697, |
|
"loss": 1.5966, |
|
"nll_loss": 1.5792012214660645, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.061835892498493195, |
|
"rewards/margins": 0.007788646034896374, |
|
"rewards/rejected": -0.06962453573942184, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5059523809523809, |
|
"grad_norm": 53.26546096801758, |
|
"learning_rate": 4.25e-06, |
|
"log_odds_chosen": -0.2370494306087494, |
|
"log_odds_ratio": -0.9606464505195618, |
|
"logits/chosen": 405.4431457519531, |
|
"logits/rejected": 386.801025390625, |
|
"logps/chosen": -1.5545951128005981, |
|
"logps/rejected": -1.2866960763931274, |
|
"loss": 1.6757, |
|
"nll_loss": 1.9027442932128906, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07772975414991379, |
|
"rewards/margins": -0.013394953683018684, |
|
"rewards/rejected": -0.06433480232954025, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 28.4445743560791, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.7336848378181458, |
|
"log_odds_ratio": -0.5354053378105164, |
|
"logits/chosen": 416.51580810546875, |
|
"logits/rejected": 425.767578125, |
|
"logps/chosen": -1.2461140155792236, |
|
"logps/rejected": -1.842694878578186, |
|
"loss": 1.4576, |
|
"nll_loss": 1.5203516483306885, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06230570003390312, |
|
"rewards/margins": 0.02982904389500618, |
|
"rewards/rejected": -0.0921347513794899, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5654761904761905, |
|
"grad_norm": 46.642581939697266, |
|
"learning_rate": 4.75e-06, |
|
"log_odds_chosen": 0.25576168298721313, |
|
"log_odds_ratio": -0.6427351832389832, |
|
"logits/chosen": 347.84832763671875, |
|
"logits/rejected": 380.0849609375, |
|
"logps/chosen": -1.119447946548462, |
|
"logps/rejected": -1.234407663345337, |
|
"loss": 1.523, |
|
"nll_loss": 1.489450454711914, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05597240477800369, |
|
"rewards/margins": 0.005747987423092127, |
|
"rewards/rejected": -0.06172039359807968, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 43.77986526489258, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.12196314334869385, |
|
"log_odds_ratio": -0.7031902074813843, |
|
"logits/chosen": 417.86639404296875, |
|
"logits/rejected": 436.10858154296875, |
|
"logps/chosen": -1.2873280048370361, |
|
"logps/rejected": -1.3734104633331299, |
|
"loss": 1.5418, |
|
"nll_loss": 1.5198986530303955, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0643664002418518, |
|
"rewards/margins": 0.004304117523133755, |
|
"rewards/rejected": -0.06867052614688873, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 32.244384765625, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 0.27784407138824463, |
|
"log_odds_ratio": -0.6329095959663391, |
|
"logits/chosen": 412.528564453125, |
|
"logits/rejected": 394.52130126953125, |
|
"logps/chosen": -1.0501761436462402, |
|
"logps/rejected": -1.182625412940979, |
|
"loss": 1.5085, |
|
"nll_loss": 1.6539300680160522, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05250881239771843, |
|
"rewards/margins": 0.006622466258704662, |
|
"rewards/rejected": -0.059131283313035965, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6547619047619048, |
|
"grad_norm": 69.666259765625, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 0.30888259410858154, |
|
"log_odds_ratio": -0.5859929919242859, |
|
"logits/chosen": 375.71612548828125, |
|
"logits/rejected": 356.3163146972656, |
|
"logps/chosen": -1.0360513925552368, |
|
"logps/rejected": -1.2427626848220825, |
|
"loss": 1.5381, |
|
"nll_loss": 1.5933644771575928, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05180257558822632, |
|
"rewards/margins": 0.01033556554466486, |
|
"rewards/rejected": -0.06213812902569771, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6845238095238095, |
|
"grad_norm": 34.408199310302734, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 0.2710966467857361, |
|
"log_odds_ratio": -0.6335155367851257, |
|
"logits/chosen": 389.82244873046875, |
|
"logits/rejected": 437.4444274902344, |
|
"logps/chosen": -0.9292726516723633, |
|
"logps/rejected": -1.0246301889419556, |
|
"loss": 1.4878, |
|
"nll_loss": 1.3399275541305542, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04646363481879234, |
|
"rewards/margins": 0.0047678761184215546, |
|
"rewards/rejected": -0.0512315109372139, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 41.90970230102539, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 0.2518194615840912, |
|
"log_odds_ratio": -0.6617739796638489, |
|
"logits/chosen": 393.5774841308594, |
|
"logits/rejected": 395.57147216796875, |
|
"logps/chosen": -1.0596468448638916, |
|
"logps/rejected": -1.1526801586151123, |
|
"loss": 1.5378, |
|
"nll_loss": 1.5159982442855835, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05298234894871712, |
|
"rewards/margins": 0.004651663359254599, |
|
"rewards/rejected": -0.05763401836156845, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7440476190476191, |
|
"grad_norm": 30.15009880065918, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": -0.14496295154094696, |
|
"log_odds_ratio": -0.9012987017631531, |
|
"logits/chosen": 378.92242431640625, |
|
"logits/rejected": 388.03662109375, |
|
"logps/chosen": -1.3249809741973877, |
|
"logps/rejected": -1.1592341661453247, |
|
"loss": 1.4803, |
|
"nll_loss": 1.3684661388397217, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0662490501999855, |
|
"rewards/margins": -0.00828734040260315, |
|
"rewards/rejected": -0.05796170234680176, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7738095238095238, |
|
"grad_norm": 24.6285457611084, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 0.3137187361717224, |
|
"log_odds_ratio": -0.6695318818092346, |
|
"logits/chosen": 401.2060546875, |
|
"logits/rejected": 372.0254821777344, |
|
"logps/chosen": -1.065645456314087, |
|
"logps/rejected": -1.1312780380249023, |
|
"loss": 1.5122, |
|
"nll_loss": 1.4193568229675293, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.053282272070646286, |
|
"rewards/margins": 0.003281626384705305, |
|
"rewards/rejected": -0.05656389519572258, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 22.474136352539062, |
|
"learning_rate": 4.303314829119352e-06, |
|
"log_odds_chosen": 0.08262600004673004, |
|
"log_odds_ratio": -0.7314961552619934, |
|
"logits/chosen": 373.8929138183594, |
|
"logits/rejected": 405.0035705566406, |
|
"logps/chosen": -1.1277202367782593, |
|
"logps/rejected": -1.2345101833343506, |
|
"loss": 1.5189, |
|
"nll_loss": 1.4631812572479248, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.056386012583971024, |
|
"rewards/margins": 0.005339500494301319, |
|
"rewards/rejected": -0.06172550842165947, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 23.9536190032959, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": -0.12259285151958466, |
|
"log_odds_ratio": -0.7952130436897278, |
|
"logits/chosen": 394.89117431640625, |
|
"logits/rejected": 361.0680847167969, |
|
"logps/chosen": -1.0518170595169067, |
|
"logps/rejected": -0.986822247505188, |
|
"loss": 1.5033, |
|
"nll_loss": 1.4649903774261475, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.052590854465961456, |
|
"rewards/margins": -0.003249742556363344, |
|
"rewards/rejected": -0.0493411123752594, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8630952380952381, |
|
"grad_norm": 22.85877227783203, |
|
"learning_rate": 4.1522739926869985e-06, |
|
"log_odds_chosen": -0.07754195481538773, |
|
"log_odds_ratio": -0.7574380040168762, |
|
"logits/chosen": 365.57147216796875, |
|
"logits/rejected": 380.41046142578125, |
|
"logps/chosen": -1.0989421606063843, |
|
"logps/rejected": -1.0417240858078003, |
|
"loss": 1.4892, |
|
"nll_loss": 1.3651247024536133, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05494711548089981, |
|
"rewards/margins": -0.0028609037399291992, |
|
"rewards/rejected": -0.052086204290390015, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 28.741422653198242, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 0.4838029742240906, |
|
"log_odds_ratio": -0.5155819654464722, |
|
"logits/chosen": 390.33367919921875, |
|
"logits/rejected": 395.64117431640625, |
|
"logps/chosen": -1.0308912992477417, |
|
"logps/rejected": -1.3601925373077393, |
|
"loss": 1.4181, |
|
"nll_loss": 1.4082247018814087, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.051544565707445145, |
|
"rewards/margins": 0.016465062275528908, |
|
"rewards/rejected": -0.0680096298456192, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9226190476190477, |
|
"grad_norm": 29.028520584106445, |
|
"learning_rate": 4.016096644512495e-06, |
|
"log_odds_chosen": 0.07582991570234299, |
|
"log_odds_ratio": -0.7352056503295898, |
|
"logits/chosen": 384.6861572265625, |
|
"logits/rejected": 382.9585876464844, |
|
"logps/chosen": -1.1874030828475952, |
|
"logps/rejected": -1.2361079454421997, |
|
"loss": 1.4165, |
|
"nll_loss": 1.365952730178833, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05937015265226364, |
|
"rewards/margins": 0.0024352427572011948, |
|
"rewards/rejected": -0.06180540472269058, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 47.11198043823242, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 0.17520497739315033, |
|
"log_odds_ratio": -0.691135048866272, |
|
"logits/chosen": 389.0434265136719, |
|
"logits/rejected": 431.94989013671875, |
|
"logps/chosen": -0.9387935400009155, |
|
"logps/rejected": -1.0681307315826416, |
|
"loss": 1.3893, |
|
"nll_loss": 1.2998160123825073, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.046939678490161896, |
|
"rewards/margins": 0.006466855760663748, |
|
"rewards/rejected": -0.05340652912855148, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9821428571428571, |
|
"grad_norm": 16.736385345458984, |
|
"learning_rate": 3.892494720807615e-06, |
|
"log_odds_chosen": -0.01084871869534254, |
|
"log_odds_ratio": -0.759717583656311, |
|
"logits/chosen": 427.4103088378906, |
|
"logits/rejected": 431.9371032714844, |
|
"logps/chosen": -1.1341661214828491, |
|
"logps/rejected": -1.1380488872528076, |
|
"loss": 1.4218, |
|
"nll_loss": 1.4594279527664185, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05670831352472305, |
|
"rewards/margins": 0.00019413381232880056, |
|
"rewards/rejected": -0.0569024458527565, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_log_odds_chosen": 0.23235350847244263, |
|
"eval_log_odds_ratio": -0.6711738109588623, |
|
"eval_logits/chosen": 326.8028564453125, |
|
"eval_logits/rejected": 273.7525634765625, |
|
"eval_logps/chosen": -1.007071614265442, |
|
"eval_logps/rejected": -1.159067988395691, |
|
"eval_loss": 1.4487849473953247, |
|
"eval_nll_loss": 1.455283284187317, |
|
"eval_rewards/accuracies": 0.5571428537368774, |
|
"eval_rewards/chosen": -0.050353582948446274, |
|
"eval_rewards/margins": 0.007599818520247936, |
|
"eval_rewards/rejected": -0.057953398674726486, |
|
"eval_runtime": 201.1698, |
|
"eval_samples_per_second": 2.749, |
|
"eval_steps_per_second": 0.348, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0119047619047619, |
|
"grad_norm": 19.479097366333008, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 0.4537879526615143, |
|
"log_odds_ratio": -0.5628765225410461, |
|
"logits/chosen": 367.5221252441406, |
|
"logits/rejected": 402.8450622558594, |
|
"logps/chosen": -0.841064453125, |
|
"logps/rejected": -1.091392159461975, |
|
"loss": 1.2782, |
|
"nll_loss": 1.1047321557998657, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0420532263815403, |
|
"rewards/margins": 0.012516376562416553, |
|
"rewards/rejected": -0.054569609463214874, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 18.193614959716797, |
|
"learning_rate": 3.7796447300922724e-06, |
|
"log_odds_chosen": 0.6258941292762756, |
|
"log_odds_ratio": -0.5433114171028137, |
|
"logits/chosen": 404.79443359375, |
|
"logits/rejected": 432.61151123046875, |
|
"logps/chosen": -0.7016412615776062, |
|
"logps/rejected": -1.0624569654464722, |
|
"loss": 1.05, |
|
"nll_loss": 1.2699940204620361, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03508206456899643, |
|
"rewards/margins": 0.018040789291262627, |
|
"rewards/rejected": -0.053122855722904205, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 15.39194393157959, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 0.7378727197647095, |
|
"log_odds_ratio": -0.47085660696029663, |
|
"logits/chosen": 343.8995056152344, |
|
"logits/rejected": 338.2045593261719, |
|
"logps/chosen": -0.8047459721565247, |
|
"logps/rejected": -1.2078239917755127, |
|
"loss": 1.1048, |
|
"nll_loss": 1.2225919961929321, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04023730009794235, |
|
"rewards/margins": 0.020153898745775223, |
|
"rewards/rejected": -0.060391198843717575, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1011904761904763, |
|
"grad_norm": 20.72310447692871, |
|
"learning_rate": 3.6760731104690393e-06, |
|
"log_odds_chosen": 1.557734727859497, |
|
"log_odds_ratio": -0.26492685079574585, |
|
"logits/chosen": 435.18865966796875, |
|
"logits/rejected": 362.96868896484375, |
|
"logps/chosen": -0.6113921403884888, |
|
"logps/rejected": -1.4927384853363037, |
|
"loss": 0.9894, |
|
"nll_loss": 1.086829423904419, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03056960739195347, |
|
"rewards/margins": 0.044067323207855225, |
|
"rewards/rejected": -0.07463693618774414, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.130952380952381, |
|
"grad_norm": 16.229610443115234, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 0.5972079038619995, |
|
"log_odds_ratio": -0.5253661870956421, |
|
"logits/chosen": 386.9327087402344, |
|
"logits/rejected": 405.801513671875, |
|
"logps/chosen": -0.7007042169570923, |
|
"logps/rejected": -1.028989553451538, |
|
"loss": 1.1004, |
|
"nll_loss": 1.0136523246765137, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03503521531820297, |
|
"rewards/margins": 0.01641426980495453, |
|
"rewards/rejected": -0.0514494851231575, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1607142857142858, |
|
"grad_norm": 21.483884811401367, |
|
"learning_rate": 3.5805743701971648e-06, |
|
"log_odds_chosen": 0.9149399995803833, |
|
"log_odds_ratio": -0.4483817219734192, |
|
"logits/chosen": 371.6880187988281, |
|
"logits/rejected": 403.46356201171875, |
|
"logps/chosen": -0.7223442196846008, |
|
"logps/rejected": -1.1754904985427856, |
|
"loss": 1.0787, |
|
"nll_loss": 1.0912402868270874, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03611721470952034, |
|
"rewards/margins": 0.022657308727502823, |
|
"rewards/rejected": -0.05877452343702316, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 19.23330307006836, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 1.305513620376587, |
|
"log_odds_ratio": -0.32568031549453735, |
|
"logits/chosen": 431.67156982421875, |
|
"logits/rejected": 391.12017822265625, |
|
"logps/chosen": -0.4990530014038086, |
|
"logps/rejected": -1.1961959600448608, |
|
"loss": 1.0519, |
|
"nll_loss": 0.9312452077865601, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.02495265007019043, |
|
"rewards/margins": 0.03485715016722679, |
|
"rewards/rejected": -0.05980980396270752, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2202380952380953, |
|
"grad_norm": 18.404104232788086, |
|
"learning_rate": 3.4921514788478916e-06, |
|
"log_odds_chosen": 1.2872587442398071, |
|
"log_odds_ratio": -0.3010835349559784, |
|
"logits/chosen": 335.2537841796875, |
|
"logits/rejected": 387.68621826171875, |
|
"logps/chosen": -0.6490974426269531, |
|
"logps/rejected": -1.4184123277664185, |
|
"loss": 1.0519, |
|
"nll_loss": 1.0444796085357666, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.032454878091812134, |
|
"rewards/margins": 0.03846573829650879, |
|
"rewards/rejected": -0.07092060893774033, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 13.957087516784668, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 1.1957916021347046, |
|
"log_odds_ratio": -0.36826610565185547, |
|
"logits/chosen": 365.1176452636719, |
|
"logits/rejected": 363.3384094238281, |
|
"logps/chosen": -0.6256347298622131, |
|
"logps/rejected": -1.242398738861084, |
|
"loss": 0.9696, |
|
"nll_loss": 1.0164039134979248, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.031281743198633194, |
|
"rewards/margins": 0.03083820268511772, |
|
"rewards/rejected": -0.06211994215846062, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2797619047619047, |
|
"grad_norm": 25.830501556396484, |
|
"learning_rate": 3.409971697352368e-06, |
|
"log_odds_chosen": 1.1722948551177979, |
|
"log_odds_ratio": -0.3577140271663666, |
|
"logits/chosen": 410.3714904785156, |
|
"logits/rejected": 416.93621826171875, |
|
"logps/chosen": -0.7465067505836487, |
|
"logps/rejected": -1.4228246212005615, |
|
"loss": 1.0589, |
|
"nll_loss": 1.059061050415039, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.037325333803892136, |
|
"rewards/margins": 0.0338158942759037, |
|
"rewards/rejected": -0.07114122807979584, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3095238095238095, |
|
"grad_norm": 15.7648344039917, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 0.9515246152877808, |
|
"log_odds_ratio": -0.4343441128730774, |
|
"logits/chosen": 352.2547607421875, |
|
"logits/rejected": 370.08245849609375, |
|
"logps/chosen": -0.6594604253768921, |
|
"logps/rejected": -1.1094262599945068, |
|
"loss": 1.0138, |
|
"nll_loss": 0.9946004748344421, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.032973017543554306, |
|
"rewards/margins": 0.022498302161693573, |
|
"rewards/rejected": -0.05547132343053818, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 21.596298217773438, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"log_odds_chosen": 0.4564495086669922, |
|
"log_odds_ratio": -0.5201828479766846, |
|
"logits/chosen": 387.99542236328125, |
|
"logits/rejected": 365.8666076660156, |
|
"logps/chosen": -1.059605360031128, |
|
"logps/rejected": -1.3452465534210205, |
|
"loss": 1.0353, |
|
"nll_loss": 1.2666301727294922, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05298026651144028, |
|
"rewards/margins": 0.014282059855759144, |
|
"rewards/rejected": -0.06726232916116714, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.369047619047619, |
|
"grad_norm": 16.963186264038086, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 1.108178973197937, |
|
"log_odds_ratio": -0.3512019217014313, |
|
"logits/chosen": 355.7499084472656, |
|
"logits/rejected": 366.8659362792969, |
|
"logps/chosen": -0.6812053322792053, |
|
"logps/rejected": -1.3409600257873535, |
|
"loss": 1.0135, |
|
"nll_loss": 0.873543381690979, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.034060269594192505, |
|
"rewards/margins": 0.03298773616552353, |
|
"rewards/rejected": -0.06704800575971603, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3988095238095237, |
|
"grad_norm": 22.207305908203125, |
|
"learning_rate": 3.2616403652672114e-06, |
|
"log_odds_chosen": 1.4102222919464111, |
|
"log_odds_ratio": -0.3304731547832489, |
|
"logits/chosen": 382.0793151855469, |
|
"logits/rejected": 389.06683349609375, |
|
"logps/chosen": -0.5401080846786499, |
|
"logps/rejected": -1.397993803024292, |
|
"loss": 1.0737, |
|
"nll_loss": 0.9223471879959106, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.027005404233932495, |
|
"rewards/margins": 0.04289429262280464, |
|
"rewards/rejected": -0.06989969313144684, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 16.992250442504883, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 0.807520866394043, |
|
"log_odds_ratio": -0.4530153274536133, |
|
"logits/chosen": 414.99261474609375, |
|
"logits/rejected": 423.21392822265625, |
|
"logps/chosen": -0.7907418012619019, |
|
"logps/rejected": -1.2757242918014526, |
|
"loss": 1.0285, |
|
"nll_loss": 1.0256245136260986, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03953709080815315, |
|
"rewards/margins": 0.0242491252720356, |
|
"rewards/rejected": -0.06378621608018875, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 17.553632736206055, |
|
"learning_rate": 3.1943828249997e-06, |
|
"log_odds_chosen": 0.7604994177818298, |
|
"log_odds_ratio": -0.42883071303367615, |
|
"logits/chosen": 386.42657470703125, |
|
"logits/rejected": 363.54931640625, |
|
"logps/chosen": -0.6057090163230896, |
|
"logps/rejected": -1.0264109373092651, |
|
"loss": 1.0502, |
|
"nll_loss": 1.1268291473388672, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03028545156121254, |
|
"rewards/margins": 0.021035097539424896, |
|
"rewards/rejected": -0.05132054537534714, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 22.123960494995117, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 1.0444424152374268, |
|
"log_odds_ratio": -0.38615161180496216, |
|
"logits/chosen": 369.87274169921875, |
|
"logits/rejected": 389.29583740234375, |
|
"logps/chosen": -0.7160728573799133, |
|
"logps/rejected": -1.2788223028182983, |
|
"loss": 0.9816, |
|
"nll_loss": 0.8135589361190796, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.035803645849227905, |
|
"rewards/margins": 0.02813747525215149, |
|
"rewards/rejected": -0.0639411136507988, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5178571428571428, |
|
"grad_norm": 26.341325759887695, |
|
"learning_rate": 3.131121455425748e-06, |
|
"log_odds_chosen": 1.0527693033218384, |
|
"log_odds_ratio": -0.35840553045272827, |
|
"logits/chosen": 404.608154296875, |
|
"logits/rejected": 410.81854248046875, |
|
"logps/chosen": -0.6503961682319641, |
|
"logps/rejected": -1.250218152999878, |
|
"loss": 1.0367, |
|
"nll_loss": 0.9888055920600891, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.032519809901714325, |
|
"rewards/margins": 0.029991086572408676, |
|
"rewards/rejected": -0.0625109076499939, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5476190476190477, |
|
"grad_norm": 25.059885025024414, |
|
"learning_rate": 3.1008683647302113e-06, |
|
"log_odds_chosen": 1.0121057033538818, |
|
"log_odds_ratio": -0.4098740220069885, |
|
"logits/chosen": 375.2867126464844, |
|
"logits/rejected": 426.01507568359375, |
|
"logps/chosen": -0.8014397621154785, |
|
"logps/rejected": -1.4673702716827393, |
|
"loss": 1.0023, |
|
"nll_loss": 1.092798113822937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0400719977915287, |
|
"rewards/margins": 0.03329651802778244, |
|
"rewards/rejected": -0.07336851954460144, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5773809523809523, |
|
"grad_norm": 17.973594665527344, |
|
"learning_rate": 3.0714755841697565e-06, |
|
"log_odds_chosen": 0.830208957195282, |
|
"log_odds_ratio": -0.5377876162528992, |
|
"logits/chosen": 421.629150390625, |
|
"logits/rejected": 414.95703125, |
|
"logps/chosen": -0.8159686923027039, |
|
"logps/rejected": -1.314146876335144, |
|
"loss": 1.0967, |
|
"nll_loss": 1.1257387399673462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.040798433125019073, |
|
"rewards/margins": 0.024908915162086487, |
|
"rewards/rejected": -0.06570734083652496, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 17.566476821899414, |
|
"learning_rate": 3.0429030972509227e-06, |
|
"log_odds_chosen": 0.7149374485015869, |
|
"log_odds_ratio": -0.45594334602355957, |
|
"logits/chosen": 354.78692626953125, |
|
"logits/rejected": 364.1693115234375, |
|
"logps/chosen": -0.836654543876648, |
|
"logps/rejected": -1.2243211269378662, |
|
"loss": 1.0429, |
|
"nll_loss": 1.1530247926712036, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.041832733899354935, |
|
"rewards/margins": 0.019383331760764122, |
|
"rewards/rejected": -0.06121605634689331, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6369047619047619, |
|
"grad_norm": 16.325292587280273, |
|
"learning_rate": 3.0151134457776365e-06, |
|
"log_odds_chosen": 0.7199637293815613, |
|
"log_odds_ratio": -0.434516578912735, |
|
"logits/chosen": 322.65130615234375, |
|
"logits/rejected": 338.8601989746094, |
|
"logps/chosen": -0.5899932384490967, |
|
"logps/rejected": -0.9299219250679016, |
|
"loss": 1.076, |
|
"nll_loss": 0.9019113779067993, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.029499661177396774, |
|
"rewards/margins": 0.016996433958411217, |
|
"rewards/rejected": -0.04649610072374344, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 14.822625160217285, |
|
"learning_rate": 2.988071523335984e-06, |
|
"log_odds_chosen": 0.9259305000305176, |
|
"log_odds_ratio": -0.5428971648216248, |
|
"logits/chosen": 395.42230224609375, |
|
"logits/rejected": 360.65234375, |
|
"logps/chosen": -0.5504172444343567, |
|
"logps/rejected": -1.1162601709365845, |
|
"loss": 1.0232, |
|
"nll_loss": 0.8841646909713745, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.027520865201950073, |
|
"rewards/margins": 0.02829214558005333, |
|
"rewards/rejected": -0.0558130145072937, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6964285714285714, |
|
"grad_norm": 13.360857009887695, |
|
"learning_rate": 2.961744388795462e-06, |
|
"log_odds_chosen": 0.7002909183502197, |
|
"log_odds_ratio": -0.46074408292770386, |
|
"logits/chosen": 344.73712158203125, |
|
"logits/rejected": 373.6070556640625, |
|
"logps/chosen": -0.6551269292831421, |
|
"logps/rejected": -1.0137001276016235, |
|
"loss": 0.9926, |
|
"nll_loss": 0.9025999307632446, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.032756343483924866, |
|
"rewards/margins": 0.017928656190633774, |
|
"rewards/rejected": -0.05068499967455864, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7261904761904763, |
|
"grad_norm": 12.45747184753418, |
|
"learning_rate": 2.9361010975735177e-06, |
|
"log_odds_chosen": 1.1189637184143066, |
|
"log_odds_ratio": -0.3354729413986206, |
|
"logits/chosen": 367.6058349609375, |
|
"logits/rejected": 436.7281188964844, |
|
"logps/chosen": -0.7302691340446472, |
|
"logps/rejected": -1.3499362468719482, |
|
"loss": 1.0165, |
|
"nll_loss": 0.9672770500183105, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03651345893740654, |
|
"rewards/margins": 0.03098336234688759, |
|
"rewards/rejected": -0.06749682128429413, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.755952380952381, |
|
"grad_norm": 20.8387508392334, |
|
"learning_rate": 2.9111125486979104e-06, |
|
"log_odds_chosen": 0.8264445066452026, |
|
"log_odds_ratio": -0.45267271995544434, |
|
"logits/chosen": 380.7887268066406, |
|
"logits/rejected": 392.38018798828125, |
|
"logps/chosen": -0.7333472967147827, |
|
"logps/rejected": -1.1829124689102173, |
|
"loss": 1.0473, |
|
"nll_loss": 1.2072994709014893, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.036667369306087494, |
|
"rewards/margins": 0.0224782545119524, |
|
"rewards/rejected": -0.059145621955394745, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 14.306224822998047, |
|
"learning_rate": 2.8867513459481293e-06, |
|
"log_odds_chosen": 1.0914583206176758, |
|
"log_odds_ratio": -0.3539409041404724, |
|
"logits/chosen": 376.03033447265625, |
|
"logits/rejected": 378.3951416015625, |
|
"logps/chosen": -0.58739173412323, |
|
"logps/rejected": -1.1421259641647339, |
|
"loss": 0.975, |
|
"nll_loss": 0.8565952181816101, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.02936958707869053, |
|
"rewards/margins": 0.027736714109778404, |
|
"rewards/rejected": -0.05710630491375923, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8154761904761905, |
|
"grad_norm": 15.119916915893555, |
|
"learning_rate": 2.862991671569341e-06, |
|
"log_odds_chosen": 0.3856947720050812, |
|
"log_odds_ratio": -0.5676103830337524, |
|
"logits/chosen": 432.51239013671875, |
|
"logits/rejected": 413.9574279785156, |
|
"logps/chosen": -1.001999855041504, |
|
"logps/rejected": -1.1941574811935425, |
|
"loss": 1.026, |
|
"nll_loss": 1.176692247390747, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.050099991261959076, |
|
"rewards/margins": 0.009607886895537376, |
|
"rewards/rejected": -0.0597078800201416, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.8452380952380953, |
|
"grad_norm": 13.858372688293457, |
|
"learning_rate": 2.839809171235324e-06, |
|
"log_odds_chosen": 0.9851268529891968, |
|
"log_odds_ratio": -0.46969637274742126, |
|
"logits/chosen": 347.86224365234375, |
|
"logits/rejected": 354.42236328125, |
|
"logps/chosen": -0.7486366629600525, |
|
"logps/rejected": -1.323521375656128, |
|
"loss": 1.0778, |
|
"nll_loss": 1.036675214767456, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.037431832402944565, |
|
"rewards/margins": 0.028744244948029518, |
|
"rewards/rejected": -0.06617607176303864, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 17.435771942138672, |
|
"learning_rate": 2.817180849095055e-06, |
|
"log_odds_chosen": 0.7195825576782227, |
|
"log_odds_ratio": -0.5751198530197144, |
|
"logits/chosen": 372.2769470214844, |
|
"logits/rejected": 380.11322021484375, |
|
"logps/chosen": -0.9793826937675476, |
|
"logps/rejected": -1.4906439781188965, |
|
"loss": 1.0867, |
|
"nll_loss": 1.2049812078475952, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04896913468837738, |
|
"rewards/margins": 0.025563066825270653, |
|
"rewards/rejected": -0.07453219592571259, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 13.72044563293457, |
|
"learning_rate": 2.7950849718747376e-06, |
|
"log_odds_chosen": 1.036712884902954, |
|
"log_odds_ratio": -0.36927738785743713, |
|
"logits/chosen": 385.5801086425781, |
|
"logits/rejected": 423.3285217285156, |
|
"logps/chosen": -0.7053465247154236, |
|
"logps/rejected": -1.304882526397705, |
|
"loss": 0.9866, |
|
"nll_loss": 0.9042797088623047, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03526733070611954, |
|
"rewards/margins": 0.029976800084114075, |
|
"rewards/rejected": -0.06524413079023361, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9345238095238095, |
|
"grad_norm": 17.074472427368164, |
|
"learning_rate": 2.773500981126146e-06, |
|
"log_odds_chosen": 1.224524736404419, |
|
"log_odds_ratio": -0.35412582755088806, |
|
"logits/chosen": 397.96649169921875, |
|
"logits/rejected": 423.78094482421875, |
|
"logps/chosen": -0.6596122980117798, |
|
"logps/rejected": -1.2944166660308838, |
|
"loss": 0.9893, |
|
"nll_loss": 0.9460498690605164, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03298061341047287, |
|
"rewards/margins": 0.0317402258515358, |
|
"rewards/rejected": -0.06472083181142807, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 23.940942764282227, |
|
"learning_rate": 2.752409412815902e-06, |
|
"log_odds_chosen": 1.0460882186889648, |
|
"log_odds_ratio": -0.416635125875473, |
|
"logits/chosen": 391.8972473144531, |
|
"logits/rejected": 401.4590148925781, |
|
"logps/chosen": -0.7760294675827026, |
|
"logps/rejected": -1.4733283519744873, |
|
"loss": 1.0094, |
|
"nll_loss": 0.9179704785346985, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.038801468908786774, |
|
"rewards/margins": 0.034864943474531174, |
|
"rewards/rejected": -0.07366641610860825, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.994047619047619, |
|
"grad_norm": 18.142581939697266, |
|
"learning_rate": 2.7317918235407652e-06, |
|
"log_odds_chosen": 0.5594000816345215, |
|
"log_odds_ratio": -0.544743537902832, |
|
"logits/chosen": 377.28729248046875, |
|
"logits/rejected": 394.92413330078125, |
|
"logps/chosen": -0.9491284489631653, |
|
"logps/rejected": -1.2867456674575806, |
|
"loss": 1.0804, |
|
"nll_loss": 1.2649915218353271, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.047456420958042145, |
|
"rewards/margins": 0.016880858689546585, |
|
"rewards/rejected": -0.06433728337287903, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_log_odds_chosen": 0.21522486209869385, |
|
"eval_log_odds_ratio": -0.6897445917129517, |
|
"eval_logits/chosen": 330.50665283203125, |
|
"eval_logits/rejected": 278.2473449707031, |
|
"eval_logps/chosen": -1.022003173828125, |
|
"eval_logps/rejected": -1.1829901933670044, |
|
"eval_loss": 1.4225263595581055, |
|
"eval_nll_loss": 1.4082757234573364, |
|
"eval_rewards/accuracies": 0.5142857432365417, |
|
"eval_rewards/chosen": -0.05110016465187073, |
|
"eval_rewards/margins": 0.008049344643950462, |
|
"eval_rewards/rejected": -0.05914951115846634, |
|
"eval_runtime": 201.3887, |
|
"eval_samples_per_second": 2.746, |
|
"eval_steps_per_second": 0.348, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.0238095238095237, |
|
"grad_norm": 25.43450927734375, |
|
"learning_rate": 2.711630722733202e-06, |
|
"log_odds_chosen": 1.8818715810775757, |
|
"log_odds_ratio": -0.2647668421268463, |
|
"logits/chosen": 383.9913330078125, |
|
"logits/rejected": 380.02801513671875, |
|
"logps/chosen": -0.47935551404953003, |
|
"logps/rejected": -1.454525351524353, |
|
"loss": 0.6307, |
|
"nll_loss": 0.6181924343109131, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.023967772722244263, |
|
"rewards/margins": 0.04875849559903145, |
|
"rewards/rejected": -0.07272626459598541, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0535714285714284, |
|
"grad_norm": 16.127397537231445, |
|
"learning_rate": 2.691909510290828e-06, |
|
"log_odds_chosen": 2.698786497116089, |
|
"log_odds_ratio": -0.14153851568698883, |
|
"logits/chosen": 388.04534912109375, |
|
"logits/rejected": 355.0823669433594, |
|
"logps/chosen": -0.29280781745910645, |
|
"logps/rejected": -1.475531816482544, |
|
"loss": 0.5566, |
|
"nll_loss": 0.6942065954208374, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.014640390872955322, |
|
"rewards/margins": 0.059136200696229935, |
|
"rewards/rejected": -0.07377658784389496, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 13.852057456970215, |
|
"learning_rate": 2.6726124191242444e-06, |
|
"log_odds_chosen": 2.8243956565856934, |
|
"log_odds_ratio": -0.09990663826465607, |
|
"logits/chosen": 354.74615478515625, |
|
"logits/rejected": 387.60528564453125, |
|
"logps/chosen": -0.37845298647880554, |
|
"logps/rejected": -2.111022472381592, |
|
"loss": 0.5438, |
|
"nll_loss": 0.5637595057487488, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.018922649323940277, |
|
"rewards/margins": 0.08662847429513931, |
|
"rewards/rejected": -0.10555113852024078, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.113095238095238, |
|
"grad_norm": 16.1456241607666, |
|
"learning_rate": 2.6537244621713765e-06, |
|
"log_odds_chosen": 2.1223108768463135, |
|
"log_odds_ratio": -0.14685922861099243, |
|
"logits/chosen": 359.0018005371094, |
|
"logits/rejected": 385.0552062988281, |
|
"logps/chosen": -0.425741970539093, |
|
"logps/rejected": -1.6288859844207764, |
|
"loss": 0.5366, |
|
"nll_loss": 0.5897840261459351, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0212871003895998, |
|
"rewards/margins": 0.060157209634780884, |
|
"rewards/rejected": -0.08144429326057434, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 11.441978454589844, |
|
"learning_rate": 2.6352313834736496e-06, |
|
"log_odds_chosen": 2.8245463371276855, |
|
"log_odds_ratio": -0.12314938008785248, |
|
"logits/chosen": 369.9977722167969, |
|
"logits/rejected": 407.8392333984375, |
|
"logps/chosen": -0.3003098964691162, |
|
"logps/rejected": -1.5362733602523804, |
|
"loss": 0.5329, |
|
"nll_loss": 0.5958473086357117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015015493147075176, |
|
"rewards/margins": 0.06179817393422127, |
|
"rewards/rejected": -0.07681366801261902, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1726190476190474, |
|
"grad_norm": 12.86784839630127, |
|
"learning_rate": 2.6171196129510684e-06, |
|
"log_odds_chosen": 2.503121852874756, |
|
"log_odds_ratio": -0.1363641321659088, |
|
"logits/chosen": 334.23468017578125, |
|
"logits/rejected": 311.822509765625, |
|
"logps/chosen": -0.3001948595046997, |
|
"logps/rejected": -1.4599236249923706, |
|
"loss": 0.5156, |
|
"nll_loss": 0.5041288137435913, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0150097431614995, |
|
"rewards/margins": 0.05798644572496414, |
|
"rewards/rejected": -0.07299618422985077, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.2023809523809526, |
|
"grad_norm": 13.525810241699219, |
|
"learning_rate": 2.599376224550182e-06, |
|
"log_odds_chosen": 1.8203538656234741, |
|
"log_odds_ratio": -0.19545204937458038, |
|
"logits/chosen": 313.6293029785156, |
|
"logits/rejected": 326.6126403808594, |
|
"logps/chosen": -0.4296380877494812, |
|
"logps/rejected": -1.4019193649291992, |
|
"loss": 0.5492, |
|
"nll_loss": 0.6865583062171936, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02148190513253212, |
|
"rewards/margins": 0.04861406981945038, |
|
"rewards/rejected": -0.0700959712266922, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 14.674184799194336, |
|
"learning_rate": 2.5819888974716113e-06, |
|
"log_odds_chosen": 1.8546769618988037, |
|
"log_odds_ratio": -0.22947852313518524, |
|
"logits/chosen": 377.0060119628906, |
|
"logits/rejected": 399.48590087890625, |
|
"logps/chosen": -0.3884517252445221, |
|
"logps/rejected": -1.292641520500183, |
|
"loss": 0.5785, |
|
"nll_loss": 0.5873233079910278, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.019422587007284164, |
|
"rewards/margins": 0.045209504663944244, |
|
"rewards/rejected": -0.06463208049535751, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.261904761904762, |
|
"grad_norm": 11.674169540405273, |
|
"learning_rate": 2.564945880212886e-06, |
|
"log_odds_chosen": 2.2116830348968506, |
|
"log_odds_ratio": -0.12257333844900131, |
|
"logits/chosen": 367.1352233886719, |
|
"logits/rejected": 336.3498840332031, |
|
"logps/chosen": -0.3219259977340698, |
|
"logps/rejected": -1.350303292274475, |
|
"loss": 0.5297, |
|
"nll_loss": 0.5648508071899414, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01609629951417446, |
|
"rewards/margins": 0.051418863236904144, |
|
"rewards/rejected": -0.06751517206430435, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 13.154436111450195, |
|
"learning_rate": 2.5482359571881276e-06, |
|
"log_odds_chosen": 2.5731985569000244, |
|
"log_odds_ratio": -0.1179199069738388, |
|
"logits/chosen": 340.62567138671875, |
|
"logits/rejected": 364.66888427734375, |
|
"logps/chosen": -0.27915579080581665, |
|
"logps/rejected": -1.4784090518951416, |
|
"loss": 0.5102, |
|
"nll_loss": 0.49411922693252563, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.013957786373794079, |
|
"rewards/margins": 0.059962667524814606, |
|
"rewards/rejected": -0.07392045855522156, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 11.973630905151367, |
|
"learning_rate": 2.5318484177091667e-06, |
|
"log_odds_chosen": 2.2964727878570557, |
|
"log_odds_ratio": -0.13063772022724152, |
|
"logits/chosen": 359.48345947265625, |
|
"logits/rejected": 400.7521667480469, |
|
"logps/chosen": -0.3307945430278778, |
|
"logps/rejected": -1.4601058959960938, |
|
"loss": 0.5685, |
|
"nll_loss": 0.5173304677009583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01653972640633583, |
|
"rewards/margins": 0.056465573608875275, |
|
"rewards/rejected": -0.0730053037405014, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.3511904761904763, |
|
"grad_norm": 10.832448959350586, |
|
"learning_rate": 2.515773027133138e-06, |
|
"log_odds_chosen": 2.685337543487549, |
|
"log_odds_ratio": -0.10510516166687012, |
|
"logits/chosen": 360.8955383300781, |
|
"logits/rejected": 359.0787048339844, |
|
"logps/chosen": -0.2764904201030731, |
|
"logps/rejected": -1.409620761871338, |
|
"loss": 0.511, |
|
"nll_loss": 0.500057578086853, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013824522495269775, |
|
"rewards/margins": 0.05665650963783264, |
|
"rewards/rejected": -0.07048103958368301, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 12.639362335205078, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 2.6170766353607178, |
|
"log_odds_ratio": -0.1315409243106842, |
|
"logits/chosen": 342.3817138671875, |
|
"logits/rejected": 396.52410888671875, |
|
"logps/chosen": -0.328637957572937, |
|
"logps/rejected": -1.6732807159423828, |
|
"loss": 0.527, |
|
"nll_loss": 0.5181549787521362, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01643189787864685, |
|
"rewards/margins": 0.067232146859169, |
|
"rewards/rejected": -0.08366404473781586, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4107142857142856, |
|
"grad_norm": 14.651439666748047, |
|
"learning_rate": 2.484519974999767e-06, |
|
"log_odds_chosen": 2.2201273441314697, |
|
"log_odds_ratio": -0.16681890189647675, |
|
"logits/chosen": 424.72918701171875, |
|
"logits/rejected": 371.58966064453125, |
|
"logps/chosen": -0.30684176087379456, |
|
"logps/rejected": -1.3665101528167725, |
|
"loss": 0.5749, |
|
"nll_loss": 0.5082268714904785, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015342088416218758, |
|
"rewards/margins": 0.05298342555761337, |
|
"rewards/rejected": -0.06832550466060638, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.4404761904761907, |
|
"grad_norm": 11.325146675109863, |
|
"learning_rate": 2.4693239916239746e-06, |
|
"log_odds_chosen": 2.499159336090088, |
|
"log_odds_ratio": -0.21561208367347717, |
|
"logits/chosen": 356.08258056640625, |
|
"logits/rejected": 390.337890625, |
|
"logps/chosen": -0.372717022895813, |
|
"logps/rejected": -1.7423985004425049, |
|
"loss": 0.5384, |
|
"nll_loss": 0.5256937742233276, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01863585226237774, |
|
"rewards/margins": 0.06848406046628952, |
|
"rewards/rejected": -0.087119922041893, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.4702380952380953, |
|
"grad_norm": 12.71360969543457, |
|
"learning_rate": 2.4544034683690802e-06, |
|
"log_odds_chosen": 2.8460097312927246, |
|
"log_odds_ratio": -0.09599516540765762, |
|
"logits/chosen": 386.35302734375, |
|
"logits/rejected": 372.95367431640625, |
|
"logps/chosen": -0.26167118549346924, |
|
"logps/rejected": -1.7513694763183594, |
|
"loss": 0.551, |
|
"nll_loss": 0.4728317856788635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013083559460937977, |
|
"rewards/margins": 0.0744849145412445, |
|
"rewards/rejected": -0.08756847679615021, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 19.943899154663086, |
|
"learning_rate": 2.4397501823713327e-06, |
|
"log_odds_chosen": 2.2042346000671387, |
|
"log_odds_ratio": -0.17198964953422546, |
|
"logits/chosen": 370.604736328125, |
|
"logits/rejected": 320.15875244140625, |
|
"logps/chosen": -0.30728116631507874, |
|
"logps/rejected": -1.3875327110290527, |
|
"loss": 0.5247, |
|
"nll_loss": 0.6666563153266907, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.015364060178399086, |
|
"rewards/margins": 0.05401257798075676, |
|
"rewards/rejected": -0.0693766325712204, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.5297619047619047, |
|
"grad_norm": 12.133928298950195, |
|
"learning_rate": 2.4253562503633297e-06, |
|
"log_odds_chosen": 2.90407133102417, |
|
"log_odds_ratio": -0.06831072270870209, |
|
"logits/chosen": 342.95733642578125, |
|
"logits/rejected": 367.10894775390625, |
|
"logps/chosen": -0.3182448446750641, |
|
"logps/rejected": -1.8485863208770752, |
|
"loss": 0.507, |
|
"nll_loss": 0.6053184270858765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015912240371108055, |
|
"rewards/margins": 0.07651706039905548, |
|
"rewards/rejected": -0.09242931008338928, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.5595238095238093, |
|
"grad_norm": 12.26125717163086, |
|
"learning_rate": 2.411214110852061e-06, |
|
"log_odds_chosen": 2.55293607711792, |
|
"log_odds_ratio": -0.10798110067844391, |
|
"logits/chosen": 391.63848876953125, |
|
"logits/rejected": 404.0960388183594, |
|
"logps/chosen": -0.27310800552368164, |
|
"logps/rejected": -1.4932405948638916, |
|
"loss": 0.5084, |
|
"nll_loss": 0.4452734887599945, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013655401766300201, |
|
"rewards/margins": 0.06100662797689438, |
|
"rewards/rejected": -0.07466202974319458, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.5892857142857144, |
|
"grad_norm": 13.624907493591309, |
|
"learning_rate": 2.3973165074269213e-06, |
|
"log_odds_chosen": 2.3622078895568848, |
|
"log_odds_ratio": -0.2018093764781952, |
|
"logits/chosen": 356.6548767089844, |
|
"logits/rejected": 360.92474365234375, |
|
"logps/chosen": -0.36742302775382996, |
|
"logps/rejected": -1.6279608011245728, |
|
"loss": 0.5587, |
|
"nll_loss": 0.5465327501296997, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.018371151760220528, |
|
"rewards/margins": 0.06302689015865326, |
|
"rewards/rejected": -0.08139804750680923, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.619047619047619, |
|
"grad_norm": 13.956061363220215, |
|
"learning_rate": 2.3836564731139807e-06, |
|
"log_odds_chosen": 2.7665021419525146, |
|
"log_odds_ratio": -0.10336035490036011, |
|
"logits/chosen": 352.8879089355469, |
|
"logits/rejected": 368.2629699707031, |
|
"logps/chosen": -0.2728215754032135, |
|
"logps/rejected": -1.5103857517242432, |
|
"loss": 0.5462, |
|
"nll_loss": 0.46932536363601685, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01364107709378004, |
|
"rewards/margins": 0.06187821552157402, |
|
"rewards/rejected": -0.07551928609609604, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6488095238095237, |
|
"grad_norm": 11.78285026550293, |
|
"learning_rate": 2.3702273156998867e-06, |
|
"log_odds_chosen": 2.3770580291748047, |
|
"log_odds_ratio": -0.116610586643219, |
|
"logits/chosen": 337.458740234375, |
|
"logits/rejected": 374.8058776855469, |
|
"logps/chosen": -0.3450239896774292, |
|
"logps/rejected": -1.6122970581054688, |
|
"loss": 0.5447, |
|
"nll_loss": 0.5509533882141113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01725119911134243, |
|
"rewards/margins": 0.06336364895105362, |
|
"rewards/rejected": -0.0806148499250412, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 14.717430114746094, |
|
"learning_rate": 2.357022603955159e-06, |
|
"log_odds_chosen": 2.9325308799743652, |
|
"log_odds_ratio": -0.08914806693792343, |
|
"logits/chosen": 375.6985778808594, |
|
"logits/rejected": 367.75787353515625, |
|
"logps/chosen": -0.35901179909706116, |
|
"logps/rejected": -2.006959915161133, |
|
"loss": 0.5752, |
|
"nll_loss": 0.49780726432800293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01795058883726597, |
|
"rewards/margins": 0.08239741623401642, |
|
"rewards/rejected": -0.10034799575805664, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 18.606168746948242, |
|
"learning_rate": 2.3440361546924774e-06, |
|
"log_odds_chosen": 2.5973100662231445, |
|
"log_odds_ratio": -0.09789351373910904, |
|
"logits/chosen": 371.6011962890625, |
|
"logits/rejected": 356.3915710449219, |
|
"logps/chosen": -0.27857938408851624, |
|
"logps/rejected": -1.5046110153198242, |
|
"loss": 0.5909, |
|
"nll_loss": 0.5146271586418152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013928967528045177, |
|
"rewards/margins": 0.061301589012145996, |
|
"rewards/rejected": -0.07523055374622345, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.738095238095238, |
|
"grad_norm": 14.296780586242676, |
|
"learning_rate": 2.3312620206007847e-06, |
|
"log_odds_chosen": 2.4838976860046387, |
|
"log_odds_ratio": -0.13478006422519684, |
|
"logits/chosen": 412.72332763671875, |
|
"logits/rejected": 433.41070556640625, |
|
"logps/chosen": -0.3284691870212555, |
|
"logps/rejected": -1.5628798007965088, |
|
"loss": 0.5718, |
|
"nll_loss": 0.5497859716415405, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.016423460096120834, |
|
"rewards/margins": 0.061720531433820724, |
|
"rewards/rejected": -0.07814399152994156, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.767857142857143, |
|
"grad_norm": 12.61571216583252, |
|
"learning_rate": 2.3186944788008413e-06, |
|
"log_odds_chosen": 2.7270541191101074, |
|
"log_odds_ratio": -0.11491680145263672, |
|
"logits/chosen": 401.2771301269531, |
|
"logits/rejected": 386.7737121582031, |
|
"logps/chosen": -0.25855037569999695, |
|
"logps/rejected": -1.5313479900360107, |
|
"loss": 0.5771, |
|
"nll_loss": 0.6066937446594238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.012927519157528877, |
|
"rewards/margins": 0.06363988667726517, |
|
"rewards/rejected": -0.0765674039721489, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.7976190476190474, |
|
"grad_norm": 11.338821411132812, |
|
"learning_rate": 2.3063280200722128e-06, |
|
"log_odds_chosen": 2.1205978393554688, |
|
"log_odds_ratio": -0.20822449028491974, |
|
"logits/chosen": 357.4671630859375, |
|
"logits/rejected": 335.1904296875, |
|
"logps/chosen": -0.36784037947654724, |
|
"logps/rejected": -1.465673804283142, |
|
"loss": 0.5175, |
|
"nll_loss": 0.5160819888114929, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.018392018973827362, |
|
"rewards/margins": 0.0548916831612587, |
|
"rewards/rejected": -0.07328370213508606, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.8273809523809526, |
|
"grad_norm": 11.929247856140137, |
|
"learning_rate": 2.2941573387056174e-06, |
|
"log_odds_chosen": 2.6770148277282715, |
|
"log_odds_ratio": -0.0899493470788002, |
|
"logits/chosen": 325.32476806640625, |
|
"logits/rejected": 420.31280517578125, |
|
"logps/chosen": -0.3526560664176941, |
|
"logps/rejected": -1.848586082458496, |
|
"loss": 0.5393, |
|
"nll_loss": 0.48061132431030273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.017632806673645973, |
|
"rewards/margins": 0.07479649782180786, |
|
"rewards/rejected": -0.09242929518222809, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 13.174921035766602, |
|
"learning_rate": 2.2821773229381924e-06, |
|
"log_odds_chosen": 2.5945560932159424, |
|
"log_odds_ratio": -0.09449335187673569, |
|
"logits/chosen": 369.5697326660156, |
|
"logits/rejected": 390.010009765625, |
|
"logps/chosen": -0.26720184087753296, |
|
"logps/rejected": -1.5164011716842651, |
|
"loss": 0.4922, |
|
"nll_loss": 0.5011499524116516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013360092416405678, |
|
"rewards/margins": 0.06245996803045273, |
|
"rewards/rejected": -0.07582006603479385, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.886904761904762, |
|
"grad_norm": 17.229890823364258, |
|
"learning_rate": 2.270383045932499e-06, |
|
"log_odds_chosen": 2.310241222381592, |
|
"log_odds_ratio": -0.1323787122964859, |
|
"logits/chosen": 352.09368896484375, |
|
"logits/rejected": 390.89129638671875, |
|
"logps/chosen": -0.41382861137390137, |
|
"logps/rejected": -1.641650915145874, |
|
"loss": 0.5059, |
|
"nll_loss": 0.5348228216171265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.020691432058811188, |
|
"rewards/margins": 0.06139112263917923, |
|
"rewards/rejected": -0.08208255469799042, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 11.300827026367188, |
|
"learning_rate": 2.2587697572631284e-06, |
|
"log_odds_chosen": 1.7844845056533813, |
|
"log_odds_ratio": -0.32421550154685974, |
|
"logits/chosen": 398.19256591796875, |
|
"logits/rejected": 312.98248291015625, |
|
"logps/chosen": -0.5920530557632446, |
|
"logps/rejected": -1.449924349784851, |
|
"loss": 0.6026, |
|
"nll_loss": 0.5389891862869263, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0296026524156332, |
|
"rewards/margins": 0.04289356991648674, |
|
"rewards/rejected": -0.07249622046947479, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.946428571428571, |
|
"grad_norm": 13.277922630310059, |
|
"learning_rate": 2.2473328748774737e-06, |
|
"log_odds_chosen": 2.5535902976989746, |
|
"log_odds_ratio": -0.12692758440971375, |
|
"logits/chosen": 344.2357177734375, |
|
"logits/rejected": 398.01348876953125, |
|
"logps/chosen": -0.27592435479164124, |
|
"logps/rejected": -1.5041601657867432, |
|
"loss": 0.5166, |
|
"nll_loss": 0.4667043685913086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013796217739582062, |
|
"rewards/margins": 0.06141179800033569, |
|
"rewards/rejected": -0.07520802319049835, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.9761904761904763, |
|
"grad_norm": 10.682828903198242, |
|
"learning_rate": 2.23606797749979e-06, |
|
"log_odds_chosen": 2.8015060424804688, |
|
"log_odds_ratio": -0.12307514250278473, |
|
"logits/chosen": 391.4792785644531, |
|
"logits/rejected": 362.94403076171875, |
|
"logps/chosen": -0.2695181965827942, |
|
"logps/rejected": -1.6667454242706299, |
|
"loss": 0.5651, |
|
"nll_loss": 0.40066272020339966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01347590796649456, |
|
"rewards/margins": 0.06986136734485626, |
|
"rewards/rejected": -0.08333728462457657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_log_odds_chosen": 0.3315908908843994, |
|
"eval_log_odds_ratio": -0.6744564771652222, |
|
"eval_logits/chosen": 304.2648620605469, |
|
"eval_logits/rejected": 249.09341430664062, |
|
"eval_logps/chosen": -1.1561349630355835, |
|
"eval_logps/rejected": -1.3795461654663086, |
|
"eval_loss": 1.6016675233840942, |
|
"eval_nll_loss": 1.5642986297607422, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": -0.057806748896837234, |
|
"eval_rewards/margins": 0.011170565150678158, |
|
"eval_rewards/rejected": -0.06897731870412827, |
|
"eval_runtime": 201.3868, |
|
"eval_samples_per_second": 2.746, |
|
"eval_steps_per_second": 0.348, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 504, |
|
"total_flos": 0.0, |
|
"train_loss": 1.4336541661667446, |
|
"train_runtime": 14833.1355, |
|
"train_samples_per_second": 1.085, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 504, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|