|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 5.786441925419282, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.619342088699341, |
|
"logits/rejected": -2.5526421070098877, |
|
"logps/chosen": -265.45428466796875, |
|
"logps/rejected": -236.1463165283203, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -8.460578828817233e-05, |
|
"rewards/margins": 7.87067401688546e-05, |
|
"rewards/rejected": -0.000163312564836815, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 5.491351697550114, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6576342582702637, |
|
"logits/rejected": -2.5759005546569824, |
|
"logps/chosen": -298.7987060546875, |
|
"logps/rejected": -274.3077392578125, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.00013454208965413272, |
|
"rewards/margins": 0.0018433767836540937, |
|
"rewards/rejected": -0.0019779191352427006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 5.277174601748908, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6759681701660156, |
|
"logits/rejected": -2.602142333984375, |
|
"logps/chosen": -290.32366943359375, |
|
"logps/rejected": -234.38919067382812, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.010090610012412071, |
|
"rewards/margins": 0.014163595624268055, |
|
"rewards/rejected": -0.004072986543178558, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 5.753580388451781, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.660831928253174, |
|
"logits/rejected": -2.61124324798584, |
|
"logps/chosen": -280.9427490234375, |
|
"logps/rejected": -267.7388000488281, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.042264439165592194, |
|
"rewards/margins": 0.04172234237194061, |
|
"rewards/rejected": 0.0005421031382866204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 6.220985202592042, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.61970591545105, |
|
"logits/rejected": -2.6113502979278564, |
|
"logps/chosen": -294.2338562011719, |
|
"logps/rejected": -305.00823974609375, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.020173203200101852, |
|
"rewards/margins": 0.07841379195451736, |
|
"rewards/rejected": -0.058240581303834915, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.5731215476989746, |
|
"eval_logits/rejected": -2.4923324584960938, |
|
"eval_logps/chosen": -285.4681701660156, |
|
"eval_logps/rejected": -260.73919677734375, |
|
"eval_loss": 0.6512896418571472, |
|
"eval_rewards/accuracies": 0.693965494632721, |
|
"eval_rewards/chosen": -0.003780907019972801, |
|
"eval_rewards/margins": 0.13838669657707214, |
|
"eval_rewards/rejected": -0.1421675980091095, |
|
"eval_runtime": 91.4355, |
|
"eval_samples_per_second": 19.883, |
|
"eval_steps_per_second": 0.317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 8.996643288143726, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.5592479705810547, |
|
"logits/rejected": -2.5012314319610596, |
|
"logps/chosen": -296.82501220703125, |
|
"logps/rejected": -279.3923645019531, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07340322434902191, |
|
"rewards/margins": 0.14495481550693512, |
|
"rewards/rejected": -0.21835803985595703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 9.499312526523301, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.4377551078796387, |
|
"logits/rejected": -2.392364740371704, |
|
"logps/chosen": -343.14935302734375, |
|
"logps/rejected": -305.904541015625, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15578363835811615, |
|
"rewards/margins": 0.2891853451728821, |
|
"rewards/rejected": -0.4449689984321594, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 13.383949209241152, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.324429512023926, |
|
"logits/rejected": -2.2132680416107178, |
|
"logps/chosen": -312.23431396484375, |
|
"logps/rejected": -314.443115234375, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2638443112373352, |
|
"rewards/margins": 0.35402628779411316, |
|
"rewards/rejected": -0.617870569229126, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 13.193135435969568, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -1.3427231311798096, |
|
"logits/rejected": -1.2150843143463135, |
|
"logps/chosen": -337.8877258300781, |
|
"logps/rejected": -351.61114501953125, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5029428601264954, |
|
"rewards/margins": 0.5451563596725464, |
|
"rewards/rejected": -1.048099160194397, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 18.776758250311076, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -0.1327299326658249, |
|
"logits/rejected": 0.009161601774394512, |
|
"logps/chosen": -333.9382019042969, |
|
"logps/rejected": -386.19952392578125, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6151348352432251, |
|
"rewards/margins": 0.5275757312774658, |
|
"rewards/rejected": -1.142710566520691, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": 0.3393348455429077, |
|
"eval_logits/rejected": 0.8771037459373474, |
|
"eval_logps/chosen": -373.8545227050781, |
|
"eval_logps/rejected": -407.7341613769531, |
|
"eval_loss": 0.5577893257141113, |
|
"eval_rewards/accuracies": 0.6853448152542114, |
|
"eval_rewards/chosen": -0.8876444697380066, |
|
"eval_rewards/margins": 0.7244728207588196, |
|
"eval_rewards/rejected": -1.6121174097061157, |
|
"eval_runtime": 92.82, |
|
"eval_samples_per_second": 19.586, |
|
"eval_steps_per_second": 0.312, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 16.828561137860333, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": 0.10855080932378769, |
|
"logits/rejected": 0.6684142351150513, |
|
"logps/chosen": -363.28369140625, |
|
"logps/rejected": -392.91302490234375, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9809554815292358, |
|
"rewards/margins": 0.6108429431915283, |
|
"rewards/rejected": -1.5917984247207642, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 19.631970765133257, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": 0.46126747131347656, |
|
"logits/rejected": 0.8139599561691284, |
|
"logps/chosen": -382.4955139160156, |
|
"logps/rejected": -427.80206298828125, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0667617321014404, |
|
"rewards/margins": 0.6966198682785034, |
|
"rewards/rejected": -1.7633816003799438, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 23.999804178074818, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": 1.1685855388641357, |
|
"logits/rejected": 1.665592908859253, |
|
"logps/chosen": -352.095458984375, |
|
"logps/rejected": -419.9580078125, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9591536521911621, |
|
"rewards/margins": 0.8143006563186646, |
|
"rewards/rejected": -1.7734540700912476, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 18.64421738880492, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": 1.145119547843933, |
|
"logits/rejected": 1.878694772720337, |
|
"logps/chosen": -350.3140563964844, |
|
"logps/rejected": -365.4109802246094, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7412891387939453, |
|
"rewards/margins": 0.6698554754257202, |
|
"rewards/rejected": -1.411144733428955, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 20.200927048362935, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": 1.7342513799667358, |
|
"logits/rejected": 2.4978702068328857, |
|
"logps/chosen": -387.82183837890625, |
|
"logps/rejected": -444.968505859375, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2127825021743774, |
|
"rewards/margins": 0.9050270915031433, |
|
"rewards/rejected": -2.117809772491455, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": 1.2442059516906738, |
|
"eval_logits/rejected": 2.1977624893188477, |
|
"eval_logps/chosen": -399.5646667480469, |
|
"eval_logps/rejected": -458.08807373046875, |
|
"eval_loss": 0.5319975018501282, |
|
"eval_rewards/accuracies": 0.7025862336158752, |
|
"eval_rewards/chosen": -1.1447453498840332, |
|
"eval_rewards/margins": 0.9709104895591736, |
|
"eval_rewards/rejected": -2.1156558990478516, |
|
"eval_runtime": 91.9947, |
|
"eval_samples_per_second": 19.762, |
|
"eval_steps_per_second": 0.315, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 21.063284567870923, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": 1.3700188398361206, |
|
"logits/rejected": 1.9712657928466797, |
|
"logps/chosen": -422.37750244140625, |
|
"logps/rejected": -497.3843688964844, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2582637071609497, |
|
"rewards/margins": 0.8558940887451172, |
|
"rewards/rejected": -2.1141579151153564, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 20.8460851273499, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": 1.7427918910980225, |
|
"logits/rejected": 2.409679412841797, |
|
"logps/chosen": -401.26861572265625, |
|
"logps/rejected": -495.7734375, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2966772317886353, |
|
"rewards/margins": 0.9603677988052368, |
|
"rewards/rejected": -2.257045269012451, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 17.79571920632354, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": 1.8351173400878906, |
|
"logits/rejected": 2.8393807411193848, |
|
"logps/chosen": -433.7701110839844, |
|
"logps/rejected": -456.1665954589844, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3041222095489502, |
|
"rewards/margins": 0.8725945353507996, |
|
"rewards/rejected": -2.1767165660858154, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 20.925215758529603, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": 2.0596728324890137, |
|
"logits/rejected": 2.587247371673584, |
|
"logps/chosen": -375.49212646484375, |
|
"logps/rejected": -489.371826171875, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1737760305404663, |
|
"rewards/margins": 1.0764976739883423, |
|
"rewards/rejected": -2.2502734661102295, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 21.33737350989529, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": 2.203148365020752, |
|
"logits/rejected": 2.9923298358917236, |
|
"logps/chosen": -424.408203125, |
|
"logps/rejected": -482.44879150390625, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4612659215927124, |
|
"rewards/margins": 0.9246621131896973, |
|
"rewards/rejected": -2.38592791557312, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": 1.3448154926300049, |
|
"eval_logits/rejected": 2.4483416080474854, |
|
"eval_logps/chosen": -404.1459655761719, |
|
"eval_logps/rejected": -469.58026123046875, |
|
"eval_loss": 0.5122287273406982, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -1.1905584335327148, |
|
"eval_rewards/margins": 1.0400197505950928, |
|
"eval_rewards/rejected": -2.2305781841278076, |
|
"eval_runtime": 92.2657, |
|
"eval_samples_per_second": 19.704, |
|
"eval_steps_per_second": 0.314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 22.48419176653969, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": 1.9083038568496704, |
|
"logits/rejected": 2.511636734008789, |
|
"logps/chosen": -405.66839599609375, |
|
"logps/rejected": -489.11663818359375, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3236396312713623, |
|
"rewards/margins": 1.0241048336029053, |
|
"rewards/rejected": -2.3477444648742676, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 24.286637463373633, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": 1.8827180862426758, |
|
"logits/rejected": 2.9742465019226074, |
|
"logps/chosen": -462.0406799316406, |
|
"logps/rejected": -545.8687133789062, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.3936035633087158, |
|
"rewards/margins": 1.215381383895874, |
|
"rewards/rejected": -2.6089847087860107, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 19.94988074398488, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": 1.1973925828933716, |
|
"logits/rejected": 1.7874739170074463, |
|
"logps/chosen": -377.791748046875, |
|
"logps/rejected": -472.1722717285156, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0984822511672974, |
|
"rewards/margins": 0.9670408368110657, |
|
"rewards/rejected": -2.0655229091644287, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 24.247680553138093, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 1.5466783046722412, |
|
"logits/rejected": 2.6804802417755127, |
|
"logps/chosen": -449.4345703125, |
|
"logps/rejected": -522.0684814453125, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6190277338027954, |
|
"rewards/margins": 0.9881566762924194, |
|
"rewards/rejected": -2.6071841716766357, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 18.449169539289652, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": 1.7379165887832642, |
|
"logits/rejected": 2.9272639751434326, |
|
"logps/chosen": -468.9725646972656, |
|
"logps/rejected": -532.5216674804688, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6517877578735352, |
|
"rewards/margins": 1.06899094581604, |
|
"rewards/rejected": -2.720778226852417, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": 1.3214832544326782, |
|
"eval_logits/rejected": 2.5336461067199707, |
|
"eval_logps/chosen": -429.1148681640625, |
|
"eval_logps/rejected": -502.6709289550781, |
|
"eval_loss": 0.5028622150421143, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -1.440247893333435, |
|
"eval_rewards/margins": 1.1212375164031982, |
|
"eval_rewards/rejected": -2.561485528945923, |
|
"eval_runtime": 92.4677, |
|
"eval_samples_per_second": 19.661, |
|
"eval_steps_per_second": 0.314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 18.218334579311854, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": 1.2481517791748047, |
|
"logits/rejected": 2.1310501098632812, |
|
"logps/chosen": -418.47222900390625, |
|
"logps/rejected": -541.3790283203125, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4775768518447876, |
|
"rewards/margins": 1.1922760009765625, |
|
"rewards/rejected": -2.6698527336120605, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 18.269676157352414, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": 1.8889141082763672, |
|
"logits/rejected": 2.4701380729675293, |
|
"logps/chosen": -426.7981872558594, |
|
"logps/rejected": -536.7744750976562, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.607358694076538, |
|
"rewards/margins": 1.241914987564087, |
|
"rewards/rejected": -2.849273920059204, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 21.937738574869034, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 2.0935568809509277, |
|
"logits/rejected": 3.0940475463867188, |
|
"logps/chosen": -444.2198181152344, |
|
"logps/rejected": -559.611328125, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7122100591659546, |
|
"rewards/margins": 1.2459454536437988, |
|
"rewards/rejected": -2.958155393600464, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 23.622077882281282, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 2.260361433029175, |
|
"logits/rejected": 3.3843066692352295, |
|
"logps/chosen": -451.55938720703125, |
|
"logps/rejected": -546.6029052734375, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6382853984832764, |
|
"rewards/margins": 1.288576364517212, |
|
"rewards/rejected": -2.9268617630004883, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 20.079829542188833, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": 1.4699538946151733, |
|
"logits/rejected": 2.1643927097320557, |
|
"logps/chosen": -453.72039794921875, |
|
"logps/rejected": -500.624267578125, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4777113199234009, |
|
"rewards/margins": 0.939932644367218, |
|
"rewards/rejected": -2.4176440238952637, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": 1.3894954919815063, |
|
"eval_logits/rejected": 2.688563346862793, |
|
"eval_logps/chosen": -433.8905944824219, |
|
"eval_logps/rejected": -514.7540283203125, |
|
"eval_loss": 0.4985021948814392, |
|
"eval_rewards/accuracies": 0.7370689511299133, |
|
"eval_rewards/chosen": -1.488004446029663, |
|
"eval_rewards/margins": 1.1943109035491943, |
|
"eval_rewards/rejected": -2.6823153495788574, |
|
"eval_runtime": 91.7036, |
|
"eval_samples_per_second": 19.825, |
|
"eval_steps_per_second": 0.316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 20.41756706068905, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": 1.4253332614898682, |
|
"logits/rejected": 2.419015407562256, |
|
"logps/chosen": -438.324462890625, |
|
"logps/rejected": -531.0131225585938, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.603592872619629, |
|
"rewards/margins": 1.0420410633087158, |
|
"rewards/rejected": -2.6456339359283447, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 23.710557812034125, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 1.8713343143463135, |
|
"logits/rejected": 3.118149757385254, |
|
"logps/chosen": -430.84912109375, |
|
"logps/rejected": -534.192626953125, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5661717653274536, |
|
"rewards/margins": 1.4308522939682007, |
|
"rewards/rejected": -2.9970240592956543, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 22.190813187925453, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 2.05544376373291, |
|
"logits/rejected": 2.8256397247314453, |
|
"logps/chosen": -446.35931396484375, |
|
"logps/rejected": -508.9071350097656, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7183713912963867, |
|
"rewards/margins": 0.8996985554695129, |
|
"rewards/rejected": -2.618070125579834, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 26.678335434261424, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 1.8211781978607178, |
|
"logits/rejected": 2.5711045265197754, |
|
"logps/chosen": -450.910400390625, |
|
"logps/rejected": -517.4847412109375, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6291780471801758, |
|
"rewards/margins": 0.9458298683166504, |
|
"rewards/rejected": -2.575007677078247, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 22.960680980618804, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 1.8633334636688232, |
|
"logits/rejected": 3.0288431644439697, |
|
"logps/chosen": -437.11260986328125, |
|
"logps/rejected": -511.2594299316406, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6262149810791016, |
|
"rewards/margins": 1.1097644567489624, |
|
"rewards/rejected": -2.7359795570373535, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": 1.8716328144073486, |
|
"eval_logits/rejected": 3.1580371856689453, |
|
"eval_logps/chosen": -446.3657531738281, |
|
"eval_logps/rejected": -532.5296020507812, |
|
"eval_loss": 0.49309831857681274, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -1.612756371498108, |
|
"eval_rewards/margins": 1.247314453125, |
|
"eval_rewards/rejected": -2.8600711822509766, |
|
"eval_runtime": 91.4764, |
|
"eval_samples_per_second": 19.874, |
|
"eval_steps_per_second": 0.317, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 25.170197026630024, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 1.9063358306884766, |
|
"logits/rejected": 2.6921870708465576, |
|
"logps/chosen": -398.64984130859375, |
|
"logps/rejected": -524.7725830078125, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4632974863052368, |
|
"rewards/margins": 1.1990567445755005, |
|
"rewards/rejected": -2.662353992462158, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 23.25403059611051, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 1.4586713314056396, |
|
"logits/rejected": 2.6852428913116455, |
|
"logps/chosen": -454.4266052246094, |
|
"logps/rejected": -542.4071044921875, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5484213829040527, |
|
"rewards/margins": 1.2499048709869385, |
|
"rewards/rejected": -2.798326015472412, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 28.54168436834153, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 1.7404067516326904, |
|
"logits/rejected": 3.0292108058929443, |
|
"logps/chosen": -451.39849853515625, |
|
"logps/rejected": -544.4757080078125, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5502045154571533, |
|
"rewards/margins": 1.3179900646209717, |
|
"rewards/rejected": -2.868194818496704, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 25.43427497983774, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 2.222611904144287, |
|
"logits/rejected": 3.231661558151245, |
|
"logps/chosen": -413.8394470214844, |
|
"logps/rejected": -501.4020080566406, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6188195943832397, |
|
"rewards/margins": 1.1021382808685303, |
|
"rewards/rejected": -2.7209575176239014, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 24.959131634596684, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 2.535111427307129, |
|
"logits/rejected": 3.2772762775421143, |
|
"logps/chosen": -382.0926208496094, |
|
"logps/rejected": -499.1607971191406, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5753300189971924, |
|
"rewards/margins": 1.1241223812103271, |
|
"rewards/rejected": -2.6994521617889404, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 2.0773062705993652, |
|
"eval_logits/rejected": 3.358436346054077, |
|
"eval_logps/chosen": -449.9159851074219, |
|
"eval_logps/rejected": -541.3424072265625, |
|
"eval_loss": 0.49311476945877075, |
|
"eval_rewards/accuracies": 0.7413793206214905, |
|
"eval_rewards/chosen": -1.6482590436935425, |
|
"eval_rewards/margins": 1.299940824508667, |
|
"eval_rewards/rejected": -2.948199987411499, |
|
"eval_runtime": 92.5987, |
|
"eval_samples_per_second": 19.633, |
|
"eval_steps_per_second": 0.313, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 28.32448783876109, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 2.5014076232910156, |
|
"logits/rejected": 3.729045867919922, |
|
"logps/chosen": -446.04974365234375, |
|
"logps/rejected": -546.2440185546875, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7509788274765015, |
|
"rewards/margins": 1.3309944868087769, |
|
"rewards/rejected": -3.0819735527038574, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 31.377213722949094, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 2.54590106010437, |
|
"logits/rejected": 3.5291149616241455, |
|
"logps/chosen": -431.8235778808594, |
|
"logps/rejected": -553.3231201171875, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.723846197128296, |
|
"rewards/margins": 1.350610613822937, |
|
"rewards/rejected": -3.0744569301605225, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 25.27983521226701, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 2.104177713394165, |
|
"logits/rejected": 3.586791515350342, |
|
"logps/chosen": -467.50860595703125, |
|
"logps/rejected": -525.298828125, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.64333176612854, |
|
"rewards/margins": 1.1692912578582764, |
|
"rewards/rejected": -2.8126227855682373, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5464360101507344, |
|
"train_runtime": 11333.4438, |
|
"train_samples_per_second": 4.92, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|