|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 318, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -2.8509469032287598, |
|
"logits/rejected": -2.833181858062744, |
|
"logps/chosen": -133.73171997070312, |
|
"logps/pi_response": -47.9057502746582, |
|
"logps/ref_response": -47.9057502746582, |
|
"logps/rejected": -197.21273803710938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.82759690284729, |
|
"logits/rejected": -2.798935890197754, |
|
"logps/chosen": -253.7941131591797, |
|
"logps/pi_response": -71.65193939208984, |
|
"logps/ref_response": -71.66093444824219, |
|
"logps/rejected": -215.510009765625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 2.8001426471746527e-05, |
|
"rewards/margins": -0.00033620299655012786, |
|
"rewards/rejected": 0.0003642044321168214, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7570412158966064, |
|
"logits/rejected": -2.760845422744751, |
|
"logps/chosen": -217.1673126220703, |
|
"logps/pi_response": -74.27923583984375, |
|
"logps/ref_response": -74.31416320800781, |
|
"logps/rejected": -202.97946166992188, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.005049519240856171, |
|
"rewards/margins": 0.0024197015445679426, |
|
"rewards/rejected": 0.0026298172306269407, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.7637505531311035, |
|
"logits/rejected": -2.7554142475128174, |
|
"logps/chosen": -234.3155059814453, |
|
"logps/pi_response": -68.43399047851562, |
|
"logps/ref_response": -68.40460205078125, |
|
"logps/rejected": -203.23374938964844, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.028325339779257774, |
|
"rewards/margins": 0.01702897995710373, |
|
"rewards/rejected": 0.01129635889083147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.8049890995025635, |
|
"logits/rejected": -2.780224084854126, |
|
"logps/chosen": -207.7467041015625, |
|
"logps/pi_response": -68.51658630371094, |
|
"logps/ref_response": -64.04513549804688, |
|
"logps/rejected": -203.04983520507812, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04143567755818367, |
|
"rewards/margins": 0.02963915839791298, |
|
"rewards/rejected": 0.011796516366302967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951291206355559e-07, |
|
"logits/chosen": -2.7561471462249756, |
|
"logits/rejected": -2.7407054901123047, |
|
"logps/chosen": -256.8748474121094, |
|
"logps/pi_response": -92.87406921386719, |
|
"logps/ref_response": -74.82122039794922, |
|
"logps/rejected": -236.1123504638672, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0026867527049034834, |
|
"rewards/margins": 0.06110120937228203, |
|
"rewards/rejected": -0.05841444805264473, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.679486036300659, |
|
"logits/rejected": -2.681763172149658, |
|
"logps/chosen": -266.70977783203125, |
|
"logps/pi_response": -106.77949523925781, |
|
"logps/ref_response": -76.94175720214844, |
|
"logps/rejected": -241.783447265625, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.05337841436266899, |
|
"rewards/margins": 0.11435987800359726, |
|
"rewards/rejected": -0.16773828864097595, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785350472409791e-07, |
|
"logits/chosen": -2.6562085151672363, |
|
"logits/rejected": -2.638962984085083, |
|
"logps/chosen": -262.48358154296875, |
|
"logps/pi_response": -129.61695861816406, |
|
"logps/ref_response": -82.19517517089844, |
|
"logps/rejected": -264.07568359375, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.20274746417999268, |
|
"rewards/margins": 0.09253005683422089, |
|
"rewards/rejected": -0.29527753591537476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.6726343631744385, |
|
"logits/rejected": -2.667086601257324, |
|
"logps/chosen": -273.891845703125, |
|
"logps/pi_response": -138.45387268066406, |
|
"logps/ref_response": -72.91886901855469, |
|
"logps/rejected": -269.2601318359375, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.2682049870491028, |
|
"rewards/margins": 0.1169251948595047, |
|
"rewards/rejected": -0.3851301670074463, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5095513994085974e-07, |
|
"logits/chosen": -2.535644292831421, |
|
"logits/rejected": -2.532330274581909, |
|
"logps/chosen": -239.97012329101562, |
|
"logps/pi_response": -127.95100402832031, |
|
"logps/ref_response": -69.45870208740234, |
|
"logps/rejected": -257.403564453125, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.14788806438446045, |
|
"rewards/margins": 0.2313682734966278, |
|
"rewards/rejected": -0.37925636768341064, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.5136990547180176, |
|
"logits/rejected": -2.509000062942505, |
|
"logps/chosen": -240.0645294189453, |
|
"logps/pi_response": -125.2841796875, |
|
"logps/ref_response": -70.416259765625, |
|
"logps/rejected": -254.03012084960938, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2124701738357544, |
|
"rewards/margins": 0.27277401089668274, |
|
"rewards/rejected": -0.4852442145347595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -2.480569362640381, |
|
"logits/rejected": -2.443432331085205, |
|
"logps/chosen": -275.77630615234375, |
|
"logps/pi_response": -136.15298461914062, |
|
"logps/ref_response": -71.45120239257812, |
|
"logps/rejected": -269.72015380859375, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2312408983707428, |
|
"rewards/margins": 0.23492033779621124, |
|
"rewards/rejected": -0.46616125106811523, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.439021348953247, |
|
"logits/rejected": -2.4340755939483643, |
|
"logps/chosen": -268.33770751953125, |
|
"logps/pi_response": -167.921630859375, |
|
"logps/ref_response": -77.01984405517578, |
|
"logps/rejected": -322.13433837890625, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.35008108615875244, |
|
"rewards/margins": 0.42321157455444336, |
|
"rewards/rejected": -0.7732926607131958, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6860532770864005e-07, |
|
"logits/chosen": -2.347311496734619, |
|
"logits/rejected": -2.335073947906494, |
|
"logps/chosen": -315.92718505859375, |
|
"logps/pi_response": -209.3319091796875, |
|
"logps/ref_response": -67.93054962158203, |
|
"logps/rejected": -312.83465576171875, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7186356782913208, |
|
"rewards/margins": 0.3434630036354065, |
|
"rewards/rejected": -1.062098741531372, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.2413113117218018, |
|
"logits/rejected": -2.211487293243408, |
|
"logps/chosen": -371.6416931152344, |
|
"logps/pi_response": -265.18096923828125, |
|
"logps/ref_response": -74.56938171386719, |
|
"logps/rejected": -376.59466552734375, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1778886318206787, |
|
"rewards/margins": 0.32092222571372986, |
|
"rewards/rejected": -1.4988110065460205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1779403380910425e-07, |
|
"logits/chosen": -2.240055561065674, |
|
"logits/rejected": -2.1979031562805176, |
|
"logps/chosen": -307.1637878417969, |
|
"logps/pi_response": -187.8131866455078, |
|
"logps/ref_response": -69.4150390625, |
|
"logps/rejected": -316.88873291015625, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5614252090454102, |
|
"rewards/margins": 0.41102123260498047, |
|
"rewards/rejected": -0.9724465608596802, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.2194037437438965, |
|
"logits/rejected": -2.2016549110412598, |
|
"logps/chosen": -308.1712646484375, |
|
"logps/pi_response": -226.4488983154297, |
|
"logps/ref_response": -81.69718933105469, |
|
"logps/rejected": -343.4349365234375, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6651563048362732, |
|
"rewards/margins": 0.4220353662967682, |
|
"rewards/rejected": -1.0871917009353638, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6372383496608186e-07, |
|
"logits/chosen": -2.233020305633545, |
|
"logits/rejected": -2.1675045490264893, |
|
"logps/chosen": -348.2539978027344, |
|
"logps/pi_response": -231.01455688476562, |
|
"logps/ref_response": -75.20973205566406, |
|
"logps/rejected": -334.9202880859375, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6902152895927429, |
|
"rewards/margins": 0.4886610507965088, |
|
"rewards/rejected": -1.1788761615753174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.137673854827881, |
|
"logits/rejected": -2.0975818634033203, |
|
"logps/chosen": -286.08172607421875, |
|
"logps/pi_response": -202.77284240722656, |
|
"logps/ref_response": -60.3970947265625, |
|
"logps/rejected": -297.22442626953125, |
|
"loss": 0.5776, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6755498051643372, |
|
"rewards/margins": 0.44268402457237244, |
|
"rewards/rejected": -1.1182337999343872, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.089939221172446e-07, |
|
"logits/chosen": -2.1633880138397217, |
|
"logits/rejected": -2.1441142559051514, |
|
"logps/chosen": -301.385498046875, |
|
"logps/pi_response": -249.9862060546875, |
|
"logps/ref_response": -72.61909484863281, |
|
"logps/rejected": -376.3998718261719, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8089518547058105, |
|
"rewards/margins": 0.643667995929718, |
|
"rewards/rejected": -1.4526197910308838, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.21587872505188, |
|
"logits/rejected": -2.1797242164611816, |
|
"logps/chosen": -383.5548400878906, |
|
"logps/pi_response": -243.202880859375, |
|
"logps/ref_response": -76.12086486816406, |
|
"logps/rejected": -376.36138916015625, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7832227349281311, |
|
"rewards/margins": 0.4175417423248291, |
|
"rewards/rejected": -1.2007642984390259, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.562351990976095e-07, |
|
"logits/chosen": -2.150066614151001, |
|
"logits/rejected": -2.121753692626953, |
|
"logps/chosen": -293.81488037109375, |
|
"logps/pi_response": -219.0130157470703, |
|
"logps/ref_response": -72.12835693359375, |
|
"logps/rejected": -308.10919189453125, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8007175326347351, |
|
"rewards/margins": 0.48071402311325073, |
|
"rewards/rejected": -1.2814315557479858, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.182744026184082, |
|
"logits/rejected": -2.1633565425872803, |
|
"logps/chosen": -320.0791931152344, |
|
"logps/pi_response": -229.01052856445312, |
|
"logps/ref_response": -76.02903747558594, |
|
"logps/rejected": -319.9397277832031, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7191491723060608, |
|
"rewards/margins": 0.45336204767227173, |
|
"rewards/rejected": -1.172511339187622, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0798381331721107e-07, |
|
"logits/chosen": -2.094500780105591, |
|
"logits/rejected": -2.094238758087158, |
|
"logps/chosen": -340.3441467285156, |
|
"logps/pi_response": -242.52029418945312, |
|
"logps/ref_response": -78.22608184814453, |
|
"logps/rejected": -359.96136474609375, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8624930381774902, |
|
"rewards/margins": 0.3955257534980774, |
|
"rewards/rejected": -1.2580187320709229, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.1416382789611816, |
|
"logits/rejected": -2.107877254486084, |
|
"logps/chosen": -334.34051513671875, |
|
"logps/pi_response": -253.45559692382812, |
|
"logps/ref_response": -71.44368743896484, |
|
"logps/rejected": -343.25750732421875, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9990061521530151, |
|
"rewards/margins": 0.39658617973327637, |
|
"rewards/rejected": -1.3955923318862915, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.655924144404906e-08, |
|
"logits/chosen": -2.109405279159546, |
|
"logits/rejected": -2.070265769958496, |
|
"logps/chosen": -366.1980285644531, |
|
"logps/pi_response": -274.1239318847656, |
|
"logps/ref_response": -80.24897003173828, |
|
"logps/rejected": -391.06976318359375, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0960283279418945, |
|
"rewards/margins": 0.4408624768257141, |
|
"rewards/rejected": -1.5368907451629639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.1373915672302246, |
|
"logits/rejected": -2.050778865814209, |
|
"logps/chosen": -357.5392150878906, |
|
"logps/pi_response": -278.73681640625, |
|
"logps/ref_response": -74.540283203125, |
|
"logps/rejected": -366.91839599609375, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0771440267562866, |
|
"rewards/margins": 0.7008964419364929, |
|
"rewards/rejected": -1.7780405282974243, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3952790595787986e-08, |
|
"logits/chosen": -2.031721353530884, |
|
"logits/rejected": -2.014037609100342, |
|
"logps/chosen": -339.314453125, |
|
"logps/pi_response": -293.8365783691406, |
|
"logps/ref_response": -78.09291076660156, |
|
"logps/rejected": -406.5647888183594, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1859939098358154, |
|
"rewards/margins": 0.4465731084346771, |
|
"rewards/rejected": -1.6325668096542358, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.1228859424591064, |
|
"logits/rejected": -2.0986340045928955, |
|
"logps/chosen": -329.19244384765625, |
|
"logps/pi_response": -264.7164306640625, |
|
"logps/ref_response": -76.1480712890625, |
|
"logps/rejected": -405.26287841796875, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.001992106437683, |
|
"rewards/margins": 0.5500288009643555, |
|
"rewards/rejected": -1.552020788192749, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1731874863145142e-08, |
|
"logits/chosen": -2.1019539833068848, |
|
"logits/rejected": -2.051776885986328, |
|
"logps/chosen": -354.9820861816406, |
|
"logps/pi_response": -268.553466796875, |
|
"logps/ref_response": -72.30916595458984, |
|
"logps/rejected": -364.39324951171875, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.99445641040802, |
|
"rewards/margins": 0.649712324142456, |
|
"rewards/rejected": -1.6441688537597656, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.1944591999053955, |
|
"logits/rejected": -2.160240650177002, |
|
"logps/chosen": -352.7145080566406, |
|
"logps/pi_response": -258.2195129394531, |
|
"logps/ref_response": -73.66411590576172, |
|
"logps/rejected": -385.5911865234375, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9718688726425171, |
|
"rewards/margins": 0.5499030351638794, |
|
"rewards/rejected": -1.521772027015686, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.64668657069706e-10, |
|
"logits/chosen": -2.138836622238159, |
|
"logits/rejected": -2.0898382663726807, |
|
"logps/chosen": -320.83782958984375, |
|
"logps/pi_response": -251.64108276367188, |
|
"logps/ref_response": -73.73819732666016, |
|
"logps/rejected": -360.6897277832031, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9648464322090149, |
|
"rewards/margins": 0.5148458480834961, |
|
"rewards/rejected": -1.4796922206878662, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 318, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5994115610542537, |
|
"train_runtime": 8245.2497, |
|
"train_samples_per_second": 4.943, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 318, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|