|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -2.6282742023468018, |
|
"logits/rejected": -2.6322691440582275, |
|
"logps/chosen": -400.73358154296875, |
|
"logps/rejected": -385.47198486328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.60478138923645, |
|
"logits/rejected": -2.5875940322875977, |
|
"logps/chosen": -383.8570556640625, |
|
"logps/rejected": -403.23974609375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.0001632043713470921, |
|
"rewards/margins": -6.321006367215887e-05, |
|
"rewards/rejected": 0.0002264144568471238, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.618523359298706, |
|
"logits/rejected": -2.6062684059143066, |
|
"logps/chosen": -400.2876892089844, |
|
"logps/rejected": -425.55078125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.000641118735074997, |
|
"rewards/margins": -7.16630820534192e-05, |
|
"rewards/rejected": 0.0007127817953005433, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.552297830581665, |
|
"logits/rejected": -2.536328077316284, |
|
"logps/chosen": -371.4658508300781, |
|
"logps/rejected": -386.770751953125, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.004528197459876537, |
|
"rewards/margins": 0.001459120074287057, |
|
"rewards/rejected": 0.0030690771527588367, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.591301441192627, |
|
"logits/rejected": -2.6019530296325684, |
|
"logps/chosen": -371.7442321777344, |
|
"logps/rejected": -381.6260986328125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0025075082667171955, |
|
"rewards/margins": 0.0020256205461919308, |
|
"rewards/rejected": 0.00048188763321377337, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-07, |
|
"logits/chosen": -2.6773521900177, |
|
"logits/rejected": -2.6721460819244385, |
|
"logps/chosen": -387.83477783203125, |
|
"logps/rejected": -402.6687927246094, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.011017683893442154, |
|
"rewards/margins": 0.013887738808989525, |
|
"rewards/rejected": -0.002870055614039302, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -2.7053120136260986, |
|
"logits/rejected": -2.7033491134643555, |
|
"logps/chosen": -391.2000427246094, |
|
"logps/rejected": -417.63751220703125, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0010398384183645248, |
|
"rewards/margins": 0.009658296592533588, |
|
"rewards/rejected": -0.008618457242846489, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.776193866647039e-07, |
|
"logits/chosen": -2.7314000129699707, |
|
"logits/rejected": -2.742297649383545, |
|
"logps/chosen": -418.1868591308594, |
|
"logps/rejected": -433.50653076171875, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.015087150037288666, |
|
"rewards/margins": 0.02047603204846382, |
|
"rewards/rejected": -0.03556318208575249, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.7759556770324707, |
|
"logits/rejected": -2.7733349800109863, |
|
"logps/chosen": -398.1673583984375, |
|
"logps/rejected": -409.0989685058594, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03768505901098251, |
|
"rewards/margins": 0.025433484464883804, |
|
"rewards/rejected": -0.06311853975057602, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.489061372204452e-07, |
|
"logits/chosen": -2.828214645385742, |
|
"logits/rejected": -2.831789255142212, |
|
"logps/chosen": -423.423828125, |
|
"logps/rejected": -433.41729736328125, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.053007822483778, |
|
"rewards/margins": 0.03302832692861557, |
|
"rewards/rejected": -0.08603614568710327, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -2.896029233932495, |
|
"logits/rejected": -2.893444538116455, |
|
"logps/chosen": -397.0558166503906, |
|
"logps/rejected": -403.83636474609375, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04668121784925461, |
|
"rewards/margins": 0.03920525312423706, |
|
"rewards/rejected": -0.08588646352291107, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -2.885509967803955, |
|
"eval_logits/rejected": -2.878624677658081, |
|
"eval_logps/chosen": -386.8027038574219, |
|
"eval_logps/rejected": -400.90277099609375, |
|
"eval_loss": 0.6868489980697632, |
|
"eval_rewards/accuracies": 0.5546875, |
|
"eval_rewards/chosen": -0.0868735983967781, |
|
"eval_rewards/margins": 0.014578516595065594, |
|
"eval_rewards/rejected": -0.10145211219787598, |
|
"eval_runtime": 137.4322, |
|
"eval_samples_per_second": 7.276, |
|
"eval_steps_per_second": 0.233, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.10218903496256e-07, |
|
"logits/chosen": -2.863513469696045, |
|
"logits/rejected": -2.8547675609588623, |
|
"logps/chosen": -425.29986572265625, |
|
"logps/rejected": -426.5914611816406, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09437432140111923, |
|
"rewards/margins": 0.027984386309981346, |
|
"rewards/rejected": -0.12235872447490692, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.824404001235962, |
|
"logits/rejected": -2.8097667694091797, |
|
"logps/chosen": -416.0079650878906, |
|
"logps/rejected": -433.6048889160156, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.09664727002382278, |
|
"rewards/margins": 0.040948256850242615, |
|
"rewards/rejected": -0.137595534324646, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-07, |
|
"logits/chosen": -2.8867764472961426, |
|
"logits/rejected": -2.891484498977661, |
|
"logps/chosen": -395.841552734375, |
|
"logps/rejected": -422.6394958496094, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11936166137456894, |
|
"rewards/margins": 0.03277095407247543, |
|
"rewards/rejected": -0.15213260054588318, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -2.8144431114196777, |
|
"logits/rejected": -2.7991340160369873, |
|
"logps/chosen": -436.4762268066406, |
|
"logps/rejected": -477.21624755859375, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.15145839750766754, |
|
"rewards/margins": 0.03745008260011673, |
|
"rewards/rejected": -0.18890848755836487, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.110851015344735e-07, |
|
"logits/chosen": -2.962568759918213, |
|
"logits/rejected": -2.957019090652466, |
|
"logps/chosen": -405.751708984375, |
|
"logps/rejected": -443.404296875, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.12555965781211853, |
|
"rewards/margins": 0.05261915922164917, |
|
"rewards/rejected": -0.1781788021326065, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.9129347801208496, |
|
"logits/rejected": -2.9059901237487793, |
|
"logps/chosen": -415.2442321777344, |
|
"logps/rejected": -437.210693359375, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.17596140503883362, |
|
"rewards/margins": 0.05255354568362236, |
|
"rewards/rejected": -0.22851495444774628, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5560951607395126e-07, |
|
"logits/chosen": -2.997249126434326, |
|
"logits/rejected": -3.0022037029266357, |
|
"logps/chosen": -408.9958801269531, |
|
"logps/rejected": -433.72076416015625, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.16367743909358978, |
|
"rewards/margins": 0.07268272340297699, |
|
"rewards/rejected": -0.23636016249656677, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -3.01900315284729, |
|
"logits/rejected": -3.011901378631592, |
|
"logps/chosen": -409.57940673828125, |
|
"logps/rejected": -429.5187072753906, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11230169236660004, |
|
"rewards/margins": 0.061531912535429, |
|
"rewards/rejected": -0.17383362352848053, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.998526460541818e-07, |
|
"logits/chosen": -3.0185275077819824, |
|
"logits/rejected": -2.9947714805603027, |
|
"logps/chosen": -423.85589599609375, |
|
"logps/rejected": -451.7948303222656, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.16302022337913513, |
|
"rewards/margins": 0.05766066908836365, |
|
"rewards/rejected": -0.22068090736865997, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -3.038890838623047, |
|
"logits/rejected": -3.028480052947998, |
|
"logps/chosen": -393.0187072753906, |
|
"logps/rejected": -410.3246154785156, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.19790220260620117, |
|
"rewards/margins": 0.04545364901423454, |
|
"rewards/rejected": -0.24335582554340363, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -3.067202568054199, |
|
"eval_logits/rejected": -3.0606539249420166, |
|
"eval_logps/chosen": -396.62066650390625, |
|
"eval_logps/rejected": -412.20208740234375, |
|
"eval_loss": 0.6828470826148987, |
|
"eval_rewards/accuracies": 0.59375, |
|
"eval_rewards/chosen": -0.1850530207157135, |
|
"eval_rewards/margins": 0.029392333701252937, |
|
"eval_rewards/rejected": -0.2144453525543213, |
|
"eval_runtime": 136.2085, |
|
"eval_samples_per_second": 7.342, |
|
"eval_steps_per_second": 0.235, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4661037375836987e-07, |
|
"logits/chosen": -3.0557608604431152, |
|
"logits/rejected": -3.044783353805542, |
|
"logps/chosen": -411.9776916503906, |
|
"logps/rejected": -446.8324279785156, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1823655068874359, |
|
"rewards/margins": 0.09753072261810303, |
|
"rewards/rejected": -0.27989625930786133, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -3.064290761947632, |
|
"logits/rejected": -3.0677173137664795, |
|
"logps/chosen": -410.56231689453125, |
|
"logps/rejected": -445.7730407714844, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.161437526345253, |
|
"rewards/margins": 0.10264303535223007, |
|
"rewards/rejected": -0.26408058404922485, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-08, |
|
"logits/chosen": -3.098358392715454, |
|
"logits/rejected": -3.089136838912964, |
|
"logps/chosen": -410.40423583984375, |
|
"logps/rejected": -428.3938903808594, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.13886065781116486, |
|
"rewards/margins": 0.08097346127033234, |
|
"rewards/rejected": -0.2198341190814972, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -3.098928928375244, |
|
"logits/rejected": -3.0982117652893066, |
|
"logps/chosen": -399.87091064453125, |
|
"logps/rejected": -431.13653564453125, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.18533360958099365, |
|
"rewards/margins": 0.10331207513809204, |
|
"rewards/rejected": -0.2886456847190857, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049722e-08, |
|
"logits/chosen": -3.0935721397399902, |
|
"logits/rejected": -3.0877461433410645, |
|
"logps/chosen": -394.9942321777344, |
|
"logps/rejected": -434.58526611328125, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.19627241790294647, |
|
"rewards/margins": 0.14824633300304413, |
|
"rewards/rejected": -0.3445187509059906, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -3.061063051223755, |
|
"logits/rejected": -3.0640881061553955, |
|
"logps/chosen": -450.4789123535156, |
|
"logps/rejected": -484.6214904785156, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2441820651292801, |
|
"rewards/margins": 0.12522391974925995, |
|
"rewards/rejected": -0.36940592527389526, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.724836895290805e-08, |
|
"logits/chosen": -3.136080265045166, |
|
"logits/rejected": -3.135943651199341, |
|
"logps/chosen": -392.79010009765625, |
|
"logps/rejected": -411.1092224121094, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17221376299858093, |
|
"rewards/margins": 0.06714353710412979, |
|
"rewards/rejected": -0.23935727775096893, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -3.0997986793518066, |
|
"logits/rejected": -3.0961849689483643, |
|
"logps/chosen": -427.6429138183594, |
|
"logps/rejected": -467.4734802246094, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2281384915113449, |
|
"rewards/margins": 0.09002666175365448, |
|
"rewards/rejected": -0.318165123462677, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-09, |
|
"logits/chosen": -3.1325037479400635, |
|
"logits/rejected": -3.138279438018799, |
|
"logps/chosen": -394.94561767578125, |
|
"logps/rejected": -414.731201171875, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2411809265613556, |
|
"rewards/margins": 0.07669158279895782, |
|
"rewards/rejected": -0.31787246465682983, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -3.1289730072021484, |
|
"logits/rejected": -3.099738597869873, |
|
"logps/chosen": -440.1055603027344, |
|
"logps/rejected": -485.18463134765625, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1983877271413803, |
|
"rewards/margins": 0.13716521859169006, |
|
"rewards/rejected": -0.3355529308319092, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -3.170891523361206, |
|
"eval_logits/rejected": -3.164473295211792, |
|
"eval_logps/chosen": -401.33953857421875, |
|
"eval_logps/rejected": -417.6891784667969, |
|
"eval_loss": 0.6821330785751343, |
|
"eval_rewards/accuracies": 0.60546875, |
|
"eval_rewards/chosen": -0.23224163055419922, |
|
"eval_rewards/margins": 0.03707445412874222, |
|
"eval_rewards/rejected": -0.2693161070346832, |
|
"eval_runtime": 136.3294, |
|
"eval_samples_per_second": 7.335, |
|
"eval_steps_per_second": 0.235, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336733e-11, |
|
"logits/chosen": -3.1333985328674316, |
|
"logits/rejected": -3.1103413105010986, |
|
"logps/chosen": -421.52532958984375, |
|
"logps/rejected": -455.3125, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2057611495256424, |
|
"rewards/margins": 0.10267385095357895, |
|
"rewards/rejected": -0.30843502283096313, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6714850996549313, |
|
"train_runtime": 5456.9379, |
|
"train_samples_per_second": 3.664, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|