|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2648831203231574, |
|
"eval_steps": 200, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.652509652509653e-06, |
|
"logits/chosen": -3.11246657371521, |
|
"logits/rejected": -3.086373805999756, |
|
"logps/chosen": -113.73238372802734, |
|
"logps/rejected": -109.32698822021484, |
|
"loss": 0.721, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.2993558943271637, |
|
"rewards/margins": -0.0277109295129776, |
|
"rewards/rejected": -0.2716449797153473, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9305019305019306e-05, |
|
"logits/chosen": -3.110931873321533, |
|
"logits/rejected": -3.1171531677246094, |
|
"logps/chosen": -122.4663314819336, |
|
"logps/rejected": -113.23054504394531, |
|
"loss": 0.7161, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.2805718183517456, |
|
"rewards/margins": -0.03167964145541191, |
|
"rewards/rejected": -0.2488921880722046, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.895752895752896e-05, |
|
"logits/chosen": -3.139052629470825, |
|
"logits/rejected": -3.1156527996063232, |
|
"logps/chosen": -126.01689147949219, |
|
"logps/rejected": -100.77046203613281, |
|
"loss": 0.7224, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.3138067126274109, |
|
"rewards/margins": -0.038806475698947906, |
|
"rewards/rejected": -0.2750001847743988, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.764478764478765e-05, |
|
"logits/chosen": -3.155150890350342, |
|
"logits/rejected": -3.1715970039367676, |
|
"logps/chosen": -133.27737426757812, |
|
"logps/rejected": -118.9439926147461, |
|
"loss": 0.713, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.2480003386735916, |
|
"rewards/margins": -0.023857835680246353, |
|
"rewards/rejected": -0.22414250671863556, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.72972972972973e-05, |
|
"logits/chosen": -3.1669116020202637, |
|
"logits/rejected": -3.1525278091430664, |
|
"logps/chosen": -123.1195297241211, |
|
"logps/rejected": -128.38714599609375, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.19161827862262726, |
|
"rewards/margins": 0.04476013034582138, |
|
"rewards/rejected": -0.23637838661670685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.694980694980695e-05, |
|
"logits/chosen": -3.1426501274108887, |
|
"logits/rejected": -3.132570266723633, |
|
"logps/chosen": -120.64261627197266, |
|
"logps/rejected": -113.0268783569336, |
|
"loss": 0.7107, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.3047412037849426, |
|
"rewards/margins": -0.014080168679356575, |
|
"rewards/rejected": -0.2906610071659088, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.660231660231661e-05, |
|
"logits/chosen": -3.170804500579834, |
|
"logits/rejected": -3.164586305618286, |
|
"logps/chosen": -116.2149887084961, |
|
"logps/rejected": -126.68898010253906, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.33613839745521545, |
|
"rewards/margins": 0.028235793113708496, |
|
"rewards/rejected": -0.36437422037124634, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.625482625482626e-05, |
|
"logits/chosen": -3.17895245552063, |
|
"logits/rejected": -3.1590020656585693, |
|
"logps/chosen": -126.33811950683594, |
|
"logps/rejected": -103.02183532714844, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4042009711265564, |
|
"rewards/margins": 0.19598612189292908, |
|
"rewards/rejected": -0.6001870632171631, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.59073359073359e-05, |
|
"logits/chosen": -3.180785894393921, |
|
"logits/rejected": -3.1576857566833496, |
|
"logps/chosen": -129.47866821289062, |
|
"logps/rejected": -126.99539947509766, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.8047823905944824, |
|
"rewards/margins": 0.08432246744632721, |
|
"rewards/rejected": -0.8891048431396484, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.555984555984557e-05, |
|
"logits/chosen": -3.139349937438965, |
|
"logits/rejected": -3.114441394805908, |
|
"logps/chosen": -139.87002563476562, |
|
"logps/rejected": -135.06802368164062, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2678108215332031, |
|
"rewards/margins": 0.2349836528301239, |
|
"rewards/rejected": -1.5027945041656494, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010424710424710426, |
|
"logits/chosen": -3.155695676803589, |
|
"logits/rejected": -3.1192359924316406, |
|
"logps/chosen": -126.76655578613281, |
|
"logps/rejected": -119.91800689697266, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1118037700653076, |
|
"rewards/margins": 0.12902173399925232, |
|
"rewards/rejected": -1.2408255338668823, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001138996138996139, |
|
"logits/chosen": -3.203996181488037, |
|
"logits/rejected": -3.181511402130127, |
|
"logps/chosen": -121.38505554199219, |
|
"logps/rejected": -123.63691711425781, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.83983314037323, |
|
"rewards/margins": 0.14224112033843994, |
|
"rewards/rejected": -0.9820743799209595, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00012355212355212355, |
|
"logits/chosen": -3.199700355529785, |
|
"logits/rejected": -3.142176628112793, |
|
"logps/chosen": -144.78311157226562, |
|
"logps/rejected": -131.46128845214844, |
|
"loss": 0.7421, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7906621694564819, |
|
"rewards/margins": 0.08599194139242172, |
|
"rewards/rejected": -0.8766541481018066, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00013223938223938227, |
|
"logits/chosen": -3.11432147026062, |
|
"logits/rejected": -3.0819637775421143, |
|
"logps/chosen": -131.96109008789062, |
|
"logps/rejected": -118.2151107788086, |
|
"loss": 0.7438, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.9345771074295044, |
|
"rewards/margins": 0.03206203132867813, |
|
"rewards/rejected": -0.9666391611099243, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00014189189189189188, |
|
"logits/chosen": -3.103154182434082, |
|
"logits/rejected": -3.0087850093841553, |
|
"logps/chosen": -111.38960266113281, |
|
"logps/rejected": -109.3032455444336, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4318141043186188, |
|
"rewards/margins": 0.15236088633537292, |
|
"rewards/rejected": -0.5841749906539917, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00015154440154440155, |
|
"logits/chosen": -3.065882444381714, |
|
"logits/rejected": -3.014258623123169, |
|
"logps/chosen": -115.94306945800781, |
|
"logps/rejected": -129.7348175048828, |
|
"loss": 0.7302, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6290556192398071, |
|
"rewards/margins": 0.06416401267051697, |
|
"rewards/rejected": -0.6932196021080017, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001611969111969112, |
|
"logits/chosen": -2.8696858882904053, |
|
"logits/rejected": -2.820652723312378, |
|
"logps/chosen": -121.01307678222656, |
|
"logps/rejected": -122.356201171875, |
|
"loss": 0.7221, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.7266199588775635, |
|
"rewards/margins": 0.14146149158477783, |
|
"rewards/rejected": -0.8680814504623413, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00017084942084942084, |
|
"logits/chosen": -2.89375638961792, |
|
"logits/rejected": -2.8223800659179688, |
|
"logps/chosen": -131.37777709960938, |
|
"logps/rejected": -125.69004821777344, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9062817692756653, |
|
"rewards/margins": 0.5299333333969116, |
|
"rewards/rejected": -1.4362150430679321, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001805019305019305, |
|
"logits/chosen": -2.8086953163146973, |
|
"logits/rejected": -2.8810436725616455, |
|
"logps/chosen": -115.71038818359375, |
|
"logps/rejected": -133.2216339111328, |
|
"loss": 0.7593, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.390483021736145, |
|
"rewards/margins": 0.12736426293849945, |
|
"rewards/rejected": -1.5178472995758057, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019015444015444015, |
|
"logits/chosen": -3.0167882442474365, |
|
"logits/rejected": -2.996938943862915, |
|
"logps/chosen": -118.67408752441406, |
|
"logps/rejected": -106.15169525146484, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7122364640235901, |
|
"rewards/margins": 0.25746825337409973, |
|
"rewards/rejected": -0.9697047472000122, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_logits/chosen": -3.1546952724456787, |
|
"eval_logits/rejected": -3.1338424682617188, |
|
"eval_logps/chosen": -127.55575561523438, |
|
"eval_logps/rejected": -128.7761993408203, |
|
"eval_loss": 0.743442177772522, |
|
"eval_rewards/accuracies": 0.5858798623085022, |
|
"eval_rewards/chosen": -1.0754988193511963, |
|
"eval_rewards/margins": 0.3047899305820465, |
|
"eval_rewards/rejected": -1.3802887201309204, |
|
"eval_runtime": 1335.2681, |
|
"eval_samples_per_second": 0.711, |
|
"eval_steps_per_second": 0.711, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001998069498069498, |
|
"logits/chosen": -3.2106406688690186, |
|
"logits/rejected": -3.1746304035186768, |
|
"logps/chosen": -131.38128662109375, |
|
"logps/rejected": -121.36234283447266, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.005927324295044, |
|
"rewards/margins": 0.5132917165756226, |
|
"rewards/rejected": -1.5192190408706665, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00020945945945945947, |
|
"logits/chosen": -3.2851333618164062, |
|
"logits/rejected": -3.267256259918213, |
|
"logps/chosen": -120.11392974853516, |
|
"logps/rejected": -117.89964294433594, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7865055203437805, |
|
"rewards/margins": 0.19706687331199646, |
|
"rewards/rejected": -0.9835723638534546, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002191119691119691, |
|
"logits/chosen": -3.408318281173706, |
|
"logits/rejected": -3.351940870285034, |
|
"logps/chosen": -107.76014709472656, |
|
"logps/rejected": -107.02482604980469, |
|
"loss": 0.7385, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.41882553696632385, |
|
"rewards/margins": 0.01567765511572361, |
|
"rewards/rejected": -0.4345032274723053, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022876447876447875, |
|
"logits/chosen": -3.155478000640869, |
|
"logits/rejected": -3.135077953338623, |
|
"logps/chosen": -131.94711303710938, |
|
"logps/rejected": -112.65836334228516, |
|
"loss": 0.7328, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.6660552024841309, |
|
"rewards/margins": 0.10303208976984024, |
|
"rewards/rejected": -0.7690872550010681, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00023841698841698842, |
|
"logits/chosen": -3.290839433670044, |
|
"logits/rejected": -3.2837767601013184, |
|
"logps/chosen": -132.83676147460938, |
|
"logps/rejected": -119.4383773803711, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9368747472763062, |
|
"rewards/margins": 0.25725504755973816, |
|
"rewards/rejected": -1.1941298246383667, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002480694980694981, |
|
"logits/chosen": -3.3188934326171875, |
|
"logits/rejected": -3.361811876296997, |
|
"logps/chosen": -130.7174835205078, |
|
"logps/rejected": -147.4563751220703, |
|
"loss": 0.7069, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9200389981269836, |
|
"rewards/margins": 0.4393937587738037, |
|
"rewards/rejected": -1.3594326972961426, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002567567567567567, |
|
"logits/chosen": -3.249516010284424, |
|
"logits/rejected": -3.2256407737731934, |
|
"logps/chosen": -108.8515853881836, |
|
"logps/rejected": -132.24813842773438, |
|
"loss": 0.6983, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.42829591035842896, |
|
"rewards/margins": 0.3723019063472748, |
|
"rewards/rejected": -0.8005977869033813, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002635135135135135, |
|
"logits/chosen": -3.0320539474487305, |
|
"logits/rejected": -2.982640027999878, |
|
"logps/chosen": -175.3157501220703, |
|
"logps/rejected": -154.48963928222656, |
|
"loss": 1.4131, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.973146915435791, |
|
"rewards/margins": -0.0525052547454834, |
|
"rewards/rejected": -2.9206414222717285, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00027316602316602317, |
|
"logits/chosen": -2.9082584381103516, |
|
"logits/rejected": -2.8451313972473145, |
|
"logps/chosen": -129.400146484375, |
|
"logps/rejected": -118.70863342285156, |
|
"loss": 0.9237, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -1.8987582921981812, |
|
"rewards/margins": 0.024892251938581467, |
|
"rewards/rejected": -1.9236505031585693, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028281853281853284, |
|
"logits/chosen": -3.066049337387085, |
|
"logits/rejected": -2.9455220699310303, |
|
"logps/chosen": -131.84317016601562, |
|
"logps/rejected": -121.24568176269531, |
|
"loss": 0.6998, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1568044424057007, |
|
"rewards/margins": 0.3450776934623718, |
|
"rewards/rejected": -1.5018823146820068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002924710424710425, |
|
"logits/chosen": -2.8079309463500977, |
|
"logits/rejected": -2.773160457611084, |
|
"logps/chosen": -169.48828125, |
|
"logps/rejected": -148.60374450683594, |
|
"loss": 2.3505, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -5.812082290649414, |
|
"rewards/margins": -0.6856533288955688, |
|
"rewards/rejected": -5.126428127288818, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003021235521235521, |
|
"logits/chosen": -2.6579480171203613, |
|
"logits/rejected": -2.6659698486328125, |
|
"logps/chosen": -141.78701782226562, |
|
"logps/rejected": -160.35110473632812, |
|
"loss": 0.8982, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -3.301175594329834, |
|
"rewards/margins": 0.01409349124878645, |
|
"rewards/rejected": -3.3152689933776855, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003117760617760618, |
|
"logits/chosen": -2.8775956630706787, |
|
"logits/rejected": -2.8255507946014404, |
|
"logps/chosen": -150.46177673339844, |
|
"logps/rejected": -144.97073364257812, |
|
"loss": 0.9436, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -3.29494047164917, |
|
"rewards/margins": 0.0633418932557106, |
|
"rewards/rejected": -3.3582825660705566, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003204633204633205, |
|
"logits/chosen": -2.8166918754577637, |
|
"logits/rejected": -2.818556308746338, |
|
"logps/chosen": -171.34437561035156, |
|
"logps/rejected": -172.5870361328125, |
|
"loss": 1.0895, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -4.722414016723633, |
|
"rewards/margins": 0.23760518431663513, |
|
"rewards/rejected": -4.960019111633301, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00033011583011583015, |
|
"logits/chosen": -2.9005284309387207, |
|
"logits/rejected": -2.905046224594116, |
|
"logps/chosen": -179.5493621826172, |
|
"logps/rejected": -189.4880828857422, |
|
"loss": 1.4019, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -4.879184722900391, |
|
"rewards/margins": 0.2637065649032593, |
|
"rewards/rejected": -5.142890930175781, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00033976833976833977, |
|
"logits/chosen": -2.4955551624298096, |
|
"logits/rejected": -2.5379605293273926, |
|
"logps/chosen": -142.00270080566406, |
|
"logps/rejected": -147.41220092773438, |
|
"loss": 0.7586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.0069820880889893, |
|
"rewards/margins": 0.49537187814712524, |
|
"rewards/rejected": -3.502354383468628, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00034942084942084944, |
|
"logits/chosen": -2.451601266860962, |
|
"logits/rejected": -2.4406824111938477, |
|
"logps/chosen": -177.74746704101562, |
|
"logps/rejected": -181.7180633544922, |
|
"loss": 1.6042, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -6.1398024559021, |
|
"rewards/margins": -0.07387089729309082, |
|
"rewards/rejected": -6.065931797027588, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003590733590733591, |
|
"logits/chosen": -2.4471051692962646, |
|
"logits/rejected": -2.409393548965454, |
|
"logps/chosen": -182.06051635742188, |
|
"logps/rejected": -163.97035217285156, |
|
"loss": 2.4314, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -6.733994483947754, |
|
"rewards/margins": -1.0469824075698853, |
|
"rewards/rejected": -5.687012672424316, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0003687258687258687, |
|
"logits/chosen": -2.8171439170837402, |
|
"logits/rejected": -2.7251698970794678, |
|
"logps/chosen": -160.04652404785156, |
|
"logps/rejected": -142.2588348388672, |
|
"loss": 1.0093, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.551713466644287, |
|
"rewards/margins": 0.07490300387144089, |
|
"rewards/rejected": -3.6266167163848877, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0003783783783783784, |
|
"logits/chosen": -2.8222527503967285, |
|
"logits/rejected": -2.8754923343658447, |
|
"logps/chosen": -138.70736694335938, |
|
"logps/rejected": -142.30128479003906, |
|
"loss": 0.945, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.170539140701294, |
|
"rewards/margins": 0.011356920003890991, |
|
"rewards/rejected": -3.1818957328796387, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.9538896083831787, |
|
"eval_logits/rejected": -2.8971762657165527, |
|
"eval_logps/chosen": -152.2897491455078, |
|
"eval_logps/rejected": -150.13941955566406, |
|
"eval_loss": 1.0823436975479126, |
|
"eval_rewards/accuracies": 0.5100105404853821, |
|
"eval_rewards/chosen": -3.548898458480835, |
|
"eval_rewards/margins": -0.032288454473018646, |
|
"eval_rewards/rejected": -3.5166099071502686, |
|
"eval_runtime": 1347.2142, |
|
"eval_samples_per_second": 0.704, |
|
"eval_steps_per_second": 0.704, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00038803088803088807, |
|
"logits/chosen": -2.8151559829711914, |
|
"logits/rejected": -2.8057456016540527, |
|
"logps/chosen": -145.69772338867188, |
|
"logps/rejected": -156.96774291992188, |
|
"loss": 0.8857, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.8467044830322266, |
|
"rewards/margins": 0.3242764472961426, |
|
"rewards/rejected": -4.170981407165527, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0003976833976833977, |
|
"logits/chosen": -2.7274651527404785, |
|
"logits/rejected": -2.713927745819092, |
|
"logps/chosen": -164.06692504882812, |
|
"logps/rejected": -149.8723907470703, |
|
"loss": 0.9745, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.5636115074157715, |
|
"rewards/margins": 0.0345739908516407, |
|
"rewards/rejected": -3.5981857776641846, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00040733590733590735, |
|
"logits/chosen": -2.8782758712768555, |
|
"logits/rejected": -2.8031697273254395, |
|
"logps/chosen": -165.91473388671875, |
|
"logps/rejected": -147.03627014160156, |
|
"loss": 0.8281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.2417216300964355, |
|
"rewards/margins": 0.32142549753189087, |
|
"rewards/rejected": -4.563147068023682, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000416988416988417, |
|
"logits/chosen": -2.623711585998535, |
|
"logits/rejected": -2.622528314590454, |
|
"logps/chosen": -149.8426513671875, |
|
"logps/rejected": -159.93692016601562, |
|
"loss": 0.9961, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.433084487915039, |
|
"rewards/margins": 0.28495556116104126, |
|
"rewards/rejected": -4.7180399894714355, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00042664092664092664, |
|
"logits/chosen": -2.580371618270874, |
|
"logits/rejected": -2.5741231441497803, |
|
"logps/chosen": -160.84347534179688, |
|
"logps/rejected": -141.23475646972656, |
|
"loss": 1.2914, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -4.076364994049072, |
|
"rewards/margins": -0.23183032870292664, |
|
"rewards/rejected": -3.8445351123809814, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004362934362934363, |
|
"logits/chosen": -2.889563798904419, |
|
"logits/rejected": -2.8342082500457764, |
|
"logps/chosen": -186.77017211914062, |
|
"logps/rejected": -168.42330932617188, |
|
"loss": 1.1317, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -4.674792289733887, |
|
"rewards/margins": -0.265504390001297, |
|
"rewards/rejected": -4.409287929534912, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000445945945945946, |
|
"logits/chosen": -2.3731606006622314, |
|
"logits/rejected": -2.344404697418213, |
|
"logps/chosen": -172.8909454345703, |
|
"logps/rejected": -175.9696502685547, |
|
"loss": 0.9674, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -5.584943771362305, |
|
"rewards/margins": 0.14772634208202362, |
|
"rewards/rejected": -5.732670783996582, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004555984555984556, |
|
"logits/chosen": -2.3436319828033447, |
|
"logits/rejected": -2.301845073699951, |
|
"logps/chosen": -173.07313537597656, |
|
"logps/rejected": -169.7339630126953, |
|
"loss": 1.2097, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -5.504385471343994, |
|
"rewards/margins": 0.23244301974773407, |
|
"rewards/rejected": -5.736828804016113, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00046525096525096526, |
|
"logits/chosen": -2.6778111457824707, |
|
"logits/rejected": -2.5807526111602783, |
|
"logps/chosen": -166.180419921875, |
|
"logps/rejected": -168.2677001953125, |
|
"loss": 1.2799, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -4.63643217086792, |
|
"rewards/margins": 0.5320998430252075, |
|
"rewards/rejected": -5.168532371520996, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00047490347490347493, |
|
"logits/chosen": -3.083743095397949, |
|
"logits/rejected": -3.080765962600708, |
|
"logps/chosen": -193.96841430664062, |
|
"logps/rejected": -182.3042755126953, |
|
"loss": 1.6246, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.152979850769043, |
|
"rewards/margins": -0.20669928193092346, |
|
"rewards/rejected": -5.946280479431152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004777992277992278, |
|
"logits/chosen": -2.692495584487915, |
|
"logits/rejected": -2.6421055793762207, |
|
"logps/chosen": -172.01806640625, |
|
"logps/rejected": -165.9178466796875, |
|
"loss": 1.2994, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -4.898122787475586, |
|
"rewards/margins": 0.04086846113204956, |
|
"rewards/rejected": -4.938991546630859, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004874517374517375, |
|
"logits/chosen": -2.7154297828674316, |
|
"logits/rejected": -2.5865087509155273, |
|
"logps/chosen": -134.430419921875, |
|
"logps/rejected": -144.77151489257812, |
|
"loss": 1.0245, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.9665591716766357, |
|
"rewards/margins": 0.17584654688835144, |
|
"rewards/rejected": -4.1424055099487305, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004961389961389962, |
|
"logits/chosen": -2.863107442855835, |
|
"logits/rejected": -2.8306002616882324, |
|
"logps/chosen": -269.8907165527344, |
|
"logps/rejected": -273.8556823730469, |
|
"loss": 3.3909, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -13.715133666992188, |
|
"rewards/margins": 0.3205181956291199, |
|
"rewards/rejected": -14.035652160644531, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004999979503849796, |
|
"logits/chosen": -3.0216221809387207, |
|
"logits/rejected": -3.014930248260498, |
|
"logps/chosen": -194.19422912597656, |
|
"logps/rejected": -201.08251953125, |
|
"loss": 2.3037, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.459778785705566, |
|
"rewards/margins": -0.22785942256450653, |
|
"rewards/rejected": -7.231919288635254, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004999854250815602, |
|
"logits/chosen": -2.967331647872925, |
|
"logits/rejected": -2.933845043182373, |
|
"logps/chosen": -215.81240844726562, |
|
"logps/rejected": -204.9075927734375, |
|
"loss": 2.1232, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -7.889649868011475, |
|
"rewards/margins": 0.17769476771354675, |
|
"rewards/rejected": -8.067344665527344, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00049996151371953, |
|
"logits/chosen": -3.052489757537842, |
|
"logits/rejected": -3.0332350730895996, |
|
"logps/chosen": -181.4810791015625, |
|
"logps/rejected": -172.9596405029297, |
|
"loss": 1.3367, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.431277275085449, |
|
"rewards/margins": 0.03274815157055855, |
|
"rewards/rejected": -5.464025020599365, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004999262173879769, |
|
"logits/chosen": -3.040531635284424, |
|
"logits/rejected": -3.037515163421631, |
|
"logps/chosen": -179.90150451660156, |
|
"logps/rejected": -185.0644989013672, |
|
"loss": 1.4866, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -6.061680316925049, |
|
"rewards/margins": 0.6980069279670715, |
|
"rewards/rejected": -6.759686470031738, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004998795376945392, |
|
"logits/chosen": -3.021232843399048, |
|
"logits/rejected": -2.9936585426330566, |
|
"logps/chosen": -175.13389587402344, |
|
"logps/rejected": -156.2028350830078, |
|
"loss": 1.7235, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -5.4631123542785645, |
|
"rewards/margins": -0.5610149502754211, |
|
"rewards/rejected": -4.902098178863525, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004998214767653319, |
|
"logits/chosen": -2.944594621658325, |
|
"logits/rejected": -3.0027194023132324, |
|
"logps/chosen": -189.1737823486328, |
|
"logps/rejected": -175.7019500732422, |
|
"loss": 1.9699, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -6.499063014984131, |
|
"rewards/margins": -0.3209795355796814, |
|
"rewards/rejected": -6.178082466125488, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004997520372448494, |
|
"logits/chosen": -2.8185458183288574, |
|
"logits/rejected": -2.798320770263672, |
|
"logps/chosen": -262.44500732421875, |
|
"logps/rejected": -245.83889770507812, |
|
"loss": 3.8268, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -12.636558532714844, |
|
"rewards/margins": -0.5035432577133179, |
|
"rewards/rejected": -12.133015632629395, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/chosen": -2.9935925006866455, |
|
"eval_logits/rejected": -2.961137533187866, |
|
"eval_logps/chosen": -184.5051727294922, |
|
"eval_logps/rejected": -181.61184692382812, |
|
"eval_loss": 1.3598365783691406, |
|
"eval_rewards/accuracies": 0.5193312168121338, |
|
"eval_rewards/chosen": -6.594781875610352, |
|
"eval_rewards/margins": 0.12169010192155838, |
|
"eval_rewards/rejected": -6.7164716720581055, |
|
"eval_runtime": 1314.8357, |
|
"eval_samples_per_second": 0.728, |
|
"eval_steps_per_second": 0.728, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004996712222958462, |
|
"logits/chosen": -2.9863028526306152, |
|
"logits/rejected": -2.841834306716919, |
|
"logps/chosen": -234.49893188476562, |
|
"logps/rejected": -215.51123046875, |
|
"loss": 3.4529, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -11.542932510375977, |
|
"rewards/margins": -1.085506796836853, |
|
"rewards/rejected": -10.457425117492676, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004995790355991916, |
|
"logits/chosen": -2.9887356758117676, |
|
"logits/rejected": -2.887108564376831, |
|
"logps/chosen": -202.21853637695312, |
|
"logps/rejected": -198.64749145507812, |
|
"loss": 1.6552, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -7.725579261779785, |
|
"rewards/margins": 0.2432982176542282, |
|
"rewards/rejected": -7.96887731552124, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004994754813537031, |
|
"logits/chosen": -3.116293430328369, |
|
"logits/rejected": -3.126661539077759, |
|
"logps/chosen": -201.38604736328125, |
|
"logps/rejected": -192.84515380859375, |
|
"loss": 1.9923, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.627758979797363, |
|
"rewards/margins": -0.4443356990814209, |
|
"rewards/rejected": -8.183423042297363, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000499372567166064, |
|
"logits/chosen": -3.480961561203003, |
|
"logits/rejected": -3.476128339767456, |
|
"logps/chosen": -226.2942657470703, |
|
"logps/rejected": -198.20462036132812, |
|
"loss": 3.5697, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -10.370187759399414, |
|
"rewards/margins": -1.4912974834442139, |
|
"rewards/rejected": -8.878890037536621, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004992474279997049, |
|
"logits/chosen": -3.380039930343628, |
|
"logits/rejected": -3.3540236949920654, |
|
"logps/chosen": -183.65228271484375, |
|
"logps/rejected": -192.43350219726562, |
|
"loss": 2.0561, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -6.808587551116943, |
|
"rewards/margins": 1.2175410985946655, |
|
"rewards/rejected": -8.026129722595215, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004991109363882065, |
|
"logits/chosen": -2.5853641033172607, |
|
"logits/rejected": -2.6103484630584717, |
|
"logps/chosen": -464.68310546875, |
|
"logps/rejected": -447.2250061035156, |
|
"loss": 11.4738, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -34.34156036376953, |
|
"rewards/margins": -1.95975661277771, |
|
"rewards/rejected": -32.381797790527344, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004989630985483375, |
|
"logits/chosen": -2.7174885272979736, |
|
"logits/rejected": -2.707397937774658, |
|
"logps/chosen": -447.7940979003906, |
|
"logps/rejected": -407.99493408203125, |
|
"loss": 12.9625, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -32.60967254638672, |
|
"rewards/margins": -3.206895112991333, |
|
"rewards/rejected": -29.40277671813965, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004988203490218075, |
|
"logits/chosen": -2.946742296218872, |
|
"logits/rejected": -2.8993031978607178, |
|
"logps/chosen": -445.21142578125, |
|
"logps/rejected": -432.021240234375, |
|
"loss": 10.7741, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -32.348819732666016, |
|
"rewards/margins": -1.2561819553375244, |
|
"rewards/rejected": -31.092632293701172, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004986509723258511, |
|
"logits/chosen": -3.1020426750183105, |
|
"logits/rejected": -3.133068561553955, |
|
"logps/chosen": -427.81756591796875, |
|
"logps/rejected": -416.2047424316406, |
|
"loss": 11.4242, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -31.230037689208984, |
|
"rewards/margins": -0.655289351940155, |
|
"rewards/rejected": -30.57474708557129, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004984702703514565, |
|
"logits/chosen": -3.0160446166992188, |
|
"logits/rejected": -3.0138049125671387, |
|
"logps/chosen": -433.4644470214844, |
|
"logps/rejected": -405.3623046875, |
|
"loss": 10.8165, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -30.836090087890625, |
|
"rewards/margins": -2.514590263366699, |
|
"rewards/rejected": -28.321496963500977, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004982782513290365, |
|
"logits/chosen": -3.1978919506073, |
|
"logits/rejected": -3.197380542755127, |
|
"logps/chosen": -422.511962890625, |
|
"logps/rejected": -402.30938720703125, |
|
"loss": 12.8189, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -30.785781860351562, |
|
"rewards/margins": -2.3963069915771484, |
|
"rewards/rejected": -28.389474868774414, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004980749240044603, |
|
"logits/chosen": -3.1342532634735107, |
|
"logits/rejected": -3.1338047981262207, |
|
"logps/chosen": -403.13494873046875, |
|
"logps/rejected": -357.790771484375, |
|
"loss": 11.5675, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -28.892202377319336, |
|
"rewards/margins": -3.723827362060547, |
|
"rewards/rejected": -25.168371200561523, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004978602976386554, |
|
"logits/chosen": -3.0739312171936035, |
|
"logits/rejected": -3.0738184452056885, |
|
"logps/chosen": -381.2265625, |
|
"logps/rejected": -378.4680480957031, |
|
"loss": 11.5225, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -27.621994018554688, |
|
"rewards/margins": -0.9019744992256165, |
|
"rewards/rejected": -26.720022201538086, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004976343820071849, |
|
"logits/chosen": -3.166983127593994, |
|
"logits/rejected": -3.1671650409698486, |
|
"logps/chosen": -408.42071533203125, |
|
"logps/rejected": -387.2364196777344, |
|
"loss": 13.9818, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -29.39678382873535, |
|
"rewards/margins": -2.448943614959717, |
|
"rewards/rejected": -26.94784164428711, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004973971873998035, |
|
"logits/chosen": -3.0561656951904297, |
|
"logits/rejected": -3.0557007789611816, |
|
"logps/chosen": -417.0025329589844, |
|
"logps/rejected": -349.56463623046875, |
|
"loss": 12.3073, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -29.997058868408203, |
|
"rewards/margins": -5.650521278381348, |
|
"rewards/rejected": -24.346534729003906, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004971487246199875, |
|
"logits/chosen": -3.0265376567840576, |
|
"logits/rejected": -3.0265283584594727, |
|
"logps/chosen": -434.55419921875, |
|
"logps/rejected": -381.22808837890625, |
|
"loss": 12.0398, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -30.918407440185547, |
|
"rewards/margins": -4.258307456970215, |
|
"rewards/rejected": -26.66009521484375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000496889004984444, |
|
"logits/chosen": -2.8932366371154785, |
|
"logits/rejected": -2.895204544067383, |
|
"logps/chosen": -396.3167419433594, |
|
"logps/rejected": -428.53839111328125, |
|
"loss": 9.4104, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -28.4284610748291, |
|
"rewards/margins": 1.9928890466690063, |
|
"rewards/rejected": -30.42134666442871, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004966180403225946, |
|
"logits/chosen": -2.895068407058716, |
|
"logits/rejected": -2.894937753677368, |
|
"logps/chosen": -395.245849609375, |
|
"logps/rejected": -378.4429626464844, |
|
"loss": 10.2846, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -28.237285614013672, |
|
"rewards/margins": -1.503316879272461, |
|
"rewards/rejected": -26.733972549438477, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004963358429760368, |
|
"logits/chosen": -2.551323652267456, |
|
"logits/rejected": -2.5523290634155273, |
|
"logps/chosen": -477.17327880859375, |
|
"logps/rejected": -427.474853515625, |
|
"loss": 12.1626, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -35.30048370361328, |
|
"rewards/margins": -4.13530158996582, |
|
"rewards/rejected": -31.165185928344727, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004960424257979822, |
|
"logits/chosen": -2.7914838790893555, |
|
"logits/rejected": -2.790367841720581, |
|
"logps/chosen": -478.9364318847656, |
|
"logps/rejected": -461.02655029296875, |
|
"loss": 10.3404, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -35.25132369995117, |
|
"rewards/margins": -1.6512939929962158, |
|
"rewards/rejected": -33.60003662109375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -3.0016679763793945, |
|
"eval_logits/rejected": -3.0014870166778564, |
|
"eval_logps/chosen": -441.87054443359375, |
|
"eval_logps/rejected": -399.2597961425781, |
|
"eval_loss": 11.33322811126709, |
|
"eval_rewards/accuracies": 0.4555903971195221, |
|
"eval_rewards/chosen": -32.331321716308594, |
|
"eval_rewards/margins": -3.850048303604126, |
|
"eval_rewards/rejected": -28.481277465820312, |
|
"eval_runtime": 1312.1317, |
|
"eval_samples_per_second": 0.729, |
|
"eval_steps_per_second": 0.729, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004957378021526705, |
|
"logits/chosen": -2.9228155612945557, |
|
"logits/rejected": -2.925412654876709, |
|
"logps/chosen": -481.1300354003906, |
|
"logps/rejected": -465.1952209472656, |
|
"loss": 11.3707, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -35.0019416809082, |
|
"rewards/margins": -1.9142730236053467, |
|
"rewards/rejected": -33.08766555786133, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004954219859147614, |
|
"logits/chosen": -3.0219039916992188, |
|
"logits/rejected": -3.0174221992492676, |
|
"logps/chosen": -364.8722839355469, |
|
"logps/rejected": -295.5256652832031, |
|
"loss": 12.3607, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -25.810903549194336, |
|
"rewards/margins": -5.978564262390137, |
|
"rewards/rejected": -19.832340240478516, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004950949914687023, |
|
"logits/chosen": -3.118417263031006, |
|
"logits/rejected": -3.1218113899230957, |
|
"logps/chosen": -469.4906311035156, |
|
"logps/rejected": -412.5103454589844, |
|
"loss": 11.9496, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -33.402565002441406, |
|
"rewards/margins": -4.156603813171387, |
|
"rewards/rejected": -29.245960235595703, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004947568337080732, |
|
"logits/chosen": -3.0231635570526123, |
|
"logits/rejected": -3.0243794918060303, |
|
"logps/chosen": -382.8542785644531, |
|
"logps/rejected": -346.9595031738281, |
|
"loss": 9.7701, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -27.092443466186523, |
|
"rewards/margins": -3.154633045196533, |
|
"rewards/rejected": -23.93781089782715, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004944075280349084, |
|
"logits/chosen": -3.034963846206665, |
|
"logits/rejected": -3.0339653491973877, |
|
"logps/chosen": -385.3253173828125, |
|
"logps/rejected": -367.23638916015625, |
|
"loss": 9.2328, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -27.111583709716797, |
|
"rewards/margins": -1.6716159582138062, |
|
"rewards/rejected": -25.43996810913086, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004940470903589948, |
|
"logits/chosen": -3.1586310863494873, |
|
"logits/rejected": -3.128281831741333, |
|
"logps/chosen": -487.345703125, |
|
"logps/rejected": -389.9637756347656, |
|
"loss": 12.7587, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -34.73926544189453, |
|
"rewards/margins": -6.996462821960449, |
|
"rewards/rejected": -27.7428035736084, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004936755370971475, |
|
"logits/chosen": -2.9109790325164795, |
|
"logits/rejected": -2.888288974761963, |
|
"logps/chosen": -463.9207458496094, |
|
"logps/rejected": -376.2655334472656, |
|
"loss": 13.8618, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -34.083106994628906, |
|
"rewards/margins": -7.244679927825928, |
|
"rewards/rejected": -26.838430404663086, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004932928851724621, |
|
"logits/chosen": -2.8432798385620117, |
|
"logits/rejected": -2.8495278358459473, |
|
"logps/chosen": -365.90679931640625, |
|
"logps/rejected": -361.8638000488281, |
|
"loss": 8.7379, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -26.055978775024414, |
|
"rewards/margins": -0.3134794235229492, |
|
"rewards/rejected": -25.742502212524414, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004928991520135436, |
|
"logits/chosen": -2.6536898612976074, |
|
"logits/rejected": -2.6361289024353027, |
|
"logps/chosen": -519.7141723632812, |
|
"logps/rejected": -389.46575927734375, |
|
"loss": 15.3084, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -37.92496871948242, |
|
"rewards/margins": -10.107343673706055, |
|
"rewards/rejected": -27.817623138427734, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004924943555537128, |
|
"logits/chosen": -3.1115312576293945, |
|
"logits/rejected": -3.0791449546813965, |
|
"logps/chosen": -469.66351318359375, |
|
"logps/rejected": -401.14508056640625, |
|
"loss": 13.3952, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -33.486183166503906, |
|
"rewards/margins": -5.449090480804443, |
|
"rewards/rejected": -28.037090301513672, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004920785142301893, |
|
"logits/chosen": -2.454453945159912, |
|
"logits/rejected": -2.4585988521575928, |
|
"logps/chosen": -463.24517822265625, |
|
"logps/rejected": -402.23773193359375, |
|
"loss": 13.7904, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -34.2692985534668, |
|
"rewards/margins": -5.860762596130371, |
|
"rewards/rejected": -28.40853500366211, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004916516469832524, |
|
"logits/chosen": -2.8716561794281006, |
|
"logits/rejected": -2.8634109497070312, |
|
"logps/chosen": -347.35198974609375, |
|
"logps/rejected": -340.46942138671875, |
|
"loss": 7.8634, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -24.902542114257812, |
|
"rewards/margins": -1.0167404413223267, |
|
"rewards/rejected": -23.885799407958984, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004912137732553772, |
|
"logits/chosen": -3.2272415161132812, |
|
"logits/rejected": -3.2235121726989746, |
|
"logps/chosen": -450.973876953125, |
|
"logps/rejected": -462.0083923339844, |
|
"loss": 9.2953, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -32.52794647216797, |
|
"rewards/margins": 0.38686689734458923, |
|
"rewards/rejected": -32.91481399536133, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004907649129903504, |
|
"logits/chosen": -2.647204875946045, |
|
"logits/rejected": -2.6482150554656982, |
|
"logps/chosen": -373.737060546875, |
|
"logps/rejected": -383.5912780761719, |
|
"loss": 7.0717, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -26.5350284576416, |
|
"rewards/margins": 0.7499195337295532, |
|
"rewards/rejected": -27.284948348999023, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004903050866323608, |
|
"logits/chosen": -3.079465627670288, |
|
"logits/rejected": -3.0794615745544434, |
|
"logps/chosen": -395.6184997558594, |
|
"logps/rejected": -406.3051452636719, |
|
"loss": 9.1411, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -29.57635498046875, |
|
"rewards/margins": 0.8788874745368958, |
|
"rewards/rejected": -30.45524024963379, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000489834315125069, |
|
"logits/chosen": -3.1981568336486816, |
|
"logits/rejected": -3.1923341751098633, |
|
"logps/chosen": -453.5596618652344, |
|
"logps/rejected": -425.4774475097656, |
|
"loss": 11.2943, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -32.61115646362305, |
|
"rewards/margins": -2.69136118888855, |
|
"rewards/rejected": -29.9197998046875, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004893526199106531, |
|
"logits/chosen": -2.876206874847412, |
|
"logits/rejected": -2.881593942642212, |
|
"logps/chosen": -433.71636962890625, |
|
"logps/rejected": -391.15692138671875, |
|
"loss": 10.9992, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -31.739410400390625, |
|
"rewards/margins": -3.6809983253479004, |
|
"rewards/rejected": -28.058406829833984, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004888600229288316, |
|
"logits/chosen": -2.865589141845703, |
|
"logits/rejected": -2.8664183616638184, |
|
"logps/chosen": -359.43023681640625, |
|
"logps/rejected": -321.70599365234375, |
|
"loss": 7.9915, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -25.906265258789062, |
|
"rewards/margins": -3.289003849029541, |
|
"rewards/rejected": -22.617259979248047, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004883565466158652, |
|
"logits/chosen": -2.8116517066955566, |
|
"logits/rejected": -2.782489776611328, |
|
"logps/chosen": -494.1553649902344, |
|
"logps/rejected": -430.222412109375, |
|
"loss": 13.6529, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -36.09266662597656, |
|
"rewards/margins": -5.284867286682129, |
|
"rewards/rejected": -30.807796478271484, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004878422139035341, |
|
"logits/chosen": -2.4114489555358887, |
|
"logits/rejected": -2.377622604370117, |
|
"logps/chosen": -482.96856689453125, |
|
"logps/rejected": -437.3125, |
|
"loss": 11.137, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -34.935813903808594, |
|
"rewards/margins": -3.0001707077026367, |
|
"rewards/rejected": -31.93564224243164, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/chosen": -2.2959094047546387, |
|
"eval_logits/rejected": -2.2838947772979736, |
|
"eval_logps/chosen": -438.8805236816406, |
|
"eval_logps/rejected": -399.5718688964844, |
|
"eval_loss": 10.402800559997559, |
|
"eval_rewards/accuracies": 0.45036572217941284, |
|
"eval_rewards/chosen": -32.032310485839844, |
|
"eval_rewards/margins": -3.519833564758301, |
|
"eval_rewards/rejected": -28.51247787475586, |
|
"eval_runtime": 1313.6418, |
|
"eval_samples_per_second": 0.729, |
|
"eval_steps_per_second": 0.729, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004917027842051741, |
|
"logits/chosen": -2.3743691444396973, |
|
"logits/rejected": -2.3645715713500977, |
|
"logps/chosen": -467.918701171875, |
|
"logps/rejected": -392.2208557128906, |
|
"loss": 13.2044, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -34.59047317504883, |
|
"rewards/margins": -6.761924743652344, |
|
"rewards/rejected": -27.82854652404785, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004913043488808868, |
|
"logits/chosen": -2.2389774322509766, |
|
"logits/rejected": -2.229212760925293, |
|
"logps/chosen": -486.40362548828125, |
|
"logps/rejected": -460.124267578125, |
|
"loss": 12.247, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -36.01793670654297, |
|
"rewards/margins": -2.4814937114715576, |
|
"rewards/rejected": -33.53643798828125, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004909379125837757, |
|
"logits/chosen": -2.3661270141601562, |
|
"logits/rejected": -2.35339093208313, |
|
"logps/chosen": -450.0957946777344, |
|
"logps/rejected": -419.64453125, |
|
"loss": 9.5754, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -32.51788330078125, |
|
"rewards/margins": -2.8007171154022217, |
|
"rewards/rejected": -29.717166900634766, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000490689498823928, |
|
"logits/chosen": -2.502885341644287, |
|
"logits/rejected": -2.475445032119751, |
|
"logps/chosen": -544.1683349609375, |
|
"logps/rejected": -447.0438537597656, |
|
"loss": 14.0802, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -40.440181732177734, |
|
"rewards/margins": -7.666708946228027, |
|
"rewards/rejected": -32.773475646972656, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004903107023416835, |
|
"logits/chosen": -2.5913939476013184, |
|
"logits/rejected": -2.5680718421936035, |
|
"logps/chosen": -486.11907958984375, |
|
"logps/rejected": -368.26251220703125, |
|
"loss": 13.0584, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -35.45328140258789, |
|
"rewards/margins": -9.003652572631836, |
|
"rewards/rejected": -26.449630737304688, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004898811381307269, |
|
"logits/chosen": -2.5621819496154785, |
|
"logits/rejected": -2.5630691051483154, |
|
"logps/chosen": -415.2149353027344, |
|
"logps/rejected": -372.393798828125, |
|
"loss": 10.9069, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -30.74907875061035, |
|
"rewards/margins": -3.5560336112976074, |
|
"rewards/rejected": -27.193042755126953, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004894424536834149, |
|
"logits/chosen": -2.6120645999908447, |
|
"logits/rejected": -2.604825019836426, |
|
"logps/chosen": -513.4226684570312, |
|
"logps/rejected": -459.19647216796875, |
|
"loss": 11.3672, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -39.00333023071289, |
|
"rewards/margins": -4.482884883880615, |
|
"rewards/rejected": -34.520442962646484, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000488994665678449, |
|
"logits/chosen": -2.880282163619995, |
|
"logits/rejected": -2.8795719146728516, |
|
"logps/chosen": -424.77874755859375, |
|
"logps/rejected": -367.4000244140625, |
|
"loss": 11.0388, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -31.353382110595703, |
|
"rewards/margins": -5.197685718536377, |
|
"rewards/rejected": -26.15569496154785, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004885377911406459, |
|
"logits/chosen": -2.947252035140991, |
|
"logits/rejected": -2.940441370010376, |
|
"logps/chosen": -442.61651611328125, |
|
"logps/rejected": -374.4497985839844, |
|
"loss": 11.9975, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -31.82815170288086, |
|
"rewards/margins": -5.211056709289551, |
|
"rewards/rejected": -26.617095947265625, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00048807184744029076, |
|
"logits/chosen": -2.9204514026641846, |
|
"logits/rejected": -2.922818660736084, |
|
"logps/chosen": -416.2978515625, |
|
"logps/rejected": -395.44378662109375, |
|
"loss": 9.0951, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -30.16245460510254, |
|
"rewards/margins": -1.954272985458374, |
|
"rewards/rejected": -28.208179473876953, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00048759685229247675, |
|
"logits/chosen": -2.950378179550171, |
|
"logits/rejected": -2.9522385597229004, |
|
"logps/chosen": -456.4190368652344, |
|
"logps/rejected": -417.6458435058594, |
|
"loss": 10.6616, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -33.50844192504883, |
|
"rewards/margins": -3.696442127227783, |
|
"rewards/rejected": -29.811996459960938, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004872103512563103, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 24.7455, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00048716163259071837, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 41.8228, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000487112823756431, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 110.7265, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000487112823756431, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 245.3322, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000487112823756431, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1041.9771, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004870639247720053, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 437.3346, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004870639247720053, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 123.7268, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004870639247720053, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 315.4085, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004870639247720053, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 52542.0625, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": NaN, |
|
"eval_logits/rejected": NaN, |
|
"eval_logps/chosen": NaN, |
|
"eval_logps/rejected": NaN, |
|
"eval_loss": NaN, |
|
"eval_rewards/accuracies": 0.2244604378938675, |
|
"eval_rewards/chosen": NaN, |
|
"eval_rewards/margins": NaN, |
|
"eval_rewards/rejected": NaN, |
|
"eval_runtime": 988.4543, |
|
"eval_samples_per_second": 0.703, |
|
"eval_steps_per_second": 0.703, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 197.8537, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 4813.8465, |
|
"rewards/accuracies": 0.13750000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 2048.3082, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 143.0693, |
|
"rewards/accuracies": 0.16249999403953552, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 16874.5531, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 2509.9437, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 9635.2938, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 3857.9113, |
|
"rewards/accuracies": 0.13750000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 4572.7609, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 606.0347, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1759.0598, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 14436.2953, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 386264.375, |
|
"rewards/accuracies": 0.0625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 8430.657, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 6939.275, |
|
"rewards/accuracies": 0.13750000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 4295.0949, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 17283.3672, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 24895.7469, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 45136.4094, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 165.0189, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/chosen": NaN, |
|
"eval_logits/rejected": NaN, |
|
"eval_logps/chosen": NaN, |
|
"eval_logps/rejected": NaN, |
|
"eval_loss": NaN, |
|
"eval_rewards/accuracies": 0.17553956806659698, |
|
"eval_rewards/chosen": NaN, |
|
"eval_rewards/margins": NaN, |
|
"eval_rewards/rejected": NaN, |
|
"eval_runtime": 987.1061, |
|
"eval_samples_per_second": 0.704, |
|
"eval_steps_per_second": 0.704, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 8688.7203, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 9484.9836, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 9308.6922, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 2905.9373, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 4738.7867, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 424.0728, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 2181.5133, |
|
"rewards/accuracies": 0.13750000298023224, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 23597.5219, |
|
"rewards/accuracies": 0.16249999403953552, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1259.9596, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004870149356560326, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 7475.4719, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5662, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|