mistral7b_dpo_en / trainer_state.json
Krisbiantoro's picture
Upload folder using huggingface_hub
ea87049
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2648831203231574,
"eval_steps": 200,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.652509652509653e-06,
"logits/chosen": -3.11246657371521,
"logits/rejected": -3.086373805999756,
"logps/chosen": -113.73238372802734,
"logps/rejected": -109.32698822021484,
"loss": 0.721,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.2993558943271637,
"rewards/margins": -0.0277109295129776,
"rewards/rejected": -0.2716449797153473,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.9305019305019306e-05,
"logits/chosen": -3.110931873321533,
"logits/rejected": -3.1171531677246094,
"logps/chosen": -122.4663314819336,
"logps/rejected": -113.23054504394531,
"loss": 0.7161,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.2805718183517456,
"rewards/margins": -0.03167964145541191,
"rewards/rejected": -0.2488921880722046,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 2.895752895752896e-05,
"logits/chosen": -3.139052629470825,
"logits/rejected": -3.1156527996063232,
"logps/chosen": -126.01689147949219,
"logps/rejected": -100.77046203613281,
"loss": 0.7224,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.3138067126274109,
"rewards/margins": -0.038806475698947906,
"rewards/rejected": -0.2750001847743988,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 3.764478764478765e-05,
"logits/chosen": -3.155150890350342,
"logits/rejected": -3.1715970039367676,
"logps/chosen": -133.27737426757812,
"logps/rejected": -118.9439926147461,
"loss": 0.713,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.2480003386735916,
"rewards/margins": -0.023857835680246353,
"rewards/rejected": -0.22414250671863556,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.72972972972973e-05,
"logits/chosen": -3.1669116020202637,
"logits/rejected": -3.1525278091430664,
"logps/chosen": -123.1195297241211,
"logps/rejected": -128.38714599609375,
"loss": 0.6781,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.19161827862262726,
"rewards/margins": 0.04476013034582138,
"rewards/rejected": -0.23637838661670685,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 5.694980694980695e-05,
"logits/chosen": -3.1426501274108887,
"logits/rejected": -3.132570266723633,
"logps/chosen": -120.64261627197266,
"logps/rejected": -113.0268783569336,
"loss": 0.7107,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.3047412037849426,
"rewards/margins": -0.014080168679356575,
"rewards/rejected": -0.2906610071659088,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 6.660231660231661e-05,
"logits/chosen": -3.170804500579834,
"logits/rejected": -3.164586305618286,
"logps/chosen": -116.2149887084961,
"logps/rejected": -126.68898010253906,
"loss": 0.6885,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.33613839745521545,
"rewards/margins": 0.028235793113708496,
"rewards/rejected": -0.36437422037124634,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 7.625482625482626e-05,
"logits/chosen": -3.17895245552063,
"logits/rejected": -3.1590020656585693,
"logps/chosen": -126.33811950683594,
"logps/rejected": -103.02183532714844,
"loss": 0.6215,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.4042009711265564,
"rewards/margins": 0.19598612189292908,
"rewards/rejected": -0.6001870632171631,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 8.59073359073359e-05,
"logits/chosen": -3.180785894393921,
"logits/rejected": -3.1576857566833496,
"logps/chosen": -129.47866821289062,
"logps/rejected": -126.99539947509766,
"loss": 0.6889,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.8047823905944824,
"rewards/margins": 0.08432246744632721,
"rewards/rejected": -0.8891048431396484,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 9.555984555984557e-05,
"logits/chosen": -3.139349937438965,
"logits/rejected": -3.114441394805908,
"logps/chosen": -139.87002563476562,
"logps/rejected": -135.06802368164062,
"loss": 0.6326,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -1.2678108215332031,
"rewards/margins": 0.2349836528301239,
"rewards/rejected": -1.5027945041656494,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.00010424710424710426,
"logits/chosen": -3.155695676803589,
"logits/rejected": -3.1192359924316406,
"logps/chosen": -126.76655578613281,
"logps/rejected": -119.91800689697266,
"loss": 0.6925,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -1.1118037700653076,
"rewards/margins": 0.12902173399925232,
"rewards/rejected": -1.2408255338668823,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 0.0001138996138996139,
"logits/chosen": -3.203996181488037,
"logits/rejected": -3.181511402130127,
"logps/chosen": -121.38505554199219,
"logps/rejected": -123.63691711425781,
"loss": 0.6905,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.83983314037323,
"rewards/margins": 0.14224112033843994,
"rewards/rejected": -0.9820743799209595,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 0.00012355212355212355,
"logits/chosen": -3.199700355529785,
"logits/rejected": -3.142176628112793,
"logps/chosen": -144.78311157226562,
"logps/rejected": -131.46128845214844,
"loss": 0.7421,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.7906621694564819,
"rewards/margins": 0.08599194139242172,
"rewards/rejected": -0.8766541481018066,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 0.00013223938223938227,
"logits/chosen": -3.11432147026062,
"logits/rejected": -3.0819637775421143,
"logps/chosen": -131.96109008789062,
"logps/rejected": -118.2151107788086,
"loss": 0.7438,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.9345771074295044,
"rewards/margins": 0.03206203132867813,
"rewards/rejected": -0.9666391611099243,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.00014189189189189188,
"logits/chosen": -3.103154182434082,
"logits/rejected": -3.0087850093841553,
"logps/chosen": -111.38960266113281,
"logps/rejected": -109.3032455444336,
"loss": 0.6658,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.4318141043186188,
"rewards/margins": 0.15236088633537292,
"rewards/rejected": -0.5841749906539917,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 0.00015154440154440155,
"logits/chosen": -3.065882444381714,
"logits/rejected": -3.014258623123169,
"logps/chosen": -115.94306945800781,
"logps/rejected": -129.7348175048828,
"loss": 0.7302,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.6290556192398071,
"rewards/margins": 0.06416401267051697,
"rewards/rejected": -0.6932196021080017,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 0.0001611969111969112,
"logits/chosen": -2.8696858882904053,
"logits/rejected": -2.820652723312378,
"logps/chosen": -121.01307678222656,
"logps/rejected": -122.356201171875,
"loss": 0.7221,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.7266199588775635,
"rewards/margins": 0.14146149158477783,
"rewards/rejected": -0.8680814504623413,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 0.00017084942084942084,
"logits/chosen": -2.89375638961792,
"logits/rejected": -2.8223800659179688,
"logps/chosen": -131.37777709960938,
"logps/rejected": -125.69004821777344,
"loss": 0.583,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.9062817692756653,
"rewards/margins": 0.5299333333969116,
"rewards/rejected": -1.4362150430679321,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.0001805019305019305,
"logits/chosen": -2.8086953163146973,
"logits/rejected": -2.8810436725616455,
"logps/chosen": -115.71038818359375,
"logps/rejected": -133.2216339111328,
"loss": 0.7593,
"rewards/accuracies": 0.5625,
"rewards/chosen": -1.390483021736145,
"rewards/margins": 0.12736426293849945,
"rewards/rejected": -1.5178472995758057,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 0.00019015444015444015,
"logits/chosen": -3.0167882442474365,
"logits/rejected": -2.996938943862915,
"logps/chosen": -118.67408752441406,
"logps/rejected": -106.15169525146484,
"loss": 0.672,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.7122364640235901,
"rewards/margins": 0.25746825337409973,
"rewards/rejected": -0.9697047472000122,
"step": 200
},
{
"epoch": 0.04,
"eval_logits/chosen": -3.1546952724456787,
"eval_logits/rejected": -3.1338424682617188,
"eval_logps/chosen": -127.55575561523438,
"eval_logps/rejected": -128.7761993408203,
"eval_loss": 0.743442177772522,
"eval_rewards/accuracies": 0.5858798623085022,
"eval_rewards/chosen": -1.0754988193511963,
"eval_rewards/margins": 0.3047899305820465,
"eval_rewards/rejected": -1.3802887201309204,
"eval_runtime": 1335.2681,
"eval_samples_per_second": 0.711,
"eval_steps_per_second": 0.711,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 0.0001998069498069498,
"logits/chosen": -3.2106406688690186,
"logits/rejected": -3.1746304035186768,
"logps/chosen": -131.38128662109375,
"logps/rejected": -121.36234283447266,
"loss": 0.664,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -1.005927324295044,
"rewards/margins": 0.5132917165756226,
"rewards/rejected": -1.5192190408706665,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 0.00020945945945945947,
"logits/chosen": -3.2851333618164062,
"logits/rejected": -3.267256259918213,
"logps/chosen": -120.11392974853516,
"logps/rejected": -117.89964294433594,
"loss": 0.6965,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.7865055203437805,
"rewards/margins": 0.19706687331199646,
"rewards/rejected": -0.9835723638534546,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 0.0002191119691119691,
"logits/chosen": -3.408318281173706,
"logits/rejected": -3.351940870285034,
"logps/chosen": -107.76014709472656,
"logps/rejected": -107.02482604980469,
"loss": 0.7385,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.41882553696632385,
"rewards/margins": 0.01567765511572361,
"rewards/rejected": -0.4345032274723053,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 0.00022876447876447875,
"logits/chosen": -3.155478000640869,
"logits/rejected": -3.135077953338623,
"logps/chosen": -131.94711303710938,
"logps/rejected": -112.65836334228516,
"loss": 0.7328,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.6660552024841309,
"rewards/margins": 0.10303208976984024,
"rewards/rejected": -0.7690872550010681,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 0.00023841698841698842,
"logits/chosen": -3.290839433670044,
"logits/rejected": -3.2837767601013184,
"logps/chosen": -132.83676147460938,
"logps/rejected": -119.4383773803711,
"loss": 0.678,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.9368747472763062,
"rewards/margins": 0.25725504755973816,
"rewards/rejected": -1.1941298246383667,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 0.0002480694980694981,
"logits/chosen": -3.3188934326171875,
"logits/rejected": -3.361811876296997,
"logps/chosen": -130.7174835205078,
"logps/rejected": -147.4563751220703,
"loss": 0.7069,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.9200389981269836,
"rewards/margins": 0.4393937587738037,
"rewards/rejected": -1.3594326972961426,
"step": 260
},
{
"epoch": 0.05,
"learning_rate": 0.0002567567567567567,
"logits/chosen": -3.249516010284424,
"logits/rejected": -3.2256407737731934,
"logps/chosen": -108.8515853881836,
"logps/rejected": -132.24813842773438,
"loss": 0.6983,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.42829591035842896,
"rewards/margins": 0.3723019063472748,
"rewards/rejected": -0.8005977869033813,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 0.0002635135135135135,
"logits/chosen": -3.0320539474487305,
"logits/rejected": -2.982640027999878,
"logps/chosen": -175.3157501220703,
"logps/rejected": -154.48963928222656,
"loss": 1.4131,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -2.973146915435791,
"rewards/margins": -0.0525052547454834,
"rewards/rejected": -2.9206414222717285,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 0.00027316602316602317,
"logits/chosen": -2.9082584381103516,
"logits/rejected": -2.8451313972473145,
"logps/chosen": -129.400146484375,
"logps/rejected": -118.70863342285156,
"loss": 0.9237,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -1.8987582921981812,
"rewards/margins": 0.024892251938581467,
"rewards/rejected": -1.9236505031585693,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 0.00028281853281853284,
"logits/chosen": -3.066049337387085,
"logits/rejected": -2.9455220699310303,
"logps/chosen": -131.84317016601562,
"logps/rejected": -121.24568176269531,
"loss": 0.6998,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -1.1568044424057007,
"rewards/margins": 0.3450776934623718,
"rewards/rejected": -1.5018823146820068,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 0.0002924710424710425,
"logits/chosen": -2.8079309463500977,
"logits/rejected": -2.773160457611084,
"logps/chosen": -169.48828125,
"logps/rejected": -148.60374450683594,
"loss": 2.3505,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -5.812082290649414,
"rewards/margins": -0.6856533288955688,
"rewards/rejected": -5.126428127288818,
"step": 310
},
{
"epoch": 0.06,
"learning_rate": 0.0003021235521235521,
"logits/chosen": -2.6579480171203613,
"logits/rejected": -2.6659698486328125,
"logps/chosen": -141.78701782226562,
"logps/rejected": -160.35110473632812,
"loss": 0.8982,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -3.301175594329834,
"rewards/margins": 0.01409349124878645,
"rewards/rejected": -3.3152689933776855,
"step": 320
},
{
"epoch": 0.06,
"learning_rate": 0.0003117760617760618,
"logits/chosen": -2.8775956630706787,
"logits/rejected": -2.8255507946014404,
"logps/chosen": -150.46177673339844,
"logps/rejected": -144.97073364257812,
"loss": 0.9436,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -3.29494047164917,
"rewards/margins": 0.0633418932557106,
"rewards/rejected": -3.3582825660705566,
"step": 330
},
{
"epoch": 0.07,
"learning_rate": 0.0003204633204633205,
"logits/chosen": -2.8166918754577637,
"logits/rejected": -2.818556308746338,
"logps/chosen": -171.34437561035156,
"logps/rejected": -172.5870361328125,
"loss": 1.0895,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -4.722414016723633,
"rewards/margins": 0.23760518431663513,
"rewards/rejected": -4.960019111633301,
"step": 340
},
{
"epoch": 0.07,
"learning_rate": 0.00033011583011583015,
"logits/chosen": -2.9005284309387207,
"logits/rejected": -2.905046224594116,
"logps/chosen": -179.5493621826172,
"logps/rejected": -189.4880828857422,
"loss": 1.4019,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -4.879184722900391,
"rewards/margins": 0.2637065649032593,
"rewards/rejected": -5.142890930175781,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 0.00033976833976833977,
"logits/chosen": -2.4955551624298096,
"logits/rejected": -2.5379605293273926,
"logps/chosen": -142.00270080566406,
"logps/rejected": -147.41220092773438,
"loss": 0.7586,
"rewards/accuracies": 0.625,
"rewards/chosen": -3.0069820880889893,
"rewards/margins": 0.49537187814712524,
"rewards/rejected": -3.502354383468628,
"step": 360
},
{
"epoch": 0.07,
"learning_rate": 0.00034942084942084944,
"logits/chosen": -2.451601266860962,
"logits/rejected": -2.4406824111938477,
"logps/chosen": -177.74746704101562,
"logps/rejected": -181.7180633544922,
"loss": 1.6042,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -6.1398024559021,
"rewards/margins": -0.07387089729309082,
"rewards/rejected": -6.065931797027588,
"step": 370
},
{
"epoch": 0.07,
"learning_rate": 0.0003590733590733591,
"logits/chosen": -2.4471051692962646,
"logits/rejected": -2.409393548965454,
"logps/chosen": -182.06051635742188,
"logps/rejected": -163.97035217285156,
"loss": 2.4314,
"rewards/accuracies": 0.4375,
"rewards/chosen": -6.733994483947754,
"rewards/margins": -1.0469824075698853,
"rewards/rejected": -5.687012672424316,
"step": 380
},
{
"epoch": 0.08,
"learning_rate": 0.0003687258687258687,
"logits/chosen": -2.8171439170837402,
"logits/rejected": -2.7251698970794678,
"logps/chosen": -160.04652404785156,
"logps/rejected": -142.2588348388672,
"loss": 1.0093,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -3.551713466644287,
"rewards/margins": 0.07490300387144089,
"rewards/rejected": -3.6266167163848877,
"step": 390
},
{
"epoch": 0.08,
"learning_rate": 0.0003783783783783784,
"logits/chosen": -2.8222527503967285,
"logits/rejected": -2.8754923343658447,
"logps/chosen": -138.70736694335938,
"logps/rejected": -142.30128479003906,
"loss": 0.945,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -3.170539140701294,
"rewards/margins": 0.011356920003890991,
"rewards/rejected": -3.1818957328796387,
"step": 400
},
{
"epoch": 0.08,
"eval_logits/chosen": -2.9538896083831787,
"eval_logits/rejected": -2.8971762657165527,
"eval_logps/chosen": -152.2897491455078,
"eval_logps/rejected": -150.13941955566406,
"eval_loss": 1.0823436975479126,
"eval_rewards/accuracies": 0.5100105404853821,
"eval_rewards/chosen": -3.548898458480835,
"eval_rewards/margins": -0.032288454473018646,
"eval_rewards/rejected": -3.5166099071502686,
"eval_runtime": 1347.2142,
"eval_samples_per_second": 0.704,
"eval_steps_per_second": 0.704,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 0.00038803088803088807,
"logits/chosen": -2.8151559829711914,
"logits/rejected": -2.8057456016540527,
"logps/chosen": -145.69772338867188,
"logps/rejected": -156.96774291992188,
"loss": 0.8857,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -3.8467044830322266,
"rewards/margins": 0.3242764472961426,
"rewards/rejected": -4.170981407165527,
"step": 410
},
{
"epoch": 0.08,
"learning_rate": 0.0003976833976833977,
"logits/chosen": -2.7274651527404785,
"logits/rejected": -2.713927745819092,
"logps/chosen": -164.06692504882812,
"logps/rejected": -149.8723907470703,
"loss": 0.9745,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -3.5636115074157715,
"rewards/margins": 0.0345739908516407,
"rewards/rejected": -3.5981857776641846,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 0.00040733590733590735,
"logits/chosen": -2.8782758712768555,
"logits/rejected": -2.8031697273254395,
"logps/chosen": -165.91473388671875,
"logps/rejected": -147.03627014160156,
"loss": 0.8281,
"rewards/accuracies": 0.625,
"rewards/chosen": -4.2417216300964355,
"rewards/margins": 0.32142549753189087,
"rewards/rejected": -4.563147068023682,
"step": 430
},
{
"epoch": 0.09,
"learning_rate": 0.000416988416988417,
"logits/chosen": -2.623711585998535,
"logits/rejected": -2.622528314590454,
"logps/chosen": -149.8426513671875,
"logps/rejected": -159.93692016601562,
"loss": 0.9961,
"rewards/accuracies": 0.5,
"rewards/chosen": -4.433084487915039,
"rewards/margins": 0.28495556116104126,
"rewards/rejected": -4.7180399894714355,
"step": 440
},
{
"epoch": 0.09,
"learning_rate": 0.00042664092664092664,
"logits/chosen": -2.580371618270874,
"logits/rejected": -2.5741231441497803,
"logps/chosen": -160.84347534179688,
"logps/rejected": -141.23475646972656,
"loss": 1.2914,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -4.076364994049072,
"rewards/margins": -0.23183032870292664,
"rewards/rejected": -3.8445351123809814,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 0.0004362934362934363,
"logits/chosen": -2.889563798904419,
"logits/rejected": -2.8342082500457764,
"logps/chosen": -186.77017211914062,
"logps/rejected": -168.42330932617188,
"loss": 1.1317,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -4.674792289733887,
"rewards/margins": -0.265504390001297,
"rewards/rejected": -4.409287929534912,
"step": 460
},
{
"epoch": 0.09,
"learning_rate": 0.000445945945945946,
"logits/chosen": -2.3731606006622314,
"logits/rejected": -2.344404697418213,
"logps/chosen": -172.8909454345703,
"logps/rejected": -175.9696502685547,
"loss": 0.9674,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -5.584943771362305,
"rewards/margins": 0.14772634208202362,
"rewards/rejected": -5.732670783996582,
"step": 470
},
{
"epoch": 0.09,
"learning_rate": 0.0004555984555984556,
"logits/chosen": -2.3436319828033447,
"logits/rejected": -2.301845073699951,
"logps/chosen": -173.07313537597656,
"logps/rejected": -169.7339630126953,
"loss": 1.2097,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -5.504385471343994,
"rewards/margins": 0.23244301974773407,
"rewards/rejected": -5.736828804016113,
"step": 480
},
{
"epoch": 0.09,
"learning_rate": 0.00046525096525096526,
"logits/chosen": -2.6778111457824707,
"logits/rejected": -2.5807526111602783,
"logps/chosen": -166.180419921875,
"logps/rejected": -168.2677001953125,
"loss": 1.2799,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -4.63643217086792,
"rewards/margins": 0.5320998430252075,
"rewards/rejected": -5.168532371520996,
"step": 490
},
{
"epoch": 0.1,
"learning_rate": 0.00047490347490347493,
"logits/chosen": -3.083743095397949,
"logits/rejected": -3.080765962600708,
"logps/chosen": -193.96841430664062,
"logps/rejected": -182.3042755126953,
"loss": 1.6246,
"rewards/accuracies": 0.5,
"rewards/chosen": -6.152979850769043,
"rewards/margins": -0.20669928193092346,
"rewards/rejected": -5.946280479431152,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 0.0004777992277992278,
"logits/chosen": -2.692495584487915,
"logits/rejected": -2.6421055793762207,
"logps/chosen": -172.01806640625,
"logps/rejected": -165.9178466796875,
"loss": 1.2994,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -4.898122787475586,
"rewards/margins": 0.04086846113204956,
"rewards/rejected": -4.938991546630859,
"step": 510
},
{
"epoch": 0.1,
"learning_rate": 0.0004874517374517375,
"logits/chosen": -2.7154297828674316,
"logits/rejected": -2.5865087509155273,
"logps/chosen": -134.430419921875,
"logps/rejected": -144.77151489257812,
"loss": 1.0245,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -3.9665591716766357,
"rewards/margins": 0.17584654688835144,
"rewards/rejected": -4.1424055099487305,
"step": 520
},
{
"epoch": 0.1,
"learning_rate": 0.0004961389961389962,
"logits/chosen": -2.863107442855835,
"logits/rejected": -2.8306002616882324,
"logps/chosen": -269.8907165527344,
"logps/rejected": -273.8556823730469,
"loss": 3.3909,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -13.715133666992188,
"rewards/margins": 0.3205181956291199,
"rewards/rejected": -14.035652160644531,
"step": 530
},
{
"epoch": 0.1,
"learning_rate": 0.0004999979503849796,
"logits/chosen": -3.0216221809387207,
"logits/rejected": -3.014930248260498,
"logps/chosen": -194.19422912597656,
"logps/rejected": -201.08251953125,
"loss": 2.3037,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -7.459778785705566,
"rewards/margins": -0.22785942256450653,
"rewards/rejected": -7.231919288635254,
"step": 540
},
{
"epoch": 0.11,
"learning_rate": 0.0004999854250815602,
"logits/chosen": -2.967331647872925,
"logits/rejected": -2.933845043182373,
"logps/chosen": -215.81240844726562,
"logps/rejected": -204.9075927734375,
"loss": 2.1232,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -7.889649868011475,
"rewards/margins": 0.17769476771354675,
"rewards/rejected": -8.067344665527344,
"step": 550
},
{
"epoch": 0.11,
"learning_rate": 0.00049996151371953,
"logits/chosen": -3.052489757537842,
"logits/rejected": -3.0332350730895996,
"logps/chosen": -181.4810791015625,
"logps/rejected": -172.9596405029297,
"loss": 1.3367,
"rewards/accuracies": 0.5,
"rewards/chosen": -5.431277275085449,
"rewards/margins": 0.03274815157055855,
"rewards/rejected": -5.464025020599365,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 0.0004999262173879769,
"logits/chosen": -3.040531635284424,
"logits/rejected": -3.037515163421631,
"logps/chosen": -179.90150451660156,
"logps/rejected": -185.0644989013672,
"loss": 1.4866,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -6.061680316925049,
"rewards/margins": 0.6980069279670715,
"rewards/rejected": -6.759686470031738,
"step": 570
},
{
"epoch": 0.11,
"learning_rate": 0.0004998795376945392,
"logits/chosen": -3.021232843399048,
"logits/rejected": -2.9936585426330566,
"logps/chosen": -175.13389587402344,
"logps/rejected": -156.2028350830078,
"loss": 1.7235,
"rewards/accuracies": 0.375,
"rewards/chosen": -5.4631123542785645,
"rewards/margins": -0.5610149502754211,
"rewards/rejected": -4.902098178863525,
"step": 580
},
{
"epoch": 0.11,
"learning_rate": 0.0004998214767653319,
"logits/chosen": -2.944594621658325,
"logits/rejected": -3.0027194023132324,
"logps/chosen": -189.1737823486328,
"logps/rejected": -175.7019500732422,
"loss": 1.9699,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -6.499063014984131,
"rewards/margins": -0.3209795355796814,
"rewards/rejected": -6.178082466125488,
"step": 590
},
{
"epoch": 0.12,
"learning_rate": 0.0004997520372448494,
"logits/chosen": -2.8185458183288574,
"logits/rejected": -2.798320770263672,
"logps/chosen": -262.44500732421875,
"logps/rejected": -245.83889770507812,
"loss": 3.8268,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -12.636558532714844,
"rewards/margins": -0.5035432577133179,
"rewards/rejected": -12.133015632629395,
"step": 600
},
{
"epoch": 0.12,
"eval_logits/chosen": -2.9935925006866455,
"eval_logits/rejected": -2.961137533187866,
"eval_logps/chosen": -184.5051727294922,
"eval_logps/rejected": -181.61184692382812,
"eval_loss": 1.3598365783691406,
"eval_rewards/accuracies": 0.5193312168121338,
"eval_rewards/chosen": -6.594781875610352,
"eval_rewards/margins": 0.12169010192155838,
"eval_rewards/rejected": -6.7164716720581055,
"eval_runtime": 1314.8357,
"eval_samples_per_second": 0.728,
"eval_steps_per_second": 0.728,
"step": 600
},
{
"epoch": 0.12,
"learning_rate": 0.0004996712222958462,
"logits/chosen": -2.9863028526306152,
"logits/rejected": -2.841834306716919,
"logps/chosen": -234.49893188476562,
"logps/rejected": -215.51123046875,
"loss": 3.4529,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -11.542932510375977,
"rewards/margins": -1.085506796836853,
"rewards/rejected": -10.457425117492676,
"step": 610
},
{
"epoch": 0.12,
"learning_rate": 0.0004995790355991916,
"logits/chosen": -2.9887356758117676,
"logits/rejected": -2.887108564376831,
"logps/chosen": -202.21853637695312,
"logps/rejected": -198.64749145507812,
"loss": 1.6552,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -7.725579261779785,
"rewards/margins": 0.2432982176542282,
"rewards/rejected": -7.96887731552124,
"step": 620
},
{
"epoch": 0.12,
"learning_rate": 0.0004994754813537031,
"logits/chosen": -3.116293430328369,
"logits/rejected": -3.126661539077759,
"logps/chosen": -201.38604736328125,
"logps/rejected": -192.84515380859375,
"loss": 1.9923,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -8.627758979797363,
"rewards/margins": -0.4443356990814209,
"rewards/rejected": -8.183423042297363,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 0.000499372567166064,
"logits/chosen": -3.480961561203003,
"logits/rejected": -3.476128339767456,
"logps/chosen": -226.2942657470703,
"logps/rejected": -198.20462036132812,
"loss": 3.5697,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -10.370187759399414,
"rewards/margins": -1.4912974834442139,
"rewards/rejected": -8.878890037536621,
"step": 640
},
{
"epoch": 0.13,
"learning_rate": 0.0004992474279997049,
"logits/chosen": -3.380039930343628,
"logits/rejected": -3.3540236949920654,
"logps/chosen": -183.65228271484375,
"logps/rejected": -192.43350219726562,
"loss": 2.0561,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -6.808587551116943,
"rewards/margins": 1.2175410985946655,
"rewards/rejected": -8.026129722595215,
"step": 650
},
{
"epoch": 0.13,
"learning_rate": 0.0004991109363882065,
"logits/chosen": -2.5853641033172607,
"logits/rejected": -2.6103484630584717,
"logps/chosen": -464.68310546875,
"logps/rejected": -447.2250061035156,
"loss": 11.4738,
"rewards/accuracies": 0.5625,
"rewards/chosen": -34.34156036376953,
"rewards/margins": -1.95975661277771,
"rewards/rejected": -32.381797790527344,
"step": 660
},
{
"epoch": 0.13,
"learning_rate": 0.0004989630985483375,
"logits/chosen": -2.7174885272979736,
"logits/rejected": -2.707397937774658,
"logps/chosen": -447.7940979003906,
"logps/rejected": -407.99493408203125,
"loss": 12.9625,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -32.60967254638672,
"rewards/margins": -3.206895112991333,
"rewards/rejected": -29.40277671813965,
"step": 670
},
{
"epoch": 0.13,
"learning_rate": 0.0004988203490218075,
"logits/chosen": -2.946742296218872,
"logits/rejected": -2.8993031978607178,
"logps/chosen": -445.21142578125,
"logps/rejected": -432.021240234375,
"loss": 10.7741,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -32.348819732666016,
"rewards/margins": -1.2561819553375244,
"rewards/rejected": -31.092632293701172,
"step": 680
},
{
"epoch": 0.13,
"learning_rate": 0.0004986509723258511,
"logits/chosen": -3.1020426750183105,
"logits/rejected": -3.133068561553955,
"logps/chosen": -427.81756591796875,
"logps/rejected": -416.2047424316406,
"loss": 11.4242,
"rewards/accuracies": 0.4375,
"rewards/chosen": -31.230037689208984,
"rewards/margins": -0.655289351940155,
"rewards/rejected": -30.57474708557129,
"step": 690
},
{
"epoch": 0.14,
"learning_rate": 0.0004984702703514565,
"logits/chosen": -3.0160446166992188,
"logits/rejected": -3.0138049125671387,
"logps/chosen": -433.4644470214844,
"logps/rejected": -405.3623046875,
"loss": 10.8165,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -30.836090087890625,
"rewards/margins": -2.514590263366699,
"rewards/rejected": -28.321496963500977,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 0.0004982782513290365,
"logits/chosen": -3.1978919506073,
"logits/rejected": -3.197380542755127,
"logps/chosen": -422.511962890625,
"logps/rejected": -402.30938720703125,
"loss": 12.8189,
"rewards/accuracies": 0.4375,
"rewards/chosen": -30.785781860351562,
"rewards/margins": -2.3963069915771484,
"rewards/rejected": -28.389474868774414,
"step": 710
},
{
"epoch": 0.14,
"learning_rate": 0.0004980749240044603,
"logits/chosen": -3.1342532634735107,
"logits/rejected": -3.1338047981262207,
"logps/chosen": -403.13494873046875,
"logps/rejected": -357.790771484375,
"loss": 11.5675,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -28.892202377319336,
"rewards/margins": -3.723827362060547,
"rewards/rejected": -25.168371200561523,
"step": 720
},
{
"epoch": 0.14,
"learning_rate": 0.0004978602976386554,
"logits/chosen": -3.0739312171936035,
"logits/rejected": -3.0738184452056885,
"logps/chosen": -381.2265625,
"logps/rejected": -378.4680480957031,
"loss": 11.5225,
"rewards/accuracies": 0.5,
"rewards/chosen": -27.621994018554688,
"rewards/margins": -0.9019744992256165,
"rewards/rejected": -26.720022201538086,
"step": 730
},
{
"epoch": 0.14,
"learning_rate": 0.0004976343820071849,
"logits/chosen": -3.166983127593994,
"logits/rejected": -3.1671650409698486,
"logps/chosen": -408.42071533203125,
"logps/rejected": -387.2364196777344,
"loss": 13.9818,
"rewards/accuracies": 0.5,
"rewards/chosen": -29.39678382873535,
"rewards/margins": -2.448943614959717,
"rewards/rejected": -26.94784164428711,
"step": 740
},
{
"epoch": 0.14,
"learning_rate": 0.0004973971873998035,
"logits/chosen": -3.0561656951904297,
"logits/rejected": -3.0557007789611816,
"logps/chosen": -417.0025329589844,
"logps/rejected": -349.56463623046875,
"loss": 12.3073,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -29.997058868408203,
"rewards/margins": -5.650521278381348,
"rewards/rejected": -24.346534729003906,
"step": 750
},
{
"epoch": 0.15,
"learning_rate": 0.0004971487246199875,
"logits/chosen": -3.0265376567840576,
"logits/rejected": -3.0265283584594727,
"logps/chosen": -434.55419921875,
"logps/rejected": -381.22808837890625,
"loss": 12.0398,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -30.918407440185547,
"rewards/margins": -4.258307456970215,
"rewards/rejected": -26.66009521484375,
"step": 760
},
{
"epoch": 0.15,
"learning_rate": 0.000496889004984444,
"logits/chosen": -2.8932366371154785,
"logits/rejected": -2.895204544067383,
"logps/chosen": -396.3167419433594,
"logps/rejected": -428.53839111328125,
"loss": 9.4104,
"rewards/accuracies": 0.5625,
"rewards/chosen": -28.4284610748291,
"rewards/margins": 1.9928890466690063,
"rewards/rejected": -30.42134666442871,
"step": 770
},
{
"epoch": 0.15,
"learning_rate": 0.0004966180403225946,
"logits/chosen": -2.895068407058716,
"logits/rejected": -2.894937753677368,
"logps/chosen": -395.245849609375,
"logps/rejected": -378.4429626464844,
"loss": 10.2846,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -28.237285614013672,
"rewards/margins": -1.503316879272461,
"rewards/rejected": -26.733972549438477,
"step": 780
},
{
"epoch": 0.15,
"learning_rate": 0.0004963358429760368,
"logits/chosen": -2.551323652267456,
"logits/rejected": -2.5523290634155273,
"logps/chosen": -477.17327880859375,
"logps/rejected": -427.474853515625,
"loss": 12.1626,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -35.30048370361328,
"rewards/margins": -4.13530158996582,
"rewards/rejected": -31.165185928344727,
"step": 790
},
{
"epoch": 0.15,
"learning_rate": 0.0004960424257979822,
"logits/chosen": -2.7914838790893555,
"logits/rejected": -2.790367841720581,
"logps/chosen": -478.9364318847656,
"logps/rejected": -461.02655029296875,
"loss": 10.3404,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -35.25132369995117,
"rewards/margins": -1.6512939929962158,
"rewards/rejected": -33.60003662109375,
"step": 800
},
{
"epoch": 0.15,
"eval_logits/chosen": -3.0016679763793945,
"eval_logits/rejected": -3.0014870166778564,
"eval_logps/chosen": -441.87054443359375,
"eval_logps/rejected": -399.2597961425781,
"eval_loss": 11.33322811126709,
"eval_rewards/accuracies": 0.4555903971195221,
"eval_rewards/chosen": -32.331321716308594,
"eval_rewards/margins": -3.850048303604126,
"eval_rewards/rejected": -28.481277465820312,
"eval_runtime": 1312.1317,
"eval_samples_per_second": 0.729,
"eval_steps_per_second": 0.729,
"step": 800
},
{
"epoch": 0.16,
"learning_rate": 0.0004957378021526705,
"logits/chosen": -2.9228155612945557,
"logits/rejected": -2.925412654876709,
"logps/chosen": -481.1300354003906,
"logps/rejected": -465.1952209472656,
"loss": 11.3707,
"rewards/accuracies": 0.4375,
"rewards/chosen": -35.0019416809082,
"rewards/margins": -1.9142730236053467,
"rewards/rejected": -33.08766555786133,
"step": 810
},
{
"epoch": 0.16,
"learning_rate": 0.0004954219859147614,
"logits/chosen": -3.0219039916992188,
"logits/rejected": -3.0174221992492676,
"logps/chosen": -364.8722839355469,
"logps/rejected": -295.5256652832031,
"loss": 12.3607,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -25.810903549194336,
"rewards/margins": -5.978564262390137,
"rewards/rejected": -19.832340240478516,
"step": 820
},
{
"epoch": 0.16,
"learning_rate": 0.0004950949914687023,
"logits/chosen": -3.118417263031006,
"logits/rejected": -3.1218113899230957,
"logps/chosen": -469.4906311035156,
"logps/rejected": -412.5103454589844,
"loss": 11.9496,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -33.402565002441406,
"rewards/margins": -4.156603813171387,
"rewards/rejected": -29.245960235595703,
"step": 830
},
{
"epoch": 0.16,
"learning_rate": 0.0004947568337080732,
"logits/chosen": -3.0231635570526123,
"logits/rejected": -3.0243794918060303,
"logps/chosen": -382.8542785644531,
"logps/rejected": -346.9595031738281,
"loss": 9.7701,
"rewards/accuracies": 0.4375,
"rewards/chosen": -27.092443466186523,
"rewards/margins": -3.154633045196533,
"rewards/rejected": -23.93781089782715,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 0.0004944075280349084,
"logits/chosen": -3.034963846206665,
"logits/rejected": -3.0339653491973877,
"logps/chosen": -385.3253173828125,
"logps/rejected": -367.23638916015625,
"loss": 9.2328,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -27.111583709716797,
"rewards/margins": -1.6716159582138062,
"rewards/rejected": -25.43996810913086,
"step": 850
},
{
"epoch": 0.17,
"learning_rate": 0.0004940470903589948,
"logits/chosen": -3.1586310863494873,
"logits/rejected": -3.128281831741333,
"logps/chosen": -487.345703125,
"logps/rejected": -389.9637756347656,
"loss": 12.7587,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -34.73926544189453,
"rewards/margins": -6.996462821960449,
"rewards/rejected": -27.7428035736084,
"step": 860
},
{
"epoch": 0.17,
"learning_rate": 0.0004936755370971475,
"logits/chosen": -2.9109790325164795,
"logits/rejected": -2.888288974761963,
"logps/chosen": -463.9207458496094,
"logps/rejected": -376.2655334472656,
"loss": 13.8618,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -34.083106994628906,
"rewards/margins": -7.244679927825928,
"rewards/rejected": -26.838430404663086,
"step": 870
},
{
"epoch": 0.17,
"learning_rate": 0.0004932928851724621,
"logits/chosen": -2.8432798385620117,
"logits/rejected": -2.8495278358459473,
"logps/chosen": -365.90679931640625,
"logps/rejected": -361.8638000488281,
"loss": 8.7379,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -26.055978775024414,
"rewards/margins": -0.3134794235229492,
"rewards/rejected": -25.742502212524414,
"step": 880
},
{
"epoch": 0.17,
"learning_rate": 0.0004928991520135436,
"logits/chosen": -2.6536898612976074,
"logits/rejected": -2.6361289024353027,
"logps/chosen": -519.7141723632812,
"logps/rejected": -389.46575927734375,
"loss": 15.3084,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -37.92496871948242,
"rewards/margins": -10.107343673706055,
"rewards/rejected": -27.817623138427734,
"step": 890
},
{
"epoch": 0.17,
"learning_rate": 0.0004924943555537128,
"logits/chosen": -3.1115312576293945,
"logits/rejected": -3.0791449546813965,
"logps/chosen": -469.66351318359375,
"logps/rejected": -401.14508056640625,
"loss": 13.3952,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -33.486183166503906,
"rewards/margins": -5.449090480804443,
"rewards/rejected": -28.037090301513672,
"step": 900
},
{
"epoch": 0.18,
"learning_rate": 0.0004920785142301893,
"logits/chosen": -2.454453945159912,
"logits/rejected": -2.4585988521575928,
"logps/chosen": -463.24517822265625,
"logps/rejected": -402.23773193359375,
"loss": 13.7904,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -34.2692985534668,
"rewards/margins": -5.860762596130371,
"rewards/rejected": -28.40853500366211,
"step": 910
},
{
"epoch": 0.18,
"learning_rate": 0.0004916516469832524,
"logits/chosen": -2.8716561794281006,
"logits/rejected": -2.8634109497070312,
"logps/chosen": -347.35198974609375,
"logps/rejected": -340.46942138671875,
"loss": 7.8634,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -24.902542114257812,
"rewards/margins": -1.0167404413223267,
"rewards/rejected": -23.885799407958984,
"step": 920
},
{
"epoch": 0.18,
"learning_rate": 0.0004912137732553772,
"logits/chosen": -3.2272415161132812,
"logits/rejected": -3.2235121726989746,
"logps/chosen": -450.973876953125,
"logps/rejected": -462.0083923339844,
"loss": 9.2953,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -32.52794647216797,
"rewards/margins": 0.38686689734458923,
"rewards/rejected": -32.91481399536133,
"step": 930
},
{
"epoch": 0.18,
"learning_rate": 0.0004907649129903504,
"logits/chosen": -2.647204875946045,
"logits/rejected": -2.6482150554656982,
"logps/chosen": -373.737060546875,
"logps/rejected": -383.5912780761719,
"loss": 7.0717,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -26.5350284576416,
"rewards/margins": 0.7499195337295532,
"rewards/rejected": -27.284948348999023,
"step": 940
},
{
"epoch": 0.18,
"learning_rate": 0.0004903050866323608,
"logits/chosen": -3.079465627670288,
"logits/rejected": -3.0794615745544434,
"logps/chosen": -395.6184997558594,
"logps/rejected": -406.3051452636719,
"loss": 9.1411,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -29.57635498046875,
"rewards/margins": 0.8788874745368958,
"rewards/rejected": -30.45524024963379,
"step": 950
},
{
"epoch": 0.19,
"learning_rate": 0.000489834315125069,
"logits/chosen": -3.1981568336486816,
"logits/rejected": -3.1923341751098633,
"logps/chosen": -453.5596618652344,
"logps/rejected": -425.4774475097656,
"loss": 11.2943,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -32.61115646362305,
"rewards/margins": -2.69136118888855,
"rewards/rejected": -29.9197998046875,
"step": 960
},
{
"epoch": 0.19,
"learning_rate": 0.0004893526199106531,
"logits/chosen": -2.876206874847412,
"logits/rejected": -2.881593942642212,
"logps/chosen": -433.71636962890625,
"logps/rejected": -391.15692138671875,
"loss": 10.9992,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -31.739410400390625,
"rewards/margins": -3.6809983253479004,
"rewards/rejected": -28.058406829833984,
"step": 970
},
{
"epoch": 0.19,
"learning_rate": 0.0004888600229288316,
"logits/chosen": -2.865589141845703,
"logits/rejected": -2.8664183616638184,
"logps/chosen": -359.43023681640625,
"logps/rejected": -321.70599365234375,
"loss": 7.9915,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -25.906265258789062,
"rewards/margins": -3.289003849029541,
"rewards/rejected": -22.617259979248047,
"step": 980
},
{
"epoch": 0.19,
"learning_rate": 0.0004883565466158652,
"logits/chosen": -2.8116517066955566,
"logits/rejected": -2.782489776611328,
"logps/chosen": -494.1553649902344,
"logps/rejected": -430.222412109375,
"loss": 13.6529,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -36.09266662597656,
"rewards/margins": -5.284867286682129,
"rewards/rejected": -30.807796478271484,
"step": 990
},
{
"epoch": 0.19,
"learning_rate": 0.0004878422139035341,
"logits/chosen": -2.4114489555358887,
"logits/rejected": -2.377622604370117,
"logps/chosen": -482.96856689453125,
"logps/rejected": -437.3125,
"loss": 11.137,
"rewards/accuracies": 0.5,
"rewards/chosen": -34.935813903808594,
"rewards/margins": -3.0001707077026367,
"rewards/rejected": -31.93564224243164,
"step": 1000
},
{
"epoch": 0.19,
"eval_logits/chosen": -2.2959094047546387,
"eval_logits/rejected": -2.2838947772979736,
"eval_logps/chosen": -438.8805236816406,
"eval_logps/rejected": -399.5718688964844,
"eval_loss": 10.402800559997559,
"eval_rewards/accuracies": 0.45036572217941284,
"eval_rewards/chosen": -32.032310485839844,
"eval_rewards/margins": -3.519833564758301,
"eval_rewards/rejected": -28.51247787475586,
"eval_runtime": 1313.6418,
"eval_samples_per_second": 0.729,
"eval_steps_per_second": 0.729,
"step": 1000
},
{
"epoch": 0.18,
"learning_rate": 0.0004917027842051741,
"logits/chosen": -2.3743691444396973,
"logits/rejected": -2.3645715713500977,
"logps/chosen": -467.918701171875,
"logps/rejected": -392.2208557128906,
"loss": 13.2044,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -34.59047317504883,
"rewards/margins": -6.761924743652344,
"rewards/rejected": -27.82854652404785,
"step": 1010
},
{
"epoch": 0.18,
"learning_rate": 0.0004913043488808868,
"logits/chosen": -2.2389774322509766,
"logits/rejected": -2.229212760925293,
"logps/chosen": -486.40362548828125,
"logps/rejected": -460.124267578125,
"loss": 12.247,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -36.01793670654297,
"rewards/margins": -2.4814937114715576,
"rewards/rejected": -33.53643798828125,
"step": 1020
},
{
"epoch": 0.18,
"learning_rate": 0.0004909379125837757,
"logits/chosen": -2.3661270141601562,
"logits/rejected": -2.35339093208313,
"logps/chosen": -450.0957946777344,
"logps/rejected": -419.64453125,
"loss": 9.5754,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -32.51788330078125,
"rewards/margins": -2.8007171154022217,
"rewards/rejected": -29.717166900634766,
"step": 1030
},
{
"epoch": 0.18,
"learning_rate": 0.000490689498823928,
"logits/chosen": -2.502885341644287,
"logits/rejected": -2.475445032119751,
"logps/chosen": -544.1683349609375,
"logps/rejected": -447.0438537597656,
"loss": 14.0802,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -40.440181732177734,
"rewards/margins": -7.666708946228027,
"rewards/rejected": -32.773475646972656,
"step": 1040
},
{
"epoch": 0.19,
"learning_rate": 0.0004903107023416835,
"logits/chosen": -2.5913939476013184,
"logits/rejected": -2.5680718421936035,
"logps/chosen": -486.11907958984375,
"logps/rejected": -368.26251220703125,
"loss": 13.0584,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -35.45328140258789,
"rewards/margins": -9.003652572631836,
"rewards/rejected": -26.449630737304688,
"step": 1050
},
{
"epoch": 0.19,
"learning_rate": 0.0004898811381307269,
"logits/chosen": -2.5621819496154785,
"logits/rejected": -2.5630691051483154,
"logps/chosen": -415.2149353027344,
"logps/rejected": -372.393798828125,
"loss": 10.9069,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -30.74907875061035,
"rewards/margins": -3.5560336112976074,
"rewards/rejected": -27.193042755126953,
"step": 1060
},
{
"epoch": 0.19,
"learning_rate": 0.0004894424536834149,
"logits/chosen": -2.6120645999908447,
"logits/rejected": -2.604825019836426,
"logps/chosen": -513.4226684570312,
"logps/rejected": -459.19647216796875,
"loss": 11.3672,
"rewards/accuracies": 0.4375,
"rewards/chosen": -39.00333023071289,
"rewards/margins": -4.482884883880615,
"rewards/rejected": -34.520442962646484,
"step": 1070
},
{
"epoch": 0.19,
"learning_rate": 0.000488994665678449,
"logits/chosen": -2.880282163619995,
"logits/rejected": -2.8795719146728516,
"logps/chosen": -424.77874755859375,
"logps/rejected": -367.4000244140625,
"loss": 11.0388,
"rewards/accuracies": 0.4375,
"rewards/chosen": -31.353382110595703,
"rewards/margins": -5.197685718536377,
"rewards/rejected": -26.15569496154785,
"step": 1080
},
{
"epoch": 0.19,
"learning_rate": 0.0004885377911406459,
"logits/chosen": -2.947252035140991,
"logits/rejected": -2.940441370010376,
"logps/chosen": -442.61651611328125,
"logps/rejected": -374.4497985839844,
"loss": 11.9975,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -31.82815170288086,
"rewards/margins": -5.211056709289551,
"rewards/rejected": -26.617095947265625,
"step": 1090
},
{
"epoch": 0.19,
"learning_rate": 0.00048807184744029076,
"logits/chosen": -2.9204514026641846,
"logits/rejected": -2.922818660736084,
"logps/chosen": -416.2978515625,
"logps/rejected": -395.44378662109375,
"loss": 9.0951,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -30.16245460510254,
"rewards/margins": -1.954272985458374,
"rewards/rejected": -28.208179473876953,
"step": 1100
},
{
"epoch": 0.2,
"learning_rate": 0.00048759685229247675,
"logits/chosen": -2.950378179550171,
"logits/rejected": -2.9522385597229004,
"logps/chosen": -456.4190368652344,
"logps/rejected": -417.6458435058594,
"loss": 10.6616,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -33.50844192504883,
"rewards/margins": -3.696442127227783,
"rewards/rejected": -29.811996459960938,
"step": 1110
},
{
"epoch": 0.2,
"learning_rate": 0.0004872103512563103,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 24.7455,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1120
},
{
"epoch": 0.2,
"learning_rate": 0.00048716163259071837,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 41.8228,
"rewards/accuracies": 0.375,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1130
},
{
"epoch": 0.2,
"learning_rate": 0.000487112823756431,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 110.7265,
"rewards/accuracies": 0.25,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1140
},
{
"epoch": 0.2,
"learning_rate": 0.000487112823756431,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 245.3322,
"rewards/accuracies": 0.2750000059604645,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1150
},
{
"epoch": 0.2,
"learning_rate": 0.000487112823756431,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 1041.9771,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1160
},
{
"epoch": 0.21,
"learning_rate": 0.0004870639247720053,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 437.3346,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1170
},
{
"epoch": 0.21,
"learning_rate": 0.0004870639247720053,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 123.7268,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1180
},
{
"epoch": 0.21,
"learning_rate": 0.0004870639247720053,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 315.4085,
"rewards/accuracies": 0.23749999701976776,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1190
},
{
"epoch": 0.21,
"learning_rate": 0.0004870639247720053,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 52542.0625,
"rewards/accuracies": 0.25,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1200
},
{
"epoch": 0.21,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": NaN,
"eval_logps/rejected": NaN,
"eval_loss": NaN,
"eval_rewards/accuracies": 0.2244604378938675,
"eval_rewards/chosen": NaN,
"eval_rewards/margins": NaN,
"eval_rewards/rejected": NaN,
"eval_runtime": 988.4543,
"eval_samples_per_second": 0.703,
"eval_steps_per_second": 0.703,
"step": 1200
},
{
"epoch": 0.21,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 197.8537,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1210
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 4813.8465,
"rewards/accuracies": 0.13750000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1220
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 2048.3082,
"rewards/accuracies": 0.25,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1230
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 143.0693,
"rewards/accuracies": 0.16249999403953552,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1240
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 16874.5531,
"rewards/accuracies": 0.21250000596046448,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1250
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 2509.9437,
"rewards/accuracies": 0.17499999701976776,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1260
},
{
"epoch": 0.22,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 9635.2938,
"rewards/accuracies": 0.1875,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1270
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 3857.9113,
"rewards/accuracies": 0.13750000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1280
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 4572.7609,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1290
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 606.0347,
"rewards/accuracies": 0.15000000596046448,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1300
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 1759.0598,
"rewards/accuracies": 0.17499999701976776,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1310
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 14436.2953,
"rewards/accuracies": 0.21250000596046448,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1320
},
{
"epoch": 0.23,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 386264.375,
"rewards/accuracies": 0.0625,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1330
},
{
"epoch": 0.24,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 8430.657,
"rewards/accuracies": 0.1875,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1340
},
{
"epoch": 0.24,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 6939.275,
"rewards/accuracies": 0.13750000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1350
},
{
"epoch": 0.24,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 4295.0949,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1360
},
{
"epoch": 0.24,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 17283.3672,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1370
},
{
"epoch": 0.24,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 24895.7469,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1380
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 45136.4094,
"rewards/accuracies": 0.17499999701976776,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1390
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 165.0189,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1400
},
{
"epoch": 0.25,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": NaN,
"eval_logps/rejected": NaN,
"eval_loss": NaN,
"eval_rewards/accuracies": 0.17553956806659698,
"eval_rewards/chosen": NaN,
"eval_rewards/margins": NaN,
"eval_rewards/rejected": NaN,
"eval_runtime": 987.1061,
"eval_samples_per_second": 0.704,
"eval_steps_per_second": 0.704,
"step": 1400
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 8688.7203,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1410
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 9484.9836,
"rewards/accuracies": 0.1875,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1420
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 9308.6922,
"rewards/accuracies": 0.17499999701976776,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1430
},
{
"epoch": 0.25,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 2905.9373,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1440
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 4738.7867,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1450
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 424.0728,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1460
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 2181.5133,
"rewards/accuracies": 0.13750000298023224,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1470
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 23597.5219,
"rewards/accuracies": 0.16249999403953552,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1480
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 1259.9596,
"rewards/accuracies": 0.25,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1490
},
{
"epoch": 0.26,
"learning_rate": 0.0004870149356560326,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 7475.4719,
"rewards/accuracies": 0.21250000596046448,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 1500
}
],
"logging_steps": 10,
"max_steps": 5662,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}