{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 5811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.591065292096219e-10, "logits/chosen": -2.810119152069092, "logits/rejected": -2.8539578914642334, "logps/chosen": -108.88716125488281, "logps/rejected": -104.7931137084961, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.59106529209622e-09, "logits/chosen": -3.0777981281280518, "logits/rejected": -3.0556678771972656, "logps/chosen": -324.0378112792969, "logps/rejected": -248.84950256347656, "loss": 0.6931, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.004365404602140188, "rewards/margins": 0.002458281349390745, "rewards/rejected": 0.0019071230199187994, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -3.031554698944092, "logits/rejected": -2.9927072525024414, "logps/chosen": -246.7428741455078, "logps/rejected": -176.8910675048828, "loss": 0.6913, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.003744876477867365, "rewards/margins": -0.005839090794324875, "rewards/rejected": 0.0020942138507962227, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5773195876288656e-08, "logits/chosen": -3.0333802700042725, "logits/rejected": -3.027919292449951, "logps/chosen": -308.7424621582031, "logps/rejected": -265.59039306640625, "loss": 0.6904, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.005396988708525896, "rewards/margins": 0.00396696338430047, "rewards/rejected": 0.001430025091394782, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.9431519508361816, "logits/rejected": -2.97038197517395, "logps/chosen": -315.57135009765625, "logps/rejected": -228.1820068359375, "loss": 0.682, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.0027233201544731855, "rewards/margins": 0.021390482783317566, "rewards/rejected": -0.018667161464691162, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.29553264604811e-08, "logits/chosen": -3.1190497875213623, "logits/rejected": -3.098569393157959, "logps/chosen": -262.5951232910156, "logps/rejected": -206.78384399414062, "loss": 0.6735, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01897308975458145, "rewards/margins": 0.03295915946364403, "rewards/rejected": -0.013986068777740002, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -3.0057833194732666, "logits/rejected": -2.970109224319458, "logps/chosen": -257.2474365234375, "logps/rejected": -242.32852172851562, "loss": 0.665, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03294278308749199, "rewards/margins": 0.05787893012166023, "rewards/rejected": -0.024936143308877945, "step": 60 }, { "epoch": 0.04, "learning_rate": 6.013745704467354e-08, "logits/chosen": -3.073664426803589, "logits/rejected": -3.024251699447632, "logps/chosen": -308.711669921875, "logps/rejected": -248.31295776367188, "loss": 0.6557, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04562750831246376, "rewards/margins": 0.09679891914129257, "rewards/rejected": -0.051171403378248215, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -3.061481475830078, "logits/rejected": -3.0335938930511475, "logps/chosen": -289.26007080078125, "logps/rejected": -253.003173828125, "loss": 0.6305, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.061396338045597076, "rewards/margins": 0.12416829913854599, "rewards/rejected": -0.06277195364236832, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.731958762886598e-08, "logits/chosen": -3.0796360969543457, "logits/rejected": -3.086453914642334, "logps/chosen": -309.4039611816406, "logps/rejected": -254.5652618408203, "loss": 0.6248, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.048405859619379044, "rewards/margins": 0.1540774703025818, "rewards/rejected": -0.10567160695791245, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.9399971961975098, "logits/rejected": -2.9413506984710693, "logps/chosen": -268.4250793457031, "logps/rejected": -196.02508544921875, "loss": 0.6144, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.03557150438427925, "rewards/margins": 0.18206295371055603, "rewards/rejected": -0.14649145305156708, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -3.018644332885742, "eval_logits/rejected": -3.0044751167297363, "eval_logps/chosen": -270.5843200683594, "eval_logps/rejected": -230.69760131835938, "eval_loss": 0.5937883257865906, "eval_rewards/accuracies": 0.722000002861023, "eval_rewards/chosen": 0.05668351799249649, "eval_rewards/margins": 0.2780429720878601, "eval_rewards/rejected": -0.22135944664478302, "eval_runtime": 299.8772, "eval_samples_per_second": 6.669, "eval_steps_per_second": 0.417, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.450171821305841e-08, "logits/chosen": -3.010746717453003, "logits/rejected": -2.995668888092041, "logps/chosen": -268.6666259765625, "logps/rejected": -245.5867156982422, "loss": 0.6057, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.011533960700035095, "rewards/margins": 0.28680604696273804, "rewards/rejected": -0.2752721309661865, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -3.0465500354766846, "logits/rejected": -3.0174379348754883, "logps/chosen": -226.70651245117188, "logps/rejected": -212.8759002685547, "loss": 0.5731, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.026695668697357178, "rewards/margins": 0.24723270535469055, "rewards/rejected": -0.2739284038543701, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1168384879725086e-07, "logits/chosen": -3.089245319366455, "logits/rejected": -3.1066346168518066, "logps/chosen": -315.1956481933594, "logps/rejected": -240.29312133789062, "loss": 0.5804, "rewards/accuracies": 0.6875, "rewards/chosen": 0.13610997796058655, "rewards/margins": 0.4461982846260071, "rewards/rejected": -0.31008821725845337, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -3.0325512886047363, "logits/rejected": -2.9741909503936768, "logps/chosen": -294.88067626953125, "logps/rejected": -272.0750427246094, "loss": 0.5569, "rewards/accuracies": 0.75, "rewards/chosen": 0.009326432831585407, "rewards/margins": 0.5915461182594299, "rewards/rejected": -0.5822197198867798, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.2886597938144328e-07, "logits/chosen": -3.084364414215088, "logits/rejected": -3.02875018119812, "logps/chosen": -287.40118408203125, "logps/rejected": -246.5475616455078, "loss": 0.5471, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.1425856351852417, "rewards/margins": 0.7196646928787231, "rewards/rejected": -0.5770790576934814, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.9751551151275635, "logits/rejected": -2.9700520038604736, "logps/chosen": -297.21075439453125, "logps/rejected": -244.6047821044922, "loss": 0.5369, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10728351771831512, "rewards/margins": 0.6044571399688721, "rewards/rejected": -0.49717360734939575, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.4604810996563573e-07, "logits/chosen": -3.058936595916748, "logits/rejected": -3.021108627319336, "logps/chosen": -282.1290283203125, "logps/rejected": -237.5272979736328, "loss": 0.4839, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.15872260928153992, "rewards/margins": 0.9850108027458191, "rewards/rejected": -0.826288104057312, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -3.039640426635742, "logits/rejected": -2.9874491691589355, "logps/chosen": -234.4727325439453, "logps/rejected": -210.0829620361328, "loss": 0.508, "rewards/accuracies": 0.75, "rewards/chosen": 0.05743076652288437, "rewards/margins": 0.7193694114685059, "rewards/rejected": -0.6619385480880737, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6323024054982818e-07, "logits/chosen": -2.985429525375366, "logits/rejected": -2.9715933799743652, "logps/chosen": -263.69964599609375, "logps/rejected": -266.5009765625, "loss": 0.5669, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04090666025876999, "rewards/margins": 0.6870480179786682, "rewards/rejected": -0.6461412906646729, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.942009449005127, "logits/rejected": -2.9293673038482666, "logps/chosen": -269.390625, "logps/rejected": -192.42050170898438, "loss": 0.4957, "rewards/accuracies": 0.8125, "rewards/chosen": 0.21698591113090515, "rewards/margins": 0.8756136894226074, "rewards/rejected": -0.6586278080940247, "step": 200 }, { "epoch": 0.1, "eval_logits/chosen": -2.9714457988739014, "eval_logits/rejected": -2.955587148666382, "eval_logps/chosen": -270.5447998046875, "eval_logps/rejected": -235.9661102294922, "eval_loss": 0.5132176280021667, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": 0.060630541294813156, "eval_rewards/margins": 0.8088454008102417, "eval_rewards/rejected": -0.7482149004936218, "eval_runtime": 301.0289, "eval_samples_per_second": 6.644, "eval_steps_per_second": 0.415, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.804123711340206e-07, "logits/chosen": -2.9341390132904053, "logits/rejected": -2.880955696105957, "logps/chosen": -255.9530029296875, "logps/rejected": -239.8656768798828, "loss": 0.5635, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19516435265541077, "rewards/margins": 0.5975160598754883, "rewards/rejected": -0.7926804423332214, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -3.0241286754608154, "logits/rejected": -2.9803478717803955, "logps/chosen": -285.34161376953125, "logps/rejected": -238.00674438476562, "loss": 0.5215, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.06667652726173401, "rewards/margins": 0.7439771890640259, "rewards/rejected": -0.8106536865234375, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9759450171821303e-07, "logits/chosen": -3.0200541019439697, "logits/rejected": -2.9749975204467773, "logps/chosen": -290.6888427734375, "logps/rejected": -253.60012817382812, "loss": 0.5113, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.03626754879951477, "rewards/margins": 0.693548321723938, "rewards/rejected": -0.7298158407211304, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -3.031228542327881, "logits/rejected": -2.997079610824585, "logps/chosen": -332.36529541015625, "logps/rejected": -227.00833129882812, "loss": 0.5376, "rewards/accuracies": 0.75, "rewards/chosen": -0.07132077217102051, "rewards/margins": 0.716058075428009, "rewards/rejected": -0.7873787879943848, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.1477663230240549e-07, "logits/chosen": -3.1058244705200195, "logits/rejected": -3.0761656761169434, "logps/chosen": -272.7492980957031, "logps/rejected": -253.9503173828125, "loss": 0.5161, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.07308956235647202, "rewards/margins": 0.9320653676986694, "rewards/rejected": -0.8589757680892944, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -3.094557285308838, "logits/rejected": -3.033235549926758, "logps/chosen": -298.0885925292969, "logps/rejected": -246.3491973876953, "loss": 0.524, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17855298519134521, "rewards/margins": 0.9676022529602051, "rewards/rejected": -0.7890492081642151, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.3195876288659794e-07, "logits/chosen": -3.0374608039855957, "logits/rejected": -3.03932523727417, "logps/chosen": -274.2452697753906, "logps/rejected": -235.6359100341797, "loss": 0.4844, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.02701665833592415, "rewards/margins": 0.7995314002037048, "rewards/rejected": -0.772514820098877, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.99436354637146, "logits/rejected": -2.9718544483184814, "logps/chosen": -309.51019287109375, "logps/rejected": -250.88412475585938, "loss": 0.5128, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08803629130125046, "rewards/margins": 1.0304720401763916, "rewards/rejected": -0.9424357414245605, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.4914089347079036e-07, "logits/chosen": -2.9639785289764404, "logits/rejected": -2.948111057281494, "logps/chosen": -290.60498046875, "logps/rejected": -282.6380615234375, "loss": 0.5165, "rewards/accuracies": 0.6875, "rewards/chosen": 0.08333762735128403, "rewards/margins": 0.8697144389152527, "rewards/rejected": -0.7863768339157104, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -3.0124526023864746, "logits/rejected": -3.0208213329315186, "logps/chosen": -260.4809875488281, "logps/rejected": -248.8876953125, "loss": 0.5257, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06663007289171219, "rewards/margins": 1.012616515159607, "rewards/rejected": -1.0792466402053833, "step": 300 }, { "epoch": 0.15, "eval_logits/chosen": -2.998927116394043, "eval_logits/rejected": -2.98532772064209, "eval_logps/chosen": -271.51171875, "eval_logps/rejected": -238.74551391601562, "eval_loss": 0.4975211024284363, "eval_rewards/accuracies": 0.7519999742507935, "eval_rewards/chosen": -0.03605831041932106, "eval_rewards/margins": 0.9900941252708435, "eval_rewards/rejected": -1.026152491569519, "eval_runtime": 299.8592, "eval_samples_per_second": 6.67, "eval_steps_per_second": 0.417, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.663230240549828e-07, "logits/chosen": -3.0704574584960938, "logits/rejected": -3.070812702178955, "logps/chosen": -263.6881408691406, "logps/rejected": -226.4467010498047, "loss": 0.5101, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10588334500789642, "rewards/margins": 0.8894448280334473, "rewards/rejected": -0.9953282475471497, "step": 310 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -3.0514838695526123, "logits/rejected": -3.0334386825561523, "logps/chosen": -296.7230224609375, "logps/rejected": -235.6595916748047, "loss": 0.4537, "rewards/accuracies": 0.8125, "rewards/chosen": -0.14300963282585144, "rewards/margins": 1.31166672706604, "rewards/rejected": -1.4546763896942139, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.835051546391752e-07, "logits/chosen": -3.0409178733825684, "logits/rejected": -3.0209853649139404, "logps/chosen": -302.88775634765625, "logps/rejected": -250.03475952148438, "loss": 0.4813, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.18107786774635315, "rewards/margins": 1.172987699508667, "rewards/rejected": -1.3540656566619873, "step": 330 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -3.000317335128784, "logits/rejected": -2.9957940578460693, "logps/chosen": -285.11346435546875, "logps/rejected": -246.8708953857422, "loss": 0.5306, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17257550358772278, "rewards/margins": 0.7849529981613159, "rewards/rejected": -0.9575284719467163, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.006872852233677e-07, "logits/chosen": -3.086730480194092, "logits/rejected": -3.0460643768310547, "logps/chosen": -232.62631225585938, "logps/rejected": -228.4468231201172, "loss": 0.4653, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13442695140838623, "rewards/margins": 1.164433240890503, "rewards/rejected": -1.2988600730895996, "step": 350 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -3.075042247772217, "logits/rejected": -3.080418109893799, "logps/chosen": -265.05126953125, "logps/rejected": -218.3927001953125, "loss": 0.4768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.11520049721002579, "rewards/margins": 1.164591908454895, "rewards/rejected": -1.0493913888931274, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.178694158075601e-07, "logits/chosen": -3.0801005363464355, "logits/rejected": -3.0352022647857666, "logps/chosen": -252.08358764648438, "logps/rejected": -205.0348663330078, "loss": 0.4763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.09602154791355133, "rewards/margins": 1.2616920471191406, "rewards/rejected": -1.16567063331604, "step": 370 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -3.02718448638916, "logits/rejected": -3.018253803253174, "logps/chosen": -240.01785278320312, "logps/rejected": -218.6881103515625, "loss": 0.5578, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08774475753307343, "rewards/margins": 0.9482590556144714, "rewards/rejected": -1.036003828048706, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3505154639175255e-07, "logits/chosen": -3.1347148418426514, "logits/rejected": -3.1124050617218018, "logps/chosen": -255.76754760742188, "logps/rejected": -214.763427734375, "loss": 0.4347, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12095893919467926, "rewards/margins": 1.100426435470581, "rewards/rejected": -1.2213853597640991, "step": 390 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.159269332885742, "logits/rejected": -3.1348633766174316, "logps/chosen": -253.93325805664062, "logps/rejected": -193.1920928955078, "loss": 0.556, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3844020664691925, "rewards/margins": 0.7617012858390808, "rewards/rejected": -1.1461035013198853, "step": 400 }, { "epoch": 0.21, "eval_logits/chosen": -3.0931246280670166, "eval_logits/rejected": -3.084690809249878, "eval_logps/chosen": -272.16705322265625, "eval_logps/rejected": -240.4776153564453, "eval_loss": 0.4935062527656555, "eval_rewards/accuracies": 0.7760000228881836, "eval_rewards/chosen": -0.10159354656934738, "eval_rewards/margins": 1.0977704524993896, "eval_rewards/rejected": -1.1993640661239624, "eval_runtime": 295.6794, "eval_samples_per_second": 6.764, "eval_steps_per_second": 0.423, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.5223367697594503e-07, "logits/chosen": -3.0230696201324463, "logits/rejected": -2.964069128036499, "logps/chosen": -306.8080749511719, "logps/rejected": -221.2211151123047, "loss": 0.4107, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.09494096785783768, "rewards/margins": 1.1914355754852295, "rewards/rejected": -1.2863763570785522, "step": 410 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -3.0272717475891113, "logits/rejected": -2.9719436168670654, "logps/chosen": -280.74859619140625, "logps/rejected": -237.41921997070312, "loss": 0.4879, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.24850067496299744, "rewards/margins": 1.2745717763900757, "rewards/rejected": -1.523072361946106, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.6941580756013745e-07, "logits/chosen": -3.0856499671936035, "logits/rejected": -3.03281831741333, "logps/chosen": -242.9888916015625, "logps/rejected": -221.92031860351562, "loss": 0.4766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6125269532203674, "rewards/margins": 1.3152192831039429, "rewards/rejected": -1.9277461767196655, "step": 430 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -3.0084025859832764, "logits/rejected": -3.060877561569214, "logps/chosen": -287.5694885253906, "logps/rejected": -285.35040283203125, "loss": 0.5522, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3822043538093567, "rewards/margins": 1.0044233798980713, "rewards/rejected": -1.3866277933120728, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.865979381443299e-07, "logits/chosen": -3.0907702445983887, "logits/rejected": -3.085932970046997, "logps/chosen": -262.8836975097656, "logps/rejected": -250.1670379638672, "loss": 0.4844, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1354902982711792, "rewards/margins": 0.8856312036514282, "rewards/rejected": -1.0211213827133179, "step": 450 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -3.0575408935546875, "logits/rejected": -3.0283780097961426, "logps/chosen": -269.05120849609375, "logps/rejected": -255.0241241455078, "loss": 0.5529, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3627804219722748, "rewards/margins": 1.0761009454727173, "rewards/rejected": -1.438881278038025, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.037800687285223e-07, "logits/chosen": -3.123262405395508, "logits/rejected": -3.0892791748046875, "logps/chosen": -317.90850830078125, "logps/rejected": -207.1751708984375, "loss": 0.4478, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.31438368558883667, "rewards/margins": 1.2641030550003052, "rewards/rejected": -1.578486680984497, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -3.19368314743042, "logits/rejected": -3.1357719898223877, "logps/chosen": -285.27130126953125, "logps/rejected": -249.72097778320312, "loss": 0.482, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15568087995052338, "rewards/margins": 1.13084077835083, "rewards/rejected": -1.2865216732025146, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.209621993127148e-07, "logits/chosen": -2.9999001026153564, "logits/rejected": -2.989119052886963, "logps/chosen": -263.2889099121094, "logps/rejected": -240.6525421142578, "loss": 0.4095, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3779570460319519, "rewards/margins": 1.3387038707733154, "rewards/rejected": -1.7166610956192017, "step": 490 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -3.199814558029175, "logits/rejected": -3.170172691345215, "logps/chosen": -274.55523681640625, "logps/rejected": -257.8343505859375, "loss": 0.5409, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5398232936859131, "rewards/margins": 0.9405809640884399, "rewards/rejected": -1.4804041385650635, "step": 500 }, { "epoch": 0.26, "eval_logits/chosen": -3.076664686203003, "eval_logits/rejected": -3.0543596744537354, "eval_logps/chosen": -275.1524963378906, "eval_logps/rejected": -244.3592071533203, "eval_loss": 0.4952601194381714, "eval_rewards/accuracies": 0.777999997138977, "eval_rewards/chosen": -0.40013551712036133, "eval_rewards/margins": 1.187387466430664, "eval_rewards/rejected": -1.5875229835510254, "eval_runtime": 298.7439, "eval_samples_per_second": 6.695, "eval_steps_per_second": 0.418, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.381443298969072e-07, "logits/chosen": -3.0252814292907715, "logits/rejected": -3.044193744659424, "logps/chosen": -288.96246337890625, "logps/rejected": -249.6056671142578, "loss": 0.5222, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6331709027290344, "rewards/margins": 0.7817397117614746, "rewards/rejected": -1.4149106740951538, "step": 510 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.9890036582946777, "logits/rejected": -2.975595474243164, "logps/chosen": -250.694580078125, "logps/rejected": -223.31900024414062, "loss": 0.5092, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5249029397964478, "rewards/margins": 1.4869499206542969, "rewards/rejected": -2.011852741241455, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.5532646048109964e-07, "logits/chosen": -3.036130428314209, "logits/rejected": -3.005101442337036, "logps/chosen": -279.5271911621094, "logps/rejected": -233.37759399414062, "loss": 0.5129, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4594394266605377, "rewards/margins": 1.0354315042495728, "rewards/rejected": -1.494870901107788, "step": 530 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -3.071089267730713, "logits/rejected": -3.0566036701202393, "logps/chosen": -280.0428161621094, "logps/rejected": -255.1620635986328, "loss": 0.5469, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6389147043228149, "rewards/margins": 0.8219264149665833, "rewards/rejected": -1.4608410596847534, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.7250859106529206e-07, "logits/chosen": -3.052964687347412, "logits/rejected": -3.027625560760498, "logps/chosen": -269.45050048828125, "logps/rejected": -242.0845489501953, "loss": 0.5201, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5871966481208801, "rewards/margins": 1.219684362411499, "rewards/rejected": -1.8068811893463135, "step": 550 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -3.0290699005126953, "logits/rejected": -2.9819164276123047, "logps/chosen": -308.82049560546875, "logps/rejected": -262.578369140625, "loss": 0.5215, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4544792175292969, "rewards/margins": 1.2229182720184326, "rewards/rejected": -1.67739737033844, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.896907216494845e-07, "logits/chosen": -3.095975160598755, "logits/rejected": -3.038904905319214, "logps/chosen": -278.1103515625, "logps/rejected": -263.58197021484375, "loss": 0.4939, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7382952570915222, "rewards/margins": 0.9173868894577026, "rewards/rejected": -1.6556819677352905, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -3.1059720516204834, "logits/rejected": -3.0128486156463623, "logps/chosen": -273.25616455078125, "logps/rejected": -216.08547973632812, "loss": 0.545, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3375282287597656, "rewards/margins": 1.2573668956756592, "rewards/rejected": -1.5948951244354248, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.992350353796136e-07, "logits/chosen": -3.0131020545959473, "logits/rejected": -3.0127346515655518, "logps/chosen": -247.0398406982422, "logps/rejected": -247.3001708984375, "loss": 0.4945, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33573365211486816, "rewards/margins": 1.4024873971939087, "rewards/rejected": -1.7382211685180664, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.982788296041308e-07, "logits/chosen": -3.1070754528045654, "logits/rejected": -3.0217490196228027, "logps/chosen": -250.99771118164062, "logps/rejected": -224.89303588867188, "loss": 0.5161, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5898759961128235, "rewards/margins": 1.1816720962524414, "rewards/rejected": -1.7715480327606201, "step": 600 }, { "epoch": 0.31, "eval_logits/chosen": -3.0461089611053467, "eval_logits/rejected": -3.0234925746917725, "eval_logps/chosen": -274.2987976074219, "eval_logps/rejected": -242.63465881347656, "eval_loss": 0.5194836854934692, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -0.31476885080337524, "eval_rewards/margins": 1.100299596786499, "eval_rewards/rejected": -1.4150683879852295, "eval_runtime": 299.932, "eval_samples_per_second": 6.668, "eval_steps_per_second": 0.417, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.973226238286479e-07, "logits/chosen": -3.0412392616271973, "logits/rejected": -3.001218318939209, "logps/chosen": -323.3726501464844, "logps/rejected": -270.83984375, "loss": 0.5421, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24728074669837952, "rewards/margins": 1.267622947692871, "rewards/rejected": -1.5149036645889282, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.96366418053165e-07, "logits/chosen": -3.121735095977783, "logits/rejected": -3.0834250450134277, "logps/chosen": -280.3814392089844, "logps/rejected": -269.0967712402344, "loss": 0.5492, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1741463840007782, "rewards/margins": 0.844623863697052, "rewards/rejected": -1.0187702178955078, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.954102122776821e-07, "logits/chosen": -3.1227710247039795, "logits/rejected": -3.003854513168335, "logps/chosen": -253.03939819335938, "logps/rejected": -200.9890899658203, "loss": 0.4995, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1886899173259735, "rewards/margins": 1.2468225955963135, "rewards/rejected": -1.4355127811431885, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.944540065021993e-07, "logits/chosen": -2.9064483642578125, "logits/rejected": -2.925783634185791, "logps/chosen": -238.77737426757812, "logps/rejected": -206.6617431640625, "loss": 0.4772, "rewards/accuracies": 0.8125, "rewards/chosen": -0.456102192401886, "rewards/margins": 1.4002044200897217, "rewards/rejected": -1.856306791305542, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.934978007267163e-07, "logits/chosen": -3.025247812271118, "logits/rejected": -2.9976277351379395, "logps/chosen": -279.1222839355469, "logps/rejected": -253.5683135986328, "loss": 0.8206, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.023301448673009872, "rewards/margins": 1.3701492547988892, "rewards/rejected": -1.3934507369995117, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.925415949512335e-07, "logits/chosen": -2.996875524520874, "logits/rejected": -2.918454170227051, "logps/chosen": -331.8418884277344, "logps/rejected": -253.6525115966797, "loss": 0.5038, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.24123434722423553, "rewards/margins": 1.4405347108840942, "rewards/rejected": -1.6817691326141357, "step": 660 }, { "epoch": 0.35, "learning_rate": 4.915853891757506e-07, "logits/chosen": -2.8935065269470215, "logits/rejected": -2.898484706878662, "logps/chosen": -197.3555908203125, "logps/rejected": -241.32046508789062, "loss": 0.5704, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24199140071868896, "rewards/margins": 0.7882445454597473, "rewards/rejected": -1.030236005783081, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.906291834002677e-07, "logits/chosen": -2.962968349456787, "logits/rejected": -2.9006264209747314, "logps/chosen": -281.5236511230469, "logps/rejected": -256.7275085449219, "loss": 0.5252, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.608171820640564, "rewards/margins": 0.9431388974189758, "rewards/rejected": -1.5513107776641846, "step": 680 }, { "epoch": 0.36, "learning_rate": 4.896729776247848e-07, "logits/chosen": -2.9835548400878906, "logits/rejected": -2.9527204036712646, "logps/chosen": -289.92987060546875, "logps/rejected": -251.1087188720703, "loss": 0.4655, "rewards/accuracies": 0.8125, "rewards/chosen": -0.38817405700683594, "rewards/margins": 1.28240168094635, "rewards/rejected": -1.6705758571624756, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.88716771849302e-07, "logits/chosen": -3.0054280757904053, "logits/rejected": -2.911468029022217, "logps/chosen": -329.260009765625, "logps/rejected": -274.3800964355469, "loss": 0.4913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.265338271856308, "rewards/margins": 1.6260671615600586, "rewards/rejected": -1.8914053440093994, "step": 700 }, { "epoch": 0.36, "eval_logits/chosen": -2.9585862159729004, "eval_logits/rejected": -2.9301576614379883, "eval_logps/chosen": -277.00439453125, "eval_logps/rejected": -247.15345764160156, "eval_loss": 0.5227752923965454, "eval_rewards/accuracies": 0.7799999713897705, "eval_rewards/chosen": -0.5853266716003418, "eval_rewards/margins": 1.2816225290298462, "eval_rewards/rejected": -1.866949200630188, "eval_runtime": 297.2392, "eval_samples_per_second": 6.729, "eval_steps_per_second": 0.421, "step": 700 }, { "epoch": 0.37, "learning_rate": 4.87760566073819e-07, "logits/chosen": -2.9115710258483887, "logits/rejected": -2.9119551181793213, "logps/chosen": -302.12396240234375, "logps/rejected": -242.7657470703125, "loss": 0.5125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7263463139533997, "rewards/margins": 1.2942359447479248, "rewards/rejected": -2.020582675933838, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.868043602983362e-07, "logits/chosen": -3.0313706398010254, "logits/rejected": -2.987967014312744, "logps/chosen": -302.0757751464844, "logps/rejected": -299.59466552734375, "loss": 0.5112, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.8701627850532532, "rewards/margins": 1.6429879665374756, "rewards/rejected": -2.513150691986084, "step": 720 }, { "epoch": 0.38, "learning_rate": 4.858481545228533e-07, "logits/chosen": -3.0496833324432373, "logits/rejected": -2.9578769207000732, "logps/chosen": -324.0188293457031, "logps/rejected": -285.79510498046875, "loss": 0.485, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7672210931777954, "rewards/margins": 1.4055955410003662, "rewards/rejected": -2.172816514968872, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.848919487473704e-07, "logits/chosen": -2.9866185188293457, "logits/rejected": -2.962374687194824, "logps/chosen": -300.18084716796875, "logps/rejected": -295.59954833984375, "loss": 0.5883, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8200961947441101, "rewards/margins": 1.3473241329193115, "rewards/rejected": -2.1674201488494873, "step": 740 }, { "epoch": 0.39, "learning_rate": 4.839357429718875e-07, "logits/chosen": -3.005702257156372, "logits/rejected": -3.033315658569336, "logps/chosen": -272.52850341796875, "logps/rejected": -243.59927368164062, "loss": 0.5489, "rewards/accuracies": 0.75, "rewards/chosen": -0.6764557957649231, "rewards/margins": 1.2463300228118896, "rewards/rejected": -1.922785997390747, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.829795371964047e-07, "logits/chosen": -3.028440237045288, "logits/rejected": -2.97921085357666, "logps/chosen": -304.7507019042969, "logps/rejected": -267.3523254394531, "loss": 0.5266, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5715482831001282, "rewards/margins": 1.3217369318008423, "rewards/rejected": -1.8932850360870361, "step": 760 }, { "epoch": 0.4, "learning_rate": 4.820233314209217e-07, "logits/chosen": -2.8253636360168457, "logits/rejected": -2.8096349239349365, "logps/chosen": -256.4940490722656, "logps/rejected": -226.732666015625, "loss": 0.5891, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7520522475242615, "rewards/margins": 1.1242681741714478, "rewards/rejected": -1.876320242881775, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.810671256454389e-07, "logits/chosen": -2.8110451698303223, "logits/rejected": -2.8266806602478027, "logps/chosen": -302.3857116699219, "logps/rejected": -285.738525390625, "loss": 0.4987, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6822640895843506, "rewards/margins": 1.6061044931411743, "rewards/rejected": -2.2883687019348145, "step": 780 }, { "epoch": 0.41, "learning_rate": 4.80110919869956e-07, "logits/chosen": -2.858147382736206, "logits/rejected": -2.80432391166687, "logps/chosen": -298.38995361328125, "logps/rejected": -229.829345703125, "loss": 0.5486, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6031957864761353, "rewards/margins": 1.3144261837005615, "rewards/rejected": -1.9176222085952759, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.791547140944731e-07, "logits/chosen": -2.7301101684570312, "logits/rejected": -2.7294387817382812, "logps/chosen": -222.55526733398438, "logps/rejected": -228.2888641357422, "loss": 0.4724, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5060822367668152, "rewards/margins": 1.4019149541854858, "rewards/rejected": -1.9079973697662354, "step": 800 }, { "epoch": 0.41, "eval_logits/chosen": -2.8297252655029297, "eval_logits/rejected": -2.798793315887451, "eval_logps/chosen": -277.2220764160156, "eval_logps/rejected": -249.0490264892578, "eval_loss": 0.514238178730011, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -0.6070927977561951, "eval_rewards/margins": 1.4494118690490723, "eval_rewards/rejected": -2.056504487991333, "eval_runtime": 297.9515, "eval_samples_per_second": 6.713, "eval_steps_per_second": 0.42, "step": 800 }, { "epoch": 0.42, "learning_rate": 4.781985083189902e-07, "logits/chosen": -2.810962677001953, "logits/rejected": -2.766624927520752, "logps/chosen": -255.8228759765625, "logps/rejected": -262.8910217285156, "loss": 0.5184, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5636113286018372, "rewards/margins": 1.295597791671753, "rewards/rejected": -1.8592088222503662, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.772423025435074e-07, "logits/chosen": -2.8432140350341797, "logits/rejected": -2.8048148155212402, "logps/chosen": -279.9248962402344, "logps/rejected": -271.9587707519531, "loss": 0.5416, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6159058213233948, "rewards/margins": 0.8555160760879517, "rewards/rejected": -1.4714218378067017, "step": 820 }, { "epoch": 0.43, "learning_rate": 4.762860967680244e-07, "logits/chosen": -2.9207639694213867, "logits/rejected": -2.9197988510131836, "logps/chosen": -242.93972778320312, "logps/rejected": -200.70346069335938, "loss": 0.5519, "rewards/accuracies": 0.75, "rewards/chosen": -0.5735063552856445, "rewards/margins": 1.0570865869522095, "rewards/rejected": -1.630592703819275, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.7532989099254154e-07, "logits/chosen": -2.903390407562256, "logits/rejected": -2.811583995819092, "logps/chosen": -262.1546936035156, "logps/rejected": -235.65078735351562, "loss": 0.5592, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7549904584884644, "rewards/margins": 0.8606253862380981, "rewards/rejected": -1.6156158447265625, "step": 840 }, { "epoch": 0.44, "learning_rate": 4.7437368521705866e-07, "logits/chosen": -2.9836201667785645, "logits/rejected": -2.9336819648742676, "logps/chosen": -252.79244995117188, "logps/rejected": -262.43499755859375, "loss": 0.5252, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.45973125100135803, "rewards/margins": 1.5895912647247314, "rewards/rejected": -2.0493226051330566, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.7341747944157577e-07, "logits/chosen": -2.9847192764282227, "logits/rejected": -2.968533754348755, "logps/chosen": -274.5762023925781, "logps/rejected": -252.141357421875, "loss": 0.5585, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6592631340026855, "rewards/margins": 1.054240107536316, "rewards/rejected": -1.7135032415390015, "step": 860 }, { "epoch": 0.45, "learning_rate": 4.724612736660929e-07, "logits/chosen": -2.8903841972351074, "logits/rejected": -2.8222813606262207, "logps/chosen": -276.86968994140625, "logps/rejected": -245.29885864257812, "loss": 0.4805, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3650582730770111, "rewards/margins": 1.8204562664031982, "rewards/rejected": -2.1855146884918213, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.7150506789061006e-07, "logits/chosen": -2.984184741973877, "logits/rejected": -2.9287283420562744, "logps/chosen": -291.3039855957031, "logps/rejected": -292.13543701171875, "loss": 0.503, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7998818159103394, "rewards/margins": 1.5341354608535767, "rewards/rejected": -2.334017515182495, "step": 880 }, { "epoch": 0.46, "learning_rate": 4.7054886211512717e-07, "logits/chosen": -2.9736297130584717, "logits/rejected": -2.954500913619995, "logps/chosen": -281.38250732421875, "logps/rejected": -250.498779296875, "loss": 0.5356, "rewards/accuracies": 0.75, "rewards/chosen": -0.46949291229248047, "rewards/margins": 1.1354753971099854, "rewards/rejected": -1.6049684286117554, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.695926563396443e-07, "logits/chosen": -3.040417432785034, "logits/rejected": -2.989995002746582, "logps/chosen": -276.5390930175781, "logps/rejected": -237.54824829101562, "loss": 0.5157, "rewards/accuracies": 0.75, "rewards/chosen": -0.8211701512336731, "rewards/margins": 0.9814871549606323, "rewards/rejected": -1.8026573657989502, "step": 900 }, { "epoch": 0.46, "eval_logits/chosen": -2.9777884483337402, "eval_logits/rejected": -2.946300506591797, "eval_logps/chosen": -277.0157470703125, "eval_logps/rejected": -246.65028381347656, "eval_loss": 0.5049863457679749, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -0.5864599347114563, "eval_rewards/margins": 1.230170488357544, "eval_rewards/rejected": -1.8166306018829346, "eval_runtime": 297.0482, "eval_samples_per_second": 6.733, "eval_steps_per_second": 0.421, "step": 900 }, { "epoch": 0.47, "learning_rate": 4.686364505641614e-07, "logits/chosen": -2.992205858230591, "logits/rejected": -2.9321579933166504, "logps/chosen": -276.30682373046875, "logps/rejected": -238.8515167236328, "loss": 0.4941, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6449284553527832, "rewards/margins": 1.4866305589675903, "rewards/rejected": -2.131558895111084, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.676802447886785e-07, "logits/chosen": -2.90757155418396, "logits/rejected": -2.8897616863250732, "logps/chosen": -264.0755310058594, "logps/rejected": -240.0446014404297, "loss": 0.5385, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5407442450523376, "rewards/margins": 1.37613844871521, "rewards/rejected": -1.916882872581482, "step": 920 }, { "epoch": 0.48, "learning_rate": 4.6672403901319564e-07, "logits/chosen": -2.92992901802063, "logits/rejected": -2.9072909355163574, "logps/chosen": -258.2342224121094, "logps/rejected": -241.68197631835938, "loss": 0.4592, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22730322182178497, "rewards/margins": 1.7852048873901367, "rewards/rejected": -2.012507915496826, "step": 930 }, { "epoch": 0.49, "learning_rate": 4.6576783323771275e-07, "logits/chosen": -2.8399574756622314, "logits/rejected": -2.8085784912109375, "logps/chosen": -229.1579132080078, "logps/rejected": -221.8878173828125, "loss": 0.4922, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6926137804985046, "rewards/margins": 1.2121039628982544, "rewards/rejected": -1.9047178030014038, "step": 940 }, { "epoch": 0.49, "learning_rate": 4.6481162746222987e-07, "logits/chosen": -2.837979793548584, "logits/rejected": -2.820255994796753, "logps/chosen": -288.9910583496094, "logps/rejected": -254.75588989257812, "loss": 0.5064, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.9452236294746399, "rewards/margins": 1.558538794517517, "rewards/rejected": -2.503762722015381, "step": 950 }, { "epoch": 0.5, "learning_rate": 4.63855421686747e-07, "logits/chosen": -2.897672176361084, "logits/rejected": -2.847825527191162, "logps/chosen": -291.0904846191406, "logps/rejected": -248.66873168945312, "loss": 0.5254, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5009971261024475, "rewards/margins": 1.568277359008789, "rewards/rejected": -2.069274663925171, "step": 960 }, { "epoch": 0.5, "learning_rate": 4.628992159112641e-07, "logits/chosen": -2.8176021575927734, "logits/rejected": -2.863546848297119, "logps/chosen": -266.3712158203125, "logps/rejected": -269.98291015625, "loss": 0.5196, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.17499732971191406, "rewards/margins": 1.6475900411605835, "rewards/rejected": -1.822587251663208, "step": 970 }, { "epoch": 0.51, "learning_rate": 4.6194301013578116e-07, "logits/chosen": -2.889280319213867, "logits/rejected": -2.88883376121521, "logps/chosen": -320.40277099609375, "logps/rejected": -254.3000946044922, "loss": 0.5441, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4573570191860199, "rewards/margins": 1.5372388362884521, "rewards/rejected": -1.9945958852767944, "step": 980 }, { "epoch": 0.51, "learning_rate": 4.609868043602983e-07, "logits/chosen": -2.9552786350250244, "logits/rejected": -2.889835834503174, "logps/chosen": -253.4173583984375, "logps/rejected": -232.3767852783203, "loss": 0.4735, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4632430672645569, "rewards/margins": 1.0684535503387451, "rewards/rejected": -1.5316965579986572, "step": 990 }, { "epoch": 0.52, "learning_rate": 4.600305985848154e-07, "logits/chosen": -2.904284954071045, "logits/rejected": -2.867764472961426, "logps/chosen": -245.16519165039062, "logps/rejected": -240.11782836914062, "loss": 0.4641, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.47291144728660583, "rewards/margins": 1.0328925848007202, "rewards/rejected": -1.5058040618896484, "step": 1000 }, { "epoch": 0.52, "eval_logits/chosen": -2.92161226272583, "eval_logits/rejected": -2.891594171524048, "eval_logps/chosen": -276.3018798828125, "eval_logps/rejected": -248.4610595703125, "eval_loss": 0.5090581774711609, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -0.515073835849762, "eval_rewards/margins": 1.4826339483261108, "eval_rewards/rejected": -1.997707724571228, "eval_runtime": 299.1136, "eval_samples_per_second": 6.686, "eval_steps_per_second": 0.418, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.590743928093325e-07, "logits/chosen": -2.7496089935302734, "logits/rejected": -2.7067320346832275, "logps/chosen": -304.1459655761719, "logps/rejected": -259.58782958984375, "loss": 0.5741, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7305110692977905, "rewards/margins": 1.1352968215942383, "rewards/rejected": -1.8658077716827393, "step": 1010 }, { "epoch": 0.53, "learning_rate": 4.581181870338497e-07, "logits/chosen": -2.8158769607543945, "logits/rejected": -2.7898595333099365, "logps/chosen": -316.39007568359375, "logps/rejected": -285.505615234375, "loss": 0.4861, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3617403507232666, "rewards/margins": 1.6614677906036377, "rewards/rejected": -2.0232081413269043, "step": 1020 }, { "epoch": 0.53, "learning_rate": 4.571619812583668e-07, "logits/chosen": -2.8434338569641113, "logits/rejected": -2.816493272781372, "logps/chosen": -268.98223876953125, "logps/rejected": -274.27459716796875, "loss": 0.4956, "rewards/accuracies": 0.75, "rewards/chosen": -0.6085302233695984, "rewards/margins": 1.38003671169281, "rewards/rejected": -1.9885669946670532, "step": 1030 }, { "epoch": 0.54, "learning_rate": 4.562057754828839e-07, "logits/chosen": -2.8416907787323, "logits/rejected": -2.7812013626098633, "logps/chosen": -283.7867431640625, "logps/rejected": -249.9698028564453, "loss": 0.551, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5633155703544617, "rewards/margins": 1.5839368104934692, "rewards/rejected": -2.1472525596618652, "step": 1040 }, { "epoch": 0.54, "learning_rate": 4.55249569707401e-07, "logits/chosen": -2.910600185394287, "logits/rejected": -2.8899829387664795, "logps/chosen": -235.5374298095703, "logps/rejected": -255.29824829101562, "loss": 0.4963, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8693474531173706, "rewards/margins": 1.2028529644012451, "rewards/rejected": -2.072200298309326, "step": 1050 }, { "epoch": 0.55, "learning_rate": 4.5429336393191814e-07, "logits/chosen": -2.8421430587768555, "logits/rejected": -2.862175703048706, "logps/chosen": -250.13607788085938, "logps/rejected": -224.20864868164062, "loss": 0.5393, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6483901739120483, "rewards/margins": 1.0726064443588257, "rewards/rejected": -1.720996618270874, "step": 1060 }, { "epoch": 0.55, "learning_rate": 4.5333715815643525e-07, "logits/chosen": -2.9626762866973877, "logits/rejected": -2.9626893997192383, "logps/chosen": -299.9131774902344, "logps/rejected": -270.77386474609375, "loss": 0.5598, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0119389295578003, "rewards/margins": 1.3936216831207275, "rewards/rejected": -2.4055607318878174, "step": 1070 }, { "epoch": 0.56, "learning_rate": 4.5238095238095237e-07, "logits/chosen": -2.9695396423339844, "logits/rejected": -2.9188308715820312, "logps/chosen": -283.611328125, "logps/rejected": -250.3421630859375, "loss": 0.5169, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1383975744247437, "rewards/margins": 1.2842782735824585, "rewards/rejected": -2.422675609588623, "step": 1080 }, { "epoch": 0.56, "learning_rate": 4.514247466054695e-07, "logits/chosen": -2.906083345413208, "logits/rejected": -2.8836545944213867, "logps/chosen": -230.08450317382812, "logps/rejected": -224.95791625976562, "loss": 0.5159, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9879177808761597, "rewards/margins": 1.4775643348693848, "rewards/rejected": -2.465482234954834, "step": 1090 }, { "epoch": 0.57, "learning_rate": 4.504685408299866e-07, "logits/chosen": -2.810950756072998, "logits/rejected": -2.805164098739624, "logps/chosen": -303.5326843261719, "logps/rejected": -280.25927734375, "loss": 0.5558, "rewards/accuracies": 0.75, "rewards/chosen": -1.0075418949127197, "rewards/margins": 1.3667891025543213, "rewards/rejected": -2.374330759048462, "step": 1100 }, { "epoch": 0.57, "eval_logits/chosen": -2.8914411067962646, "eval_logits/rejected": -2.860114336013794, "eval_logps/chosen": -279.2667541503906, "eval_logps/rejected": -249.60362243652344, "eval_loss": 0.49709653854370117, "eval_rewards/accuracies": 0.7699999809265137, "eval_rewards/chosen": -0.8115612268447876, "eval_rewards/margins": 1.3004019260406494, "eval_rewards/rejected": -2.1119627952575684, "eval_runtime": 296.2613, "eval_samples_per_second": 6.751, "eval_steps_per_second": 0.422, "step": 1100 }, { "epoch": 0.57, "learning_rate": 4.495123350545037e-07, "logits/chosen": -2.894946336746216, "logits/rejected": -2.880427122116089, "logps/chosen": -302.75360107421875, "logps/rejected": -287.68927001953125, "loss": 0.5009, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7256338596343994, "rewards/margins": 1.2386404275894165, "rewards/rejected": -1.9642740488052368, "step": 1110 }, { "epoch": 0.58, "learning_rate": 4.4855612927902083e-07, "logits/chosen": -2.8273870944976807, "logits/rejected": -2.8010201454162598, "logps/chosen": -305.2731628417969, "logps/rejected": -255.99533081054688, "loss": 0.5916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2193191051483154, "rewards/margins": 1.1791822910308838, "rewards/rejected": -2.398501396179199, "step": 1120 }, { "epoch": 0.58, "learning_rate": 4.4759992350353795e-07, "logits/chosen": -2.8475892543792725, "logits/rejected": -2.7523796558380127, "logps/chosen": -290.9092712402344, "logps/rejected": -231.7282257080078, "loss": 0.4341, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9197354316711426, "rewards/margins": 1.6278235912322998, "rewards/rejected": -2.5475590229034424, "step": 1130 }, { "epoch": 0.59, "learning_rate": 4.46643717728055e-07, "logits/chosen": -2.773775815963745, "logits/rejected": -2.7185730934143066, "logps/chosen": -249.8427734375, "logps/rejected": -257.61114501953125, "loss": 0.5095, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9906557202339172, "rewards/margins": 1.4055380821228027, "rewards/rejected": -2.396193742752075, "step": 1140 }, { "epoch": 0.59, "learning_rate": 4.4568751195257213e-07, "logits/chosen": -2.783116340637207, "logits/rejected": -2.7151153087615967, "logps/chosen": -330.0415344238281, "logps/rejected": -256.75262451171875, "loss": 0.5247, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5400577783584595, "rewards/margins": 1.7343899011611938, "rewards/rejected": -2.2744476795196533, "step": 1150 }, { "epoch": 0.6, "learning_rate": 4.447313061770893e-07, "logits/chosen": -2.7966766357421875, "logits/rejected": -2.7340331077575684, "logps/chosen": -263.78033447265625, "logps/rejected": -267.43609619140625, "loss": 0.5293, "rewards/accuracies": 0.8125, "rewards/chosen": -0.573074460029602, "rewards/margins": 1.5856841802597046, "rewards/rejected": -2.1587586402893066, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.437751004016064e-07, "logits/chosen": -2.684180498123169, "logits/rejected": -2.628032684326172, "logps/chosen": -229.15811157226562, "logps/rejected": -241.14602661132812, "loss": 0.5069, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6570131778717041, "rewards/margins": 1.2180211544036865, "rewards/rejected": -1.8750343322753906, "step": 1170 }, { "epoch": 0.61, "learning_rate": 4.4281889462612353e-07, "logits/chosen": -2.8334476947784424, "logits/rejected": -2.8391318321228027, "logps/chosen": -269.18353271484375, "logps/rejected": -228.5656280517578, "loss": 0.4841, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.5598064064979553, "rewards/margins": 1.3947551250457764, "rewards/rejected": -1.9545615911483765, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4186268885064064e-07, "logits/chosen": -2.858812093734741, "logits/rejected": -2.861037492752075, "logps/chosen": -296.47418212890625, "logps/rejected": -253.0843505859375, "loss": 0.5356, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.768179178237915, "rewards/margins": 1.159069299697876, "rewards/rejected": -1.9272483587265015, "step": 1190 }, { "epoch": 0.62, "learning_rate": 4.4090648307515776e-07, "logits/chosen": -2.862907648086548, "logits/rejected": -2.834665298461914, "logps/chosen": -216.3045196533203, "logps/rejected": -189.45518493652344, "loss": 0.4877, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8047316670417786, "rewards/margins": 1.0661463737487793, "rewards/rejected": -1.8708778619766235, "step": 1200 }, { "epoch": 0.62, "eval_logits/chosen": -2.8769757747650146, "eval_logits/rejected": -2.833991765975952, "eval_logps/chosen": -276.7474060058594, "eval_logps/rejected": -247.43191528320312, "eval_loss": 0.5092260837554932, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -0.5596281886100769, "eval_rewards/margins": 1.3351647853851318, "eval_rewards/rejected": -1.894792914390564, "eval_runtime": 296.8641, "eval_samples_per_second": 6.737, "eval_steps_per_second": 0.421, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.399502772996749e-07, "logits/chosen": -2.869292736053467, "logits/rejected": -2.847054958343506, "logps/chosen": -274.1513671875, "logps/rejected": -256.44647216796875, "loss": 0.4977, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8532268404960632, "rewards/margins": 1.425225019454956, "rewards/rejected": -2.278452157974243, "step": 1210 }, { "epoch": 0.63, "learning_rate": 4.38994071524192e-07, "logits/chosen": -2.8490569591522217, "logits/rejected": -2.857327938079834, "logps/chosen": -254.9918670654297, "logps/rejected": -223.4118194580078, "loss": 0.4921, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.870094895362854, "rewards/margins": 1.205338478088379, "rewards/rejected": -2.0754332542419434, "step": 1220 }, { "epoch": 0.64, "learning_rate": 4.380378657487091e-07, "logits/chosen": -2.90724778175354, "logits/rejected": -2.839764356613159, "logps/chosen": -317.3240051269531, "logps/rejected": -304.71771240234375, "loss": 0.4931, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.45973819494247437, "rewards/margins": 1.6618239879608154, "rewards/rejected": -2.1215622425079346, "step": 1230 }, { "epoch": 0.64, "learning_rate": 4.370816599732262e-07, "logits/chosen": -2.9106605052948, "logits/rejected": -2.865180730819702, "logps/chosen": -310.6402282714844, "logps/rejected": -236.48153686523438, "loss": 0.4856, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.191367506980896, "rewards/margins": 1.2832874059677124, "rewards/rejected": -2.4746549129486084, "step": 1240 }, { "epoch": 0.65, "learning_rate": 4.3612545419774334e-07, "logits/chosen": -2.8365566730499268, "logits/rejected": -2.8342230319976807, "logps/chosen": -237.7244110107422, "logps/rejected": -268.4637451171875, "loss": 0.5643, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1750293970108032, "rewards/margins": 0.9123395085334778, "rewards/rejected": -2.0873687267303467, "step": 1250 }, { "epoch": 0.65, "learning_rate": 4.3516924842226045e-07, "logits/chosen": -2.8220834732055664, "logits/rejected": -2.7954087257385254, "logps/chosen": -284.5871887207031, "logps/rejected": -250.57449340820312, "loss": 0.5149, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7969790101051331, "rewards/margins": 1.5051862001419067, "rewards/rejected": -2.3021652698516846, "step": 1260 }, { "epoch": 0.66, "learning_rate": 4.3421304264677757e-07, "logits/chosen": -2.867050886154175, "logits/rejected": -2.848658323287964, "logps/chosen": -258.2459411621094, "logps/rejected": -223.54580688476562, "loss": 0.4854, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.8680087924003601, "rewards/margins": 1.5067569017410278, "rewards/rejected": -2.374765634536743, "step": 1270 }, { "epoch": 0.66, "learning_rate": 4.332568368712947e-07, "logits/chosen": -2.969085931777954, "logits/rejected": -2.9144904613494873, "logps/chosen": -273.5484313964844, "logps/rejected": -251.82192993164062, "loss": 0.5912, "rewards/accuracies": 0.75, "rewards/chosen": -1.0105066299438477, "rewards/margins": 1.149637222290039, "rewards/rejected": -2.1601438522338867, "step": 1280 }, { "epoch": 0.67, "learning_rate": 4.323006310958118e-07, "logits/chosen": -2.9112117290496826, "logits/rejected": -2.872497320175171, "logps/chosen": -316.62298583984375, "logps/rejected": -268.509765625, "loss": 0.4789, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8553665280342102, "rewards/margins": 1.4781516790390015, "rewards/rejected": -2.3335182666778564, "step": 1290 }, { "epoch": 0.67, "learning_rate": 4.313444253203289e-07, "logits/chosen": -2.8557441234588623, "logits/rejected": -2.847517490386963, "logps/chosen": -256.849609375, "logps/rejected": -240.433837890625, "loss": 0.4922, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8805469274520874, "rewards/margins": 1.5468670129776, "rewards/rejected": -2.4274144172668457, "step": 1300 }, { "epoch": 0.67, "eval_logits/chosen": -2.8517472743988037, "eval_logits/rejected": -2.8187196254730225, "eval_logps/chosen": -280.490966796875, "eval_logps/rejected": -252.22872924804688, "eval_loss": 0.5181106925010681, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -0.9339839220046997, "eval_rewards/margins": 1.4404925107955933, "eval_rewards/rejected": -2.374476671218872, "eval_runtime": 296.964, "eval_samples_per_second": 6.735, "eval_steps_per_second": 0.421, "step": 1300 }, { "epoch": 0.68, "learning_rate": 4.3038821954484603e-07, "logits/chosen": -2.860063076019287, "logits/rejected": -2.794776439666748, "logps/chosen": -283.9671325683594, "logps/rejected": -255.06192016601562, "loss": 0.464, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1593186855316162, "rewards/margins": 1.200408697128296, "rewards/rejected": -2.359727382659912, "step": 1310 }, { "epoch": 0.68, "learning_rate": 4.2943201376936315e-07, "logits/chosen": -2.7973389625549316, "logits/rejected": -2.769406318664551, "logps/chosen": -282.5267028808594, "logps/rejected": -258.59527587890625, "loss": 0.5268, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1852004528045654, "rewards/margins": 1.5480226278305054, "rewards/rejected": -2.7332231998443604, "step": 1320 }, { "epoch": 0.69, "learning_rate": 4.2847580799388026e-07, "logits/chosen": -2.8179421424865723, "logits/rejected": -2.78226900100708, "logps/chosen": -297.841552734375, "logps/rejected": -285.6810302734375, "loss": 0.5583, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.291656255722046, "rewards/margins": 1.1764315366744995, "rewards/rejected": -2.468087911605835, "step": 1330 }, { "epoch": 0.69, "learning_rate": 4.275196022183974e-07, "logits/chosen": -2.7832727432250977, "logits/rejected": -2.721656322479248, "logps/chosen": -301.8523254394531, "logps/rejected": -235.78750610351562, "loss": 0.4971, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.1573271751403809, "rewards/margins": 1.297183632850647, "rewards/rejected": -2.4545111656188965, "step": 1340 }, { "epoch": 0.7, "learning_rate": 4.265633964429145e-07, "logits/chosen": -2.8424277305603027, "logits/rejected": -2.831519365310669, "logps/chosen": -269.0021667480469, "logps/rejected": -215.4640350341797, "loss": 0.5687, "rewards/accuracies": 0.6875, "rewards/chosen": -1.104570746421814, "rewards/margins": 0.9818031191825867, "rewards/rejected": -2.086374044418335, "step": 1350 }, { "epoch": 0.7, "learning_rate": 4.256071906674316e-07, "logits/chosen": -2.841625690460205, "logits/rejected": -2.8271470069885254, "logps/chosen": -314.56719970703125, "logps/rejected": -280.16204833984375, "loss": 0.5947, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9873099327087402, "rewards/margins": 1.2393622398376465, "rewards/rejected": -2.226672410964966, "step": 1360 }, { "epoch": 0.71, "learning_rate": 4.246509848919487e-07, "logits/chosen": -2.875816822052002, "logits/rejected": -2.875920057296753, "logps/chosen": -262.7657165527344, "logps/rejected": -266.3171691894531, "loss": 0.5325, "rewards/accuracies": 0.75, "rewards/chosen": -0.8712446093559265, "rewards/margins": 1.4807698726654053, "rewards/rejected": -2.3520145416259766, "step": 1370 }, { "epoch": 0.71, "learning_rate": 4.2369477911646584e-07, "logits/chosen": -2.901019811630249, "logits/rejected": -2.860865592956543, "logps/chosen": -275.1546325683594, "logps/rejected": -237.35043334960938, "loss": 0.5662, "rewards/accuracies": 0.75, "rewards/chosen": -1.190070390701294, "rewards/margins": 1.1778628826141357, "rewards/rejected": -2.3679332733154297, "step": 1380 }, { "epoch": 0.72, "learning_rate": 4.2273857334098296e-07, "logits/chosen": -2.8525962829589844, "logits/rejected": -2.7851364612579346, "logps/chosen": -280.11444091796875, "logps/rejected": -238.8421630859375, "loss": 0.55, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.947921633720398, "rewards/margins": 1.1795740127563477, "rewards/rejected": -2.127495527267456, "step": 1390 }, { "epoch": 0.72, "learning_rate": 4.2178236756550007e-07, "logits/chosen": -2.8216989040374756, "logits/rejected": -2.7960610389709473, "logps/chosen": -291.2049255371094, "logps/rejected": -238.7577362060547, "loss": 0.5515, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9337828755378723, "rewards/margins": 1.1081970930099487, "rewards/rejected": -2.041980266571045, "step": 1400 }, { "epoch": 0.72, "eval_logits/chosen": -2.870413303375244, "eval_logits/rejected": -2.8487894535064697, "eval_logps/chosen": -281.02386474609375, "eval_logps/rejected": -250.60337829589844, "eval_loss": 0.5081000924110413, "eval_rewards/accuracies": 0.7440000176429749, "eval_rewards/chosen": -0.987274169921875, "eval_rewards/margins": 1.2246668338775635, "eval_rewards/rejected": -2.2119410037994385, "eval_runtime": 297.404, "eval_samples_per_second": 6.725, "eval_steps_per_second": 0.42, "step": 1400 }, { "epoch": 0.73, "learning_rate": 4.208261617900172e-07, "logits/chosen": -2.8915770053863525, "logits/rejected": -2.8828485012054443, "logps/chosen": -284.6546325683594, "logps/rejected": -214.5458984375, "loss": 0.4857, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0735814571380615, "rewards/margins": 1.3267624378204346, "rewards/rejected": -2.400344133377075, "step": 1410 }, { "epoch": 0.73, "learning_rate": 4.198699560145343e-07, "logits/chosen": -2.7232089042663574, "logits/rejected": -2.735395908355713, "logps/chosen": -261.86456298828125, "logps/rejected": -242.8522186279297, "loss": 0.5901, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9799901247024536, "rewards/margins": 1.3254430294036865, "rewards/rejected": -2.3054332733154297, "step": 1420 }, { "epoch": 0.74, "learning_rate": 4.189137502390514e-07, "logits/chosen": -2.8386452198028564, "logits/rejected": -2.7790274620056152, "logps/chosen": -276.7398376464844, "logps/rejected": -271.81732177734375, "loss": 0.5506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8171237707138062, "rewards/margins": 1.1944704055786133, "rewards/rejected": -2.01159405708313, "step": 1430 }, { "epoch": 0.74, "learning_rate": 4.179575444635686e-07, "logits/chosen": -2.8361704349517822, "logits/rejected": -2.781494617462158, "logps/chosen": -331.5342712402344, "logps/rejected": -278.0868225097656, "loss": 0.5754, "rewards/accuracies": 0.75, "rewards/chosen": -0.726187527179718, "rewards/margins": 1.3111015558242798, "rewards/rejected": -2.0372891426086426, "step": 1440 }, { "epoch": 0.75, "learning_rate": 4.170013386880857e-07, "logits/chosen": -2.763598918914795, "logits/rejected": -2.744124412536621, "logps/chosen": -274.3957824707031, "logps/rejected": -286.14544677734375, "loss": 0.4956, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8051531910896301, "rewards/margins": 1.4850131273269653, "rewards/rejected": -2.2901663780212402, "step": 1450 }, { "epoch": 0.75, "learning_rate": 4.1604513291260277e-07, "logits/chosen": -2.7877087593078613, "logits/rejected": -2.7506332397460938, "logps/chosen": -263.64764404296875, "logps/rejected": -259.18243408203125, "loss": 0.5687, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7930214405059814, "rewards/margins": 1.4395453929901123, "rewards/rejected": -2.2325668334960938, "step": 1460 }, { "epoch": 0.76, "learning_rate": 4.150889271371199e-07, "logits/chosen": -2.735426425933838, "logits/rejected": -2.7236385345458984, "logps/chosen": -284.9507751464844, "logps/rejected": -236.884521484375, "loss": 0.5582, "rewards/accuracies": 0.75, "rewards/chosen": -0.9369648098945618, "rewards/margins": 1.3734034299850464, "rewards/rejected": -2.310368299484253, "step": 1470 }, { "epoch": 0.76, "learning_rate": 4.14132721361637e-07, "logits/chosen": -2.770085334777832, "logits/rejected": -2.688161611557007, "logps/chosen": -275.33111572265625, "logps/rejected": -206.7180938720703, "loss": 0.4587, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0923352241516113, "rewards/margins": 1.6801226139068604, "rewards/rejected": -2.7724575996398926, "step": 1480 }, { "epoch": 0.77, "learning_rate": 4.131765155861541e-07, "logits/chosen": -2.7762527465820312, "logits/rejected": -2.747938632965088, "logps/chosen": -239.770751953125, "logps/rejected": -215.8168182373047, "loss": 0.5012, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7464852333068848, "rewards/margins": 1.6632707118988037, "rewards/rejected": -2.4097559452056885, "step": 1490 }, { "epoch": 0.77, "learning_rate": 4.1222030981067123e-07, "logits/chosen": -2.8420047760009766, "logits/rejected": -2.8117308616638184, "logps/chosen": -303.0367126464844, "logps/rejected": -269.77996826171875, "loss": 0.4349, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0034947395324707, "rewards/margins": 1.6452404260635376, "rewards/rejected": -2.6487350463867188, "step": 1500 }, { "epoch": 0.77, "eval_logits/chosen": -2.8601479530334473, "eval_logits/rejected": -2.840158700942993, "eval_logps/chosen": -280.1994323730469, "eval_logps/rejected": -252.74588012695312, "eval_loss": 0.4996170699596405, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -0.9048290252685547, "eval_rewards/margins": 1.5213594436645508, "eval_rewards/rejected": -2.4261887073516846, "eval_runtime": 297.5672, "eval_samples_per_second": 6.721, "eval_steps_per_second": 0.42, "step": 1500 }, { "epoch": 0.78, "learning_rate": 4.1126410403518835e-07, "logits/chosen": -2.8034896850585938, "logits/rejected": -2.779625415802002, "logps/chosen": -248.8442840576172, "logps/rejected": -269.21832275390625, "loss": 0.5365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1126397848129272, "rewards/margins": 1.5354644060134888, "rewards/rejected": -2.648104190826416, "step": 1510 }, { "epoch": 0.78, "learning_rate": 4.1030789825970546e-07, "logits/chosen": -2.775416851043701, "logits/rejected": -2.771148443222046, "logps/chosen": -294.32366943359375, "logps/rejected": -273.7279052734375, "loss": 0.5477, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8498045206069946, "rewards/margins": 1.3190572261810303, "rewards/rejected": -2.1688618659973145, "step": 1520 }, { "epoch": 0.79, "learning_rate": 4.093516924842226e-07, "logits/chosen": -2.7791056632995605, "logits/rejected": -2.7741143703460693, "logps/chosen": -285.6884460449219, "logps/rejected": -259.9753723144531, "loss": 0.4439, "rewards/accuracies": 0.75, "rewards/chosen": -0.6743859648704529, "rewards/margins": 1.2439590692520142, "rewards/rejected": -1.9183450937271118, "step": 1530 }, { "epoch": 0.8, "learning_rate": 4.083954867087397e-07, "logits/chosen": -2.916766405105591, "logits/rejected": -2.8723397254943848, "logps/chosen": -262.5208740234375, "logps/rejected": -250.9338836669922, "loss": 0.4902, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7652831673622131, "rewards/margins": 1.256225347518921, "rewards/rejected": -2.0215084552764893, "step": 1540 }, { "epoch": 0.8, "learning_rate": 4.074392809332568e-07, "logits/chosen": -2.8186182975769043, "logits/rejected": -2.783643960952759, "logps/chosen": -318.96734619140625, "logps/rejected": -273.291259765625, "loss": 0.5233, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7978024482727051, "rewards/margins": 2.1130380630493164, "rewards/rejected": -2.9108405113220215, "step": 1550 }, { "epoch": 0.81, "learning_rate": 4.064830751577739e-07, "logits/chosen": -2.8384575843811035, "logits/rejected": -2.792706251144409, "logps/chosen": -263.0948791503906, "logps/rejected": -248.63986206054688, "loss": 0.474, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6642600297927856, "rewards/margins": 1.660636305809021, "rewards/rejected": -2.3248965740203857, "step": 1560 }, { "epoch": 0.81, "learning_rate": 4.0552686938229104e-07, "logits/chosen": -2.845766305923462, "logits/rejected": -2.8493919372558594, "logps/chosen": -261.2109375, "logps/rejected": -236.23056030273438, "loss": 0.4627, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5801820755004883, "rewards/margins": 1.506900429725647, "rewards/rejected": -2.087082624435425, "step": 1570 }, { "epoch": 0.82, "learning_rate": 4.045706636068082e-07, "logits/chosen": -2.8144640922546387, "logits/rejected": -2.752319097518921, "logps/chosen": -277.097412109375, "logps/rejected": -248.9197235107422, "loss": 0.4852, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9225671887397766, "rewards/margins": 1.5256555080413818, "rewards/rejected": -2.4482226371765137, "step": 1580 }, { "epoch": 0.82, "learning_rate": 4.036144578313253e-07, "logits/chosen": -2.780768871307373, "logits/rejected": -2.7448782920837402, "logps/chosen": -280.2907409667969, "logps/rejected": -259.012451171875, "loss": 0.58, "rewards/accuracies": 0.75, "rewards/chosen": -0.9570592641830444, "rewards/margins": 1.3608639240264893, "rewards/rejected": -2.3179233074188232, "step": 1590 }, { "epoch": 0.83, "learning_rate": 4.0265825205584244e-07, "logits/chosen": -2.801021099090576, "logits/rejected": -2.766148805618286, "logps/chosen": -294.86181640625, "logps/rejected": -266.83282470703125, "loss": 0.5446, "rewards/accuracies": 0.75, "rewards/chosen": -0.9832652807235718, "rewards/margins": 1.3677330017089844, "rewards/rejected": -2.3509984016418457, "step": 1600 }, { "epoch": 0.83, "eval_logits/chosen": -2.7852587699890137, "eval_logits/rejected": -2.7610068321228027, "eval_logps/chosen": -279.8681335449219, "eval_logps/rejected": -252.8737030029297, "eval_loss": 0.4926547408103943, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -0.8716984987258911, "eval_rewards/margins": 1.5672730207443237, "eval_rewards/rejected": -2.438971519470215, "eval_runtime": 297.4562, "eval_samples_per_second": 6.724, "eval_steps_per_second": 0.42, "step": 1600 }, { "epoch": 0.83, "learning_rate": 4.0170204628035956e-07, "logits/chosen": -2.779371976852417, "logits/rejected": -2.779296398162842, "logps/chosen": -227.3587188720703, "logps/rejected": -220.8697052001953, "loss": 0.5078, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7682415246963501, "rewards/margins": 1.6325304508209229, "rewards/rejected": -2.4007718563079834, "step": 1610 }, { "epoch": 0.84, "learning_rate": 4.007458405048766e-07, "logits/chosen": -2.79868745803833, "logits/rejected": -2.7551894187927246, "logps/chosen": -313.44866943359375, "logps/rejected": -284.7059631347656, "loss": 0.4781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0337600708007812, "rewards/margins": 1.553781270980835, "rewards/rejected": -2.587541103363037, "step": 1620 }, { "epoch": 0.84, "learning_rate": 3.9978963472939373e-07, "logits/chosen": -2.781686782836914, "logits/rejected": -2.7553551197052, "logps/chosen": -282.82470703125, "logps/rejected": -247.119384765625, "loss": 0.4834, "rewards/accuracies": 0.75, "rewards/chosen": -0.8043079376220703, "rewards/margins": 1.690171480178833, "rewards/rejected": -2.4944794178009033, "step": 1630 }, { "epoch": 0.85, "learning_rate": 3.9883342895391085e-07, "logits/chosen": -2.7971036434173584, "logits/rejected": -2.7414002418518066, "logps/chosen": -324.1792907714844, "logps/rejected": -256.4706726074219, "loss": 0.4813, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9165623784065247, "rewards/margins": 1.7748088836669922, "rewards/rejected": -2.691371440887451, "step": 1640 }, { "epoch": 0.85, "learning_rate": 3.9787722317842796e-07, "logits/chosen": -2.839597225189209, "logits/rejected": -2.805170774459839, "logps/chosen": -296.72198486328125, "logps/rejected": -214.109619140625, "loss": 0.5182, "rewards/accuracies": 0.75, "rewards/chosen": -0.6080407500267029, "rewards/margins": 1.669585943222046, "rewards/rejected": -2.2776267528533936, "step": 1650 }, { "epoch": 0.86, "learning_rate": 3.969210174029451e-07, "logits/chosen": -2.8030974864959717, "logits/rejected": -2.746521472930908, "logps/chosen": -282.7402038574219, "logps/rejected": -277.02227783203125, "loss": 0.4973, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.145887851715088, "rewards/margins": 1.514013648033142, "rewards/rejected": -2.6599013805389404, "step": 1660 }, { "epoch": 0.86, "learning_rate": 3.959648116274622e-07, "logits/chosen": -2.822636365890503, "logits/rejected": -2.7770602703094482, "logps/chosen": -274.1295471191406, "logps/rejected": -237.7220458984375, "loss": 0.6338, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1833336353302002, "rewards/margins": 1.1599174737930298, "rewards/rejected": -2.3432514667510986, "step": 1670 }, { "epoch": 0.87, "learning_rate": 3.950086058519793e-07, "logits/chosen": -2.8634159564971924, "logits/rejected": -2.855304479598999, "logps/chosen": -250.9813232421875, "logps/rejected": -238.8641357421875, "loss": 0.5555, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0109765529632568, "rewards/margins": 1.2353746891021729, "rewards/rejected": -2.2463512420654297, "step": 1680 }, { "epoch": 0.87, "learning_rate": 3.9405240007649643e-07, "logits/chosen": -2.9426653385162354, "logits/rejected": -2.9147517681121826, "logps/chosen": -265.8846130371094, "logps/rejected": -255.80490112304688, "loss": 0.4857, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.008003830909729, "rewards/margins": 1.246358871459961, "rewards/rejected": -2.2543625831604004, "step": 1690 }, { "epoch": 0.88, "learning_rate": 3.9309619430101354e-07, "logits/chosen": -2.8152334690093994, "logits/rejected": -2.780714750289917, "logps/chosen": -261.5868225097656, "logps/rejected": -245.407470703125, "loss": 0.5242, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9333425760269165, "rewards/margins": 1.554662823677063, "rewards/rejected": -2.4880051612854004, "step": 1700 }, { "epoch": 0.88, "eval_logits/chosen": -2.8524723052978516, "eval_logits/rejected": -2.8269340991973877, "eval_logps/chosen": -278.135498046875, "eval_logps/rejected": -249.865478515625, "eval_loss": 0.48644253611564636, "eval_rewards/accuracies": 0.777999997138977, "eval_rewards/chosen": -0.6984347105026245, "eval_rewards/margins": 1.4397144317626953, "eval_rewards/rejected": -2.1381492614746094, "eval_runtime": 297.4083, "eval_samples_per_second": 6.725, "eval_steps_per_second": 0.42, "step": 1700 }, { "epoch": 0.88, "learning_rate": 3.9213998852553066e-07, "logits/chosen": -2.858901262283325, "logits/rejected": -2.8314156532287598, "logps/chosen": -326.45538330078125, "logps/rejected": -269.11468505859375, "loss": 0.5017, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9739869832992554, "rewards/margins": 1.2920843362808228, "rewards/rejected": -2.2660715579986572, "step": 1710 }, { "epoch": 0.89, "learning_rate": 3.9118378275004783e-07, "logits/chosen": -2.879965305328369, "logits/rejected": -2.8734488487243652, "logps/chosen": -274.63604736328125, "logps/rejected": -316.09521484375, "loss": 0.5748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.099215030670166, "rewards/margins": 1.0683257579803467, "rewards/rejected": -2.167541027069092, "step": 1720 }, { "epoch": 0.89, "learning_rate": 3.9022757697456494e-07, "logits/chosen": -2.7558462619781494, "logits/rejected": -2.771763324737549, "logps/chosen": -331.04693603515625, "logps/rejected": -281.0125427246094, "loss": 0.4806, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7285557985305786, "rewards/margins": 1.2618799209594727, "rewards/rejected": -1.9904358386993408, "step": 1730 }, { "epoch": 0.9, "learning_rate": 3.8927137119908206e-07, "logits/chosen": -2.8156943321228027, "logits/rejected": -2.782925844192505, "logps/chosen": -299.8418884277344, "logps/rejected": -229.73574829101562, "loss": 0.5426, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9351360201835632, "rewards/margins": 1.2108604907989502, "rewards/rejected": -2.14599609375, "step": 1740 }, { "epoch": 0.9, "learning_rate": 3.883151654235992e-07, "logits/chosen": -2.8140454292297363, "logits/rejected": -2.774932861328125, "logps/chosen": -295.07867431640625, "logps/rejected": -263.8828125, "loss": 0.4908, "rewards/accuracies": 0.8125, "rewards/chosen": -0.813959002494812, "rewards/margins": 1.7653522491455078, "rewards/rejected": -2.579310894012451, "step": 1750 }, { "epoch": 0.91, "learning_rate": 3.873589596481163e-07, "logits/chosen": -2.8576390743255615, "logits/rejected": -2.8543241024017334, "logps/chosen": -293.5692443847656, "logps/rejected": -266.193359375, "loss": 0.5689, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2586266994476318, "rewards/margins": 1.2890112400054932, "rewards/rejected": -2.547637939453125, "step": 1760 }, { "epoch": 0.91, "learning_rate": 3.864027538726334e-07, "logits/chosen": -2.7836225032806396, "logits/rejected": -2.7786474227905273, "logps/chosen": -277.83740234375, "logps/rejected": -254.81494140625, "loss": 0.5213, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8805364370346069, "rewards/margins": 1.7172577381134033, "rewards/rejected": -2.5977942943573, "step": 1770 }, { "epoch": 0.92, "learning_rate": 3.8544654809715047e-07, "logits/chosen": -2.7854583263397217, "logits/rejected": -2.757338523864746, "logps/chosen": -291.8116760253906, "logps/rejected": -260.56121826171875, "loss": 0.531, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1720925569534302, "rewards/margins": 1.3734238147735596, "rewards/rejected": -2.5455162525177, "step": 1780 }, { "epoch": 0.92, "learning_rate": 3.844903423216676e-07, "logits/chosen": -2.767277240753174, "logits/rejected": -2.7258496284484863, "logps/chosen": -269.7626647949219, "logps/rejected": -236.54165649414062, "loss": 0.5328, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7634437680244446, "rewards/margins": 1.3117390871047974, "rewards/rejected": -2.0751829147338867, "step": 1790 }, { "epoch": 0.93, "learning_rate": 3.835341365461847e-07, "logits/chosen": -2.716766834259033, "logits/rejected": -2.6774024963378906, "logps/chosen": -272.6806640625, "logps/rejected": -222.8508758544922, "loss": 0.5266, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.6215282678604126, "rewards/margins": 1.5075757503509521, "rewards/rejected": -2.129103899002075, "step": 1800 }, { "epoch": 0.93, "eval_logits/chosen": -2.7715063095092773, "eval_logits/rejected": -2.7381045818328857, "eval_logps/chosen": -276.5621032714844, "eval_logps/rejected": -247.96278381347656, "eval_loss": 0.501970112323761, "eval_rewards/accuracies": 0.7760000228881836, "eval_rewards/chosen": -0.5410944819450378, "eval_rewards/margins": 1.406785011291504, "eval_rewards/rejected": -1.9478795528411865, "eval_runtime": 297.6711, "eval_samples_per_second": 6.719, "eval_steps_per_second": 0.42, "step": 1800 }, { "epoch": 0.93, "learning_rate": 3.825779307707018e-07, "logits/chosen": -2.7607715129852295, "logits/rejected": -2.7069694995880127, "logps/chosen": -208.4616241455078, "logps/rejected": -229.7495574951172, "loss": 0.5221, "rewards/accuracies": 0.75, "rewards/chosen": -0.6570498943328857, "rewards/margins": 1.241496205329895, "rewards/rejected": -1.8985458612442017, "step": 1810 }, { "epoch": 0.94, "learning_rate": 3.8162172499521893e-07, "logits/chosen": -2.776834726333618, "logits/rejected": -2.709933042526245, "logps/chosen": -266.1866455078125, "logps/rejected": -218.61087036132812, "loss": 0.5243, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8236967921257019, "rewards/margins": 1.372011423110962, "rewards/rejected": -2.1957080364227295, "step": 1820 }, { "epoch": 0.94, "learning_rate": 3.8066551921973605e-07, "logits/chosen": -2.7101306915283203, "logits/rejected": -2.7025110721588135, "logps/chosen": -261.57611083984375, "logps/rejected": -236.56466674804688, "loss": 0.4898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6180542707443237, "rewards/margins": 1.3089244365692139, "rewards/rejected": -1.9269788265228271, "step": 1830 }, { "epoch": 0.95, "learning_rate": 3.7970931344425316e-07, "logits/chosen": -2.691938877105713, "logits/rejected": -2.637636661529541, "logps/chosen": -298.92431640625, "logps/rejected": -240.8381805419922, "loss": 0.4783, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7817220091819763, "rewards/margins": 1.1453922986984253, "rewards/rejected": -1.9271142482757568, "step": 1840 }, { "epoch": 0.96, "learning_rate": 3.787531076687703e-07, "logits/chosen": -2.5811028480529785, "logits/rejected": -2.577949285507202, "logps/chosen": -250.656494140625, "logps/rejected": -207.6524658203125, "loss": 0.5401, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8317922353744507, "rewards/margins": 1.5317630767822266, "rewards/rejected": -2.363555669784546, "step": 1850 }, { "epoch": 0.96, "learning_rate": 3.7779690189328745e-07, "logits/chosen": -2.7253241539001465, "logits/rejected": -2.7099854946136475, "logps/chosen": -261.5533142089844, "logps/rejected": -238.36221313476562, "loss": 0.4759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.91804438829422, "rewards/margins": 1.307298183441162, "rewards/rejected": -2.2253427505493164, "step": 1860 }, { "epoch": 0.97, "learning_rate": 3.7684069611780456e-07, "logits/chosen": -2.7499070167541504, "logits/rejected": -2.6969666481018066, "logps/chosen": -284.4208068847656, "logps/rejected": -250.7512664794922, "loss": 0.503, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5610580444335938, "rewards/margins": 1.5138256549835205, "rewards/rejected": -2.0748836994171143, "step": 1870 }, { "epoch": 0.97, "learning_rate": 3.758844903423217e-07, "logits/chosen": -2.7842278480529785, "logits/rejected": -2.731440305709839, "logps/chosen": -256.0408630371094, "logps/rejected": -248.4185791015625, "loss": 0.5077, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7275162935256958, "rewards/margins": 1.5901552438735962, "rewards/rejected": -2.317671060562134, "step": 1880 }, { "epoch": 0.98, "learning_rate": 3.749282845668388e-07, "logits/chosen": -2.7996678352355957, "logits/rejected": -2.7667319774627686, "logps/chosen": -303.9225769042969, "logps/rejected": -270.538330078125, "loss": 0.506, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9399793744087219, "rewards/margins": 1.2743322849273682, "rewards/rejected": -2.2143118381500244, "step": 1890 }, { "epoch": 0.98, "learning_rate": 3.739720787913559e-07, "logits/chosen": -2.7126071453094482, "logits/rejected": -2.6714975833892822, "logps/chosen": -266.16204833984375, "logps/rejected": -232.2299041748047, "loss": 0.498, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9018408060073853, "rewards/margins": 1.3833215236663818, "rewards/rejected": -2.2851624488830566, "step": 1900 }, { "epoch": 0.98, "eval_logits/chosen": -2.766378879547119, "eval_logits/rejected": -2.7298452854156494, "eval_logps/chosen": -278.0451965332031, "eval_logps/rejected": -248.81500244140625, "eval_loss": 0.5085920691490173, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -0.6894029378890991, "eval_rewards/margins": 1.3436976671218872, "eval_rewards/rejected": -2.0331006050109863, "eval_runtime": 299.0083, "eval_samples_per_second": 6.689, "eval_steps_per_second": 0.418, "step": 1900 }, { "epoch": 0.99, "learning_rate": 3.73015873015873e-07, "logits/chosen": -2.7295749187469482, "logits/rejected": -2.746682643890381, "logps/chosen": -278.3253479003906, "logps/rejected": -262.86627197265625, "loss": 0.5109, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7267035245895386, "rewards/margins": 0.9839091300964355, "rewards/rejected": -1.7106126546859741, "step": 1910 }, { "epoch": 0.99, "learning_rate": 3.7205966724039014e-07, "logits/chosen": -2.6448540687561035, "logits/rejected": -2.645981550216675, "logps/chosen": -288.9250183105469, "logps/rejected": -249.8538055419922, "loss": 0.5003, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9050509333610535, "rewards/margins": 1.1149990558624268, "rewards/rejected": -2.020050048828125, "step": 1920 }, { "epoch": 1.0, "learning_rate": 3.711034614649072e-07, "logits/chosen": -2.7751498222351074, "logits/rejected": -2.698579788208008, "logps/chosen": -307.6148376464844, "logps/rejected": -244.74441528320312, "loss": 0.4483, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7877746224403381, "rewards/margins": 1.6704633235931396, "rewards/rejected": -2.458237886428833, "step": 1930 }, { "epoch": 1.0, "learning_rate": 3.701472556894243e-07, "logits/chosen": -2.634181261062622, "logits/rejected": -2.642824172973633, "logps/chosen": -251.3314971923828, "logps/rejected": -285.38165283203125, "loss": 0.3676, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.13440167903900146, "rewards/margins": 2.4729697704315186, "rewards/rejected": -2.6073713302612305, "step": 1940 }, { "epoch": 1.01, "learning_rate": 3.6919104991394144e-07, "logits/chosen": -2.7603354454040527, "logits/rejected": -2.731630325317383, "logps/chosen": -261.4866027832031, "logps/rejected": -264.4168701171875, "loss": 0.0836, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2352325916290283, "rewards/margins": 5.749847412109375, "rewards/rejected": -4.514614105224609, "step": 1950 }, { "epoch": 1.01, "learning_rate": 3.6823484413845855e-07, "logits/chosen": -2.6434993743896484, "logits/rejected": -2.6232972145080566, "logps/chosen": -262.1065979003906, "logps/rejected": -287.74688720703125, "loss": 0.0745, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.580449640750885, "rewards/margins": 5.3898138999938965, "rewards/rejected": -4.809364318847656, "step": 1960 }, { "epoch": 1.02, "learning_rate": 3.6727863836297567e-07, "logits/chosen": -2.6330463886260986, "logits/rejected": -2.614422559738159, "logps/chosen": -244.67660522460938, "logps/rejected": -267.0379333496094, "loss": 0.0937, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.0386600494384766, "rewards/margins": 6.133784294128418, "rewards/rejected": -5.095124244689941, "step": 1970 }, { "epoch": 1.02, "learning_rate": 3.663224325874928e-07, "logits/chosen": -2.607079029083252, "logits/rejected": -2.5714335441589355, "logps/chosen": -243.2287139892578, "logps/rejected": -281.1238708496094, "loss": 0.1006, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.41417521238327026, "rewards/margins": 5.176269054412842, "rewards/rejected": -4.7620930671691895, "step": 1980 }, { "epoch": 1.03, "learning_rate": 3.653662268120099e-07, "logits/chosen": -2.5927295684814453, "logits/rejected": -2.5773234367370605, "logps/chosen": -241.07821655273438, "logps/rejected": -316.1360778808594, "loss": 0.0684, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.1088030338287354, "rewards/margins": 5.955197334289551, "rewards/rejected": -4.846394062042236, "step": 1990 }, { "epoch": 1.03, "learning_rate": 3.6441002103652707e-07, "logits/chosen": -2.6101760864257812, "logits/rejected": -2.565425157546997, "logps/chosen": -264.8179626464844, "logps/rejected": -296.73468017578125, "loss": 0.0664, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5726389288902283, "rewards/margins": 6.380110263824463, "rewards/rejected": -5.807471752166748, "step": 2000 }, { "epoch": 1.03, "eval_logits/chosen": -2.6604604721069336, "eval_logits/rejected": -2.613698720932007, "eval_logps/chosen": -282.8529968261719, "eval_logps/rejected": -260.2071533203125, "eval_loss": 0.513712465763092, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -1.1701849699020386, "eval_rewards/margins": 2.0021331310272217, "eval_rewards/rejected": -3.17231822013855, "eval_runtime": 296.246, "eval_samples_per_second": 6.751, "eval_steps_per_second": 0.422, "step": 2000 }, { "epoch": 1.04, "learning_rate": 3.634538152610442e-07, "logits/chosen": -2.6072421073913574, "logits/rejected": -2.6007983684539795, "logps/chosen": -271.474853515625, "logps/rejected": -304.2977294921875, "loss": 0.0641, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6836616396903992, "rewards/margins": 6.16135311126709, "rewards/rejected": -5.477691173553467, "step": 2010 }, { "epoch": 1.04, "learning_rate": 3.624976094855613e-07, "logits/chosen": -2.552222490310669, "logits/rejected": -2.539797067642212, "logps/chosen": -255.1763916015625, "logps/rejected": -268.0435485839844, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": 0.6575755476951599, "rewards/margins": 5.849895477294922, "rewards/rejected": -5.192319393157959, "step": 2020 }, { "epoch": 1.05, "learning_rate": 3.615414037100784e-07, "logits/chosen": -2.5665032863616943, "logits/rejected": -2.5033249855041504, "logps/chosen": -281.29022216796875, "logps/rejected": -272.2781677246094, "loss": 0.0794, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.328784704208374, "rewards/margins": 5.696002006530762, "rewards/rejected": -5.367217063903809, "step": 2030 }, { "epoch": 1.05, "learning_rate": 3.6058519793459553e-07, "logits/chosen": -2.5166163444519043, "logits/rejected": -2.5345325469970703, "logps/chosen": -240.7671661376953, "logps/rejected": -250.6477508544922, "loss": 0.0894, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7341517210006714, "rewards/margins": 5.550318241119385, "rewards/rejected": -4.816165924072266, "step": 2040 }, { "epoch": 1.06, "learning_rate": 3.5962899215911265e-07, "logits/chosen": -2.593902587890625, "logits/rejected": -2.5292165279388428, "logps/chosen": -237.86239624023438, "logps/rejected": -280.6207580566406, "loss": 0.0518, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.4292508065700531, "rewards/margins": 5.5217485427856445, "rewards/rejected": -5.092497825622559, "step": 2050 }, { "epoch": 1.06, "learning_rate": 3.5867278638362976e-07, "logits/chosen": -2.593247175216675, "logits/rejected": -2.5439682006835938, "logps/chosen": -305.3078918457031, "logps/rejected": -318.10394287109375, "loss": 0.0645, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7294039130210876, "rewards/margins": 6.271230220794678, "rewards/rejected": -5.5418267250061035, "step": 2060 }, { "epoch": 1.07, "learning_rate": 3.577165806081469e-07, "logits/chosen": -2.617610454559326, "logits/rejected": -2.591120481491089, "logps/chosen": -252.36483764648438, "logps/rejected": -263.81060791015625, "loss": 0.1051, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.2951541841030121, "rewards/margins": 5.425782680511475, "rewards/rejected": -5.13062858581543, "step": 2070 }, { "epoch": 1.07, "learning_rate": 3.56760374832664e-07, "logits/chosen": -2.684091567993164, "logits/rejected": -2.570307493209839, "logps/chosen": -280.3758544921875, "logps/rejected": -314.53021240234375, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 1.4124327898025513, "rewards/margins": 7.719065189361572, "rewards/rejected": -6.306632041931152, "step": 2080 }, { "epoch": 1.08, "learning_rate": 3.5580416905718106e-07, "logits/chosen": -2.590378522872925, "logits/rejected": -2.546607732772827, "logps/chosen": -300.8092346191406, "logps/rejected": -268.47479248046875, "loss": 0.1004, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4799577295780182, "rewards/margins": 5.814119338989258, "rewards/rejected": -5.33416223526001, "step": 2090 }, { "epoch": 1.08, "learning_rate": 3.5484796328169817e-07, "logits/chosen": -2.576677083969116, "logits/rejected": -2.5376296043395996, "logps/chosen": -259.6259765625, "logps/rejected": -255.3178253173828, "loss": 0.0698, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22541138529777527, "rewards/margins": 5.577375411987305, "rewards/rejected": -5.351963996887207, "step": 2100 }, { "epoch": 1.08, "eval_logits/chosen": -2.669224739074707, "eval_logits/rejected": -2.6219115257263184, "eval_logps/chosen": -284.7966003417969, "eval_logps/rejected": -264.1526794433594, "eval_loss": 0.5326563715934753, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -1.3645479679107666, "eval_rewards/margins": 2.2023234367370605, "eval_rewards/rejected": -3.566871166229248, "eval_runtime": 298.4532, "eval_samples_per_second": 6.701, "eval_steps_per_second": 0.419, "step": 2100 }, { "epoch": 1.09, "learning_rate": 3.538917575062153e-07, "logits/chosen": -2.5463109016418457, "logits/rejected": -2.5333731174468994, "logps/chosen": -258.64874267578125, "logps/rejected": -310.0502014160156, "loss": 0.0481, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3605276346206665, "rewards/margins": 6.687548637390137, "rewards/rejected": -6.32702112197876, "step": 2110 }, { "epoch": 1.09, "learning_rate": 3.529355517307324e-07, "logits/chosen": -2.5875303745269775, "logits/rejected": -2.510730266571045, "logps/chosen": -245.9647979736328, "logps/rejected": -296.9500732421875, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": 0.650764524936676, "rewards/margins": 7.069943904876709, "rewards/rejected": -6.419180393218994, "step": 2120 }, { "epoch": 1.1, "learning_rate": 3.519793459552495e-07, "logits/chosen": -2.5960545539855957, "logits/rejected": -2.567936658859253, "logps/chosen": -264.7110900878906, "logps/rejected": -300.5650939941406, "loss": 0.111, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.45437049865722656, "rewards/margins": 5.234097480773926, "rewards/rejected": -5.688467979431152, "step": 2130 }, { "epoch": 1.1, "learning_rate": 3.510231401797667e-07, "logits/chosen": -2.5926661491394043, "logits/rejected": -2.551384925842285, "logps/chosen": -302.0068359375, "logps/rejected": -296.42669677734375, "loss": 0.0941, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4076964855194092, "rewards/margins": 5.991205215454102, "rewards/rejected": -6.39890193939209, "step": 2140 }, { "epoch": 1.11, "learning_rate": 3.500669344042838e-07, "logits/chosen": -2.660684585571289, "logits/rejected": -2.563539505004883, "logps/chosen": -253.825439453125, "logps/rejected": -288.27764892578125, "loss": 0.0651, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.30672699213027954, "rewards/margins": 5.953993797302246, "rewards/rejected": -6.260720729827881, "step": 2150 }, { "epoch": 1.12, "learning_rate": 3.491107286288009e-07, "logits/chosen": -2.648838758468628, "logits/rejected": -2.5848755836486816, "logps/chosen": -266.71893310546875, "logps/rejected": -304.06243896484375, "loss": 0.0881, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.042765144258737564, "rewards/margins": 6.342136859893799, "rewards/rejected": -6.384902000427246, "step": 2160 }, { "epoch": 1.12, "learning_rate": 3.4815452285331803e-07, "logits/chosen": -2.5549755096435547, "logits/rejected": -2.5574491024017334, "logps/chosen": -282.77789306640625, "logps/rejected": -277.75860595703125, "loss": 0.0885, "rewards/accuracies": 0.9375, "rewards/chosen": -0.24789047241210938, "rewards/margins": 5.591107368469238, "rewards/rejected": -5.838997840881348, "step": 2170 }, { "epoch": 1.13, "learning_rate": 3.4719831707783515e-07, "logits/chosen": -2.5701591968536377, "logits/rejected": -2.532634735107422, "logps/chosen": -297.8733825683594, "logps/rejected": -325.23663330078125, "loss": 0.0784, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.12840011715888977, "rewards/margins": 7.079614162445068, "rewards/rejected": -7.208014488220215, "step": 2180 }, { "epoch": 1.13, "learning_rate": 3.4624211130235227e-07, "logits/chosen": -2.673182964324951, "logits/rejected": -2.5937423706054688, "logps/chosen": -249.6380615234375, "logps/rejected": -269.7789306640625, "loss": 0.0777, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5297588109970093, "rewards/margins": 6.0325798988342285, "rewards/rejected": -6.562338352203369, "step": 2190 }, { "epoch": 1.14, "learning_rate": 3.452859055268694e-07, "logits/chosen": -2.686246156692505, "logits/rejected": -2.6082072257995605, "logps/chosen": -254.46072387695312, "logps/rejected": -270.94268798828125, "loss": 0.0715, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.22074970602989197, "rewards/margins": 6.174362659454346, "rewards/rejected": -6.39511251449585, "step": 2200 }, { "epoch": 1.14, "eval_logits/chosen": -2.739684820175171, "eval_logits/rejected": -2.694882392883301, "eval_logps/chosen": -291.6701354980469, "eval_logps/rejected": -270.4673156738281, "eval_loss": 0.5423225164413452, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -2.0518994331359863, "eval_rewards/margins": 2.146435022354126, "eval_rewards/rejected": -4.198334693908691, "eval_runtime": 299.094, "eval_samples_per_second": 6.687, "eval_steps_per_second": 0.418, "step": 2200 }, { "epoch": 1.14, "learning_rate": 3.443296997513865e-07, "logits/chosen": -2.7505545616149902, "logits/rejected": -2.7318546772003174, "logps/chosen": -296.1077880859375, "logps/rejected": -329.1132507324219, "loss": 0.0644, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.44653525948524475, "rewards/margins": 6.901062965393066, "rewards/rejected": -6.454527378082275, "step": 2210 }, { "epoch": 1.15, "learning_rate": 3.433734939759036e-07, "logits/chosen": -2.696167469024658, "logits/rejected": -2.6370933055877686, "logps/chosen": -286.5255432128906, "logps/rejected": -339.99163818359375, "loss": 0.1059, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.624750554561615, "rewards/margins": 8.09054946899414, "rewards/rejected": -7.465798854827881, "step": 2220 }, { "epoch": 1.15, "learning_rate": 3.4241728820042073e-07, "logits/chosen": -2.603076696395874, "logits/rejected": -2.5586435794830322, "logps/chosen": -243.6158905029297, "logps/rejected": -307.83331298828125, "loss": 0.083, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5600066781044006, "rewards/margins": 5.806141376495361, "rewards/rejected": -6.366147518157959, "step": 2230 }, { "epoch": 1.16, "learning_rate": 3.4146108242493784e-07, "logits/chosen": -2.673128128051758, "logits/rejected": -2.6862502098083496, "logps/chosen": -236.10879516601562, "logps/rejected": -281.21832275390625, "loss": 0.0947, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06583935022354126, "rewards/margins": 5.878711700439453, "rewards/rejected": -5.812871932983398, "step": 2240 }, { "epoch": 1.16, "learning_rate": 3.405048766494549e-07, "logits/chosen": -2.659985065460205, "logits/rejected": -2.6177918910980225, "logps/chosen": -289.13067626953125, "logps/rejected": -277.3434143066406, "loss": 0.1001, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.20128068327903748, "rewards/margins": 6.173056602478027, "rewards/rejected": -5.971776008605957, "step": 2250 }, { "epoch": 1.17, "learning_rate": 3.39548670873972e-07, "logits/chosen": -2.5996155738830566, "logits/rejected": -2.5736541748046875, "logps/chosen": -308.55535888671875, "logps/rejected": -343.0357360839844, "loss": 0.1096, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.6509183049201965, "rewards/margins": 7.569863796234131, "rewards/rejected": -6.9189453125, "step": 2260 }, { "epoch": 1.17, "learning_rate": 3.3859246509848914e-07, "logits/chosen": -2.6264021396636963, "logits/rejected": -2.614025592803955, "logps/chosen": -267.51007080078125, "logps/rejected": -306.4515380859375, "loss": 0.0809, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.013781326822936535, "rewards/margins": 6.145535945892334, "rewards/rejected": -6.1593170166015625, "step": 2270 }, { "epoch": 1.18, "learning_rate": 3.376362593230063e-07, "logits/chosen": -2.6009116172790527, "logits/rejected": -2.6116909980773926, "logps/chosen": -251.38931274414062, "logps/rejected": -315.5372009277344, "loss": 0.0738, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17372338473796844, "rewards/margins": 6.592174530029297, "rewards/rejected": -6.765897274017334, "step": 2280 }, { "epoch": 1.18, "learning_rate": 3.366800535475234e-07, "logits/chosen": -2.62135648727417, "logits/rejected": -2.5482916831970215, "logps/chosen": -286.6933288574219, "logps/rejected": -283.2353515625, "loss": 0.0764, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5743136405944824, "rewards/margins": 7.015399932861328, "rewards/rejected": -6.4410858154296875, "step": 2290 }, { "epoch": 1.19, "learning_rate": 3.3572384777204054e-07, "logits/chosen": -2.680253028869629, "logits/rejected": -2.6442158222198486, "logps/chosen": -292.6054992675781, "logps/rejected": -284.4306640625, "loss": 0.0548, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3452996611595154, "rewards/margins": 6.642431735992432, "rewards/rejected": -6.2971320152282715, "step": 2300 }, { "epoch": 1.19, "eval_logits/chosen": -2.642500877380371, "eval_logits/rejected": -2.5995521545410156, "eval_logps/chosen": -288.6898498535156, "eval_logps/rejected": -269.0300598144531, "eval_loss": 0.5458693504333496, "eval_rewards/accuracies": 0.7699999809265137, "eval_rewards/chosen": -1.7538715600967407, "eval_rewards/margins": 2.3007359504699707, "eval_rewards/rejected": -4.05460786819458, "eval_runtime": 297.5625, "eval_samples_per_second": 6.721, "eval_steps_per_second": 0.42, "step": 2300 }, { "epoch": 1.19, "learning_rate": 3.3476764199655765e-07, "logits/chosen": -2.574187994003296, "logits/rejected": -2.549806594848633, "logps/chosen": -210.81497192382812, "logps/rejected": -279.15716552734375, "loss": 0.0779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1547781527042389, "rewards/margins": 6.8966169357299805, "rewards/rejected": -6.741837978363037, "step": 2310 }, { "epoch": 1.2, "learning_rate": 3.3381143622107477e-07, "logits/chosen": -2.5899195671081543, "logits/rejected": -2.5857739448547363, "logps/chosen": -304.3234558105469, "logps/rejected": -318.6616516113281, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": 0.32505854964256287, "rewards/margins": 7.544722080230713, "rewards/rejected": -7.219663143157959, "step": 2320 }, { "epoch": 1.2, "learning_rate": 3.328552304455919e-07, "logits/chosen": -2.6164278984069824, "logits/rejected": -2.6033473014831543, "logps/chosen": -260.3442687988281, "logps/rejected": -289.3071594238281, "loss": 0.0729, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.08277694880962372, "rewards/margins": 6.275121688842773, "rewards/rejected": -6.357898235321045, "step": 2330 }, { "epoch": 1.21, "learning_rate": 3.31899024670109e-07, "logits/chosen": -2.6337881088256836, "logits/rejected": -2.551001787185669, "logps/chosen": -280.34246826171875, "logps/rejected": -293.72711181640625, "loss": 0.0607, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.17926494777202606, "rewards/margins": 6.4748077392578125, "rewards/rejected": -6.6540727615356445, "step": 2340 }, { "epoch": 1.21, "learning_rate": 3.309428188946261e-07, "logits/chosen": -2.562415599822998, "logits/rejected": -2.542945384979248, "logps/chosen": -250.19735717773438, "logps/rejected": -277.0072326660156, "loss": 0.0894, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4253063201904297, "rewards/margins": 5.756030082702637, "rewards/rejected": -6.181336879730225, "step": 2350 }, { "epoch": 1.22, "learning_rate": 3.2998661311914323e-07, "logits/chosen": -2.665956497192383, "logits/rejected": -2.605408191680908, "logps/chosen": -261.0832214355469, "logps/rejected": -300.29302978515625, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.27276185154914856, "rewards/margins": 6.997501373291016, "rewards/rejected": -6.724739074707031, "step": 2360 }, { "epoch": 1.22, "learning_rate": 3.2903040734366035e-07, "logits/chosen": -2.6756982803344727, "logits/rejected": -2.642782211303711, "logps/chosen": -248.23556518554688, "logps/rejected": -303.837646484375, "loss": 0.0849, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3109845519065857, "rewards/margins": 6.526381015777588, "rewards/rejected": -6.215396404266357, "step": 2370 }, { "epoch": 1.23, "learning_rate": 3.2807420156817746e-07, "logits/chosen": -2.573936939239502, "logits/rejected": -2.576326847076416, "logps/chosen": -243.3905792236328, "logps/rejected": -271.8777160644531, "loss": 0.0749, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.18687370419502258, "rewards/margins": 5.986743927001953, "rewards/rejected": -5.799870014190674, "step": 2380 }, { "epoch": 1.23, "learning_rate": 3.271179957926946e-07, "logits/chosen": -2.662365198135376, "logits/rejected": -2.6264593601226807, "logps/chosen": -281.0094909667969, "logps/rejected": -295.4397888183594, "loss": 0.0852, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.3190084993839264, "rewards/margins": 6.439042568206787, "rewards/rejected": -6.758051872253418, "step": 2390 }, { "epoch": 1.24, "learning_rate": 3.261617900172117e-07, "logits/chosen": -2.617631435394287, "logits/rejected": -2.6006240844726562, "logps/chosen": -238.6389923095703, "logps/rejected": -291.9773864746094, "loss": 0.0897, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.4103802740573883, "rewards/margins": 6.404238700866699, "rewards/rejected": -5.99385929107666, "step": 2400 }, { "epoch": 1.24, "eval_logits/chosen": -2.6870288848876953, "eval_logits/rejected": -2.651212692260742, "eval_logps/chosen": -287.7002258300781, "eval_logps/rejected": -265.7117004394531, "eval_loss": 0.5316546559333801, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -1.6549092531204224, "eval_rewards/margins": 2.067864179611206, "eval_rewards/rejected": -3.722773551940918, "eval_runtime": 299.878, "eval_samples_per_second": 6.669, "eval_steps_per_second": 0.417, "step": 2400 }, { "epoch": 1.24, "learning_rate": 3.2520558424172876e-07, "logits/chosen": -2.6913504600524902, "logits/rejected": -2.6211981773376465, "logps/chosen": -267.68817138671875, "logps/rejected": -276.04193115234375, "loss": 0.0703, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.30733993649482727, "rewards/margins": 6.186968803405762, "rewards/rejected": -5.879629611968994, "step": 2410 }, { "epoch": 1.25, "learning_rate": 3.242493784662459e-07, "logits/chosen": -2.6455018520355225, "logits/rejected": -2.625913381576538, "logps/chosen": -256.44488525390625, "logps/rejected": -287.00726318359375, "loss": 0.0744, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23726698756217957, "rewards/margins": 6.133591651916504, "rewards/rejected": -6.370858192443848, "step": 2420 }, { "epoch": 1.25, "learning_rate": 3.2329317269076304e-07, "logits/chosen": -2.65103816986084, "logits/rejected": -2.6203525066375732, "logps/chosen": -281.08306884765625, "logps/rejected": -334.32568359375, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.14011399447917938, "rewards/margins": 7.048730373382568, "rewards/rejected": -6.908616542816162, "step": 2430 }, { "epoch": 1.26, "learning_rate": 3.2233696691528016e-07, "logits/chosen": -2.5833468437194824, "logits/rejected": -2.5940117835998535, "logps/chosen": -253.43359375, "logps/rejected": -312.6004638671875, "loss": 0.0887, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03585803508758545, "rewards/margins": 6.511855125427246, "rewards/rejected": -6.475996971130371, "step": 2440 }, { "epoch": 1.26, "learning_rate": 3.2138076113979727e-07, "logits/chosen": -2.586594581604004, "logits/rejected": -2.5869569778442383, "logps/chosen": -274.05706787109375, "logps/rejected": -318.03045654296875, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.25964584946632385, "rewards/margins": 6.5868659019470215, "rewards/rejected": -6.8465118408203125, "step": 2450 }, { "epoch": 1.27, "learning_rate": 3.204245553643144e-07, "logits/chosen": -2.582857608795166, "logits/rejected": -2.5616345405578613, "logps/chosen": -307.5089111328125, "logps/rejected": -302.73980712890625, "loss": 0.0696, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.01987607404589653, "rewards/margins": 6.0425639152526855, "rewards/rejected": -6.022687911987305, "step": 2460 }, { "epoch": 1.28, "learning_rate": 3.194683495888315e-07, "logits/chosen": -2.6513664722442627, "logits/rejected": -2.628636360168457, "logps/chosen": -296.8031005859375, "logps/rejected": -277.1523742675781, "loss": 0.0925, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0834658145904541, "rewards/margins": 5.658120155334473, "rewards/rejected": -5.574653625488281, "step": 2470 }, { "epoch": 1.28, "learning_rate": 3.185121438133486e-07, "logits/chosen": -2.6223368644714355, "logits/rejected": -2.6064722537994385, "logps/chosen": -280.4040222167969, "logps/rejected": -355.2570495605469, "loss": 0.083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3895830512046814, "rewards/margins": 6.714540958404541, "rewards/rejected": -6.324957847595215, "step": 2480 }, { "epoch": 1.29, "learning_rate": 3.1755593803786574e-07, "logits/chosen": -2.655653953552246, "logits/rejected": -2.6335813999176025, "logps/chosen": -228.7120819091797, "logps/rejected": -280.1087646484375, "loss": 0.0821, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3908086121082306, "rewards/margins": 5.965733528137207, "rewards/rejected": -6.356542110443115, "step": 2490 }, { "epoch": 1.29, "learning_rate": 3.1659973226238285e-07, "logits/chosen": -2.6471128463745117, "logits/rejected": -2.619386672973633, "logps/chosen": -270.0829772949219, "logps/rejected": -283.39752197265625, "loss": 0.0842, "rewards/accuracies": 0.9375, "rewards/chosen": 0.08627805858850479, "rewards/margins": 6.602423191070557, "rewards/rejected": -6.5161452293396, "step": 2500 }, { "epoch": 1.29, "eval_logits/chosen": -2.684290885925293, "eval_logits/rejected": -2.6529762744903564, "eval_logps/chosen": -294.15118408203125, "eval_logps/rejected": -273.7510681152344, "eval_loss": 0.5710099935531616, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -2.300004482269287, "eval_rewards/margins": 2.226706027984619, "eval_rewards/rejected": -4.526710510253906, "eval_runtime": 300.4148, "eval_samples_per_second": 6.657, "eval_steps_per_second": 0.416, "step": 2500 }, { "epoch": 1.3, "learning_rate": 3.1564352648689997e-07, "logits/chosen": -2.7135045528411865, "logits/rejected": -2.6824328899383545, "logps/chosen": -302.2213439941406, "logps/rejected": -292.8287353515625, "loss": 0.1094, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.13228394091129303, "rewards/margins": 6.033846855163574, "rewards/rejected": -6.166131019592285, "step": 2510 }, { "epoch": 1.3, "learning_rate": 3.146873207114171e-07, "logits/chosen": -2.6831634044647217, "logits/rejected": -2.643829822540283, "logps/chosen": -239.953369140625, "logps/rejected": -290.42901611328125, "loss": 0.0793, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24583473801612854, "rewards/margins": 7.089083671569824, "rewards/rejected": -6.843249320983887, "step": 2520 }, { "epoch": 1.31, "learning_rate": 3.137311149359342e-07, "logits/chosen": -2.7059950828552246, "logits/rejected": -2.6721725463867188, "logps/chosen": -309.16510009765625, "logps/rejected": -334.92529296875, "loss": 0.077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.09306098520755768, "rewards/margins": 6.407054901123047, "rewards/rejected": -6.31399393081665, "step": 2530 }, { "epoch": 1.31, "learning_rate": 3.127749091604513e-07, "logits/chosen": -2.7140984535217285, "logits/rejected": -2.7086362838745117, "logps/chosen": -302.92572021484375, "logps/rejected": -342.76318359375, "loss": 0.091, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.17233842611312866, "rewards/margins": 7.973818778991699, "rewards/rejected": -7.801480293273926, "step": 2540 }, { "epoch": 1.32, "learning_rate": 3.1181870338496843e-07, "logits/chosen": -2.6527652740478516, "logits/rejected": -2.6552717685699463, "logps/chosen": -207.1505584716797, "logps/rejected": -294.25811767578125, "loss": 0.0893, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.10080035030841827, "rewards/margins": 6.058371067047119, "rewards/rejected": -6.159171104431152, "step": 2550 }, { "epoch": 1.32, "learning_rate": 3.108624976094856e-07, "logits/chosen": -2.6768598556518555, "logits/rejected": -2.66867733001709, "logps/chosen": -259.58905029296875, "logps/rejected": -266.781982421875, "loss": 0.081, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8046671152114868, "rewards/margins": 6.409237861633301, "rewards/rejected": -5.604570388793945, "step": 2560 }, { "epoch": 1.33, "learning_rate": 3.0990629183400266e-07, "logits/chosen": -2.6110267639160156, "logits/rejected": -2.597078800201416, "logps/chosen": -286.2091369628906, "logps/rejected": -312.1487121582031, "loss": 0.076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.18477150797843933, "rewards/margins": 7.183107852935791, "rewards/rejected": -6.998335838317871, "step": 2570 }, { "epoch": 1.33, "learning_rate": 3.089500860585198e-07, "logits/chosen": -2.6049301624298096, "logits/rejected": -2.606522560119629, "logps/chosen": -232.1073760986328, "logps/rejected": -264.76092529296875, "loss": 0.1077, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7609155774116516, "rewards/margins": 5.248475074768066, "rewards/rejected": -6.009390830993652, "step": 2580 }, { "epoch": 1.34, "learning_rate": 3.079938802830369e-07, "logits/chosen": -2.691229820251465, "logits/rejected": -2.6520042419433594, "logps/chosen": -254.48287963867188, "logps/rejected": -234.5712890625, "loss": 0.067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.14560198783874512, "rewards/margins": 5.6187334060668945, "rewards/rejected": -5.764335632324219, "step": 2590 }, { "epoch": 1.34, "learning_rate": 3.07037674507554e-07, "logits/chosen": -2.6273081302642822, "logits/rejected": -2.630744457244873, "logps/chosen": -296.9062805175781, "logps/rejected": -350.70074462890625, "loss": 0.1321, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.28318193554878235, "rewards/margins": 7.059614658355713, "rewards/rejected": -6.776432991027832, "step": 2600 }, { "epoch": 1.34, "eval_logits/chosen": -2.734260082244873, "eval_logits/rejected": -2.7094430923461914, "eval_logps/chosen": -289.38946533203125, "eval_logps/rejected": -267.0450439453125, "eval_loss": 0.5334489941596985, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.8238333463668823, "eval_rewards/margins": 2.032271385192871, "eval_rewards/rejected": -3.8561043739318848, "eval_runtime": 298.2165, "eval_samples_per_second": 6.707, "eval_steps_per_second": 0.419, "step": 2600 }, { "epoch": 1.35, "learning_rate": 3.060814687320711e-07, "logits/chosen": -2.6680526733398438, "logits/rejected": -2.6304268836975098, "logps/chosen": -281.00341796875, "logps/rejected": -281.87396240234375, "loss": 0.0959, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2273908108472824, "rewards/margins": 6.250962257385254, "rewards/rejected": -6.023571968078613, "step": 2610 }, { "epoch": 1.35, "learning_rate": 3.0512526295658824e-07, "logits/chosen": -2.6950268745422363, "logits/rejected": -2.6460578441619873, "logps/chosen": -286.7471923828125, "logps/rejected": -297.45050048828125, "loss": 0.1119, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0691387727856636, "rewards/margins": 6.203841209411621, "rewards/rejected": -6.134702682495117, "step": 2620 }, { "epoch": 1.36, "learning_rate": 3.0416905718110536e-07, "logits/chosen": -2.748300075531006, "logits/rejected": -2.7518250942230225, "logps/chosen": -246.3492889404297, "logps/rejected": -267.51470947265625, "loss": 0.0881, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.12618187069892883, "rewards/margins": 6.258318901062012, "rewards/rejected": -6.3845014572143555, "step": 2630 }, { "epoch": 1.36, "learning_rate": 3.0321285140562247e-07, "logits/chosen": -2.769554615020752, "logits/rejected": -2.716841459274292, "logps/chosen": -250.5794219970703, "logps/rejected": -274.2592468261719, "loss": 0.1548, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0954054594039917, "rewards/margins": 5.371593475341797, "rewards/rejected": -6.466999053955078, "step": 2640 }, { "epoch": 1.37, "learning_rate": 3.022566456301396e-07, "logits/chosen": -2.8216865062713623, "logits/rejected": -2.7998435497283936, "logps/chosen": -262.3215026855469, "logps/rejected": -294.09063720703125, "loss": 0.1213, "rewards/accuracies": 1.0, "rewards/chosen": -0.2869683802127838, "rewards/margins": 6.307840347290039, "rewards/rejected": -6.594809055328369, "step": 2650 }, { "epoch": 1.37, "learning_rate": 3.013004398546567e-07, "logits/chosen": -2.724449396133423, "logits/rejected": -2.644949197769165, "logps/chosen": -270.1006774902344, "logps/rejected": -294.309814453125, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.46882614493370056, "rewards/margins": 6.1476945877075195, "rewards/rejected": -6.616520881652832, "step": 2660 }, { "epoch": 1.38, "learning_rate": 3.003442340791738e-07, "logits/chosen": -2.8228209018707275, "logits/rejected": -2.819795608520508, "logps/chosen": -264.2869567871094, "logps/rejected": -294.621826171875, "loss": 0.0879, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5818114280700684, "rewards/margins": 7.002253532409668, "rewards/rejected": -7.5840654373168945, "step": 2670 }, { "epoch": 1.38, "learning_rate": 2.9938802830369093e-07, "logits/chosen": -2.795841693878174, "logits/rejected": -2.807483434677124, "logps/chosen": -265.37322998046875, "logps/rejected": -302.5888671875, "loss": 0.1097, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6445812582969666, "rewards/margins": 6.2356858253479, "rewards/rejected": -6.880267143249512, "step": 2680 }, { "epoch": 1.39, "learning_rate": 2.9843182252820805e-07, "logits/chosen": -2.715318441390991, "logits/rejected": -2.7065510749816895, "logps/chosen": -251.9051971435547, "logps/rejected": -291.34918212890625, "loss": 0.0851, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3353874683380127, "rewards/margins": 6.849355220794678, "rewards/rejected": -7.184743404388428, "step": 2690 }, { "epoch": 1.39, "learning_rate": 2.974756167527252e-07, "logits/chosen": -2.794362783432007, "logits/rejected": -2.784069061279297, "logps/chosen": -243.68063354492188, "logps/rejected": -289.0542297363281, "loss": 0.0862, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5554946660995483, "rewards/margins": 5.511413097381592, "rewards/rejected": -6.06690788269043, "step": 2700 }, { "epoch": 1.39, "eval_logits/chosen": -2.71694016456604, "eval_logits/rejected": -2.69527268409729, "eval_logps/chosen": -289.63067626953125, "eval_logps/rejected": -267.99761962890625, "eval_loss": 0.5442701578140259, "eval_rewards/accuracies": 0.7519999742507935, "eval_rewards/chosen": -1.8479559421539307, "eval_rewards/margins": 2.103407621383667, "eval_rewards/rejected": -3.9513633251190186, "eval_runtime": 302.0105, "eval_samples_per_second": 6.622, "eval_steps_per_second": 0.414, "step": 2700 }, { "epoch": 1.4, "learning_rate": 2.9651941097724233e-07, "logits/chosen": -2.7475249767303467, "logits/rejected": -2.725043296813965, "logps/chosen": -282.15606689453125, "logps/rejected": -271.9478759765625, "loss": 0.1062, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.22154350578784943, "rewards/margins": 5.806771755218506, "rewards/rejected": -6.028315544128418, "step": 2710 }, { "epoch": 1.4, "learning_rate": 2.9556320520175945e-07, "logits/chosen": -2.6693906784057617, "logits/rejected": -2.645045757293701, "logps/chosen": -268.8005676269531, "logps/rejected": -278.6295471191406, "loss": 0.1059, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5214961767196655, "rewards/margins": 6.091368198394775, "rewards/rejected": -6.612864017486572, "step": 2720 }, { "epoch": 1.41, "learning_rate": 2.946069994262765e-07, "logits/chosen": -2.7901053428649902, "logits/rejected": -2.7124366760253906, "logps/chosen": -295.39605712890625, "logps/rejected": -323.22674560546875, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495104283094406, "rewards/margins": 6.738787651062012, "rewards/rejected": -6.888298034667969, "step": 2730 }, { "epoch": 1.41, "learning_rate": 2.9365079365079363e-07, "logits/chosen": -2.758653163909912, "logits/rejected": -2.6714606285095215, "logps/chosen": -279.1718444824219, "logps/rejected": -265.2136535644531, "loss": 0.1065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2586991488933563, "rewards/margins": 6.57647705078125, "rewards/rejected": -6.835176944732666, "step": 2740 }, { "epoch": 1.42, "learning_rate": 2.9269458787531074e-07, "logits/chosen": -2.671984910964966, "logits/rejected": -2.6374223232269287, "logps/chosen": -245.06051635742188, "logps/rejected": -311.33538818359375, "loss": 0.0834, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7288433313369751, "rewards/margins": 5.889183521270752, "rewards/rejected": -6.6180267333984375, "step": 2750 }, { "epoch": 1.42, "learning_rate": 2.9173838209982786e-07, "logits/chosen": -2.746222972869873, "logits/rejected": -2.675706624984741, "logps/chosen": -267.20440673828125, "logps/rejected": -248.35855102539062, "loss": 0.0966, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5821000337600708, "rewards/margins": 5.743124961853027, "rewards/rejected": -6.325225353240967, "step": 2760 }, { "epoch": 1.43, "learning_rate": 2.90782176324345e-07, "logits/chosen": -2.694669485092163, "logits/rejected": -2.655134677886963, "logps/chosen": -325.7140808105469, "logps/rejected": -293.95166015625, "loss": 0.066, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.05179126188158989, "rewards/margins": 6.887429237365723, "rewards/rejected": -6.83563756942749, "step": 2770 }, { "epoch": 1.44, "learning_rate": 2.898259705488621e-07, "logits/chosen": -2.818296432495117, "logits/rejected": -2.813457727432251, "logps/chosen": -266.9579162597656, "logps/rejected": -275.5874938964844, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -0.4114197790622711, "rewards/margins": 6.127683639526367, "rewards/rejected": -6.539102077484131, "step": 2780 }, { "epoch": 1.44, "learning_rate": 2.888697647733792e-07, "logits/chosen": -2.7553584575653076, "logits/rejected": -2.7237467765808105, "logps/chosen": -323.9237060546875, "logps/rejected": -323.90655517578125, "loss": 0.0819, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25978711247444153, "rewards/margins": 7.298235893249512, "rewards/rejected": -7.038449287414551, "step": 2790 }, { "epoch": 1.45, "learning_rate": 2.879135589978963e-07, "logits/chosen": -2.657649278640747, "logits/rejected": -2.7081754207611084, "logps/chosen": -239.7465362548828, "logps/rejected": -296.0489196777344, "loss": 0.0954, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.0186956487596035, "rewards/margins": 5.776867866516113, "rewards/rejected": -5.758172035217285, "step": 2800 }, { "epoch": 1.45, "eval_logits/chosen": -2.712128162384033, "eval_logits/rejected": -2.689998149871826, "eval_logps/chosen": -290.46826171875, "eval_logps/rejected": -268.4657897949219, "eval_loss": 0.5472421646118164, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -1.9317100048065186, "eval_rewards/margins": 2.066469669342041, "eval_rewards/rejected": -3.9981796741485596, "eval_runtime": 302.1561, "eval_samples_per_second": 6.619, "eval_steps_per_second": 0.414, "step": 2800 }, { "epoch": 1.45, "learning_rate": 2.8695735322241344e-07, "logits/chosen": -2.733955144882202, "logits/rejected": -2.7273309230804443, "logps/chosen": -277.91851806640625, "logps/rejected": -294.29095458984375, "loss": 0.0969, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4033392369747162, "rewards/margins": 6.4010772705078125, "rewards/rejected": -5.997737884521484, "step": 2810 }, { "epoch": 1.46, "learning_rate": 2.8600114744693055e-07, "logits/chosen": -2.6529266834259033, "logits/rejected": -2.619670867919922, "logps/chosen": -246.15811157226562, "logps/rejected": -305.12042236328125, "loss": 0.0762, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3458639979362488, "rewards/margins": 7.0330352783203125, "rewards/rejected": -6.68717098236084, "step": 2820 }, { "epoch": 1.46, "learning_rate": 2.8504494167144767e-07, "logits/chosen": -2.7030186653137207, "logits/rejected": -2.6916511058807373, "logps/chosen": -265.45953369140625, "logps/rejected": -304.0247802734375, "loss": 0.0815, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11693648248910904, "rewards/margins": 6.367062568664551, "rewards/rejected": -6.483999729156494, "step": 2830 }, { "epoch": 1.47, "learning_rate": 2.8408873589596484e-07, "logits/chosen": -2.731295108795166, "logits/rejected": -2.6814284324645996, "logps/chosen": -263.04351806640625, "logps/rejected": -281.86529541015625, "loss": 0.0685, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.13037601113319397, "rewards/margins": 6.561248779296875, "rewards/rejected": -6.691624641418457, "step": 2840 }, { "epoch": 1.47, "learning_rate": 2.8313253012048195e-07, "logits/chosen": -2.591728687286377, "logits/rejected": -2.601846218109131, "logps/chosen": -229.76559448242188, "logps/rejected": -327.7354431152344, "loss": 0.0748, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1586235761642456, "rewards/margins": 7.073062896728516, "rewards/rejected": -6.914440155029297, "step": 2850 }, { "epoch": 1.48, "learning_rate": 2.8217632434499907e-07, "logits/chosen": -2.754502296447754, "logits/rejected": -2.707017421722412, "logps/chosen": -289.90240478515625, "logps/rejected": -316.56988525390625, "loss": 0.0788, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3420160114765167, "rewards/margins": 6.77099084854126, "rewards/rejected": -7.11300802230835, "step": 2860 }, { "epoch": 1.48, "learning_rate": 2.812201185695162e-07, "logits/chosen": -2.682600259780884, "logits/rejected": -2.6694142818450928, "logps/chosen": -238.49545288085938, "logps/rejected": -300.5420837402344, "loss": 0.0725, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1682194173336029, "rewards/margins": 5.978898048400879, "rewards/rejected": -6.1471171379089355, "step": 2870 }, { "epoch": 1.49, "learning_rate": 2.802639127940333e-07, "logits/chosen": -2.7259023189544678, "logits/rejected": -2.651639461517334, "logps/chosen": -265.7987365722656, "logps/rejected": -257.5804138183594, "loss": 0.114, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.36955079436302185, "rewards/margins": 6.632456302642822, "rewards/rejected": -7.002006530761719, "step": 2880 }, { "epoch": 1.49, "learning_rate": 2.7930770701855036e-07, "logits/chosen": -2.7521653175354004, "logits/rejected": -2.6949660778045654, "logps/chosen": -270.8799133300781, "logps/rejected": -282.875, "loss": 0.122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.07829879969358444, "rewards/margins": 6.847074031829834, "rewards/rejected": -6.76877498626709, "step": 2890 }, { "epoch": 1.5, "learning_rate": 2.783515012430675e-07, "logits/chosen": -2.6613528728485107, "logits/rejected": -2.6102874279022217, "logps/chosen": -269.69146728515625, "logps/rejected": -276.59228515625, "loss": 0.0979, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2745317220687866, "rewards/margins": 6.088465213775635, "rewards/rejected": -6.362997531890869, "step": 2900 }, { "epoch": 1.5, "eval_logits/chosen": -2.678755044937134, "eval_logits/rejected": -2.646636724472046, "eval_logps/chosen": -292.6034240722656, "eval_logps/rejected": -270.4625549316406, "eval_loss": 0.5471131205558777, "eval_rewards/accuracies": 0.7540000081062317, "eval_rewards/chosen": -2.145230531692505, "eval_rewards/margins": 2.0526273250579834, "eval_rewards/rejected": -4.197857856750488, "eval_runtime": 300.0931, "eval_samples_per_second": 6.665, "eval_steps_per_second": 0.417, "step": 2900 }, { "epoch": 1.5, "learning_rate": 2.773952954675846e-07, "logits/chosen": -2.661973237991333, "logits/rejected": -2.6546902656555176, "logps/chosen": -267.7713317871094, "logps/rejected": -256.48779296875, "loss": 0.0672, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.36151501536369324, "rewards/margins": 5.869540691375732, "rewards/rejected": -6.231055736541748, "step": 2910 }, { "epoch": 1.51, "learning_rate": 2.764390896921017e-07, "logits/chosen": -2.712329626083374, "logits/rejected": -2.6236677169799805, "logps/chosen": -299.66705322265625, "logps/rejected": -286.8455505371094, "loss": 0.0836, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04764469712972641, "rewards/margins": 6.375118255615234, "rewards/rejected": -6.422762870788574, "step": 2920 }, { "epoch": 1.51, "learning_rate": 2.754828839166188e-07, "logits/chosen": -2.586707592010498, "logits/rejected": -2.588397264480591, "logps/chosen": -257.25665283203125, "logps/rejected": -272.2353820800781, "loss": 0.0692, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.04652285575866699, "rewards/margins": 7.283473014831543, "rewards/rejected": -7.329996585845947, "step": 2930 }, { "epoch": 1.52, "learning_rate": 2.7452667814113594e-07, "logits/chosen": -2.7274789810180664, "logits/rejected": -2.6998512744903564, "logps/chosen": -285.88995361328125, "logps/rejected": -302.61798095703125, "loss": 0.0848, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24309101700782776, "rewards/margins": 6.804354667663574, "rewards/rejected": -6.561263084411621, "step": 2940 }, { "epoch": 1.52, "learning_rate": 2.7357047236565306e-07, "logits/chosen": -2.642345666885376, "logits/rejected": -2.656949281692505, "logps/chosen": -255.52157592773438, "logps/rejected": -290.87457275390625, "loss": 0.0661, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.14165842533111572, "rewards/margins": 6.226951599121094, "rewards/rejected": -6.085293292999268, "step": 2950 }, { "epoch": 1.53, "learning_rate": 2.7261426659017017e-07, "logits/chosen": -2.6985981464385986, "logits/rejected": -2.6935291290283203, "logps/chosen": -259.4523010253906, "logps/rejected": -285.291748046875, "loss": 0.0821, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.24156935513019562, "rewards/margins": 5.883909225463867, "rewards/rejected": -6.125478744506836, "step": 2960 }, { "epoch": 1.53, "learning_rate": 2.716580608146873e-07, "logits/chosen": -2.7121007442474365, "logits/rejected": -2.680630922317505, "logps/chosen": -323.2484436035156, "logps/rejected": -312.9270324707031, "loss": 0.0678, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.317016065120697, "rewards/margins": 7.44516134262085, "rewards/rejected": -7.12814474105835, "step": 2970 }, { "epoch": 1.54, "learning_rate": 2.7070185503920446e-07, "logits/chosen": -2.5762784481048584, "logits/rejected": -2.556363344192505, "logps/chosen": -247.0843963623047, "logps/rejected": -302.10186767578125, "loss": 0.0652, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.16331888735294342, "rewards/margins": 6.702650547027588, "rewards/rejected": -6.865969657897949, "step": 2980 }, { "epoch": 1.54, "learning_rate": 2.6974564926372157e-07, "logits/chosen": -2.768145799636841, "logits/rejected": -2.686316967010498, "logps/chosen": -287.52178955078125, "logps/rejected": -308.16510009765625, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": 0.07840989530086517, "rewards/margins": 7.003350257873535, "rewards/rejected": -6.924940586090088, "step": 2990 }, { "epoch": 1.55, "learning_rate": 2.687894434882387e-07, "logits/chosen": -2.7189621925354004, "logits/rejected": -2.6321442127227783, "logps/chosen": -240.61349487304688, "logps/rejected": -254.2037811279297, "loss": 0.0732, "rewards/accuracies": 0.9375, "rewards/chosen": -0.35617923736572266, "rewards/margins": 5.727667331695557, "rewards/rejected": -6.083846092224121, "step": 3000 }, { "epoch": 1.55, "eval_logits/chosen": -2.698098659515381, "eval_logits/rejected": -2.671609401702881, "eval_logps/chosen": -291.4029235839844, "eval_logps/rejected": -270.5026550292969, "eval_loss": 0.551217794418335, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -2.0251784324645996, "eval_rewards/margins": 2.176687717437744, "eval_rewards/rejected": -4.201866149902344, "eval_runtime": 300.5162, "eval_samples_per_second": 6.655, "eval_steps_per_second": 0.416, "step": 3000 }, { "epoch": 1.55, "learning_rate": 2.678332377127558e-07, "logits/chosen": -2.71705961227417, "logits/rejected": -2.6564738750457764, "logps/chosen": -300.3543701171875, "logps/rejected": -306.4097595214844, "loss": 0.0827, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2911555767059326, "rewards/margins": 7.625485897064209, "rewards/rejected": -7.334330081939697, "step": 3010 }, { "epoch": 1.56, "learning_rate": 2.668770319372729e-07, "logits/chosen": -2.7231059074401855, "logits/rejected": -2.6993746757507324, "logps/chosen": -267.49078369140625, "logps/rejected": -254.30569458007812, "loss": 0.0776, "rewards/accuracies": 0.9375, "rewards/chosen": 0.07040025293827057, "rewards/margins": 5.528401851654053, "rewards/rejected": -5.458001613616943, "step": 3020 }, { "epoch": 1.56, "learning_rate": 2.6592082616179004e-07, "logits/chosen": -2.5468788146972656, "logits/rejected": -2.5402424335479736, "logps/chosen": -196.69125366210938, "logps/rejected": -264.7854919433594, "loss": 0.0868, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.013843962922692299, "rewards/margins": 5.93640661239624, "rewards/rejected": -5.950250625610352, "step": 3030 }, { "epoch": 1.57, "learning_rate": 2.649646203863071e-07, "logits/chosen": -2.5948565006256104, "logits/rejected": -2.6171257495880127, "logps/chosen": -252.2794647216797, "logps/rejected": -301.02874755859375, "loss": 0.0587, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2185756266117096, "rewards/margins": 6.193685531616211, "rewards/rejected": -5.975109577178955, "step": 3040 }, { "epoch": 1.57, "learning_rate": 2.640084146108242e-07, "logits/chosen": -2.654193878173828, "logits/rejected": -2.633950710296631, "logps/chosen": -303.9206237792969, "logps/rejected": -296.4111022949219, "loss": 0.0733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1474265307188034, "rewards/margins": 6.6915788650512695, "rewards/rejected": -6.839005470275879, "step": 3050 }, { "epoch": 1.58, "learning_rate": 2.6305220883534133e-07, "logits/chosen": -2.668823719024658, "logits/rejected": -2.5736072063446045, "logps/chosen": -236.24972534179688, "logps/rejected": -276.8191833496094, "loss": 0.0712, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.06152166798710823, "rewards/margins": 6.0490899085998535, "rewards/rejected": -6.1106109619140625, "step": 3060 }, { "epoch": 1.58, "learning_rate": 2.6209600305985845e-07, "logits/chosen": -2.6920838356018066, "logits/rejected": -2.642549991607666, "logps/chosen": -274.13934326171875, "logps/rejected": -306.42523193359375, "loss": 0.1021, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5211185812950134, "rewards/margins": 7.7590436935424805, "rewards/rejected": -7.2379255294799805, "step": 3070 }, { "epoch": 1.59, "learning_rate": 2.6113979728437556e-07, "logits/chosen": -2.7744197845458984, "logits/rejected": -2.754517078399658, "logps/chosen": -300.2927551269531, "logps/rejected": -293.2771911621094, "loss": 0.1094, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.10171057283878326, "rewards/margins": 6.362521171569824, "rewards/rejected": -6.260810375213623, "step": 3080 }, { "epoch": 1.6, "learning_rate": 2.601835915088927e-07, "logits/chosen": -2.571173906326294, "logits/rejected": -2.569072961807251, "logps/chosen": -233.9067840576172, "logps/rejected": -281.82879638671875, "loss": 0.1037, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19151082634925842, "rewards/margins": 6.079700946807861, "rewards/rejected": -5.888190269470215, "step": 3090 }, { "epoch": 1.6, "learning_rate": 2.592273857334098e-07, "logits/chosen": -2.63468074798584, "logits/rejected": -2.6281745433807373, "logps/chosen": -259.4231262207031, "logps/rejected": -289.1959228515625, "loss": 0.0799, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.1904747486114502, "rewards/margins": 6.4702324867248535, "rewards/rejected": -6.279757022857666, "step": 3100 }, { "epoch": 1.6, "eval_logits/chosen": -2.7142674922943115, "eval_logits/rejected": -2.6703028678894043, "eval_logps/chosen": -290.0393371582031, "eval_logps/rejected": -267.2228698730469, "eval_loss": 0.5415002107620239, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.888822078704834, "eval_rewards/margins": 1.9850670099258423, "eval_rewards/rejected": -3.873889207839966, "eval_runtime": 299.388, "eval_samples_per_second": 6.68, "eval_steps_per_second": 0.418, "step": 3100 }, { "epoch": 1.61, "learning_rate": 2.582711799579269e-07, "logits/chosen": -2.7312028408050537, "logits/rejected": -2.7376887798309326, "logps/chosen": -292.3309326171875, "logps/rejected": -306.7789611816406, "loss": 0.0849, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12501433491706848, "rewards/margins": 6.4690046310424805, "rewards/rejected": -6.594018459320068, "step": 3110 }, { "epoch": 1.61, "learning_rate": 2.573149741824441e-07, "logits/chosen": -2.7475574016571045, "logits/rejected": -2.6412527561187744, "logps/chosen": -301.6943664550781, "logps/rejected": -288.72088623046875, "loss": 0.0913, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.25444021821022034, "rewards/margins": 6.481881618499756, "rewards/rejected": -6.227441310882568, "step": 3120 }, { "epoch": 1.62, "learning_rate": 2.563587684069612e-07, "logits/chosen": -2.6992063522338867, "logits/rejected": -2.66088604927063, "logps/chosen": -297.042236328125, "logps/rejected": -300.57843017578125, "loss": 0.0612, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2802169919013977, "rewards/margins": 6.343818664550781, "rewards/rejected": -6.624035835266113, "step": 3130 }, { "epoch": 1.62, "learning_rate": 2.554025626314783e-07, "logits/chosen": -2.7833545207977295, "logits/rejected": -2.6407017707824707, "logps/chosen": -273.5995788574219, "logps/rejected": -312.2384948730469, "loss": 0.096, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5391936898231506, "rewards/margins": 6.512132167816162, "rewards/rejected": -7.051326751708984, "step": 3140 }, { "epoch": 1.63, "learning_rate": 2.544463568559954e-07, "logits/chosen": -2.8172030448913574, "logits/rejected": -2.6880176067352295, "logps/chosen": -309.4474792480469, "logps/rejected": -321.9583435058594, "loss": 0.0707, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.013124823570251465, "rewards/margins": 6.90747594833374, "rewards/rejected": -6.894351005554199, "step": 3150 }, { "epoch": 1.63, "learning_rate": 2.5349015108051254e-07, "logits/chosen": -2.7259583473205566, "logits/rejected": -2.6957221031188965, "logps/chosen": -251.1798858642578, "logps/rejected": -275.9666748046875, "loss": 0.0833, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.09325708448886871, "rewards/margins": 6.963329315185547, "rewards/rejected": -6.8700714111328125, "step": 3160 }, { "epoch": 1.64, "learning_rate": 2.5253394530502966e-07, "logits/chosen": -2.6244192123413086, "logits/rejected": -2.589078426361084, "logps/chosen": -324.7796936035156, "logps/rejected": -322.0079650878906, "loss": 0.0591, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2864592969417572, "rewards/margins": 7.4498138427734375, "rewards/rejected": -7.16335391998291, "step": 3170 }, { "epoch": 1.64, "learning_rate": 2.5157773952954677e-07, "logits/chosen": -2.804133892059326, "logits/rejected": -2.7516913414001465, "logps/chosen": -306.5554504394531, "logps/rejected": -300.6597900390625, "loss": 0.0944, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08758392184972763, "rewards/margins": 6.123711109161377, "rewards/rejected": -6.211295127868652, "step": 3180 }, { "epoch": 1.65, "learning_rate": 2.506215337540639e-07, "logits/chosen": -2.680217742919922, "logits/rejected": -2.6095592975616455, "logps/chosen": -257.2310485839844, "logps/rejected": -300.31072998046875, "loss": 0.082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01602376066148281, "rewards/margins": 6.316248416900635, "rewards/rejected": -6.332272052764893, "step": 3190 }, { "epoch": 1.65, "learning_rate": 2.4966532797858095e-07, "logits/chosen": -2.6483314037323, "logits/rejected": -2.5556366443634033, "logps/chosen": -279.1159362792969, "logps/rejected": -281.1722106933594, "loss": 0.07, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2574574947357178, "rewards/margins": 7.106630802154541, "rewards/rejected": -6.849173069000244, "step": 3200 }, { "epoch": 1.65, "eval_logits/chosen": -2.700206995010376, "eval_logits/rejected": -2.6565675735473633, "eval_logps/chosen": -289.6077575683594, "eval_logps/rejected": -268.78326416015625, "eval_loss": 0.5399491190910339, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -1.8456586599349976, "eval_rewards/margins": 2.184269666671753, "eval_rewards/rejected": -4.029928207397461, "eval_runtime": 300.5168, "eval_samples_per_second": 6.655, "eval_steps_per_second": 0.416, "step": 3200 }, { "epoch": 1.66, "learning_rate": 2.4870912220309807e-07, "logits/chosen": -2.6509642601013184, "logits/rejected": -2.6520817279815674, "logps/chosen": -273.5619201660156, "logps/rejected": -275.95147705078125, "loss": 0.0892, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.26873114705085754, "rewards/margins": 5.601580619812012, "rewards/rejected": -5.870312690734863, "step": 3210 }, { "epoch": 1.66, "learning_rate": 2.477529164276152e-07, "logits/chosen": -2.8350400924682617, "logits/rejected": -2.743255138397217, "logps/chosen": -274.95733642578125, "logps/rejected": -308.00567626953125, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.03380532190203667, "rewards/margins": 6.9070143699646, "rewards/rejected": -6.873208522796631, "step": 3220 }, { "epoch": 1.67, "learning_rate": 2.4679671065213235e-07, "logits/chosen": -2.786475658416748, "logits/rejected": -2.7416672706604004, "logps/chosen": -283.60577392578125, "logps/rejected": -320.0465393066406, "loss": 0.0866, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.037688374519348145, "rewards/margins": 6.618558406829834, "rewards/rejected": -6.580870151519775, "step": 3230 }, { "epoch": 1.67, "learning_rate": 2.4584050487664947e-07, "logits/chosen": -2.762442111968994, "logits/rejected": -2.637181282043457, "logps/chosen": -284.50079345703125, "logps/rejected": -291.54669189453125, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": 0.18414416909217834, "rewards/margins": 6.982729434967041, "rewards/rejected": -6.798585414886475, "step": 3240 }, { "epoch": 1.68, "learning_rate": 2.448842991011666e-07, "logits/chosen": -2.7496209144592285, "logits/rejected": -2.670368194580078, "logps/chosen": -291.591796875, "logps/rejected": -293.0029602050781, "loss": 0.0933, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.37867432832717896, "rewards/margins": 6.212046146392822, "rewards/rejected": -6.590720176696777, "step": 3250 }, { "epoch": 1.68, "learning_rate": 2.439280933256837e-07, "logits/chosen": -2.6777236461639404, "logits/rejected": -2.6595215797424316, "logps/chosen": -238.927978515625, "logps/rejected": -308.22271728515625, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.11050692945718765, "rewards/margins": 7.144793510437012, "rewards/rejected": -7.0342864990234375, "step": 3260 }, { "epoch": 1.69, "learning_rate": 2.429718875502008e-07, "logits/chosen": -2.5337674617767334, "logits/rejected": -2.5987701416015625, "logps/chosen": -258.04766845703125, "logps/rejected": -331.2449035644531, "loss": 0.0687, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22830796241760254, "rewards/margins": 7.673661708831787, "rewards/rejected": -7.4453535079956055, "step": 3270 }, { "epoch": 1.69, "learning_rate": 2.420156817747179e-07, "logits/chosen": -2.686455726623535, "logits/rejected": -2.5261008739471436, "logps/chosen": -270.0923767089844, "logps/rejected": -264.47076416015625, "loss": 0.0651, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2098160982131958, "rewards/margins": 6.175947189331055, "rewards/rejected": -6.385763168334961, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.41059475999235e-07, "logits/chosen": -2.6996235847473145, "logits/rejected": -2.5689122676849365, "logps/chosen": -245.3484649658203, "logps/rejected": -278.2618408203125, "loss": 0.0797, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.643414318561554, "rewards/margins": 5.8331074714660645, "rewards/rejected": -6.4765214920043945, "step": 3290 }, { "epoch": 1.7, "learning_rate": 2.4010327022375216e-07, "logits/chosen": -2.7380359172821045, "logits/rejected": -2.7304420471191406, "logps/chosen": -302.05413818359375, "logps/rejected": -321.96063232421875, "loss": 0.0808, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.16380396485328674, "rewards/margins": 7.5016984939575195, "rewards/rejected": -7.337894439697266, "step": 3300 }, { "epoch": 1.7, "eval_logits/chosen": -2.7340078353881836, "eval_logits/rejected": -2.68426513671875, "eval_logps/chosen": -293.4576416015625, "eval_logps/rejected": -274.8385314941406, "eval_loss": 0.5593692660331726, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -2.23065185546875, "eval_rewards/margins": 2.4048030376434326, "eval_rewards/rejected": -4.6354546546936035, "eval_runtime": 302.5894, "eval_samples_per_second": 6.61, "eval_steps_per_second": 0.413, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.391470644482693e-07, "logits/chosen": -2.7412731647491455, "logits/rejected": -2.7463791370391846, "logps/chosen": -284.0135192871094, "logps/rejected": -313.3152770996094, "loss": 0.0728, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.21982452273368835, "rewards/margins": 6.8465423583984375, "rewards/rejected": -6.626717567443848, "step": 3310 }, { "epoch": 1.71, "learning_rate": 2.3819085867278636e-07, "logits/chosen": -2.659803867340088, "logits/rejected": -2.6273205280303955, "logps/chosen": -213.12435913085938, "logps/rejected": -251.2199249267578, "loss": 0.0828, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0200145244598389, "rewards/margins": 5.845543384552002, "rewards/rejected": -6.865557670593262, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.3723465289730348e-07, "logits/chosen": -2.778775453567505, "logits/rejected": -2.728415012359619, "logps/chosen": -302.9705505371094, "logps/rejected": -256.00201416015625, "loss": 0.0958, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.004472860600799322, "rewards/margins": 5.540419578552246, "rewards/rejected": -5.54489278793335, "step": 3330 }, { "epoch": 1.72, "learning_rate": 2.362784471218206e-07, "logits/chosen": -2.744642734527588, "logits/rejected": -2.6768505573272705, "logps/chosen": -259.7644348144531, "logps/rejected": -312.2467956542969, "loss": 0.0621, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.14360444247722626, "rewards/margins": 7.058108329772949, "rewards/rejected": -6.914504051208496, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.353222413463377e-07, "logits/chosen": -2.674807071685791, "logits/rejected": -2.620729684829712, "logps/chosen": -267.46630859375, "logps/rejected": -311.1983642578125, "loss": 0.0585, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08036639541387558, "rewards/margins": 7.000423431396484, "rewards/rejected": -7.080790042877197, "step": 3350 }, { "epoch": 1.73, "learning_rate": 2.3436603557085483e-07, "logits/chosen": -2.680379629135132, "logits/rejected": -2.690441131591797, "logps/chosen": -278.5157775878906, "logps/rejected": -315.74798583984375, "loss": 0.0741, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.24022746086120605, "rewards/margins": 6.1009416580200195, "rewards/rejected": -6.341168403625488, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.3340982979537197e-07, "logits/chosen": -2.704501152038574, "logits/rejected": -2.6352505683898926, "logps/chosen": -342.2712097167969, "logps/rejected": -336.1389465332031, "loss": 0.0836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.4207138121128082, "rewards/margins": 6.910533905029297, "rewards/rejected": -6.489820957183838, "step": 3370 }, { "epoch": 1.74, "learning_rate": 2.3245362401988909e-07, "logits/chosen": -2.704493761062622, "logits/rejected": -2.6764588356018066, "logps/chosen": -286.54571533203125, "logps/rejected": -298.95361328125, "loss": 0.0822, "rewards/accuracies": 1.0, "rewards/chosen": 0.39437445998191833, "rewards/margins": 7.7266998291015625, "rewards/rejected": -7.3323259353637695, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.314974182444062e-07, "logits/chosen": -2.733954906463623, "logits/rejected": -2.712998867034912, "logps/chosen": -265.21197509765625, "logps/rejected": -303.2349853515625, "loss": 0.0623, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1423777341842651, "rewards/margins": 6.146125793457031, "rewards/rejected": -7.288504123687744, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.305412124689233e-07, "logits/chosen": -2.6285359859466553, "logits/rejected": -2.5890676975250244, "logps/chosen": -263.24896240234375, "logps/rejected": -293.53497314453125, "loss": 0.0501, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.26912063360214233, "rewards/margins": 7.229107856750488, "rewards/rejected": -7.498227596282959, "step": 3400 }, { "epoch": 1.76, "eval_logits/chosen": -2.6943509578704834, "eval_logits/rejected": -2.642679214477539, "eval_logps/chosen": -296.305908203125, "eval_logps/rejected": -278.03448486328125, "eval_loss": 0.5704072117805481, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -2.515477180480957, "eval_rewards/margins": 2.4395759105682373, "eval_rewards/rejected": -4.955053806304932, "eval_runtime": 301.3731, "eval_samples_per_second": 6.636, "eval_steps_per_second": 0.415, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.295850066934404e-07, "logits/chosen": -2.695094347000122, "logits/rejected": -2.6034932136535645, "logps/chosen": -278.7093200683594, "logps/rejected": -310.04681396484375, "loss": 0.0762, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.8295940160751343, "rewards/margins": 6.8828444480896, "rewards/rejected": -7.712438106536865, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.2862880091795752e-07, "logits/chosen": -2.7348952293395996, "logits/rejected": -2.684424877166748, "logps/chosen": -269.9104309082031, "logps/rejected": -328.19610595703125, "loss": 0.0801, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4503478407859802, "rewards/margins": 7.062024116516113, "rewards/rejected": -7.512372016906738, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2767259514247464e-07, "logits/chosen": -2.749939441680908, "logits/rejected": -2.7340288162231445, "logps/chosen": -270.1997985839844, "logps/rejected": -326.0268249511719, "loss": 0.0772, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09085409343242645, "rewards/margins": 6.569288730621338, "rewards/rejected": -6.6601433753967285, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2671638936699178e-07, "logits/chosen": -2.6335549354553223, "logits/rejected": -2.623380184173584, "logps/chosen": -264.9308776855469, "logps/rejected": -305.0411682128906, "loss": 0.07, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.14353379607200623, "rewards/margins": 6.764189720153809, "rewards/rejected": -6.620657444000244, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.257601835915089e-07, "logits/chosen": -2.742511034011841, "logits/rejected": -2.719316005706787, "logps/chosen": -324.2193603515625, "logps/rejected": -318.42327880859375, "loss": 0.0674, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2821727395057678, "rewards/margins": 7.147420406341553, "rewards/rejected": -6.8652472496032715, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.24803977816026e-07, "logits/chosen": -2.6763834953308105, "logits/rejected": -2.627119779586792, "logps/chosen": -280.9945068359375, "logps/rejected": -283.1276550292969, "loss": 0.1153, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19678916037082672, "rewards/margins": 6.571404933929443, "rewards/rejected": -6.76819372177124, "step": 3460 }, { "epoch": 1.79, "learning_rate": 2.2384777204054313e-07, "logits/chosen": -2.783391237258911, "logits/rejected": -2.6957125663757324, "logps/chosen": -286.83990478515625, "logps/rejected": -304.48968505859375, "loss": 0.0711, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5841490626335144, "rewards/margins": 7.363424777984619, "rewards/rejected": -6.779275417327881, "step": 3470 }, { "epoch": 1.8, "learning_rate": 2.2289156626506022e-07, "logits/chosen": -2.6243674755096436, "logits/rejected": -2.5819458961486816, "logps/chosen": -290.9930419921875, "logps/rejected": -303.67974853515625, "loss": 0.072, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.38888758420944214, "rewards/margins": 7.5612592697143555, "rewards/rejected": -7.9501471519470215, "step": 3480 }, { "epoch": 1.8, "learning_rate": 2.2193536048957733e-07, "logits/chosen": -2.8293557167053223, "logits/rejected": -2.742828130722046, "logps/chosen": -313.91278076171875, "logps/rejected": -317.4000549316406, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.0450088270008564, "rewards/margins": 6.599936485290527, "rewards/rejected": -6.554927825927734, "step": 3490 }, { "epoch": 1.81, "learning_rate": 2.2097915471409445e-07, "logits/chosen": -2.7180981636047363, "logits/rejected": -2.7207627296447754, "logps/chosen": -282.8636779785156, "logps/rejected": -304.4784240722656, "loss": 0.061, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5040356516838074, "rewards/margins": 6.437845706939697, "rewards/rejected": -6.941880702972412, "step": 3500 }, { "epoch": 1.81, "eval_logits/chosen": -2.7404065132141113, "eval_logits/rejected": -2.708589792251587, "eval_logps/chosen": -293.3233947753906, "eval_logps/rejected": -273.42083740234375, "eval_loss": 0.5561814904212952, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -2.2172250747680664, "eval_rewards/margins": 2.2764604091644287, "eval_rewards/rejected": -4.493685722351074, "eval_runtime": 302.9106, "eval_samples_per_second": 6.603, "eval_steps_per_second": 0.413, "step": 3500 }, { "epoch": 1.81, "learning_rate": 2.200229489386116e-07, "logits/chosen": -2.741328716278076, "logits/rejected": -2.7085936069488525, "logps/chosen": -282.50091552734375, "logps/rejected": -330.7566223144531, "loss": 0.0508, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4392349123954773, "rewards/margins": 7.699382781982422, "rewards/rejected": -8.138618469238281, "step": 3510 }, { "epoch": 1.82, "learning_rate": 2.190667431631287e-07, "logits/chosen": -2.688417434692383, "logits/rejected": -2.695262908935547, "logps/chosen": -270.17486572265625, "logps/rejected": -363.36553955078125, "loss": 0.1207, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14541302621364594, "rewards/margins": 7.5206708908081055, "rewards/rejected": -7.666083335876465, "step": 3520 }, { "epoch": 1.82, "learning_rate": 2.1811053738764582e-07, "logits/chosen": -2.640284776687622, "logits/rejected": -2.6383633613586426, "logps/chosen": -243.1065673828125, "logps/rejected": -314.18792724609375, "loss": 0.0634, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.18995524942874908, "rewards/margins": 6.731640815734863, "rewards/rejected": -6.921595573425293, "step": 3530 }, { "epoch": 1.83, "learning_rate": 2.1715433161216294e-07, "logits/chosen": -2.6342291831970215, "logits/rejected": -2.6640098094940186, "logps/chosen": -270.2723388671875, "logps/rejected": -315.5150451660156, "loss": 0.0751, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2791522741317749, "rewards/margins": 6.453176975250244, "rewards/rejected": -6.73232889175415, "step": 3540 }, { "epoch": 1.83, "learning_rate": 2.1619812583668005e-07, "logits/chosen": -2.678250789642334, "logits/rejected": -2.6710121631622314, "logps/chosen": -256.47808837890625, "logps/rejected": -284.9803771972656, "loss": 0.0761, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6811217069625854, "rewards/margins": 6.993284702301025, "rewards/rejected": -7.6744065284729, "step": 3550 }, { "epoch": 1.84, "learning_rate": 2.1524192006119714e-07, "logits/chosen": -2.679922342300415, "logits/rejected": -2.6698668003082275, "logps/chosen": -256.57073974609375, "logps/rejected": -293.0118408203125, "loss": 0.0698, "rewards/accuracies": 1.0, "rewards/chosen": -1.2304511070251465, "rewards/margins": 6.001145362854004, "rewards/rejected": -7.23159646987915, "step": 3560 }, { "epoch": 1.84, "learning_rate": 2.1428571428571426e-07, "logits/chosen": -2.687896490097046, "logits/rejected": -2.6100172996520996, "logps/chosen": -251.7646026611328, "logps/rejected": -273.5567932128906, "loss": 0.0454, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9797040820121765, "rewards/margins": 6.288348197937012, "rewards/rejected": -7.268052577972412, "step": 3570 }, { "epoch": 1.85, "learning_rate": 2.133295085102314e-07, "logits/chosen": -2.590709924697876, "logits/rejected": -2.507107973098755, "logps/chosen": -256.82061767578125, "logps/rejected": -288.29132080078125, "loss": 0.0449, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.220476508140564, "rewards/margins": 7.095966339111328, "rewards/rejected": -8.31644344329834, "step": 3580 }, { "epoch": 1.85, "learning_rate": 2.1237330273474851e-07, "logits/chosen": -2.6958794593811035, "logits/rejected": -2.6530776023864746, "logps/chosen": -309.91583251953125, "logps/rejected": -307.0137634277344, "loss": 0.0964, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5907996892929077, "rewards/margins": 7.160977363586426, "rewards/rejected": -7.751777648925781, "step": 3590 }, { "epoch": 1.86, "learning_rate": 2.1141709695926563e-07, "logits/chosen": -2.672590970993042, "logits/rejected": -2.6199073791503906, "logps/chosen": -275.89251708984375, "logps/rejected": -277.8507080078125, "loss": 0.0979, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.5147138833999634, "rewards/margins": 7.132517337799072, "rewards/rejected": -7.647230625152588, "step": 3600 }, { "epoch": 1.86, "eval_logits/chosen": -2.676504373550415, "eval_logits/rejected": -2.63808536529541, "eval_logps/chosen": -297.6461181640625, "eval_logps/rejected": -278.80682373046875, "eval_loss": 0.5656457543373108, "eval_rewards/accuracies": 0.7519999742507935, "eval_rewards/chosen": -2.6494967937469482, "eval_rewards/margins": 2.3827850818634033, "eval_rewards/rejected": -5.032281875610352, "eval_runtime": 302.5456, "eval_samples_per_second": 6.611, "eval_steps_per_second": 0.413, "step": 3600 }, { "epoch": 1.86, "learning_rate": 2.1046089118378275e-07, "logits/chosen": -2.69031023979187, "logits/rejected": -2.6448917388916016, "logps/chosen": -263.35003662109375, "logps/rejected": -321.37274169921875, "loss": 0.0665, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4072280824184418, "rewards/margins": 7.940283298492432, "rewards/rejected": -8.347511291503906, "step": 3610 }, { "epoch": 1.87, "learning_rate": 2.0950468540829986e-07, "logits/chosen": -2.6567223072052, "logits/rejected": -2.6022956371307373, "logps/chosen": -280.3554992675781, "logps/rejected": -290.8537292480469, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.44895225763320923, "rewards/margins": 7.379188537597656, "rewards/rejected": -7.828141212463379, "step": 3620 }, { "epoch": 1.87, "learning_rate": 2.0854847963281698e-07, "logits/chosen": -2.6773931980133057, "logits/rejected": -2.565727710723877, "logps/chosen": -306.9902648925781, "logps/rejected": -301.0198669433594, "loss": 0.0857, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.3986026644706726, "rewards/margins": 7.065288543701172, "rewards/rejected": -7.463891506195068, "step": 3630 }, { "epoch": 1.88, "learning_rate": 2.0759227385733407e-07, "logits/chosen": -2.4982149600982666, "logits/rejected": -2.458949327468872, "logps/chosen": -307.76922607421875, "logps/rejected": -307.8131408691406, "loss": 0.0704, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.26402151584625244, "rewards/margins": 7.018546104431152, "rewards/rejected": -7.282568454742432, "step": 3640 }, { "epoch": 1.88, "learning_rate": 2.066360680818512e-07, "logits/chosen": -2.6682722568511963, "logits/rejected": -2.592215061187744, "logps/chosen": -331.39129638671875, "logps/rejected": -314.46356201171875, "loss": 0.0729, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.12419804185628891, "rewards/margins": 7.393265724182129, "rewards/rejected": -7.269067287445068, "step": 3650 }, { "epoch": 1.89, "learning_rate": 2.0567986230636832e-07, "logits/chosen": -2.648822784423828, "logits/rejected": -2.575870990753174, "logps/chosen": -257.9449157714844, "logps/rejected": -305.73406982421875, "loss": 0.0953, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2408134937286377, "rewards/margins": 7.346316337585449, "rewards/rejected": -7.58712911605835, "step": 3660 }, { "epoch": 1.89, "learning_rate": 2.0472365653088544e-07, "logits/chosen": -2.6813411712646484, "logits/rejected": -2.612908363342285, "logps/chosen": -304.7392272949219, "logps/rejected": -299.3567810058594, "loss": 0.0756, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8448777198791504, "rewards/margins": 6.271629810333252, "rewards/rejected": -7.116507053375244, "step": 3670 }, { "epoch": 1.9, "learning_rate": 2.0376745075540256e-07, "logits/chosen": -2.702895164489746, "logits/rejected": -2.6631016731262207, "logps/chosen": -324.67694091796875, "logps/rejected": -321.83392333984375, "loss": 0.0663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.580236554145813, "rewards/margins": 6.977442741394043, "rewards/rejected": -7.55767822265625, "step": 3680 }, { "epoch": 1.91, "learning_rate": 2.0281124497991967e-07, "logits/chosen": -2.6680264472961426, "logits/rejected": -2.6557843685150146, "logps/chosen": -284.6235656738281, "logps/rejected": -324.8849182128906, "loss": 0.1062, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.3347598910331726, "rewards/margins": 6.98086404800415, "rewards/rejected": -7.3156232833862305, "step": 3690 }, { "epoch": 1.91, "learning_rate": 2.018550392044368e-07, "logits/chosen": -2.6927990913391113, "logits/rejected": -2.640979290008545, "logps/chosen": -279.6319580078125, "logps/rejected": -350.6274719238281, "loss": 0.0631, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.232979416847229, "rewards/margins": 7.658511161804199, "rewards/rejected": -7.891491889953613, "step": 3700 }, { "epoch": 1.91, "eval_logits/chosen": -2.6818108558654785, "eval_logits/rejected": -2.6407127380371094, "eval_logps/chosen": -296.2057189941406, "eval_logps/rejected": -276.4331359863281, "eval_loss": 0.5667564272880554, "eval_rewards/accuracies": 0.7559999823570251, "eval_rewards/chosen": -2.5054566860198975, "eval_rewards/margins": 2.2894585132598877, "eval_rewards/rejected": -4.794915676116943, "eval_runtime": 301.041, "eval_samples_per_second": 6.644, "eval_steps_per_second": 0.415, "step": 3700 }, { "epoch": 1.92, "learning_rate": 2.0089883342895388e-07, "logits/chosen": -2.7003157138824463, "logits/rejected": -2.6551907062530518, "logps/chosen": -314.6983337402344, "logps/rejected": -279.4690246582031, "loss": 0.0751, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11149871349334717, "rewards/margins": 7.081504821777344, "rewards/rejected": -7.1930036544799805, "step": 3710 }, { "epoch": 1.92, "learning_rate": 1.9994262765347102e-07, "logits/chosen": -2.723034381866455, "logits/rejected": -2.627500534057617, "logps/chosen": -287.4389343261719, "logps/rejected": -261.7805480957031, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5197538733482361, "rewards/margins": 6.516218662261963, "rewards/rejected": -7.035972595214844, "step": 3720 }, { "epoch": 1.93, "learning_rate": 1.9898642187798813e-07, "logits/chosen": -2.5865015983581543, "logits/rejected": -2.56524920463562, "logps/chosen": -290.79486083984375, "logps/rejected": -328.8044738769531, "loss": 0.0591, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.0011775374878197908, "rewards/margins": 8.083017349243164, "rewards/rejected": -8.081838607788086, "step": 3730 }, { "epoch": 1.93, "learning_rate": 1.9803021610250525e-07, "logits/chosen": -2.7334277629852295, "logits/rejected": -2.7040934562683105, "logps/chosen": -237.2183074951172, "logps/rejected": -288.6593322753906, "loss": 0.0791, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4630967080593109, "rewards/margins": 6.583277702331543, "rewards/rejected": -7.046375274658203, "step": 3740 }, { "epoch": 1.94, "learning_rate": 1.9707401032702237e-07, "logits/chosen": -2.5976366996765137, "logits/rejected": -2.5525100231170654, "logps/chosen": -261.64508056640625, "logps/rejected": -296.0137939453125, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -0.5384448170661926, "rewards/margins": 6.905440330505371, "rewards/rejected": -7.443885803222656, "step": 3750 }, { "epoch": 1.94, "learning_rate": 1.9611780455153948e-07, "logits/chosen": -2.697441577911377, "logits/rejected": -2.7309083938598633, "logps/chosen": -240.364013671875, "logps/rejected": -301.3322448730469, "loss": 0.0921, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.21534188091754913, "rewards/margins": 6.707180976867676, "rewards/rejected": -6.922522068023682, "step": 3760 }, { "epoch": 1.95, "learning_rate": 1.951615987760566e-07, "logits/chosen": -2.6353142261505127, "logits/rejected": -2.5878772735595703, "logps/chosen": -280.3433532714844, "logps/rejected": -305.8084411621094, "loss": 0.0782, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7940788269042969, "rewards/margins": 6.9906744956970215, "rewards/rejected": -7.78475284576416, "step": 3770 }, { "epoch": 1.95, "learning_rate": 1.942053930005737e-07, "logits/chosen": -2.793308734893799, "logits/rejected": -2.7050063610076904, "logps/chosen": -291.4981689453125, "logps/rejected": -308.5917053222656, "loss": 0.0587, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6085634827613831, "rewards/margins": 7.077228546142578, "rewards/rejected": -7.685791969299316, "step": 3780 }, { "epoch": 1.96, "learning_rate": 1.9324918722509086e-07, "logits/chosen": -2.645498514175415, "logits/rejected": -2.691744089126587, "logps/chosen": -265.4901428222656, "logps/rejected": -334.2118835449219, "loss": 0.0877, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1245644092559814, "rewards/margins": 7.277432441711426, "rewards/rejected": -8.401995658874512, "step": 3790 }, { "epoch": 1.96, "learning_rate": 1.9229298144960794e-07, "logits/chosen": -2.718837261199951, "logits/rejected": -2.6985907554626465, "logps/chosen": -283.3094177246094, "logps/rejected": -304.67669677734375, "loss": 0.1202, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5752776861190796, "rewards/margins": 7.11301326751709, "rewards/rejected": -7.688291072845459, "step": 3800 }, { "epoch": 1.96, "eval_logits/chosen": -2.7124974727630615, "eval_logits/rejected": -2.671638011932373, "eval_logps/chosen": -297.732177734375, "eval_logps/rejected": -275.73297119140625, "eval_loss": 0.5678086280822754, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -2.658102035522461, "eval_rewards/margins": 2.0667974948883057, "eval_rewards/rejected": -4.7248992919921875, "eval_runtime": 302.3235, "eval_samples_per_second": 6.615, "eval_steps_per_second": 0.413, "step": 3800 }, { "epoch": 1.97, "learning_rate": 1.9133677567412506e-07, "logits/chosen": -2.7177176475524902, "logits/rejected": -2.6701908111572266, "logps/chosen": -288.8520202636719, "logps/rejected": -305.2877197265625, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9760677218437195, "rewards/margins": 6.295001029968262, "rewards/rejected": -7.271068572998047, "step": 3810 }, { "epoch": 1.97, "learning_rate": 1.9038056989864218e-07, "logits/chosen": -2.6947813034057617, "logits/rejected": -2.6857385635375977, "logps/chosen": -250.64999389648438, "logps/rejected": -273.1249694824219, "loss": 0.1056, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.41516774892807007, "rewards/margins": 5.893454551696777, "rewards/rejected": -6.308621883392334, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.894243641231593e-07, "logits/chosen": -2.49280047416687, "logits/rejected": -2.485044002532959, "logps/chosen": -268.3055725097656, "logps/rejected": -257.1871337890625, "loss": 0.0927, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7658621072769165, "rewards/margins": 6.147838592529297, "rewards/rejected": -6.913701057434082, "step": 3830 }, { "epoch": 1.98, "learning_rate": 1.884681583476764e-07, "logits/chosen": -2.7352261543273926, "logits/rejected": -2.720942497253418, "logps/chosen": -289.215087890625, "logps/rejected": -305.9485778808594, "loss": 0.0578, "rewards/accuracies": 0.9375, "rewards/chosen": -1.321073055267334, "rewards/margins": 5.713677406311035, "rewards/rejected": -7.034750938415527, "step": 3840 }, { "epoch": 1.99, "learning_rate": 1.8751195257219352e-07, "logits/chosen": -2.7396533489227295, "logits/rejected": -2.7132935523986816, "logps/chosen": -256.33660888671875, "logps/rejected": -292.3953552246094, "loss": 0.0822, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.813510537147522, "rewards/margins": 6.204690456390381, "rewards/rejected": -7.018200874328613, "step": 3850 }, { "epoch": 1.99, "learning_rate": 1.8655574679671067e-07, "logits/chosen": -2.714446544647217, "logits/rejected": -2.670003890991211, "logps/chosen": -292.04962158203125, "logps/rejected": -316.65447998046875, "loss": 0.1048, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8342695236206055, "rewards/margins": 6.873841285705566, "rewards/rejected": -7.7081098556518555, "step": 3860 }, { "epoch": 2.0, "learning_rate": 1.8559954102122778e-07, "logits/chosen": -2.723661184310913, "logits/rejected": -2.6586456298828125, "logps/chosen": -279.65679931640625, "logps/rejected": -303.198486328125, "loss": 0.093, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4174138903617859, "rewards/margins": 6.7883734703063965, "rewards/rejected": -7.205787658691406, "step": 3870 }, { "epoch": 2.0, "learning_rate": 1.8464333524574487e-07, "logits/chosen": -2.7239067554473877, "logits/rejected": -2.6996660232543945, "logps/chosen": -256.20709228515625, "logps/rejected": -281.5430908203125, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.3029792308807373, "rewards/margins": 7.203469276428223, "rewards/rejected": -7.506447792053223, "step": 3880 }, { "epoch": 2.01, "learning_rate": 1.8368712947026199e-07, "logits/chosen": -2.7349143028259277, "logits/rejected": -2.69462251663208, "logps/chosen": -285.8705749511719, "logps/rejected": -302.57537841796875, "loss": 0.014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.05275885388255119, "rewards/margins": 7.5370941162109375, "rewards/rejected": -7.589852809906006, "step": 3890 }, { "epoch": 2.01, "learning_rate": 1.827309236947791e-07, "logits/chosen": -2.6346869468688965, "logits/rejected": -2.6188418865203857, "logps/chosen": -245.5050506591797, "logps/rejected": -325.5300598144531, "loss": 0.022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2961425185203552, "rewards/margins": 8.76488971710205, "rewards/rejected": -9.061031341552734, "step": 3900 }, { "epoch": 2.01, "eval_logits/chosen": -2.7125446796417236, "eval_logits/rejected": -2.6679580211639404, "eval_logps/chosen": -298.0444030761719, "eval_logps/rejected": -280.1555480957031, "eval_loss": 0.5656534433364868, "eval_rewards/accuracies": 0.7720000147819519, "eval_rewards/chosen": -2.6893272399902344, "eval_rewards/margins": 2.4778311252593994, "eval_rewards/rejected": -5.167158603668213, "eval_runtime": 300.3774, "eval_samples_per_second": 6.658, "eval_steps_per_second": 0.416, "step": 3900 }, { "epoch": 2.02, "learning_rate": 1.8177471791929622e-07, "logits/chosen": -2.6352615356445312, "logits/rejected": -2.6043148040771484, "logps/chosen": -264.2796325683594, "logps/rejected": -364.203369140625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.25604504346847534, "rewards/margins": 8.365068435668945, "rewards/rejected": -8.621111869812012, "step": 3910 }, { "epoch": 2.02, "learning_rate": 1.8081851214381333e-07, "logits/chosen": -2.5759568214416504, "logits/rejected": -2.56921648979187, "logps/chosen": -275.2734069824219, "logps/rejected": -348.78875732421875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.5250979661941528, "rewards/margins": 8.301558494567871, "rewards/rejected": -8.826656341552734, "step": 3920 }, { "epoch": 2.03, "learning_rate": 1.7986230636833047e-07, "logits/chosen": -2.6585121154785156, "logits/rejected": -2.5779411792755127, "logps/chosen": -225.691650390625, "logps/rejected": -295.9264831542969, "loss": 0.0179, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5348653197288513, "rewards/margins": 7.498388767242432, "rewards/rejected": -8.033254623413086, "step": 3930 }, { "epoch": 2.03, "learning_rate": 1.789061005928476e-07, "logits/chosen": -2.7394919395446777, "logits/rejected": -2.6301112174987793, "logps/chosen": -310.79754638671875, "logps/rejected": -293.79046630859375, "loss": 0.0221, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4678827226161957, "rewards/margins": 7.93142032623291, "rewards/rejected": -8.39930248260498, "step": 3940 }, { "epoch": 2.04, "learning_rate": 1.7794989481736468e-07, "logits/chosen": -2.7052078247070312, "logits/rejected": -2.637937307357788, "logps/chosen": -296.1398010253906, "logps/rejected": -321.91900634765625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.4104957580566406, "rewards/margins": 8.665520668029785, "rewards/rejected": -9.076016426086426, "step": 3950 }, { "epoch": 2.04, "learning_rate": 1.769936890418818e-07, "logits/chosen": -2.66807222366333, "logits/rejected": -2.6150898933410645, "logps/chosen": -270.62188720703125, "logps/rejected": -317.2550048828125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 0.22330737113952637, "rewards/margins": 8.861315727233887, "rewards/rejected": -8.638009071350098, "step": 3960 }, { "epoch": 2.05, "learning_rate": 1.760374832663989e-07, "logits/chosen": -2.611741542816162, "logits/rejected": -2.5388216972351074, "logps/chosen": -275.30755615234375, "logps/rejected": -330.19219970703125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.39820218086242676, "rewards/margins": 8.278533935546875, "rewards/rejected": -8.676736831665039, "step": 3970 }, { "epoch": 2.05, "learning_rate": 1.7508127749091603e-07, "logits/chosen": -2.68839955329895, "logits/rejected": -2.650808811187744, "logps/chosen": -281.78289794921875, "logps/rejected": -326.82745361328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.6049606204032898, "rewards/margins": 8.425558090209961, "rewards/rejected": -9.030518531799316, "step": 3980 }, { "epoch": 2.06, "learning_rate": 1.7412507171543314e-07, "logits/chosen": -2.673745632171631, "logits/rejected": -2.614142894744873, "logps/chosen": -274.6407775878906, "logps/rejected": -293.92449951171875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.0188729763031006, "rewards/margins": 8.362761497497559, "rewards/rejected": -9.381634712219238, "step": 3990 }, { "epoch": 2.07, "learning_rate": 1.7316886593995028e-07, "logits/chosen": -2.684051513671875, "logits/rejected": -2.6487350463867188, "logps/chosen": -255.32919311523438, "logps/rejected": -315.4376220703125, "loss": 0.0177, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5388384461402893, "rewards/margins": 9.196451187133789, "rewards/rejected": -9.73529052734375, "step": 4000 }, { "epoch": 2.07, "eval_logits/chosen": -2.691579580307007, "eval_logits/rejected": -2.64309024810791, "eval_logps/chosen": -304.6116943359375, "eval_logps/rejected": -291.3919372558594, "eval_loss": 0.6171462535858154, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -3.346055746078491, "eval_rewards/margins": 2.9447388648986816, "eval_rewards/rejected": -6.290794372558594, "eval_runtime": 300.442, "eval_samples_per_second": 6.657, "eval_steps_per_second": 0.416, "step": 4000 }, { "epoch": 2.07, "learning_rate": 1.722126601644674e-07, "logits/chosen": -2.6704154014587402, "logits/rejected": -2.551290273666382, "logps/chosen": -283.76666259765625, "logps/rejected": -310.22857666015625, "loss": 0.0218, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4321608543395996, "rewards/margins": 8.552752494812012, "rewards/rejected": -8.984914779663086, "step": 4010 }, { "epoch": 2.08, "learning_rate": 1.7125645438898452e-07, "logits/chosen": -2.6245718002319336, "logits/rejected": -2.6031408309936523, "logps/chosen": -276.89520263671875, "logps/rejected": -321.8138122558594, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.7024073004722595, "rewards/margins": 9.13112735748291, "rewards/rejected": -9.833534240722656, "step": 4020 }, { "epoch": 2.08, "learning_rate": 1.703002486135016e-07, "logits/chosen": -2.6619951725006104, "logits/rejected": -2.646958112716675, "logps/chosen": -293.88006591796875, "logps/rejected": -332.16082763671875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.06791017204523087, "rewards/margins": 9.907838821411133, "rewards/rejected": -9.975748062133789, "step": 4030 }, { "epoch": 2.09, "learning_rate": 1.6934404283801872e-07, "logits/chosen": -2.6504015922546387, "logits/rejected": -2.5901706218719482, "logps/chosen": -283.5688171386719, "logps/rejected": -338.7544860839844, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.1157902255654335, "rewards/margins": 9.678095817565918, "rewards/rejected": -9.793886184692383, "step": 4040 }, { "epoch": 2.09, "learning_rate": 1.6838783706253584e-07, "logits/chosen": -2.6505672931671143, "logits/rejected": -2.600067615509033, "logps/chosen": -248.70849609375, "logps/rejected": -300.7835388183594, "loss": 0.0163, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5731819272041321, "rewards/margins": 8.532538414001465, "rewards/rejected": -9.105721473693848, "step": 4050 }, { "epoch": 2.1, "learning_rate": 1.6743163128705295e-07, "logits/chosen": -2.6042139530181885, "logits/rejected": -2.545107364654541, "logps/chosen": -260.8901672363281, "logps/rejected": -328.5799255371094, "loss": 0.0193, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0383368730545044, "rewards/margins": 9.177366256713867, "rewards/rejected": -10.215703964233398, "step": 4060 }, { "epoch": 2.1, "learning_rate": 1.664754255115701e-07, "logits/chosen": -2.617219924926758, "logits/rejected": -2.5846307277679443, "logps/chosen": -256.7451477050781, "logps/rejected": -329.6984558105469, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.6538764834403992, "rewards/margins": 9.410051345825195, "rewards/rejected": -10.063928604125977, "step": 4070 }, { "epoch": 2.11, "learning_rate": 1.655192197360872e-07, "logits/chosen": -2.779808521270752, "logits/rejected": -2.682321548461914, "logps/chosen": -315.32232666015625, "logps/rejected": -357.68408203125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.6023126840591431, "rewards/margins": 9.032594680786133, "rewards/rejected": -9.634907722473145, "step": 4080 }, { "epoch": 2.11, "learning_rate": 1.6456301396060433e-07, "logits/chosen": -2.686439037322998, "logits/rejected": -2.628729820251465, "logps/chosen": -306.7290344238281, "logps/rejected": -362.8133239746094, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5802473425865173, "rewards/margins": 8.59655475616455, "rewards/rejected": -9.176801681518555, "step": 4090 }, { "epoch": 2.12, "learning_rate": 1.6360680818512144e-07, "logits/chosen": -2.633235216140747, "logits/rejected": -2.62958025932312, "logps/chosen": -264.7658996582031, "logps/rejected": -323.9909362792969, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.38502559065818787, "rewards/margins": 8.613489151000977, "rewards/rejected": -8.998516082763672, "step": 4100 }, { "epoch": 2.12, "eval_logits/chosen": -2.670144557952881, "eval_logits/rejected": -2.622467041015625, "eval_logps/chosen": -304.59942626953125, "eval_logps/rejected": -292.2874450683594, "eval_loss": 0.6389336585998535, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -3.344829559326172, "eval_rewards/margins": 3.0355160236358643, "eval_rewards/rejected": -6.380346298217773, "eval_runtime": 301.6754, "eval_samples_per_second": 6.63, "eval_steps_per_second": 0.414, "step": 4100 }, { "epoch": 2.12, "learning_rate": 1.6265060240963853e-07, "logits/chosen": -2.7134909629821777, "logits/rejected": -2.6375508308410645, "logps/chosen": -307.7324523925781, "logps/rejected": -317.06622314453125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.44676604866981506, "rewards/margins": 8.252876281738281, "rewards/rejected": -8.6996431350708, "step": 4110 }, { "epoch": 2.13, "learning_rate": 1.6169439663415565e-07, "logits/chosen": -2.729085922241211, "logits/rejected": -2.639957904815674, "logps/chosen": -287.22015380859375, "logps/rejected": -338.83001708984375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5336463451385498, "rewards/margins": 10.47472858428955, "rewards/rejected": -11.008374214172363, "step": 4120 }, { "epoch": 2.13, "learning_rate": 1.6073819085867276e-07, "logits/chosen": -2.7341055870056152, "logits/rejected": -2.757556676864624, "logps/chosen": -270.1176452636719, "logps/rejected": -388.94757080078125, "loss": 0.013, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2091827690601349, "rewards/margins": 9.677942276000977, "rewards/rejected": -9.887125968933105, "step": 4130 }, { "epoch": 2.14, "learning_rate": 1.597819850831899e-07, "logits/chosen": -2.6629955768585205, "logits/rejected": -2.578319549560547, "logps/chosen": -250.00076293945312, "logps/rejected": -334.855712890625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.4083684980869293, "rewards/margins": 9.928139686584473, "rewards/rejected": -10.336507797241211, "step": 4140 }, { "epoch": 2.14, "learning_rate": 1.5882577930770702e-07, "logits/chosen": -2.7194714546203613, "logits/rejected": -2.620279312133789, "logps/chosen": -283.8021545410156, "logps/rejected": -331.04412841796875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6026127934455872, "rewards/margins": 9.564842224121094, "rewards/rejected": -10.167454719543457, "step": 4150 }, { "epoch": 2.15, "learning_rate": 1.5786957353222414e-07, "logits/chosen": -2.687295436859131, "logits/rejected": -2.585244655609131, "logps/chosen": -325.5843200683594, "logps/rejected": -326.9093017578125, "loss": 0.0169, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.12477810680866241, "rewards/margins": 9.427237510681152, "rewards/rejected": -9.55201530456543, "step": 4160 }, { "epoch": 2.15, "learning_rate": 1.5691336775674125e-07, "logits/chosen": -2.6061997413635254, "logits/rejected": -2.583631992340088, "logps/chosen": -257.86517333984375, "logps/rejected": -322.37945556640625, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.44339266419410706, "rewards/margins": 10.549286842346191, "rewards/rejected": -10.99267864227295, "step": 4170 }, { "epoch": 2.16, "learning_rate": 1.5595716198125837e-07, "logits/chosen": -2.646427631378174, "logits/rejected": -2.572059392929077, "logps/chosen": -298.1983947753906, "logps/rejected": -348.5059814453125, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.2386928796768188, "rewards/margins": 9.014298439025879, "rewards/rejected": -10.252991676330566, "step": 4180 }, { "epoch": 2.16, "learning_rate": 1.5500095620577546e-07, "logits/chosen": -2.745795488357544, "logits/rejected": -2.687631130218506, "logps/chosen": -265.0744934082031, "logps/rejected": -290.81414794921875, "loss": 0.0126, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1059540510177612, "rewards/margins": 8.500246047973633, "rewards/rejected": -9.606199264526367, "step": 4190 }, { "epoch": 2.17, "learning_rate": 1.5404475043029257e-07, "logits/chosen": -2.689143657684326, "logits/rejected": -2.657597303390503, "logps/chosen": -265.9485778808594, "logps/rejected": -319.83935546875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.9064074754714966, "rewards/margins": 9.307806015014648, "rewards/rejected": -10.214213371276855, "step": 4200 }, { "epoch": 2.17, "eval_logits/chosen": -2.679666042327881, "eval_logits/rejected": -2.63228702545166, "eval_logps/chosen": -306.5373229980469, "eval_logps/rejected": -294.5120849609375, "eval_loss": 0.6562466621398926, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -3.5386173725128174, "eval_rewards/margins": 3.064192295074463, "eval_rewards/rejected": -6.602809906005859, "eval_runtime": 299.9162, "eval_samples_per_second": 6.669, "eval_steps_per_second": 0.417, "step": 4200 }, { "epoch": 2.17, "learning_rate": 1.5308854465480971e-07, "logits/chosen": -2.7112841606140137, "logits/rejected": -2.6639649868011475, "logps/chosen": -277.16107177734375, "logps/rejected": -296.5805358886719, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.49038490653038025, "rewards/margins": 8.583968162536621, "rewards/rejected": -9.07435417175293, "step": 4210 }, { "epoch": 2.18, "learning_rate": 1.5213233887932683e-07, "logits/chosen": -2.64880633354187, "logits/rejected": -2.590536594390869, "logps/chosen": -312.7170104980469, "logps/rejected": -328.259521484375, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9708864092826843, "rewards/margins": 8.74626350402832, "rewards/rejected": -9.71714973449707, "step": 4220 }, { "epoch": 2.18, "learning_rate": 1.5117613310384395e-07, "logits/chosen": -2.682318925857544, "logits/rejected": -2.6444616317749023, "logps/chosen": -293.54766845703125, "logps/rejected": -340.8854064941406, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.087975263595581, "rewards/margins": 10.280842781066895, "rewards/rejected": -11.368818283081055, "step": 4230 }, { "epoch": 2.19, "learning_rate": 1.5021992732836106e-07, "logits/chosen": -2.6920106410980225, "logits/rejected": -2.6487114429473877, "logps/chosen": -290.64080810546875, "logps/rejected": -374.7381896972656, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.0875754356384277, "rewards/margins": 10.12022590637207, "rewards/rejected": -11.207801818847656, "step": 4240 }, { "epoch": 2.19, "learning_rate": 1.4926372155287818e-07, "logits/chosen": -2.6744678020477295, "logits/rejected": -2.684854030609131, "logps/chosen": -235.4513397216797, "logps/rejected": -328.29443359375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.1996930837631226, "rewards/margins": 9.253260612487793, "rewards/rejected": -10.452953338623047, "step": 4250 }, { "epoch": 2.2, "learning_rate": 1.483075157773953e-07, "logits/chosen": -2.7144248485565186, "logits/rejected": -2.6114330291748047, "logps/chosen": -287.950927734375, "logps/rejected": -306.99176025390625, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7344351410865784, "rewards/margins": 8.17457389831543, "rewards/rejected": -8.909008979797363, "step": 4260 }, { "epoch": 2.2, "learning_rate": 1.4735131000191238e-07, "logits/chosen": -2.6291093826293945, "logits/rejected": -2.6125636100769043, "logps/chosen": -247.571533203125, "logps/rejected": -277.3638610839844, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.6242862939834595, "rewards/margins": 8.818742752075195, "rewards/rejected": -9.443029403686523, "step": 4270 }, { "epoch": 2.21, "learning_rate": 1.4639510422642952e-07, "logits/chosen": -2.675595283508301, "logits/rejected": -2.637773275375366, "logps/chosen": -250.6396942138672, "logps/rejected": -305.4952697753906, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.2030136585235596, "rewards/margins": 8.31075668334961, "rewards/rejected": -9.513771057128906, "step": 4280 }, { "epoch": 2.21, "learning_rate": 1.4543889845094664e-07, "logits/chosen": -2.618609666824341, "logits/rejected": -2.6568849086761475, "logps/chosen": -308.46539306640625, "logps/rejected": -380.2574157714844, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.23853091895580292, "rewards/margins": 10.959972381591797, "rewards/rejected": -11.198503494262695, "step": 4290 }, { "epoch": 2.22, "learning_rate": 1.4448269267546376e-07, "logits/chosen": -2.652076244354248, "logits/rejected": -2.6042404174804688, "logps/chosen": -331.9740295410156, "logps/rejected": -353.1656494140625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.22926807403564453, "rewards/margins": 10.5140962600708, "rewards/rejected": -10.284828186035156, "step": 4300 }, { "epoch": 2.22, "eval_logits/chosen": -2.6678168773651123, "eval_logits/rejected": -2.619150400161743, "eval_logps/chosen": -308.19952392578125, "eval_logps/rejected": -297.47637939453125, "eval_loss": 0.6742202639579773, "eval_rewards/accuracies": 0.7559999823570251, "eval_rewards/chosen": -3.704840898513794, "eval_rewards/margins": 3.194397449493408, "eval_rewards/rejected": -6.899238586425781, "eval_runtime": 300.3339, "eval_samples_per_second": 6.659, "eval_steps_per_second": 0.416, "step": 4300 }, { "epoch": 2.23, "learning_rate": 1.4352648689998087e-07, "logits/chosen": -2.7097830772399902, "logits/rejected": -2.593984365463257, "logps/chosen": -259.73486328125, "logps/rejected": -311.3151550292969, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.753538966178894, "rewards/margins": 9.248102188110352, "rewards/rejected": -10.001642227172852, "step": 4310 }, { "epoch": 2.23, "learning_rate": 1.42570281124498e-07, "logits/chosen": -2.680701494216919, "logits/rejected": -2.6096949577331543, "logps/chosen": -305.2640075683594, "logps/rejected": -359.37835693359375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.4520825743675232, "rewards/margins": 9.839849472045898, "rewards/rejected": -10.2919340133667, "step": 4320 }, { "epoch": 2.24, "learning_rate": 1.416140753490151e-07, "logits/chosen": -2.6360256671905518, "logits/rejected": -2.669020414352417, "logps/chosen": -304.3402099609375, "logps/rejected": -366.10369873046875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6026782989501953, "rewards/margins": 10.772412300109863, "rewards/rejected": -11.375089645385742, "step": 4330 }, { "epoch": 2.24, "learning_rate": 1.4065786957353222e-07, "logits/chosen": -2.6518890857696533, "logits/rejected": -2.6714396476745605, "logps/chosen": -288.86322021484375, "logps/rejected": -343.5704040527344, "loss": 0.0143, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0478026866912842, "rewards/margins": 9.44815731048584, "rewards/rejected": -10.495959281921387, "step": 4340 }, { "epoch": 2.25, "learning_rate": 1.3970166379804933e-07, "logits/chosen": -2.6951656341552734, "logits/rejected": -2.616739273071289, "logps/chosen": -326.5705261230469, "logps/rejected": -336.5369873046875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.3368417024612427, "rewards/margins": 9.160501480102539, "rewards/rejected": -10.497343063354492, "step": 4350 }, { "epoch": 2.25, "learning_rate": 1.3874545802256645e-07, "logits/chosen": -2.711392641067505, "logits/rejected": -2.6601951122283936, "logps/chosen": -275.40130615234375, "logps/rejected": -315.70989990234375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.0467510223388672, "rewards/margins": 9.312707901000977, "rewards/rejected": -10.359457969665527, "step": 4360 }, { "epoch": 2.26, "learning_rate": 1.3778925224708357e-07, "logits/chosen": -2.5673935413360596, "logits/rejected": -2.5685951709747314, "logps/chosen": -266.0102233886719, "logps/rejected": -328.2872009277344, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.188098669052124, "rewards/margins": 9.597440719604492, "rewards/rejected": -10.785538673400879, "step": 4370 }, { "epoch": 2.26, "learning_rate": 1.3683304647160068e-07, "logits/chosen": -2.599808931350708, "logits/rejected": -2.571666717529297, "logps/chosen": -257.6767272949219, "logps/rejected": -352.75115966796875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.1264151334762573, "rewards/margins": 10.437751770019531, "rewards/rejected": -11.564168930053711, "step": 4380 }, { "epoch": 2.27, "learning_rate": 1.358768406961178e-07, "logits/chosen": -2.595768928527832, "logits/rejected": -2.6167728900909424, "logps/chosen": -254.9929656982422, "logps/rejected": -339.8115234375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.3337209224700928, "rewards/margins": 10.193733215332031, "rewards/rejected": -11.527453422546387, "step": 4390 }, { "epoch": 2.27, "learning_rate": 1.349206349206349e-07, "logits/chosen": -2.634752035140991, "logits/rejected": -2.6203811168670654, "logps/chosen": -250.7345733642578, "logps/rejected": -308.99835205078125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -2.247837781906128, "rewards/margins": 8.695775985717773, "rewards/rejected": -10.943613052368164, "step": 4400 }, { "epoch": 2.27, "eval_logits/chosen": -2.645358085632324, "eval_logits/rejected": -2.5974912643432617, "eval_logps/chosen": -312.79296875, "eval_logps/rejected": -303.3212585449219, "eval_loss": 0.6981696486473083, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -4.164183616638184, "eval_rewards/margins": 3.319542169570923, "eval_rewards/rejected": -7.483725070953369, "eval_runtime": 300.4701, "eval_samples_per_second": 6.656, "eval_steps_per_second": 0.416, "step": 4400 }, { "epoch": 2.28, "learning_rate": 1.3396442914515203e-07, "logits/chosen": -2.5926036834716797, "logits/rejected": -2.5172619819641113, "logps/chosen": -292.2915344238281, "logps/rejected": -357.282958984375, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6816883087158203, "rewards/margins": 10.120698928833008, "rewards/rejected": -11.802387237548828, "step": 4410 }, { "epoch": 2.28, "learning_rate": 1.3300822336966917e-07, "logits/chosen": -2.655643939971924, "logits/rejected": -2.5098280906677246, "logps/chosen": -315.7123107910156, "logps/rejected": -347.8288879394531, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.6822350025177002, "rewards/margins": 11.133058547973633, "rewards/rejected": -11.815293312072754, "step": 4420 }, { "epoch": 2.29, "learning_rate": 1.3205201759418626e-07, "logits/chosen": -2.5708107948303223, "logits/rejected": -2.4846396446228027, "logps/chosen": -311.37249755859375, "logps/rejected": -330.8601989746094, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.6858748197555542, "rewards/margins": 9.757244110107422, "rewards/rejected": -10.44311809539795, "step": 4430 }, { "epoch": 2.29, "learning_rate": 1.3109581181870338e-07, "logits/chosen": -2.7022705078125, "logits/rejected": -2.6619653701782227, "logps/chosen": -325.13134765625, "logps/rejected": -335.73382568359375, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3868844509124756, "rewards/margins": 10.037075996398926, "rewards/rejected": -10.423959732055664, "step": 4440 }, { "epoch": 2.3, "learning_rate": 1.301396060432205e-07, "logits/chosen": -2.6541895866394043, "logits/rejected": -2.559623956680298, "logps/chosen": -318.50872802734375, "logps/rejected": -341.30523681640625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.30984628200531, "rewards/margins": 9.713074684143066, "rewards/rejected": -11.022921562194824, "step": 4450 }, { "epoch": 2.3, "learning_rate": 1.291834002677376e-07, "logits/chosen": -2.5706753730773926, "logits/rejected": -2.5507564544677734, "logps/chosen": -281.9523620605469, "logps/rejected": -352.04986572265625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.5520942211151123, "rewards/margins": 9.727930068969727, "rewards/rejected": -11.280024528503418, "step": 4460 }, { "epoch": 2.31, "learning_rate": 1.2822719449225472e-07, "logits/chosen": -2.5962116718292236, "logits/rejected": -2.560507297515869, "logps/chosen": -223.4073944091797, "logps/rejected": -311.93670654296875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.894232153892517, "rewards/margins": 9.102167129516602, "rewards/rejected": -10.996397972106934, "step": 4470 }, { "epoch": 2.31, "learning_rate": 1.2727098871677184e-07, "logits/chosen": -2.677396774291992, "logits/rejected": -2.623908519744873, "logps/chosen": -348.23760986328125, "logps/rejected": -390.755859375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.8249503374099731, "rewards/margins": 10.461699485778809, "rewards/rejected": -11.286649703979492, "step": 4480 }, { "epoch": 2.32, "learning_rate": 1.2631478294128898e-07, "logits/chosen": -2.7189507484436035, "logits/rejected": -2.6741905212402344, "logps/chosen": -275.53314208984375, "logps/rejected": -339.3215026855469, "loss": 0.0241, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.085033655166626, "rewards/margins": 9.075556755065918, "rewards/rejected": -10.160591125488281, "step": 4490 }, { "epoch": 2.32, "learning_rate": 1.253585771658061e-07, "logits/chosen": -2.619215965270996, "logits/rejected": -2.6242549419403076, "logps/chosen": -251.1781005859375, "logps/rejected": -334.31689453125, "loss": 0.0173, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1404932737350464, "rewards/margins": 8.950601577758789, "rewards/rejected": -10.091094017028809, "step": 4500 }, { "epoch": 2.32, "eval_logits/chosen": -2.6393561363220215, "eval_logits/rejected": -2.596702814102173, "eval_logps/chosen": -310.2903747558594, "eval_logps/rejected": -297.9649963378906, "eval_loss": 0.6661145091056824, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -3.9139232635498047, "eval_rewards/margins": 3.0341811180114746, "eval_rewards/rejected": -6.948104381561279, "eval_runtime": 301.1722, "eval_samples_per_second": 6.641, "eval_steps_per_second": 0.415, "step": 4500 }, { "epoch": 2.33, "learning_rate": 1.2440237139032319e-07, "logits/chosen": -2.7100770473480225, "logits/rejected": -2.663496494293213, "logps/chosen": -296.59197998046875, "logps/rejected": -317.30499267578125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.1978495121002197, "rewards/margins": 9.186239242553711, "rewards/rejected": -10.384088516235352, "step": 4510 }, { "epoch": 2.33, "learning_rate": 1.234461656148403e-07, "logits/chosen": -2.6586012840270996, "logits/rejected": -2.6368064880371094, "logps/chosen": -323.82513427734375, "logps/rejected": -360.9985046386719, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.9345844984054565, "rewards/margins": 10.220632553100586, "rewards/rejected": -11.155217170715332, "step": 4520 }, { "epoch": 2.34, "learning_rate": 1.2248995983935742e-07, "logits/chosen": -2.680159091949463, "logits/rejected": -2.663702964782715, "logps/chosen": -302.0219421386719, "logps/rejected": -340.0579528808594, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9645177721977234, "rewards/margins": 9.70927619934082, "rewards/rejected": -10.673794746398926, "step": 4530 }, { "epoch": 2.34, "learning_rate": 1.2153375406387456e-07, "logits/chosen": -2.6001861095428467, "logits/rejected": -2.597421169281006, "logps/chosen": -296.09771728515625, "logps/rejected": -371.7466735839844, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.9812561273574829, "rewards/margins": 10.29119873046875, "rewards/rejected": -11.272455215454102, "step": 4540 }, { "epoch": 2.35, "learning_rate": 1.2057754828839165e-07, "logits/chosen": -2.639939546585083, "logits/rejected": -2.6484570503234863, "logps/chosen": -273.16802978515625, "logps/rejected": -337.97332763671875, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.728509247303009, "rewards/margins": 9.519789695739746, "rewards/rejected": -10.248300552368164, "step": 4550 }, { "epoch": 2.35, "learning_rate": 1.1962134251290876e-07, "logits/chosen": -2.6198954582214355, "logits/rejected": -2.5811574459075928, "logps/chosen": -298.90081787109375, "logps/rejected": -304.1781311035156, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5390739440917969, "rewards/margins": 8.423462867736816, "rewards/rejected": -9.962536811828613, "step": 4560 }, { "epoch": 2.36, "learning_rate": 1.1866513673742588e-07, "logits/chosen": -2.658489465713501, "logits/rejected": -2.6300225257873535, "logps/chosen": -298.3923034667969, "logps/rejected": -292.21429443359375, "loss": 0.0205, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.46941104531288147, "rewards/margins": 8.323554992675781, "rewards/rejected": -8.79296588897705, "step": 4570 }, { "epoch": 2.36, "learning_rate": 1.1770893096194301e-07, "logits/chosen": -2.557232141494751, "logits/rejected": -2.510739326477051, "logps/chosen": -286.90557861328125, "logps/rejected": -323.8365173339844, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8109794855117798, "rewards/margins": 9.279717445373535, "rewards/rejected": -10.090696334838867, "step": 4580 }, { "epoch": 2.37, "learning_rate": 1.1675272518646012e-07, "logits/chosen": -2.633720636367798, "logits/rejected": -2.6340861320495605, "logps/chosen": -241.53549194335938, "logps/rejected": -319.45574951171875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.45409899950027466, "rewards/margins": 8.88463020324707, "rewards/rejected": -9.338728904724121, "step": 4590 }, { "epoch": 2.37, "learning_rate": 1.1579651941097724e-07, "logits/chosen": -2.6740808486938477, "logits/rejected": -2.641120672225952, "logps/chosen": -316.9101257324219, "logps/rejected": -327.01593017578125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7234556674957275, "rewards/margins": 9.027227401733398, "rewards/rejected": -9.75068187713623, "step": 4600 }, { "epoch": 2.37, "eval_logits/chosen": -2.606816291809082, "eval_logits/rejected": -2.5628256797790527, "eval_logps/chosen": -308.2720642089844, "eval_logps/rejected": -296.76300048828125, "eval_loss": 0.6605738997459412, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -3.712094306945801, "eval_rewards/margins": 3.1158080101013184, "eval_rewards/rejected": -6.827902317047119, "eval_runtime": 300.7538, "eval_samples_per_second": 6.65, "eval_steps_per_second": 0.416, "step": 4600 }, { "epoch": 2.38, "learning_rate": 1.1484031363549436e-07, "logits/chosen": -2.508571147918701, "logits/rejected": -2.490762710571289, "logps/chosen": -304.9242248535156, "logps/rejected": -335.79913330078125, "loss": 0.0131, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.0555249452590942, "rewards/margins": 9.724864959716797, "rewards/rejected": -10.780390739440918, "step": 4610 }, { "epoch": 2.39, "learning_rate": 1.1388410786001147e-07, "logits/chosen": -2.606189012527466, "logits/rejected": -2.6098804473876953, "logps/chosen": -260.5215759277344, "logps/rejected": -345.4276123046875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.055306315422058, "rewards/margins": 8.989636421203613, "rewards/rejected": -10.044942855834961, "step": 4620 }, { "epoch": 2.39, "learning_rate": 1.1292790208452859e-07, "logits/chosen": -2.602384090423584, "logits/rejected": -2.5431814193725586, "logps/chosen": -231.57400512695312, "logps/rejected": -317.2869567871094, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7374894618988037, "rewards/margins": 9.130033493041992, "rewards/rejected": -9.867525100708008, "step": 4630 }, { "epoch": 2.4, "learning_rate": 1.119716963090457e-07, "logits/chosen": -2.5372633934020996, "logits/rejected": -2.529026985168457, "logps/chosen": -253.7849884033203, "logps/rejected": -338.38812255859375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.3635234832763672, "rewards/margins": 9.181906700134277, "rewards/rejected": -10.545430183410645, "step": 4640 }, { "epoch": 2.4, "learning_rate": 1.1101549053356282e-07, "logits/chosen": -2.589287757873535, "logits/rejected": -2.546631336212158, "logps/chosen": -292.82049560546875, "logps/rejected": -336.3973083496094, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.0994226932525635, "rewards/margins": 9.20947265625, "rewards/rejected": -10.308894157409668, "step": 4650 }, { "epoch": 2.41, "learning_rate": 1.1005928475807993e-07, "logits/chosen": -2.5391857624053955, "logits/rejected": -2.514249324798584, "logps/chosen": -240.534912109375, "logps/rejected": -316.34271240234375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.430264949798584, "rewards/margins": 8.712211608886719, "rewards/rejected": -10.142476081848145, "step": 4660 }, { "epoch": 2.41, "learning_rate": 1.0910307898259705e-07, "logits/chosen": -2.475175380706787, "logits/rejected": -2.4967644214630127, "logps/chosen": -259.62890625, "logps/rejected": -365.22540283203125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9143625497817993, "rewards/margins": 9.281826972961426, "rewards/rejected": -10.196188926696777, "step": 4670 }, { "epoch": 2.42, "learning_rate": 1.0814687320711418e-07, "logits/chosen": -2.4412999153137207, "logits/rejected": -2.4557924270629883, "logps/chosen": -230.18643188476562, "logps/rejected": -358.99578857421875, "loss": 0.01, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9413540959358215, "rewards/margins": 10.510141372680664, "rewards/rejected": -11.451495170593262, "step": 4680 }, { "epoch": 2.42, "learning_rate": 1.0719066743163128e-07, "logits/chosen": -2.5392403602600098, "logits/rejected": -2.531977415084839, "logps/chosen": -321.71453857421875, "logps/rejected": -349.1875305175781, "loss": 0.0168, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1652792692184448, "rewards/margins": 9.923773765563965, "rewards/rejected": -11.089054107666016, "step": 4690 }, { "epoch": 2.43, "learning_rate": 1.062344616561484e-07, "logits/chosen": -2.553591251373291, "logits/rejected": -2.520071268081665, "logps/chosen": -287.4483642578125, "logps/rejected": -355.64996337890625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.7835440039634705, "rewards/margins": 10.432835578918457, "rewards/rejected": -11.216379165649414, "step": 4700 }, { "epoch": 2.43, "eval_logits/chosen": -2.561335563659668, "eval_logits/rejected": -2.5127322673797607, "eval_logps/chosen": -310.2392883300781, "eval_logps/rejected": -300.09649658203125, "eval_loss": 0.6704944968223572, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -3.908813238143921, "eval_rewards/margins": 3.2524404525756836, "eval_rewards/rejected": -7.161253929138184, "eval_runtime": 300.5289, "eval_samples_per_second": 6.655, "eval_steps_per_second": 0.416, "step": 4700 }, { "epoch": 2.43, "learning_rate": 1.0527825588066551e-07, "logits/chosen": -2.5191619396209717, "logits/rejected": -2.5092837810516357, "logps/chosen": -290.0946350097656, "logps/rejected": -342.290771484375, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5244763493537903, "rewards/margins": 11.548242568969727, "rewards/rejected": -12.07271957397461, "step": 4710 }, { "epoch": 2.44, "learning_rate": 1.0432205010518264e-07, "logits/chosen": -2.528475522994995, "logits/rejected": -2.5448966026306152, "logps/chosen": -250.07003784179688, "logps/rejected": -357.0660400390625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9180407524108887, "rewards/margins": 10.637187004089355, "rewards/rejected": -11.555229187011719, "step": 4720 }, { "epoch": 2.44, "learning_rate": 1.0336584432969974e-07, "logits/chosen": -2.4682259559631348, "logits/rejected": -2.4316627979278564, "logps/chosen": -244.00253295898438, "logps/rejected": -322.43280029296875, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7326822280883789, "rewards/margins": 9.885066032409668, "rewards/rejected": -10.617748260498047, "step": 4730 }, { "epoch": 2.45, "learning_rate": 1.0240963855421686e-07, "logits/chosen": -2.4702906608581543, "logits/rejected": -2.4408233165740967, "logps/chosen": -328.27734375, "logps/rejected": -347.71484375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8403548002243042, "rewards/margins": 9.786865234375, "rewards/rejected": -10.62722110748291, "step": 4740 }, { "epoch": 2.45, "learning_rate": 1.0145343277873399e-07, "logits/chosen": -2.5148329734802246, "logits/rejected": -2.5080223083496094, "logps/chosen": -317.0351867675781, "logps/rejected": -354.51104736328125, "loss": 0.0106, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0553228855133057, "rewards/margins": 9.899486541748047, "rewards/rejected": -10.95481014251709, "step": 4750 }, { "epoch": 2.46, "learning_rate": 1.004972270032511e-07, "logits/chosen": -2.392381191253662, "logits/rejected": -2.311124086380005, "logps/chosen": -252.53857421875, "logps/rejected": -287.5732727050781, "loss": 0.012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.5313899517059326, "rewards/margins": 9.025603294372559, "rewards/rejected": -10.55699348449707, "step": 4760 }, { "epoch": 2.46, "learning_rate": 9.95410212277682e-08, "logits/chosen": -2.542147397994995, "logits/rejected": -2.5188517570495605, "logps/chosen": -286.36468505859375, "logps/rejected": -352.60162353515625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2542285919189453, "rewards/margins": 10.117773056030273, "rewards/rejected": -11.372002601623535, "step": 4770 }, { "epoch": 2.47, "learning_rate": 9.858481545228532e-08, "logits/chosen": -2.5597620010375977, "logits/rejected": -2.46887469291687, "logps/chosen": -299.46929931640625, "logps/rejected": -323.3019104003906, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2001690864562988, "rewards/margins": 9.905837059020996, "rewards/rejected": -11.10600757598877, "step": 4780 }, { "epoch": 2.47, "learning_rate": 9.762860967680245e-08, "logits/chosen": -2.473726749420166, "logits/rejected": -2.386307954788208, "logps/chosen": -299.240478515625, "logps/rejected": -367.3982849121094, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1765098571777344, "rewards/margins": 10.347665786743164, "rewards/rejected": -12.524175643920898, "step": 4790 }, { "epoch": 2.48, "learning_rate": 9.667240390131957e-08, "logits/chosen": -2.590724229812622, "logits/rejected": -2.485705614089966, "logps/chosen": -312.03411865234375, "logps/rejected": -355.8626403808594, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.032738447189331, "rewards/margins": 10.802017211914062, "rewards/rejected": -11.834755897521973, "step": 4800 }, { "epoch": 2.48, "eval_logits/chosen": -2.5658154487609863, "eval_logits/rejected": -2.516890048980713, "eval_logps/chosen": -310.987548828125, "eval_logps/rejected": -301.03643798828125, "eval_loss": 0.6825354099273682, "eval_rewards/accuracies": 0.7720000147819519, "eval_rewards/chosen": -3.9836413860321045, "eval_rewards/margins": 3.27160382270813, "eval_rewards/rejected": -7.255245208740234, "eval_runtime": 301.6322, "eval_samples_per_second": 6.631, "eval_steps_per_second": 0.414, "step": 4800 }, { "epoch": 2.48, "learning_rate": 9.571619812583667e-08, "logits/chosen": -2.4577534198760986, "logits/rejected": -2.455472946166992, "logps/chosen": -291.69390869140625, "logps/rejected": -345.99505615234375, "loss": 0.0141, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8659383654594421, "rewards/margins": 9.381586074829102, "rewards/rejected": -10.24752426147461, "step": 4810 }, { "epoch": 2.49, "learning_rate": 9.47599923503538e-08, "logits/chosen": -2.563683032989502, "logits/rejected": -2.500837802886963, "logps/chosen": -276.25091552734375, "logps/rejected": -345.19488525390625, "loss": 0.0231, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.059242606163025, "rewards/margins": 9.535378456115723, "rewards/rejected": -10.594621658325195, "step": 4820 }, { "epoch": 2.49, "learning_rate": 9.380378657487091e-08, "logits/chosen": -2.5754013061523438, "logits/rejected": -2.565793037414551, "logps/chosen": -277.8792419433594, "logps/rejected": -331.2759094238281, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.173643946647644, "rewards/margins": 9.848978042602539, "rewards/rejected": -11.022623062133789, "step": 4830 }, { "epoch": 2.5, "learning_rate": 9.284758079938803e-08, "logits/chosen": -2.5554699897766113, "logits/rejected": -2.4842638969421387, "logps/chosen": -283.8736572265625, "logps/rejected": -321.87579345703125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.062213659286499, "rewards/margins": 9.168981552124023, "rewards/rejected": -10.231194496154785, "step": 4840 }, { "epoch": 2.5, "learning_rate": 9.189137502390513e-08, "logits/chosen": -2.5778510570526123, "logits/rejected": -2.5440096855163574, "logps/chosen": -286.9813537597656, "logps/rejected": -396.63922119140625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4980794191360474, "rewards/margins": 9.38342571258545, "rewards/rejected": -10.88150405883789, "step": 4850 }, { "epoch": 2.51, "learning_rate": 9.093516924842226e-08, "logits/chosen": -2.5628631114959717, "logits/rejected": -2.538412570953369, "logps/chosen": -267.45501708984375, "logps/rejected": -355.0173034667969, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.669926643371582, "rewards/margins": 10.511675834655762, "rewards/rejected": -11.181602478027344, "step": 4860 }, { "epoch": 2.51, "learning_rate": 8.997896347293938e-08, "logits/chosen": -2.601468563079834, "logits/rejected": -2.582242488861084, "logps/chosen": -285.9410705566406, "logps/rejected": -378.1112976074219, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9583877325057983, "rewards/margins": 9.609073638916016, "rewards/rejected": -10.567461013793945, "step": 4870 }, { "epoch": 2.52, "learning_rate": 8.902275769745648e-08, "logits/chosen": -2.5055062770843506, "logits/rejected": -2.5199389457702637, "logps/chosen": -252.2093505859375, "logps/rejected": -352.6201477050781, "loss": 0.0115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0273876190185547, "rewards/margins": 10.693878173828125, "rewards/rejected": -11.72126579284668, "step": 4880 }, { "epoch": 2.52, "learning_rate": 8.806655192197361e-08, "logits/chosen": -2.4830613136291504, "logits/rejected": -2.5039305686950684, "logps/chosen": -244.4785919189453, "logps/rejected": -365.1302795410156, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3728035092353821, "rewards/margins": 11.747212409973145, "rewards/rejected": -12.120016098022461, "step": 4890 }, { "epoch": 2.53, "learning_rate": 8.711034614649072e-08, "logits/chosen": -2.563028573989868, "logits/rejected": -2.5216782093048096, "logps/chosen": -270.1450500488281, "logps/rejected": -354.80352783203125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.764126181602478, "rewards/margins": 10.060375213623047, "rewards/rejected": -11.824501991271973, "step": 4900 }, { "epoch": 2.53, "eval_logits/chosen": -2.5843636989593506, "eval_logits/rejected": -2.5330135822296143, "eval_logps/chosen": -313.6849365234375, "eval_logps/rejected": -306.07098388671875, "eval_loss": 0.6937812566757202, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -4.253377914428711, "eval_rewards/margins": 3.5053179264068604, "eval_rewards/rejected": -7.758696556091309, "eval_runtime": 300.2355, "eval_samples_per_second": 6.661, "eval_steps_per_second": 0.416, "step": 4900 }, { "epoch": 2.53, "learning_rate": 8.615414037100784e-08, "logits/chosen": -2.5201663970947266, "logits/rejected": -2.455021381378174, "logps/chosen": -345.52008056640625, "logps/rejected": -342.25579833984375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.6654549241065979, "rewards/margins": 10.094911575317383, "rewards/rejected": -10.76036548614502, "step": 4910 }, { "epoch": 2.54, "learning_rate": 8.519793459552494e-08, "logits/chosen": -2.5423104763031006, "logits/rejected": -2.4734034538269043, "logps/chosen": -264.29180908203125, "logps/rejected": -338.65118408203125, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.032165288925171, "rewards/margins": 10.210509300231934, "rewards/rejected": -11.242673873901367, "step": 4920 }, { "epoch": 2.55, "learning_rate": 8.424172882004207e-08, "logits/chosen": -2.5510902404785156, "logits/rejected": -2.5576791763305664, "logps/chosen": -282.1768798828125, "logps/rejected": -373.37408447265625, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.5426809787750244, "rewards/margins": 10.031122207641602, "rewards/rejected": -10.57380485534668, "step": 4930 }, { "epoch": 2.55, "learning_rate": 8.328552304455919e-08, "logits/chosen": -2.5924887657165527, "logits/rejected": -2.5332224369049072, "logps/chosen": -251.1088409423828, "logps/rejected": -318.834228515625, "loss": 0.0142, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5188874006271362, "rewards/margins": 9.845662117004395, "rewards/rejected": -10.36454963684082, "step": 4940 }, { "epoch": 2.56, "learning_rate": 8.23293172690763e-08, "logits/chosen": -2.5838980674743652, "logits/rejected": -2.5227537155151367, "logps/chosen": -282.2577819824219, "logps/rejected": -345.3276672363281, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.00644490122795105, "rewards/margins": 11.34768295288086, "rewards/rejected": -11.354127883911133, "step": 4950 }, { "epoch": 2.56, "learning_rate": 8.137311149359343e-08, "logits/chosen": -2.6914446353912354, "logits/rejected": -2.566591739654541, "logps/chosen": -332.66265869140625, "logps/rejected": -341.6344299316406, "loss": 0.0042, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.37903502583503723, "rewards/margins": 11.008275985717773, "rewards/rejected": -11.387311935424805, "step": 4960 }, { "epoch": 2.57, "learning_rate": 8.041690571811053e-08, "logits/chosen": -2.585280418395996, "logits/rejected": -2.60071063041687, "logps/chosen": -288.63519287109375, "logps/rejected": -351.8276672363281, "loss": 0.0299, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1368919610977173, "rewards/margins": 11.00975227355957, "rewards/rejected": -12.146644592285156, "step": 4970 }, { "epoch": 2.57, "learning_rate": 7.946069994262765e-08, "logits/chosen": -2.5482683181762695, "logits/rejected": -2.5048727989196777, "logps/chosen": -263.3780517578125, "logps/rejected": -324.5472717285156, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.8058077096939087, "rewards/margins": 10.41413688659668, "rewards/rejected": -11.219945907592773, "step": 4980 }, { "epoch": 2.58, "learning_rate": 7.850449416714476e-08, "logits/chosen": -2.600778102874756, "logits/rejected": -2.5648064613342285, "logps/chosen": -305.68170166015625, "logps/rejected": -346.5186462402344, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.30596125125885, "rewards/margins": 9.398767471313477, "rewards/rejected": -10.704729080200195, "step": 4990 }, { "epoch": 2.58, "learning_rate": 7.754828839166188e-08, "logits/chosen": -2.571073055267334, "logits/rejected": -2.5455102920532227, "logps/chosen": -278.08050537109375, "logps/rejected": -354.3592224121094, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.2086985111236572, "rewards/margins": 9.833106994628906, "rewards/rejected": -11.041807174682617, "step": 5000 }, { "epoch": 2.58, "eval_logits/chosen": -2.582568645477295, "eval_logits/rejected": -2.5329596996307373, "eval_logps/chosen": -314.1288146972656, "eval_logps/rejected": -306.4033508300781, "eval_loss": 0.6948726773262024, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -4.29776668548584, "eval_rewards/margins": 3.4941699504852295, "eval_rewards/rejected": -7.79193639755249, "eval_runtime": 298.8189, "eval_samples_per_second": 6.693, "eval_steps_per_second": 0.418, "step": 5000 }, { "epoch": 2.59, "learning_rate": 7.6592082616179e-08, "logits/chosen": -2.5867271423339844, "logits/rejected": -2.5527195930480957, "logps/chosen": -324.69915771484375, "logps/rejected": -377.70257568359375, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7334973812103271, "rewards/margins": 10.30841064453125, "rewards/rejected": -12.041908264160156, "step": 5010 }, { "epoch": 2.59, "learning_rate": 7.563587684069611e-08, "logits/chosen": -2.5030980110168457, "logits/rejected": -2.520484447479248, "logps/chosen": -287.6493225097656, "logps/rejected": -358.873291015625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.5187357068061829, "rewards/margins": 10.844244003295898, "rewards/rejected": -11.362979888916016, "step": 5020 }, { "epoch": 2.6, "learning_rate": 7.467967106521324e-08, "logits/chosen": -2.5578770637512207, "logits/rejected": -2.527339458465576, "logps/chosen": -257.82684326171875, "logps/rejected": -265.83624267578125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0698001384735107, "rewards/margins": 8.80046272277832, "rewards/rejected": -9.870262145996094, "step": 5030 }, { "epoch": 2.6, "learning_rate": 7.372346528973034e-08, "logits/chosen": -2.610701322555542, "logits/rejected": -2.526312828063965, "logps/chosen": -296.7576904296875, "logps/rejected": -331.4477233886719, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.9005954265594482, "rewards/margins": 9.4951171875, "rewards/rejected": -11.395713806152344, "step": 5040 }, { "epoch": 2.61, "learning_rate": 7.276725951424746e-08, "logits/chosen": -2.4974420070648193, "logits/rejected": -2.4610750675201416, "logps/chosen": -270.6094970703125, "logps/rejected": -348.76385498046875, "loss": 0.0153, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7320085763931274, "rewards/margins": 10.474514961242676, "rewards/rejected": -12.206523895263672, "step": 5050 }, { "epoch": 2.61, "learning_rate": 7.181105373876457e-08, "logits/chosen": -2.512381076812744, "logits/rejected": -2.4702210426330566, "logps/chosen": -271.5727233886719, "logps/rejected": -311.0246276855469, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9624974131584167, "rewards/margins": 9.830079078674316, "rewards/rejected": -10.792574882507324, "step": 5060 }, { "epoch": 2.62, "learning_rate": 7.08548479632817e-08, "logits/chosen": -2.4948513507843018, "logits/rejected": -2.4570116996765137, "logps/chosen": -316.3506774902344, "logps/rejected": -339.68853759765625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.7037729620933533, "rewards/margins": 10.431275367736816, "rewards/rejected": -11.135048866271973, "step": 5070 }, { "epoch": 2.62, "learning_rate": 6.98986421877988e-08, "logits/chosen": -2.579561233520508, "logits/rejected": -2.521808385848999, "logps/chosen": -298.4128723144531, "logps/rejected": -363.44580078125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.250028133392334, "rewards/margins": 9.64060115814209, "rewards/rejected": -10.890630722045898, "step": 5080 }, { "epoch": 2.63, "learning_rate": 6.894243641231592e-08, "logits/chosen": -2.4587960243225098, "logits/rejected": -2.4281344413757324, "logps/chosen": -270.9654846191406, "logps/rejected": -342.9233093261719, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.1718332767486572, "rewards/margins": 10.706388473510742, "rewards/rejected": -11.87822151184082, "step": 5090 }, { "epoch": 2.63, "learning_rate": 6.798623063683305e-08, "logits/chosen": -2.4603219032287598, "logits/rejected": -2.3965721130371094, "logps/chosen": -280.44354248046875, "logps/rejected": -350.62969970703125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.22660231590271, "rewards/margins": 11.59477424621582, "rewards/rejected": -12.821374893188477, "step": 5100 }, { "epoch": 2.63, "eval_logits/chosen": -2.5619547367095947, "eval_logits/rejected": -2.509472131729126, "eval_logps/chosen": -314.65869140625, "eval_logps/rejected": -308.5892333984375, "eval_loss": 0.7238790392875671, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -4.350755214691162, "eval_rewards/margins": 3.659768581390381, "eval_rewards/rejected": -8.010523796081543, "eval_runtime": 300.7886, "eval_samples_per_second": 6.649, "eval_steps_per_second": 0.416, "step": 5100 }, { "epoch": 2.64, "learning_rate": 6.703002486135017e-08, "logits/chosen": -2.472276210784912, "logits/rejected": -2.442483901977539, "logps/chosen": -250.888671875, "logps/rejected": -368.9104309082031, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.1285686492919922, "rewards/margins": 12.090982437133789, "rewards/rejected": -13.219549179077148, "step": 5110 }, { "epoch": 2.64, "learning_rate": 6.607381908586727e-08, "logits/chosen": -2.526215076446533, "logits/rejected": -2.428818941116333, "logps/chosen": -290.0104675292969, "logps/rejected": -391.9622497558594, "loss": 0.0189, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7901774644851685, "rewards/margins": 11.513010025024414, "rewards/rejected": -12.303187370300293, "step": 5120 }, { "epoch": 2.65, "learning_rate": 6.511761331038438e-08, "logits/chosen": -2.552222490310669, "logits/rejected": -2.5270204544067383, "logps/chosen": -241.63101196289062, "logps/rejected": -325.4315490722656, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.719938039779663, "rewards/margins": 9.429548263549805, "rewards/rejected": -11.149487495422363, "step": 5130 }, { "epoch": 2.65, "learning_rate": 6.416140753490151e-08, "logits/chosen": -2.5769755840301514, "logits/rejected": -2.5583062171936035, "logps/chosen": -326.62939453125, "logps/rejected": -382.25396728515625, "loss": 0.0126, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0803858041763306, "rewards/margins": 10.431987762451172, "rewards/rejected": -11.512374877929688, "step": 5140 }, { "epoch": 2.66, "learning_rate": 6.320520175941863e-08, "logits/chosen": -2.4850387573242188, "logits/rejected": -2.431705951690674, "logps/chosen": -268.9663391113281, "logps/rejected": -328.4075622558594, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5687530040740967, "rewards/margins": 10.12572956085205, "rewards/rejected": -11.69448184967041, "step": 5150 }, { "epoch": 2.66, "learning_rate": 6.224899598393573e-08, "logits/chosen": -2.6069729328155518, "logits/rejected": -2.563819408416748, "logps/chosen": -326.76837158203125, "logps/rejected": -387.1705017089844, "loss": 0.0177, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9824767112731934, "rewards/margins": 10.304605484008789, "rewards/rejected": -11.287081718444824, "step": 5160 }, { "epoch": 2.67, "learning_rate": 6.129279020845286e-08, "logits/chosen": -2.618408441543579, "logits/rejected": -2.4645755290985107, "logps/chosen": -276.08221435546875, "logps/rejected": -314.79351806640625, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.246117353439331, "rewards/margins": 10.326366424560547, "rewards/rejected": -11.572484016418457, "step": 5170 }, { "epoch": 2.67, "learning_rate": 6.033658443296998e-08, "logits/chosen": -2.59645938873291, "logits/rejected": -2.5549280643463135, "logps/chosen": -305.74566650390625, "logps/rejected": -360.2120361328125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6957218647003174, "rewards/margins": 10.49669075012207, "rewards/rejected": -12.192411422729492, "step": 5180 }, { "epoch": 2.68, "learning_rate": 5.9380378657487085e-08, "logits/chosen": -2.56986927986145, "logits/rejected": -2.562441349029541, "logps/chosen": -304.5476989746094, "logps/rejected": -355.4156494140625, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3462127447128296, "rewards/margins": 10.37061595916748, "rewards/rejected": -11.716829299926758, "step": 5190 }, { "epoch": 2.68, "learning_rate": 5.842417288200421e-08, "logits/chosen": -2.591763973236084, "logits/rejected": -2.607100486755371, "logps/chosen": -318.5129699707031, "logps/rejected": -368.0654602050781, "loss": 0.0074, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1077558994293213, "rewards/margins": 10.874872207641602, "rewards/rejected": -11.982629776000977, "step": 5200 }, { "epoch": 2.68, "eval_logits/chosen": -2.5890767574310303, "eval_logits/rejected": -2.5378119945526123, "eval_logps/chosen": -318.5146789550781, "eval_logps/rejected": -313.303466796875, "eval_loss": 0.739378035068512, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -4.736356258392334, "eval_rewards/margins": 3.745591878890991, "eval_rewards/rejected": -8.481947898864746, "eval_runtime": 299.196, "eval_samples_per_second": 6.685, "eval_steps_per_second": 0.418, "step": 5200 }, { "epoch": 2.69, "learning_rate": 5.7467967106521317e-08, "logits/chosen": -2.5239500999450684, "logits/rejected": -2.5438942909240723, "logps/chosen": -223.29531860351562, "logps/rejected": -347.07537841796875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.5479010343551636, "rewards/margins": 10.697844505310059, "rewards/rejected": -12.245744705200195, "step": 5210 }, { "epoch": 2.69, "learning_rate": 5.651176133103844e-08, "logits/chosen": -2.6476407051086426, "logits/rejected": -2.590280771255493, "logps/chosen": -288.53741455078125, "logps/rejected": -378.91717529296875, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.879062294960022, "rewards/margins": 11.550715446472168, "rewards/rejected": -12.429778099060059, "step": 5220 }, { "epoch": 2.7, "learning_rate": 5.555555555555555e-08, "logits/chosen": -2.51558256149292, "logits/rejected": -2.4594240188598633, "logps/chosen": -275.41180419921875, "logps/rejected": -302.51751708984375, "loss": 0.0091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2985538244247437, "rewards/margins": 10.125242233276367, "rewards/rejected": -11.423796653747559, "step": 5230 }, { "epoch": 2.71, "learning_rate": 5.459934978007267e-08, "logits/chosen": -2.5778565406799316, "logits/rejected": -2.54152250289917, "logps/chosen": -298.53887939453125, "logps/rejected": -359.42413330078125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.5041391849517822, "rewards/margins": 11.008560180664062, "rewards/rejected": -12.512699127197266, "step": 5240 }, { "epoch": 2.71, "learning_rate": 5.3643144004589786e-08, "logits/chosen": -2.5127241611480713, "logits/rejected": -2.4316296577453613, "logps/chosen": -334.34368896484375, "logps/rejected": -361.0766906738281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4334515631198883, "rewards/margins": 11.821561813354492, "rewards/rejected": -12.255014419555664, "step": 5250 }, { "epoch": 2.72, "learning_rate": 5.26869382291069e-08, "logits/chosen": -2.5240254402160645, "logits/rejected": -2.513014316558838, "logps/chosen": -255.37158203125, "logps/rejected": -314.8083190917969, "loss": 0.0093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.473276138305664, "rewards/margins": 10.024567604064941, "rewards/rejected": -11.497844696044922, "step": 5260 }, { "epoch": 2.72, "learning_rate": 5.173073245362402e-08, "logits/chosen": -2.5280280113220215, "logits/rejected": -2.4819343090057373, "logps/chosen": -300.80059814453125, "logps/rejected": -369.16351318359375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.161496877670288, "rewards/margins": 9.86224365234375, "rewards/rejected": -12.0237398147583, "step": 5270 }, { "epoch": 2.73, "learning_rate": 5.077452667814113e-08, "logits/chosen": -2.5500526428222656, "logits/rejected": -2.4608266353607178, "logps/chosen": -255.2032012939453, "logps/rejected": -345.13336181640625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8557249307632446, "rewards/margins": 11.390253067016602, "rewards/rejected": -13.245976448059082, "step": 5280 }, { "epoch": 2.73, "learning_rate": 4.981832090265825e-08, "logits/chosen": -2.5720067024230957, "logits/rejected": -2.509190082550049, "logps/chosen": -265.0337829589844, "logps/rejected": -347.77569580078125, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3671785593032837, "rewards/margins": 11.596514701843262, "rewards/rejected": -12.96369457244873, "step": 5290 }, { "epoch": 2.74, "learning_rate": 4.8862115127175364e-08, "logits/chosen": -2.655913829803467, "logits/rejected": -2.5226495265960693, "logps/chosen": -314.6326599121094, "logps/rejected": -358.582275390625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.139702081680298, "rewards/margins": 10.213295936584473, "rewards/rejected": -12.352998733520508, "step": 5300 }, { "epoch": 2.74, "eval_logits/chosen": -2.605203151702881, "eval_logits/rejected": -2.553877353668213, "eval_logps/chosen": -317.5019226074219, "eval_logps/rejected": -312.4739990234375, "eval_loss": 0.7335207462310791, "eval_rewards/accuracies": 0.7720000147819519, "eval_rewards/chosen": -4.635079860687256, "eval_rewards/margins": 3.7639193534851074, "eval_rewards/rejected": -8.39900016784668, "eval_runtime": 297.7404, "eval_samples_per_second": 6.717, "eval_steps_per_second": 0.42, "step": 5300 }, { "epoch": 2.74, "learning_rate": 4.790590935169248e-08, "logits/chosen": -2.5588815212249756, "logits/rejected": -2.5604608058929443, "logps/chosen": -266.3474426269531, "logps/rejected": -430.2942810058594, "loss": 0.0132, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.032094717025757, "rewards/margins": 10.764850616455078, "rewards/rejected": -12.796945571899414, "step": 5310 }, { "epoch": 2.75, "learning_rate": 4.69497035762096e-08, "logits/chosen": -2.53786301612854, "logits/rejected": -2.4553189277648926, "logps/chosen": -265.3934326171875, "logps/rejected": -339.6708068847656, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.617504119873047, "rewards/margins": 10.523672103881836, "rewards/rejected": -13.1411771774292, "step": 5320 }, { "epoch": 2.75, "learning_rate": 4.599349780072671e-08, "logits/chosen": -2.5566892623901367, "logits/rejected": -2.4526925086975098, "logps/chosen": -304.0606994628906, "logps/rejected": -329.9059753417969, "loss": 0.0096, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4853826761245728, "rewards/margins": 10.810678482055664, "rewards/rejected": -12.296060562133789, "step": 5330 }, { "epoch": 2.76, "learning_rate": 4.5037292025243834e-08, "logits/chosen": -2.5405123233795166, "logits/rejected": -2.550758123397827, "logps/chosen": -308.7420349121094, "logps/rejected": -389.48101806640625, "loss": 0.0118, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.569156289100647, "rewards/margins": 10.718561172485352, "rewards/rejected": -12.287717819213867, "step": 5340 }, { "epoch": 2.76, "learning_rate": 4.408108624976094e-08, "logits/chosen": -2.648345470428467, "logits/rejected": -2.5815377235412598, "logps/chosen": -283.7870788574219, "logps/rejected": -385.6964416503906, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.0726970434188843, "rewards/margins": 12.462373733520508, "rewards/rejected": -13.535069465637207, "step": 5350 }, { "epoch": 2.77, "learning_rate": 4.3124880474278065e-08, "logits/chosen": -2.61507248878479, "logits/rejected": -2.591545581817627, "logps/chosen": -261.71783447265625, "logps/rejected": -331.97210693359375, "loss": 0.0211, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4280383586883545, "rewards/margins": 10.27877426147461, "rewards/rejected": -11.706812858581543, "step": 5360 }, { "epoch": 2.77, "learning_rate": 4.2168674698795174e-08, "logits/chosen": -2.5542023181915283, "logits/rejected": -2.4944653511047363, "logps/chosen": -257.595947265625, "logps/rejected": -365.1188659667969, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.7011358737945557, "rewards/margins": 10.8641357421875, "rewards/rejected": -12.565271377563477, "step": 5370 }, { "epoch": 2.78, "learning_rate": 4.1212468923312296e-08, "logits/chosen": -2.4464447498321533, "logits/rejected": -2.421036720275879, "logps/chosen": -290.8499755859375, "logps/rejected": -364.34796142578125, "loss": 0.0101, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6131134033203125, "rewards/margins": 10.417032241821289, "rewards/rejected": -12.030145645141602, "step": 5380 }, { "epoch": 2.78, "learning_rate": 4.025626314782941e-08, "logits/chosen": -2.420766592025757, "logits/rejected": -2.430091381072998, "logps/chosen": -284.9582824707031, "logps/rejected": -337.31640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.669730544090271, "rewards/margins": 10.3810453414917, "rewards/rejected": -12.050777435302734, "step": 5390 }, { "epoch": 2.79, "learning_rate": 3.930005737234653e-08, "logits/chosen": -2.5880367755889893, "logits/rejected": -2.573629856109619, "logps/chosen": -262.8695373535156, "logps/rejected": -303.2467956542969, "loss": 0.0163, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.6934680938720703, "rewards/margins": 9.696393966674805, "rewards/rejected": -11.389862060546875, "step": 5400 }, { "epoch": 2.79, "eval_logits/chosen": -2.599304437637329, "eval_logits/rejected": -2.548959970474243, "eval_logps/chosen": -317.89239501953125, "eval_logps/rejected": -312.4419860839844, "eval_loss": 0.7316961288452148, "eval_rewards/accuracies": 0.7699999809265137, "eval_rewards/chosen": -4.674126148223877, "eval_rewards/margins": 3.721679925918579, "eval_rewards/rejected": -8.395805358886719, "eval_runtime": 300.0204, "eval_samples_per_second": 6.666, "eval_steps_per_second": 0.417, "step": 5400 }, { "epoch": 2.79, "learning_rate": 3.8343851596863644e-08, "logits/chosen": -2.597282886505127, "logits/rejected": -2.50775146484375, "logps/chosen": -255.15243530273438, "logps/rejected": -311.01910400390625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.9441089630126953, "rewards/margins": 10.98766803741455, "rewards/rejected": -11.931777000427246, "step": 5410 }, { "epoch": 2.8, "learning_rate": 3.738764582138076e-08, "logits/chosen": -2.560591220855713, "logits/rejected": -2.4639830589294434, "logps/chosen": -329.83734130859375, "logps/rejected": -376.46160888671875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.5904229879379272, "rewards/margins": 11.07619857788086, "rewards/rejected": -12.666620254516602, "step": 5420 }, { "epoch": 2.8, "learning_rate": 3.6431440045897875e-08, "logits/chosen": -2.515839099884033, "logits/rejected": -2.483527421951294, "logps/chosen": -282.5474853515625, "logps/rejected": -376.92926025390625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.3852026462554932, "rewards/margins": 10.924342155456543, "rewards/rejected": -12.309544563293457, "step": 5430 }, { "epoch": 2.81, "learning_rate": 3.547523427041499e-08, "logits/chosen": -2.64237117767334, "logits/rejected": -2.6202080249786377, "logps/chosen": -293.0863952636719, "logps/rejected": -397.6541748046875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.9685888290405273, "rewards/margins": 12.01826286315918, "rewards/rejected": -12.986851692199707, "step": 5440 }, { "epoch": 2.81, "learning_rate": 3.4519028494932106e-08, "logits/chosen": -2.4823646545410156, "logits/rejected": -2.4257140159606934, "logps/chosen": -298.60693359375, "logps/rejected": -372.49212646484375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.65126633644104, "rewards/margins": 11.615009307861328, "rewards/rejected": -13.266276359558105, "step": 5450 }, { "epoch": 2.82, "learning_rate": 3.356282271944923e-08, "logits/chosen": -2.5652999877929688, "logits/rejected": -2.575331449508667, "logps/chosen": -265.50970458984375, "logps/rejected": -378.0349426269531, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2861878871917725, "rewards/margins": 11.414156913757324, "rewards/rejected": -12.70034408569336, "step": 5460 }, { "epoch": 2.82, "learning_rate": 3.260661694396634e-08, "logits/chosen": -2.6633830070495605, "logits/rejected": -2.553030490875244, "logps/chosen": -338.38525390625, "logps/rejected": -344.780517578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.167046308517456, "rewards/margins": 11.513765335083008, "rewards/rejected": -12.68081283569336, "step": 5470 }, { "epoch": 2.83, "learning_rate": 3.165041116848346e-08, "logits/chosen": -2.5658626556396484, "logits/rejected": -2.5562121868133545, "logps/chosen": -277.5936584472656, "logps/rejected": -432.04608154296875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6860519647598267, "rewards/margins": 11.724603652954102, "rewards/rejected": -13.41065502166748, "step": 5480 }, { "epoch": 2.83, "learning_rate": 3.0694205393000576e-08, "logits/chosen": -2.554914712905884, "logits/rejected": -2.485353469848633, "logps/chosen": -263.68682861328125, "logps/rejected": -348.3326110839844, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.5707225799560547, "rewards/margins": 11.460761070251465, "rewards/rejected": -12.031484603881836, "step": 5490 }, { "epoch": 2.84, "learning_rate": 2.9737999617517688e-08, "logits/chosen": -2.5359983444213867, "logits/rejected": -2.471630811691284, "logps/chosen": -306.6964416503906, "logps/rejected": -340.04913330078125, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7643492221832275, "rewards/margins": 10.403191566467285, "rewards/rejected": -12.167540550231934, "step": 5500 }, { "epoch": 2.84, "eval_logits/chosen": -2.5815768241882324, "eval_logits/rejected": -2.5306899547576904, "eval_logps/chosen": -320.3167419433594, "eval_logps/rejected": -315.4290771484375, "eval_loss": 0.7419750094413757, "eval_rewards/accuracies": 0.7739999890327454, "eval_rewards/chosen": -4.916560649871826, "eval_rewards/margins": 3.777946710586548, "eval_rewards/rejected": -8.694506645202637, "eval_runtime": 297.0642, "eval_samples_per_second": 6.733, "eval_steps_per_second": 0.421, "step": 5500 }, { "epoch": 2.84, "learning_rate": 2.8781793842034804e-08, "logits/chosen": -2.4522650241851807, "logits/rejected": -2.330350875854492, "logps/chosen": -261.5328674316406, "logps/rejected": -335.68560791015625, "loss": 0.0164, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.278339147567749, "rewards/margins": 10.198699951171875, "rewards/rejected": -12.477038383483887, "step": 5510 }, { "epoch": 2.85, "learning_rate": 2.782558806655192e-08, "logits/chosen": -2.4790380001068115, "logits/rejected": -2.49639630317688, "logps/chosen": -275.767333984375, "logps/rejected": -372.3228759765625, "loss": 0.014, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.5172851085662842, "rewards/margins": 10.855243682861328, "rewards/rejected": -12.372529029846191, "step": 5520 }, { "epoch": 2.85, "learning_rate": 2.6869382291069035e-08, "logits/chosen": -2.5798544883728027, "logits/rejected": -2.5143635272979736, "logps/chosen": -305.1454772949219, "logps/rejected": -370.1262512207031, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.6373401880264282, "rewards/margins": 10.685625076293945, "rewards/rejected": -12.322965621948242, "step": 5530 }, { "epoch": 2.86, "learning_rate": 2.591317651558615e-08, "logits/chosen": -2.508338451385498, "logits/rejected": -2.518322467803955, "logps/chosen": -269.467529296875, "logps/rejected": -350.46337890625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.773882508277893, "rewards/margins": 10.512941360473633, "rewards/rejected": -12.286825180053711, "step": 5540 }, { "epoch": 2.87, "learning_rate": 2.4956970740103267e-08, "logits/chosen": -2.555595636367798, "logits/rejected": -2.4913864135742188, "logps/chosen": -284.4319763183594, "logps/rejected": -388.2416076660156, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8780571222305298, "rewards/margins": 11.021939277648926, "rewards/rejected": -12.899995803833008, "step": 5550 }, { "epoch": 2.87, "learning_rate": 2.4000764964620386e-08, "logits/chosen": -2.5701186656951904, "logits/rejected": -2.5412697792053223, "logps/chosen": -359.2135314941406, "logps/rejected": -392.4355773925781, "loss": 0.0154, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.2382028102874756, "rewards/margins": 10.054285049438477, "rewards/rejected": -12.292488098144531, "step": 5560 }, { "epoch": 2.88, "learning_rate": 2.30445591891375e-08, "logits/chosen": -2.623213768005371, "logits/rejected": -2.5315768718719482, "logps/chosen": -332.5467834472656, "logps/rejected": -376.62481689453125, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2165770530700684, "rewards/margins": 11.590463638305664, "rewards/rejected": -12.807042121887207, "step": 5570 }, { "epoch": 2.88, "learning_rate": 2.2088353413654617e-08, "logits/chosen": -2.49461030960083, "logits/rejected": -2.4208273887634277, "logps/chosen": -245.84646606445312, "logps/rejected": -314.0872497558594, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.8297621011734009, "rewards/margins": 11.114880561828613, "rewards/rejected": -12.9446439743042, "step": 5580 }, { "epoch": 2.89, "learning_rate": 2.1132147638171733e-08, "logits/chosen": -2.5482990741729736, "logits/rejected": -2.506981611251831, "logps/chosen": -323.66943359375, "logps/rejected": -346.53009033203125, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7804180383682251, "rewards/margins": 11.199524879455566, "rewards/rejected": -11.97994327545166, "step": 5590 }, { "epoch": 2.89, "learning_rate": 2.0175941862688848e-08, "logits/chosen": -2.5294761657714844, "logits/rejected": -2.535964250564575, "logps/chosen": -261.10882568359375, "logps/rejected": -339.1511535644531, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.790201187133789, "rewards/margins": 11.019615173339844, "rewards/rejected": -12.80981731414795, "step": 5600 }, { "epoch": 2.89, "eval_logits/chosen": -2.594068765640259, "eval_logits/rejected": -2.5437283515930176, "eval_logps/chosen": -320.7321472167969, "eval_logps/rejected": -315.70770263671875, "eval_loss": 0.7368908524513245, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -4.958104610443115, "eval_rewards/margins": 3.7642662525177, "eval_rewards/rejected": -8.722371101379395, "eval_runtime": 299.4877, "eval_samples_per_second": 6.678, "eval_steps_per_second": 0.417, "step": 5600 }, { "epoch": 2.9, "learning_rate": 1.9219736087205964e-08, "logits/chosen": -2.54457950592041, "logits/rejected": -2.4718551635742188, "logps/chosen": -263.2037048339844, "logps/rejected": -339.2086486816406, "loss": 0.016, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8428022861480713, "rewards/margins": 10.111788749694824, "rewards/rejected": -11.954591751098633, "step": 5610 }, { "epoch": 2.9, "learning_rate": 1.826353031172308e-08, "logits/chosen": -2.52895188331604, "logits/rejected": -2.4624314308166504, "logps/chosen": -319.7272644042969, "logps/rejected": -443.2732849121094, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3951648473739624, "rewards/margins": 11.68992805480957, "rewards/rejected": -13.085092544555664, "step": 5620 }, { "epoch": 2.91, "learning_rate": 1.73073245362402e-08, "logits/chosen": -2.5705406665802, "logits/rejected": -2.5043656826019287, "logps/chosen": -300.62847900390625, "logps/rejected": -335.4035949707031, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.186084747314453, "rewards/margins": 10.557881355285645, "rewards/rejected": -12.743965148925781, "step": 5630 }, { "epoch": 2.91, "learning_rate": 1.6351118760757314e-08, "logits/chosen": -2.544924259185791, "logits/rejected": -2.5001089572906494, "logps/chosen": -283.0209045410156, "logps/rejected": -336.7970886230469, "loss": 0.0132, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8115791082382202, "rewards/margins": 9.958032608032227, "rewards/rejected": -11.769611358642578, "step": 5640 }, { "epoch": 2.92, "learning_rate": 1.539491298527443e-08, "logits/chosen": -2.55588436126709, "logits/rejected": -2.5477097034454346, "logps/chosen": -249.8664093017578, "logps/rejected": -357.674560546875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.4484872817993164, "rewards/margins": 10.401117324829102, "rewards/rejected": -12.849603652954102, "step": 5650 }, { "epoch": 2.92, "learning_rate": 1.4438707209791546e-08, "logits/chosen": -2.547852039337158, "logits/rejected": -2.4804680347442627, "logps/chosen": -302.8385314941406, "logps/rejected": -329.81573486328125, "loss": 0.0149, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3558497428894043, "rewards/margins": 10.600205421447754, "rewards/rejected": -11.956053733825684, "step": 5660 }, { "epoch": 2.93, "learning_rate": 1.3482501434308661e-08, "logits/chosen": -2.547434091567993, "logits/rejected": -2.484344005584717, "logps/chosen": -294.35064697265625, "logps/rejected": -320.4677429199219, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.5904505252838135, "rewards/margins": 10.197576522827148, "rewards/rejected": -11.788025856018066, "step": 5670 }, { "epoch": 2.93, "learning_rate": 1.2526295658825777e-08, "logits/chosen": -2.650617837905884, "logits/rejected": -2.6219732761383057, "logps/chosen": -305.6776428222656, "logps/rejected": -382.5854187011719, "loss": 0.0152, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8251022100448608, "rewards/margins": 11.217008590698242, "rewards/rejected": -13.042112350463867, "step": 5680 }, { "epoch": 2.94, "learning_rate": 1.1570089883342895e-08, "logits/chosen": -2.489759922027588, "logits/rejected": -2.4366366863250732, "logps/chosen": -311.3402404785156, "logps/rejected": -412.3067932128906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.8317899703979492, "rewards/margins": 11.631093978881836, "rewards/rejected": -13.462884902954102, "step": 5690 }, { "epoch": 2.94, "learning_rate": 1.061388410786001e-08, "logits/chosen": -2.534816026687622, "logits/rejected": -2.5374817848205566, "logps/chosen": -282.93951416015625, "logps/rejected": -327.12738037109375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.5196959972381592, "rewards/margins": 10.49896240234375, "rewards/rejected": -12.018656730651855, "step": 5700 }, { "epoch": 2.94, "eval_logits/chosen": -2.59464430809021, "eval_logits/rejected": -2.5442099571228027, "eval_logps/chosen": -320.8699951171875, "eval_logps/rejected": -315.9825744628906, "eval_loss": 0.7345340251922607, "eval_rewards/accuracies": 0.7720000147819519, "eval_rewards/chosen": -4.971884727478027, "eval_rewards/margins": 3.777974843978882, "eval_rewards/rejected": -8.749860763549805, "eval_runtime": 297.7462, "eval_samples_per_second": 6.717, "eval_steps_per_second": 0.42, "step": 5700 }, { "epoch": 2.95, "learning_rate": 9.657678332377126e-09, "logits/chosen": -2.497479200363159, "logits/rejected": -2.4477274417877197, "logps/chosen": -284.3431091308594, "logps/rejected": -325.3197326660156, "loss": 0.0102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3208651542663574, "rewards/margins": 10.637207984924316, "rewards/rejected": -11.958072662353516, "step": 5710 }, { "epoch": 2.95, "learning_rate": 8.701472556894243e-09, "logits/chosen": -2.5374457836151123, "logits/rejected": -2.405616521835327, "logps/chosen": -269.211181640625, "logps/rejected": -373.4117736816406, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.543179988861084, "rewards/margins": 10.95097541809082, "rewards/rejected": -13.49415397644043, "step": 5720 }, { "epoch": 2.96, "learning_rate": 7.745266781411359e-09, "logits/chosen": -2.5577011108398438, "logits/rejected": -2.5819551944732666, "logps/chosen": -314.5643615722656, "logps/rejected": -386.89361572265625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.3582957983016968, "rewards/margins": 10.754124641418457, "rewards/rejected": -12.112419128417969, "step": 5730 }, { "epoch": 2.96, "learning_rate": 6.7890610059284754e-09, "logits/chosen": -2.55308198928833, "logits/rejected": -2.474553346633911, "logps/chosen": -246.30685424804688, "logps/rejected": -292.82403564453125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.7753121852874756, "rewards/margins": 9.459589004516602, "rewards/rejected": -11.234902381896973, "step": 5740 }, { "epoch": 2.97, "learning_rate": 5.832855230445592e-09, "logits/chosen": -2.4907386302948, "logits/rejected": -2.467416286468506, "logps/chosen": -288.83050537109375, "logps/rejected": -346.51788330078125, "loss": 0.0149, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4427422285079956, "rewards/margins": 11.456911087036133, "rewards/rejected": -12.899653434753418, "step": 5750 }, { "epoch": 2.97, "learning_rate": 4.8766494549627085e-09, "logits/chosen": -2.5566134452819824, "logits/rejected": -2.4680893421173096, "logps/chosen": -283.3470458984375, "logps/rejected": -355.33673095703125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.5444302558898926, "rewards/margins": 11.971147537231445, "rewards/rejected": -12.515579223632812, "step": 5760 }, { "epoch": 2.98, "learning_rate": 3.920443679479824e-09, "logits/chosen": -2.6467669010162354, "logits/rejected": -2.580441951751709, "logps/chosen": -326.4743347167969, "logps/rejected": -346.74090576171875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.8142646551132202, "rewards/margins": 10.477445602416992, "rewards/rejected": -12.291709899902344, "step": 5770 }, { "epoch": 2.98, "learning_rate": 2.96423790399694e-09, "logits/chosen": -2.60914945602417, "logits/rejected": -2.5680363178253174, "logps/chosen": -299.6268005371094, "logps/rejected": -368.2593688964844, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.949828863143921, "rewards/margins": 9.881691932678223, "rewards/rejected": -11.831521987915039, "step": 5780 }, { "epoch": 2.99, "learning_rate": 2.008032128514056e-09, "logits/chosen": -2.5586049556732178, "logits/rejected": -2.5496246814727783, "logps/chosen": -296.46295166015625, "logps/rejected": -365.4637451171875, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.8471219539642334, "rewards/margins": 10.26219367980957, "rewards/rejected": -12.1093168258667, "step": 5790 }, { "epoch": 2.99, "learning_rate": 1.0518263530311723e-09, "logits/chosen": -2.589639663696289, "logits/rejected": -2.5423808097839355, "logps/chosen": -245.2353973388672, "logps/rejected": -353.00701904296875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.8704856634140015, "rewards/margins": 10.102082252502441, "rewards/rejected": -11.972566604614258, "step": 5800 }, { "epoch": 2.99, "eval_logits/chosen": -2.5956168174743652, "eval_logits/rejected": -2.5452351570129395, "eval_logps/chosen": -320.2925109863281, "eval_logps/rejected": -315.3341064453125, "eval_loss": 0.7337509989738464, "eval_rewards/accuracies": 0.7699999809265137, "eval_rewards/chosen": -4.9141364097595215, "eval_rewards/margins": 3.7708754539489746, "eval_rewards/rejected": -8.685011863708496, "eval_runtime": 297.2004, "eval_samples_per_second": 6.729, "eval_steps_per_second": 0.421, "step": 5800 }, { "epoch": 3.0, "learning_rate": 9.562057754828839e-11, "logits/chosen": -2.5458855628967285, "logits/rejected": -2.548276901245117, "logps/chosen": -248.52377319335938, "logps/rejected": -351.306640625, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.725280523300171, "rewards/margins": 9.489947319030762, "rewards/rejected": -11.215229988098145, "step": 5810 }, { "epoch": 3.0, "step": 5811, "total_flos": 0.0, "train_loss": 0.2059708398652644, "train_runtime": 84831.0718, "train_samples_per_second": 2.191, "train_steps_per_second": 0.069 } ], "logging_steps": 10, "max_steps": 5811, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }