diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,1145 +3,2269 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 782, + "global_step": 1563, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 6.329113924050633e-09, + "learning_rate": 3.1847133757961784e-09, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -403.3199157714844, - "logps/real": -443.6107177734375, - "loss": 4.7453, - "rewards/accuracies": 0.5, - "rewards/generated": -12.943833351135254, - "rewards/margins": -2.53641414642334, - "rewards/real": -15.48024845123291, + "logps/generated": -539.3351440429688, + "logps/real": -367.9522399902344, + "loss": 1.6159, + "rewards/accuracies": 0.625, + "rewards/generated": -21.398611068725586, + "rewards/margins": 5.284192085266113, + "rewards/real": -16.114418029785156, "step": 1 }, { "epoch": 0.01, - "learning_rate": 6.329113924050633e-08, + "learning_rate": 3.184713375796178e-08, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -487.2853088378906, - "logps/real": -384.8033142089844, - "loss": 3.4516, - "rewards/accuracies": 0.6111111044883728, - "rewards/generated": -19.51491928100586, - "rewards/margins": 6.083561420440674, - "rewards/real": -13.431358337402344, + "logps/generated": -466.6612548828125, + "logps/real": -352.3954162597656, + "loss": 2.9155, + "rewards/accuracies": 0.6944444179534912, + "rewards/generated": -16.62398910522461, + "rewards/margins": 6.3880696296691895, + "rewards/real": -10.235919952392578, "step": 10 }, { - "epoch": 0.03, - "learning_rate": 1.2658227848101266e-07, + "epoch": 0.01, + "learning_rate": 6.369426751592356e-08, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -473.446044921875, - "logps/real": -413.171630859375, - "loss": 3.9705, - "rewards/accuracies": 0.637499988079071, - "rewards/generated": -18.367900848388672, - "rewards/margins": 3.9024269580841064, - "rewards/real": -14.465472221374512, + "logps/generated": -494.484375, + "logps/real": -385.1441955566406, + "loss": 3.3003, + "rewards/accuracies": 0.699999988079071, + "rewards/generated": -20.39480972290039, + "rewards/margins": 6.397805213928223, + "rewards/real": -13.997003555297852, "step": 20 }, { - "epoch": 0.04, - "learning_rate": 1.89873417721519e-07, + "epoch": 0.02, + "learning_rate": 9.554140127388536e-08, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -510.4585876464844, - "logps/real": -408.74212646484375, - "loss": 3.6955, - "rewards/accuracies": 0.6875, - "rewards/generated": -21.059778213500977, - "rewards/margins": 7.762242317199707, - "rewards/real": -13.297533988952637, + "logps/generated": -502.4696350097656, + "logps/real": -407.5299987792969, + "loss": 4.0551, + "rewards/accuracies": 0.5874999761581421, + "rewards/generated": -21.359249114990234, + "rewards/margins": 6.890555381774902, + "rewards/real": -14.4686918258667, "step": 30 }, { - "epoch": 0.05, - "learning_rate": 2.5316455696202533e-07, + "epoch": 0.03, + "learning_rate": 1.2738853503184713e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -451.734130859375, - "logps/real": -368.16998291015625, - "loss": 3.9056, - "rewards/accuracies": 0.625, - "rewards/generated": -16.805126190185547, - "rewards/margins": 4.4605584144592285, - "rewards/real": -12.344568252563477, + "logps/generated": -453.08099365234375, + "logps/real": -369.57183837890625, + "loss": 3.4836, + "rewards/accuracies": 0.675000011920929, + "rewards/generated": -16.532052993774414, + "rewards/margins": 5.033951759338379, + "rewards/real": -11.498100280761719, "step": 40 }, { - "epoch": 0.06, - "learning_rate": 3.1645569620253163e-07, + "epoch": 0.03, + "learning_rate": 1.592356687898089e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -552.9141845703125, - "logps/real": -397.95318603515625, - "loss": 2.9742, - "rewards/accuracies": 0.737500011920929, - "rewards/generated": -25.695674896240234, - "rewards/margins": 10.653547286987305, - "rewards/real": -15.042126655578613, + "logps/generated": -496.9840393066406, + "logps/real": -375.58489990234375, + "loss": 3.6846, + "rewards/accuracies": 0.762499988079071, + "rewards/generated": -21.554412841796875, + "rewards/margins": 8.116512298583984, + "rewards/real": -13.437899589538574, "step": 50 }, { - "epoch": 0.08, - "learning_rate": 3.79746835443038e-07, + "epoch": 0.04, + "learning_rate": 1.9108280254777072e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -509.5240783691406, - "logps/real": -390.6533203125, - "loss": 2.5682, - "rewards/accuracies": 0.737500011920929, - "rewards/generated": -22.568603515625, - "rewards/margins": 7.904494285583496, - "rewards/real": -14.664111137390137, + "logps/generated": -510.9554138183594, + "logps/real": -394.2112731933594, + "loss": 2.9311, + "rewards/accuracies": 0.7250000238418579, + "rewards/generated": -22.012584686279297, + "rewards/margins": 7.591399192810059, + "rewards/real": -14.421185493469238, "step": 60 }, { - "epoch": 0.09, - "learning_rate": 4.4303797468354424e-07, + "epoch": 0.04, + "learning_rate": 2.2292993630573247e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -554.5638427734375, - "logps/real": -382.84832763671875, - "loss": 1.7793, - "rewards/accuracies": 0.8374999761581421, - "rewards/generated": -28.215290069580078, - "rewards/margins": 14.945714950561523, - "rewards/real": -13.269571304321289, + "logps/generated": -500.39190673828125, + "logps/real": -409.89202880859375, + "loss": 3.2596, + "rewards/accuracies": 0.6499999761581421, + "rewards/generated": -21.793880462646484, + "rewards/margins": 6.847817897796631, + "rewards/real": -14.946063041687012, "step": 70 }, { - "epoch": 0.1, - "learning_rate": 4.992887624466572e-07, + "epoch": 0.05, + "learning_rate": 2.5477707006369425e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -574.03759765625, - "logps/real": -402.69232177734375, - "loss": 1.4194, - "rewards/accuracies": 0.800000011920929, - "rewards/generated": -28.118602752685547, - "rewards/margins": 12.523630142211914, - "rewards/real": -15.594972610473633, + "logps/generated": -502.09320068359375, + "logps/real": -387.62890625, + "loss": 2.3412, + "rewards/accuracies": 0.7124999761581421, + "rewards/generated": -21.89040184020996, + "rewards/margins": 7.6944780349731445, + "rewards/real": -14.19592571258545, "step": 80 }, { - "epoch": 0.12, - "learning_rate": 4.92176386913229e-07, + "epoch": 0.06, + "learning_rate": 2.86624203821656e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -577.7903442382812, - "logps/real": -401.81561279296875, - "loss": 1.2635, - "rewards/accuracies": 0.7749999761581421, - "rewards/generated": -30.364093780517578, - "rewards/margins": 14.755389213562012, - "rewards/real": -15.608701705932617, + "logps/generated": -557.3800048828125, + "logps/real": -414.1551818847656, + "loss": 1.9592, + "rewards/accuracies": 0.7875000238418579, + "rewards/generated": -26.10390281677246, + "rewards/margins": 10.303709983825684, + "rewards/real": -15.800193786621094, "step": 90 }, { - "epoch": 0.13, - "learning_rate": 4.850640113798008e-07, + "epoch": 0.06, + "learning_rate": 3.184713375796178e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -654.4276733398438, - "logps/real": -442.893798828125, - "loss": 0.9213, - "rewards/accuracies": 0.8999999761581421, - "rewards/generated": -35.57740020751953, - "rewards/margins": 18.7423038482666, - "rewards/real": -16.835100173950195, + "logps/generated": -543.3717651367188, + "logps/real": -368.0298156738281, + "loss": 1.7186, + "rewards/accuracies": 0.8374999761581421, + "rewards/generated": -25.229318618774414, + "rewards/margins": 12.685417175292969, + "rewards/real": -12.543901443481445, "step": 100 }, { - "epoch": 0.14, - "learning_rate": 4.779516358463727e-07, + "epoch": 0.07, + "learning_rate": 3.5031847133757957e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -637.9244384765625, - "logps/real": -426.7471618652344, - "loss": 1.0798, - "rewards/accuracies": 0.875, - "rewards/generated": -35.024356842041016, - "rewards/margins": 18.848251342773438, - "rewards/real": -16.17610740661621, + "logps/generated": -596.9669189453125, + "logps/real": -413.40350341796875, + "loss": 1.4592, + "rewards/accuracies": 0.8374999761581421, + "rewards/generated": -30.341022491455078, + "rewards/margins": 15.404217720031738, + "rewards/real": -14.936800956726074, "step": 110 }, { - "epoch": 0.15, - "learning_rate": 4.7083926031294454e-07, + "epoch": 0.08, + "learning_rate": 3.8216560509554143e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -647.3193969726562, - "logps/real": -419.98095703125, - "loss": 0.7428, - "rewards/accuracies": 0.9375, - "rewards/generated": -35.827938079833984, - "rewards/margins": 21.43227767944336, - "rewards/real": -14.395665168762207, + "logps/generated": -569.759033203125, + "logps/real": -374.6027526855469, + "loss": 1.1146, + "rewards/accuracies": 0.9125000238418579, + "rewards/generated": -26.69822120666504, + "rewards/margins": 14.171666145324707, + "rewards/real": -12.526556015014648, "step": 120 }, { - "epoch": 0.17, - "learning_rate": 4.6372688477951633e-07, + "epoch": 0.08, + "learning_rate": 4.140127388535032e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -647.6121826171875, - "logps/real": -413.42401123046875, - "loss": 0.6566, - "rewards/accuracies": 0.875, - "rewards/generated": -35.408538818359375, - "rewards/margins": 20.003904342651367, - "rewards/real": -15.404635429382324, + "logps/generated": -593.4500122070312, + "logps/real": -421.9783630371094, + "loss": 1.2263, + "rewards/accuracies": 0.862500011920929, + "rewards/generated": -31.279077529907227, + "rewards/margins": 15.886209487915039, + "rewards/real": -15.39286994934082, "step": 130 }, { - "epoch": 0.18, - "learning_rate": 4.5661450924608817e-07, + "epoch": 0.09, + "learning_rate": 4.4585987261146494e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -697.9991455078125, - "logps/real": -401.5552673339844, - "loss": 0.9421, - "rewards/accuracies": 0.9375, - "rewards/generated": -38.38452911376953, - "rewards/margins": 23.595943450927734, - "rewards/real": -14.788581848144531, + "logps/generated": -648.861328125, + "logps/real": -401.9364013671875, + "loss": 0.8198, + "rewards/accuracies": 0.8500000238418579, + "rewards/generated": -34.970481872558594, + "rewards/margins": 18.90291976928711, + "rewards/real": -16.067562103271484, "step": 140 }, { - "epoch": 0.19, - "learning_rate": 4.4950213371266e-07, + "epoch": 0.1, + "learning_rate": 4.777070063694267e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -672.8363037109375, - "logps/real": -401.61590576171875, - "loss": 0.6587, - "rewards/accuracies": 0.8999999761581421, - "rewards/generated": -38.686241149902344, - "rewards/margins": 23.072139739990234, - "rewards/real": -15.614102363586426, + "logps/generated": -638.9237670898438, + "logps/real": -419.0517578125, + "loss": 1.0322, + "rewards/accuracies": 0.862500011920929, + "rewards/generated": -34.63279342651367, + "rewards/margins": 18.07627296447754, + "rewards/real": -16.556522369384766, "step": 150 }, { - "epoch": 0.2, - "learning_rate": 4.4238975817923186e-07, + "epoch": 0.1, + "learning_rate": 4.989331436699858e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -641.0380859375, - "logps/real": -377.9221496582031, - "loss": 0.6261, - "rewards/accuracies": 0.8999999761581421, - "rewards/generated": -35.72499084472656, - "rewards/margins": 21.804719924926758, - "rewards/real": -13.920272827148438, + "logps/generated": -667.6073608398438, + "logps/real": -418.95562744140625, + "loss": 1.075, + "rewards/accuracies": 0.875, + "rewards/generated": -37.12474822998047, + "rewards/margins": 20.29035186767578, + "rewards/real": -16.834400177001953, "step": 160 }, { - "epoch": 0.22, - "learning_rate": 4.3527738264580364e-07, + "epoch": 0.11, + "learning_rate": 4.953769559032717e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -676.1116943359375, - "logps/real": -410.857421875, - "loss": 0.6189, - "rewards/accuracies": 0.9125000238418579, - "rewards/generated": -37.884979248046875, - "rewards/margins": 23.008445739746094, - "rewards/real": -14.876535415649414, + "logps/generated": -700.0158081054688, + "logps/real": -375.8499755859375, + "loss": 1.0279, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -40.3820915222168, + "rewards/margins": 25.6898136138916, + "rewards/real": -14.692278861999512, "step": 170 }, { - "epoch": 0.23, - "learning_rate": 4.2816500711237554e-07, + "epoch": 0.12, + "learning_rate": 4.918207681365576e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -654.6505737304688, - "logps/real": -370.24224853515625, - "loss": 0.3247, - "rewards/accuracies": 0.9375, - "rewards/generated": -36.68430709838867, - "rewards/margins": 23.027376174926758, - "rewards/real": -13.656933784484863, + "logps/generated": -680.1141357421875, + "logps/real": -433.60052490234375, + "loss": 0.7383, + "rewards/accuracies": 0.8999999761581421, + "rewards/generated": -36.7303352355957, + "rewards/margins": 21.14933967590332, + "rewards/real": -15.5809965133667, "step": 180 }, { - "epoch": 0.24, - "learning_rate": 4.2105263157894733e-07, + "epoch": 0.12, + "learning_rate": 4.882645803698435e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -665.4993286132812, - "logps/real": -364.58880615234375, - "loss": 0.5709, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -37.38275146484375, - "rewards/margins": 25.380136489868164, - "rewards/real": -12.002609252929688, + "logps/generated": -654.9556274414062, + "logps/real": -447.13519287109375, + "loss": 0.8003, + "rewards/accuracies": 0.887499988079071, + "rewards/generated": -37.65789031982422, + "rewards/margins": 19.25699806213379, + "rewards/real": -18.40089225769043, "step": 190 }, { - "epoch": 0.26, - "learning_rate": 4.1394025604551917e-07, + "epoch": 0.13, + "learning_rate": 4.847083926031294e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -684.380615234375, - "logps/real": -392.02288818359375, - "loss": 0.6823, - "rewards/accuracies": 0.8999999761581421, - "rewards/generated": -38.82280349731445, - "rewards/margins": 24.800228118896484, - "rewards/real": -14.02257251739502, + "logps/generated": -667.2574462890625, + "logps/real": -390.1756591796875, + "loss": 0.4891, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -37.77182388305664, + "rewards/margins": 22.858360290527344, + "rewards/real": -14.913459777832031, "step": 200 }, { - "epoch": 0.27, - "learning_rate": 4.06827880512091e-07, + "epoch": 0.13, + "learning_rate": 4.811522048364154e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -741.0132446289062, - "logps/real": -383.6390075683594, - "loss": 0.5369, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -43.17304229736328, - "rewards/margins": 28.80326271057129, - "rewards/real": -14.369776725769043, + "logps/generated": -710.2974853515625, + "logps/real": -364.0440673828125, + "loss": 0.4879, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -42.257808685302734, + "rewards/margins": 28.898040771484375, + "rewards/real": -13.359766960144043, "step": 210 }, { - "epoch": 0.28, - "learning_rate": 3.9971550497866285e-07, + "epoch": 0.14, + "learning_rate": 4.775960170697012e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -665.7184448242188, - "logps/real": -401.6400146484375, - "loss": 0.3696, - "rewards/accuracies": 0.9375, - "rewards/generated": -38.721866607666016, - "rewards/margins": 24.154865264892578, - "rewards/real": -14.567001342773438, + "logps/generated": -648.4423828125, + "logps/real": -361.45953369140625, + "loss": 0.543, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -36.720672607421875, + "rewards/margins": 22.87405014038086, + "rewards/real": -13.846624374389648, "step": 220 }, { - "epoch": 0.29, - "learning_rate": 3.926031294452347e-07, + "epoch": 0.15, + "learning_rate": 4.7403982930298717e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -681.5037231445312, - "logps/real": -394.03509521484375, - "loss": 0.2532, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -38.438758850097656, - "rewards/margins": 23.877817153930664, - "rewards/real": -14.560938835144043, + "logps/generated": -702.8461303710938, + "logps/real": -383.67425537109375, + "loss": 0.4983, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -40.814414978027344, + "rewards/margins": 26.6862850189209, + "rewards/real": -14.128130912780762, "step": 230 }, { - "epoch": 0.31, - "learning_rate": 3.8549075391180653e-07, + "epoch": 0.15, + "learning_rate": 4.7048364153627306e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -705.4946899414062, - "logps/real": -414.25146484375, - "loss": 0.4719, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -40.05225372314453, - "rewards/margins": 24.665658950805664, - "rewards/real": -15.386594772338867, + "logps/generated": -666.6107788085938, + "logps/real": -442.3255310058594, + "loss": 0.5706, + "rewards/accuracies": 0.9375, + "rewards/generated": -37.09008026123047, + "rewards/margins": 21.38768196105957, + "rewards/real": -15.702404975891113, "step": 240 }, { - "epoch": 0.32, - "learning_rate": 3.783783783783784e-07, + "epoch": 0.16, + "learning_rate": 4.66927453769559e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -718.469970703125, - "logps/real": -393.9696350097656, - "loss": 0.326, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -43.078765869140625, - "rewards/margins": 28.980609893798828, - "rewards/real": -14.09815788269043, + "logps/generated": -685.1495971679688, + "logps/real": -359.0677795410156, + "loss": 0.5271, + "rewards/accuracies": 0.9375, + "rewards/generated": -38.3968505859375, + "rewards/margins": 26.170297622680664, + "rewards/real": -12.226548194885254, "step": 250 }, { - "epoch": 0.33, - "learning_rate": 3.7126600284495016e-07, + "epoch": 0.17, + "learning_rate": 4.633712660028449e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -721.9268798828125, - "logps/real": -403.2998046875, - "loss": 0.4804, - "rewards/accuracies": 0.9375, - "rewards/generated": -41.29940414428711, - "rewards/margins": 26.352609634399414, - "rewards/real": -14.946797370910645, + "logps/generated": -679.1738891601562, + "logps/real": -367.4692077636719, + "loss": 0.4291, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -38.32642364501953, + "rewards/margins": 25.107736587524414, + "rewards/real": -13.218683242797852, "step": 260 }, { - "epoch": 0.35, - "learning_rate": 3.6415362731152206e-07, + "epoch": 0.17, + "learning_rate": 4.5981507823613085e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -690.9553833007812, - "logps/real": -374.5037536621094, - "loss": 0.3827, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -40.69682693481445, - "rewards/margins": 26.715112686157227, - "rewards/real": -13.981710433959961, + "logps/generated": -681.1062622070312, + "logps/real": -395.8605651855469, + "loss": 0.6404, + "rewards/accuracies": 0.9375, + "rewards/generated": -39.41220474243164, + "rewards/margins": 23.57551383972168, + "rewards/real": -15.836690902709961, "step": 270 }, { - "epoch": 0.36, - "learning_rate": 3.5704125177809385e-07, + "epoch": 0.18, + "learning_rate": 4.562588904694168e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -725.0104370117188, - "logps/real": -406.488037109375, - "loss": 0.4101, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -42.66962432861328, - "rewards/margins": 27.242467880249023, - "rewards/real": -15.427154541015625, + "logps/generated": -720.9105224609375, + "logps/real": -410.877197265625, + "loss": 0.3044, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -43.70952606201172, + "rewards/margins": 28.780017852783203, + "rewards/real": -14.929506301879883, "step": 280 }, { - "epoch": 0.37, - "learning_rate": 3.4992887624466574e-07, + "epoch": 0.19, + "learning_rate": 4.5270270270270264e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -697.3206787109375, - "logps/real": -400.65753173828125, - "loss": 0.3629, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -39.595970153808594, - "rewards/margins": 24.689983367919922, - "rewards/real": -14.90599250793457, + "logps/generated": -714.8857421875, + "logps/real": -394.2933654785156, + "loss": 0.2007, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -41.24427032470703, + "rewards/margins": 28.501968383789062, + "rewards/real": -12.742300987243652, "step": 290 }, { - "epoch": 0.38, - "learning_rate": 3.4281650071123753e-07, + "epoch": 0.19, + "learning_rate": 4.491465149359886e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -711.7612915039062, - "logps/real": -388.0057678222656, - "loss": 0.2122, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -42.39947509765625, - "rewards/margins": 27.109222412109375, - "rewards/real": -15.290254592895508, + "logps/generated": -682.9447021484375, + "logps/real": -367.23590087890625, + "loss": 0.4911, + "rewards/accuracies": 0.9375, + "rewards/generated": -39.695213317871094, + "rewards/margins": 26.347553253173828, + "rewards/real": -13.34765911102295, "step": 300 }, { - "epoch": 0.4, - "learning_rate": 3.3570412517780937e-07, + "epoch": 0.2, + "learning_rate": 4.4559032716927454e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -713.0751953125, - "logps/real": -423.7640686035156, - "loss": 0.3657, - "rewards/accuracies": 0.9125000238418579, - "rewards/generated": -41.861183166503906, - "rewards/margins": 26.213199615478516, - "rewards/real": -15.647982597351074, + "logps/generated": -709.5479736328125, + "logps/real": -430.88763427734375, + "loss": 0.1828, + "rewards/accuracies": 0.9375, + "rewards/generated": -42.66682052612305, + "rewards/margins": 27.468265533447266, + "rewards/real": -15.19856071472168, "step": 310 }, { - "epoch": 0.41, - "learning_rate": 3.285917496443812e-07, + "epoch": 0.2, + "learning_rate": 4.420341394025605e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -695.4622192382812, - "logps/real": -386.4324645996094, - "loss": 0.3005, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -39.45234298706055, - "rewards/margins": 25.696590423583984, - "rewards/real": -13.755752563476562, + "logps/generated": -755.2057495117188, + "logps/real": -418.7355041503906, + "loss": 0.3104, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -46.417259216308594, + "rewards/margins": 30.384328842163086, + "rewards/real": -16.032928466796875, "step": 320 }, { - "epoch": 0.42, - "learning_rate": 3.2147937411095305e-07, + "epoch": 0.21, + "learning_rate": 4.384779516358463e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -774.9369506835938, - "logps/real": -426.2724609375, - "loss": 0.2325, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -48.17813491821289, - "rewards/margins": 31.29937171936035, - "rewards/real": -16.87876319885254, + "logps/generated": -744.1261596679688, + "logps/real": -432.3663024902344, + "loss": 0.4376, + "rewards/accuracies": 0.887499988079071, + "rewards/generated": -45.77213668823242, + "rewards/margins": 28.370372772216797, + "rewards/real": -17.401762008666992, "step": 330 }, { - "epoch": 0.43, - "learning_rate": 3.1436699857752484e-07, + "epoch": 0.22, + "learning_rate": 4.3492176386913227e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -714.9234008789062, - "logps/real": -395.0069885253906, - "loss": 0.2981, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -41.547645568847656, - "rewards/margins": 27.00480079650879, - "rewards/real": -14.54284381866455, + "logps/generated": -778.1282348632812, + "logps/real": -449.26776123046875, + "loss": 0.2628, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -48.66665267944336, + "rewards/margins": 31.182621002197266, + "rewards/real": -17.484031677246094, "step": 340 }, { - "epoch": 0.45, - "learning_rate": 3.0725462304409674e-07, + "epoch": 0.22, + "learning_rate": 4.313655761024182e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -721.8656005859375, - "logps/real": -400.1606750488281, - "loss": 0.2019, - "rewards/accuracies": 0.9375, - "rewards/generated": -44.031803131103516, - "rewards/margins": 28.76715087890625, - "rewards/real": -15.264646530151367, + "logps/generated": -756.3446655273438, + "logps/real": -430.87493896484375, + "loss": 0.3925, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -46.581642150878906, + "rewards/margins": 29.290760040283203, + "rewards/real": -17.290876388549805, "step": 350 }, { - "epoch": 0.46, - "learning_rate": 3.001422475106685e-07, + "epoch": 0.23, + "learning_rate": 4.278093883357041e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -728.89599609375, - "logps/real": -470.1971130371094, - "loss": 0.5931, - "rewards/accuracies": 0.9375, - "rewards/generated": -43.573814392089844, - "rewards/margins": 24.9804630279541, - "rewards/real": -18.593351364135742, + "logps/generated": -777.9512939453125, + "logps/real": -414.18450927734375, + "loss": 0.4194, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.169349670410156, + "rewards/margins": 32.56087875366211, + "rewards/real": -15.608467102050781, "step": 360 }, { - "epoch": 0.47, - "learning_rate": 2.9302987197724037e-07, + "epoch": 0.24, + "learning_rate": 4.2425320056899e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -667.4353637695312, - "logps/real": -369.68328857421875, - "loss": 0.4031, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -38.71464920043945, - "rewards/margins": 24.013246536254883, - "rewards/real": -14.701400756835938, + "logps/generated": -776.2547607421875, + "logps/real": -459.26971435546875, + "loss": 0.1642, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.74531173706055, + "rewards/margins": 30.371551513671875, + "rewards/real": -18.373756408691406, "step": 370 }, { - "epoch": 0.49, - "learning_rate": 2.8591749644381226e-07, + "epoch": 0.24, + "learning_rate": 4.2069701280227595e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -708.6209716796875, - "logps/real": -395.7604675292969, - "loss": 0.3691, - "rewards/accuracies": 0.9375, - "rewards/generated": -42.240318298339844, - "rewards/margins": 26.32097816467285, - "rewards/real": -15.919347763061523, + "logps/generated": -787.847900390625, + "logps/real": -431.939697265625, + "loss": 0.1822, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -48.518409729003906, + "rewards/margins": 31.980022430419922, + "rewards/real": -16.538381576538086, "step": 380 }, { - "epoch": 0.5, - "learning_rate": 2.7880512091038405e-07, + "epoch": 0.25, + "learning_rate": 4.1714082503556185e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -766.4454345703125, - "logps/real": -423.30755615234375, - "loss": 0.2758, + "logps/generated": -782.6195678710938, + "logps/real": -409.2000732421875, + "loss": 0.1507, "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -47.435791015625, - "rewards/margins": 30.477214813232422, - "rewards/real": -16.958572387695312, + "rewards/generated": -49.06304168701172, + "rewards/margins": 32.46137237548828, + "rewards/real": -16.601669311523438, "step": 390 }, { - "epoch": 0.51, - "learning_rate": 2.716927453769559e-07, + "epoch": 0.26, + "learning_rate": 4.135846372688478e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -768.6455078125, - "logps/real": -449.0997009277344, - "loss": 0.3455, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -47.01398849487305, - "rewards/margins": 28.302906036376953, - "rewards/real": -18.711084365844727, + "logps/generated": -743.0278930664062, + "logps/real": -435.90399169921875, + "loss": 0.3316, + "rewards/accuracies": 0.9375, + "rewards/generated": -46.87841796875, + "rewards/margins": 29.552413940429688, + "rewards/real": -17.32600975036621, "step": 400 }, { - "epoch": 0.52, - "learning_rate": 2.6458036984352773e-07, + "epoch": 0.26, + "learning_rate": 4.100284495021337e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -734.2765502929688, - "logps/real": -447.5357971191406, - "loss": 0.375, - "rewards/accuracies": 0.9375, - "rewards/generated": -46.2476921081543, - "rewards/margins": 27.413455963134766, - "rewards/real": -18.834239959716797, + "logps/generated": -779.1712646484375, + "logps/real": -406.473876953125, + "loss": 0.333, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -48.82392883300781, + "rewards/margins": 33.1125602722168, + "rewards/real": -15.711362838745117, "step": 410 }, { - "epoch": 0.54, - "learning_rate": 2.574679943100996e-07, + "epoch": 0.27, + "learning_rate": 4.064722617354196e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -730.5106201171875, - "logps/real": -453.9371643066406, - "loss": 0.0816, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -45.583648681640625, - "rewards/margins": 27.334802627563477, - "rewards/real": -18.248844146728516, + "logps/generated": -774.0535888671875, + "logps/real": -417.33184814453125, + "loss": 0.2446, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.45975875854492, + "rewards/margins": 33.8248405456543, + "rewards/real": -15.634923934936523, "step": 420 }, { - "epoch": 0.55, - "learning_rate": 2.5035561877667136e-07, + "epoch": 0.28, + "learning_rate": 4.0291607396870553e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -766.5603637695312, - "logps/real": -438.89788818359375, - "loss": 0.1735, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -46.9077262878418, - "rewards/margins": 29.52130126953125, - "rewards/real": -17.386432647705078, + "logps/generated": -767.9479370117188, + "logps/real": -403.72357177734375, + "loss": 0.2645, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -48.79080581665039, + "rewards/margins": 33.18938064575195, + "rewards/real": -15.601428031921387, "step": 430 }, { - "epoch": 0.56, - "learning_rate": 2.4324324324324326e-07, + "epoch": 0.28, + "learning_rate": 3.993598862019915e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -781.421630859375, - "logps/real": -405.1501770019531, - "loss": 0.3965, + "logps/generated": -754.3994140625, + "logps/real": -410.26470947265625, + "loss": 0.2166, "rewards/accuracies": 0.987500011920929, - "rewards/generated": -49.6665153503418, - "rewards/margins": 33.62762451171875, - "rewards/real": -16.03889274597168, + "rewards/generated": -46.57341766357422, + "rewards/margins": 30.815637588500977, + "rewards/real": -15.757779121398926, "step": 440 }, { - "epoch": 0.58, - "learning_rate": 2.3613086770981507e-07, + "epoch": 0.29, + "learning_rate": 3.9580369843527737e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -760.2469482421875, - "logps/real": -404.4407653808594, - "loss": 0.4128, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -47.4620361328125, - "rewards/margins": 30.4351863861084, - "rewards/real": -17.026851654052734, + "logps/generated": -870.4566650390625, + "logps/real": -408.28997802734375, + "loss": 0.2063, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.19744110107422, + "rewards/margins": 40.77977752685547, + "rewards/real": -16.41766357421875, "step": 450 }, { - "epoch": 0.59, - "learning_rate": 2.290184921763869e-07, + "epoch": 0.29, + "learning_rate": 3.9224751066856327e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -820.1843872070312, - "logps/real": -420.02587890625, - "loss": 0.1306, - "rewards/accuracies": 1.0, - "rewards/generated": -51.192138671875, - "rewards/margins": 35.38946533203125, - "rewards/real": -15.8026704788208, + "logps/generated": -876.7347412109375, + "logps/real": -472.7223205566406, + "loss": 0.3418, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -57.74506378173828, + "rewards/margins": 37.913551330566406, + "rewards/real": -19.831506729125977, "step": 460 }, { - "epoch": 0.6, - "learning_rate": 2.2190611664295875e-07, + "epoch": 0.3, + "learning_rate": 3.886913229018492e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -759.2930908203125, - "logps/real": -438.888916015625, - "loss": 0.2487, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -45.420188903808594, - "rewards/margins": 27.34377670288086, - "rewards/real": -18.0764102935791, + "logps/generated": -798.3450317382812, + "logps/real": -431.00628662109375, + "loss": 0.5482, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -50.90120315551758, + "rewards/margins": 32.8853645324707, + "rewards/real": -18.01584243774414, "step": 470 }, { - "epoch": 0.61, - "learning_rate": 2.1479374110953057e-07, + "epoch": 0.31, + "learning_rate": 3.851351351351351e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -777.7584838867188, - "logps/real": -388.6899108886719, - "loss": 0.3922, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -48.69025421142578, - "rewards/margins": 32.126991271972656, - "rewards/real": -16.563264846801758, + "logps/generated": -804.5404052734375, + "logps/real": -420.87066650390625, + "loss": 0.1632, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -51.20261764526367, + "rewards/margins": 34.63499069213867, + "rewards/real": -16.567626953125, "step": 480 }, { - "epoch": 0.63, - "learning_rate": 2.076813655761024e-07, + "epoch": 0.31, + "learning_rate": 3.8157894736842105e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -804.2822875976562, - "logps/real": -438.558837890625, - "loss": 0.2558, - "rewards/accuracies": 0.987500011920929, - "rewards/generated": -50.539390563964844, - "rewards/margins": 31.528533935546875, - "rewards/real": -19.0108585357666, + "logps/generated": -808.2142333984375, + "logps/real": -412.75909423828125, + "loss": 0.067, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -51.9697265625, + "rewards/margins": 33.47379684448242, + "rewards/real": -18.49593162536621, "step": 490 }, { - "epoch": 0.64, - "learning_rate": 2.0056899004267425e-07, + "epoch": 0.32, + "learning_rate": 3.7802275960170695e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -799.6202392578125, - "logps/real": -483.55096435546875, - "loss": 0.2945, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -50.81739807128906, - "rewards/margins": 30.668746948242188, - "rewards/real": -20.14865493774414, + "logps/generated": -833.8155517578125, + "logps/real": -421.85693359375, + "loss": 0.3303, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -53.8129768371582, + "rewards/margins": 36.602169036865234, + "rewards/real": -17.210813522338867, "step": 500 }, { - "epoch": 0.64, + "epoch": 0.32, "eval_logits/generated": -Infinity, "eval_logits/real": -Infinity, - "eval_logps/generated": -509.32861328125, - "eval_logps/real": -313.0279541015625, - "eval_loss": 0.1748354285955429, - "eval_rewards/accuracies": 0.9442675113677979, - "eval_rewards/generated": -21.789350509643555, - "eval_rewards/margins": 15.725247383117676, - "eval_rewards/real": -6.0641021728515625, - "eval_runtime": 590.026, - "eval_samples_per_second": 8.474, - "eval_steps_per_second": 0.266, + "eval_logps/generated": -529.861328125, + "eval_logps/real": -300.84613037109375, + "eval_loss": 0.20025908946990967, + "eval_rewards/accuracies": 0.9371019005775452, + "eval_rewards/generated": -23.842620849609375, + "eval_rewards/margins": 18.996700286865234, + "eval_rewards/real": -4.845921039581299, + "eval_runtime": 816.3836, + "eval_samples_per_second": 6.125, + "eval_steps_per_second": 0.192, "step": 500 }, { - "epoch": 0.65, - "learning_rate": 1.9345661450924607e-07, + "epoch": 0.33, + "learning_rate": 3.7446657183499284e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -757.3658447265625, - "logps/real": -412.8812561035156, - "loss": 0.2523, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -45.53099822998047, - "rewards/margins": 29.076038360595703, - "rewards/real": -16.454959869384766, + "logps/generated": -865.2786254882812, + "logps/real": -424.654052734375, + "loss": 0.113, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -56.876319885253906, + "rewards/margins": 39.13337326049805, + "rewards/real": -17.74294662475586, "step": 510 }, { - "epoch": 0.66, - "learning_rate": 1.863442389758179e-07, + "epoch": 0.33, + "learning_rate": 3.709103840682788e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -780.4212036132812, - "logps/real": -437.5557556152344, - "loss": 0.2147, - "rewards/accuracies": 0.987500011920929, - "rewards/generated": -49.51309585571289, - "rewards/margins": 32.93183517456055, - "rewards/real": -16.581256866455078, + "logps/generated": -779.5700073242188, + "logps/real": -393.60321044921875, + "loss": 0.1035, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.612606048583984, + "rewards/margins": 33.626441955566406, + "rewards/real": -15.986169815063477, "step": 520 }, { - "epoch": 0.68, - "learning_rate": 1.7923186344238975e-07, + "epoch": 0.34, + "learning_rate": 3.6735419630156474e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -735.9564208984375, - "logps/real": -407.6651306152344, - "loss": 0.2975, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -44.98466491699219, - "rewards/margins": 29.423254013061523, - "rewards/real": -15.561413764953613, + "logps/generated": -776.9521484375, + "logps/real": -400.86016845703125, + "loss": 0.138, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.583370208740234, + "rewards/margins": 32.01411819458008, + "rewards/real": -17.569255828857422, "step": 530 }, { - "epoch": 0.69, - "learning_rate": 1.721194879089616e-07, + "epoch": 0.35, + "learning_rate": 3.637980085348506e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -807.4033203125, - "logps/real": -429.4000549316406, - "loss": 0.2254, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -51.46739959716797, - "rewards/margins": 32.75578689575195, - "rewards/real": -18.711612701416016, + "logps/generated": -768.1486206054688, + "logps/real": -421.4039611816406, + "loss": 0.3281, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -48.634918212890625, + "rewards/margins": 31.545923233032227, + "rewards/real": -17.088993072509766, "step": 540 }, { - "epoch": 0.7, - "learning_rate": 1.650071123755334e-07, + "epoch": 0.35, + "learning_rate": 3.602418207681365e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -811.77099609375, - "logps/real": -425.84649658203125, - "loss": 0.3105, + "logps/generated": -798.0943603515625, + "logps/real": -438.1573791503906, + "loss": 0.1217, "rewards/accuracies": 0.949999988079071, - "rewards/generated": -52.33929443359375, - "rewards/margins": 34.459083557128906, - "rewards/real": -17.88020896911621, + "rewards/generated": -49.93975067138672, + "rewards/margins": 32.817298889160156, + "rewards/real": -17.12244987487793, "step": 550 }, { - "epoch": 0.72, - "learning_rate": 1.5789473684210525e-07, + "epoch": 0.36, + "learning_rate": 3.5668563300142247e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -788.242919921875, - "logps/real": -440.695556640625, - "loss": 0.1702, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -49.456298828125, - "rewards/margins": 31.41439437866211, - "rewards/real": -18.041906356811523, + "logps/generated": -784.5931396484375, + "logps/real": -432.72039794921875, + "loss": 0.1697, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.35939407348633, + "rewards/margins": 30.70552635192871, + "rewards/real": -18.65386962890625, "step": 560 }, { - "epoch": 0.73, - "learning_rate": 1.507823613086771e-07, + "epoch": 0.36, + "learning_rate": 3.5312944523470837e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -752.26611328125, - "logps/real": -427.10113525390625, - "loss": 0.3281, - "rewards/accuracies": 0.9375, - "rewards/generated": -47.79071807861328, - "rewards/margins": 30.950246810913086, - "rewards/real": -16.84047508239746, + "logps/generated": -830.1300659179688, + "logps/real": -439.60400390625, + "loss": 0.1078, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -53.280181884765625, + "rewards/margins": 33.769203186035156, + "rewards/real": -19.510984420776367, "step": 570 }, { - "epoch": 0.74, - "learning_rate": 1.436699857752489e-07, + "epoch": 0.37, + "learning_rate": 3.495732574679943e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -826.0186767578125, - "logps/real": -424.66680908203125, - "loss": 0.3042, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -51.909454345703125, - "rewards/margins": 34.6719856262207, - "rewards/real": -17.237468719482422, + "logps/generated": -795.7073364257812, + "logps/real": -419.2820739746094, + "loss": 0.3486, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -51.35248947143555, + "rewards/margins": 34.13421630859375, + "rewards/real": -17.2182674407959, "step": 580 }, { - "epoch": 0.75, - "learning_rate": 1.3655761024182077e-07, + "epoch": 0.38, + "learning_rate": 3.460170697012802e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -816.61962890625, - "logps/real": -420.100830078125, - "loss": 0.2635, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -52.91407012939453, - "rewards/margins": 36.203983306884766, - "rewards/real": -16.710086822509766, + "logps/generated": -867.7032470703125, + "logps/real": -411.7303771972656, + "loss": 0.1504, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.48298263549805, + "rewards/margins": 41.10187530517578, + "rewards/real": -16.3811092376709, "step": 590 }, { - "epoch": 0.77, - "learning_rate": 1.2944523470839261e-07, + "epoch": 0.38, + "learning_rate": 3.424608819345661e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -820.4493408203125, - "logps/real": -428.4378967285156, - "loss": 0.347, - "rewards/accuracies": 0.9375, - "rewards/generated": -51.51692581176758, - "rewards/margins": 34.30039596557617, - "rewards/real": -17.216527938842773, + "logps/generated": -883.2786865234375, + "logps/real": -425.11041259765625, + "loss": 0.26, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -58.78193283081055, + "rewards/margins": 40.10378646850586, + "rewards/real": -18.67814064025879, "step": 600 }, { - "epoch": 0.78, - "learning_rate": 1.2233285917496443e-07, + "epoch": 0.39, + "learning_rate": 3.3890469416785205e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -768.8092041015625, - "logps/real": -420.62200927734375, - "loss": 0.0588, - "rewards/accuracies": 0.987500011920929, - "rewards/generated": -47.871437072753906, - "rewards/margins": 32.33845901489258, - "rewards/real": -15.532976150512695, + "logps/generated": -849.3904418945312, + "logps/real": -451.054931640625, + "loss": 0.1136, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -56.99665451049805, + "rewards/margins": 37.19718933105469, + "rewards/real": -19.79946517944336, "step": 610 }, { - "epoch": 0.79, - "learning_rate": 1.1522048364153626e-07, + "epoch": 0.4, + "learning_rate": 3.35348506401138e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -786.0013427734375, - "logps/real": -442.2757873535156, - "loss": 0.2483, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -50.278663635253906, - "rewards/margins": 33.101016998291016, - "rewards/real": -17.17764663696289, + "logps/generated": -894.3114013671875, + "logps/real": -414.6637268066406, + "loss": 0.2349, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -59.90185546875, + "rewards/margins": 41.685302734375, + "rewards/real": -18.216556549072266, "step": 620 }, { - "epoch": 0.81, - "learning_rate": 1.0810810810810811e-07, + "epoch": 0.4, + "learning_rate": 3.3179231863442384e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -787.63818359375, - "logps/real": -404.19122314453125, - "loss": 0.1336, + "logps/generated": -826.98681640625, + "logps/real": -439.54559326171875, + "loss": 0.1956, "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -50.15456008911133, - "rewards/margins": 33.99864959716797, - "rewards/real": -16.15591049194336, + "rewards/generated": -53.87934494018555, + "rewards/margins": 35.65498733520508, + "rewards/real": -18.2243595123291, "step": 630 }, { - "epoch": 0.82, - "learning_rate": 1.0099573257467994e-07, + "epoch": 0.41, + "learning_rate": 3.282361308677098e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -830.99462890625, - "logps/real": -457.3229064941406, - "loss": 0.2925, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -53.06385040283203, - "rewards/margins": 34.269439697265625, - "rewards/real": -18.794404983520508, + "logps/generated": -865.1568603515625, + "logps/real": -431.68914794921875, + "loss": 0.132, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -56.41038131713867, + "rewards/margins": 37.95232391357422, + "rewards/real": -18.458057403564453, "step": 640 }, { - "epoch": 0.83, - "learning_rate": 9.388335704125178e-08, + "epoch": 0.42, + "learning_rate": 3.2467994310099573e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -777.55517578125, - "logps/real": -405.3326110839844, - "loss": 0.137, + "logps/generated": -910.2918090820312, + "logps/real": -463.79034423828125, + "loss": 0.0608, "rewards/accuracies": 1.0, - "rewards/generated": -49.94211196899414, - "rewards/margins": 32.77922821044922, - "rewards/real": -17.16288948059082, + "rewards/generated": -61.92814254760742, + "rewards/margins": 42.76028823852539, + "rewards/real": -19.167850494384766, "step": 650 }, { - "epoch": 0.84, - "learning_rate": 8.677098150782361e-08, + "epoch": 0.42, + "learning_rate": 3.211237553342817e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -775.7921142578125, - "logps/real": -408.35302734375, - "loss": 0.0992, + "logps/generated": -792.5632934570312, + "logps/real": -417.36187744140625, + "loss": 0.0828, "rewards/accuracies": 0.987500011920929, - "rewards/generated": -48.498905181884766, - "rewards/margins": 32.695289611816406, - "rewards/real": -15.803617477416992, + "rewards/generated": -51.489219665527344, + "rewards/margins": 34.43842697143555, + "rewards/real": -17.050796508789062, "step": 660 }, { - "epoch": 0.86, - "learning_rate": 7.965860597439544e-08, + "epoch": 0.43, + "learning_rate": 3.175675675675675e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -764.4813232421875, - "logps/real": -427.140625, - "loss": 0.2396, - "rewards/accuracies": 0.925000011920929, - "rewards/generated": -48.443748474121094, - "rewards/margins": 31.320148468017578, - "rewards/real": -17.123600006103516, + "logps/generated": -870.7483520507812, + "logps/real": -379.50860595703125, + "loss": 0.2451, + "rewards/accuracies": 1.0, + "rewards/generated": -59.225135803222656, + "rewards/margins": 44.224327087402344, + "rewards/real": -15.000802993774414, "step": 670 }, { - "epoch": 0.87, - "learning_rate": 7.254623044096728e-08, + "epoch": 0.44, + "learning_rate": 3.1401137980085347e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -803.4662475585938, - "logps/real": -435.402099609375, - "loss": 0.2362, - "rewards/accuracies": 0.987500011920929, - "rewards/generated": -51.644203186035156, - "rewards/margins": 33.23621368408203, - "rewards/real": -18.40799331665039, + "logps/generated": -930.4283447265625, + "logps/real": -410.5220642089844, + "loss": 0.1281, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -63.19865036010742, + "rewards/margins": 46.53211212158203, + "rewards/real": -16.666542053222656, "step": 680 }, { - "epoch": 0.88, - "learning_rate": 6.543385490753911e-08, + "epoch": 0.44, + "learning_rate": 3.104551920341394e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -863.4392700195312, - "logps/real": -439.1971130371094, - "loss": 0.1949, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -55.93394088745117, - "rewards/margins": 36.83943176269531, - "rewards/real": -19.094507217407227, + "logps/generated": -847.4844970703125, + "logps/real": -407.2476806640625, + "loss": 0.2284, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.74333572387695, + "rewards/margins": 40.407684326171875, + "rewards/real": -15.335647583007812, "step": 690 }, { - "epoch": 0.9, - "learning_rate": 5.832147937411095e-08, + "epoch": 0.45, + "learning_rate": 3.068990042674253e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -779.9309692382812, - "logps/real": -411.91455078125, - "loss": 0.2666, - "rewards/accuracies": 0.9375, - "rewards/generated": -47.416011810302734, - "rewards/margins": 29.500951766967773, - "rewards/real": -17.915063858032227, + "logps/generated": -811.4222412109375, + "logps/real": -391.034912109375, + "loss": 0.142, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -53.94512939453125, + "rewards/margins": 37.505123138427734, + "rewards/real": -16.440000534057617, "step": 700 }, { - "epoch": 0.91, - "learning_rate": 5.120910384068278e-08, + "epoch": 0.45, + "learning_rate": 3.033428165007112e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -826.6921997070312, - "logps/real": -447.87762451171875, - "loss": 0.258, - "rewards/accuracies": 0.949999988079071, - "rewards/generated": -53.74137496948242, - "rewards/margins": 35.463531494140625, - "rewards/real": -18.277841567993164, + "logps/generated": -877.0598754882812, + "logps/real": -428.3775329589844, + "loss": 0.0922, + "rewards/accuracies": 1.0, + "rewards/generated": -59.027740478515625, + "rewards/margins": 40.57575988769531, + "rewards/real": -18.45196533203125, "step": 710 }, { - "epoch": 0.92, - "learning_rate": 4.4096728307254624e-08, + "epoch": 0.46, + "learning_rate": 2.9978662873399715e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -834.2398681640625, - "logps/real": -427.90966796875, - "loss": 0.1272, - "rewards/accuracies": 0.987500011920929, - "rewards/generated": -53.82038497924805, - "rewards/margins": 36.249298095703125, - "rewards/real": -17.571086883544922, + "logps/generated": -855.3162841796875, + "logps/real": -435.65301513671875, + "loss": 0.2617, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -56.983558654785156, + "rewards/margins": 38.14686965942383, + "rewards/real": -18.836681365966797, "step": 720 }, { - "epoch": 0.93, - "learning_rate": 3.698435277382646e-08, + "epoch": 0.47, + "learning_rate": 2.9623044096728305e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -838.7642822265625, - "logps/real": -436.14642333984375, - "loss": 0.2078, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -55.14348602294922, - "rewards/margins": 37.05797576904297, - "rewards/real": -18.08551597595215, + "logps/generated": -818.3048095703125, + "logps/real": -438.0257263183594, + "loss": 0.281, + "rewards/accuracies": 0.887499988079071, + "rewards/generated": -54.11011505126953, + "rewards/margins": 37.23180389404297, + "rewards/real": -16.878313064575195, "step": 730 }, { - "epoch": 0.95, - "learning_rate": 2.9871977240398294e-08, + "epoch": 0.47, + "learning_rate": 2.92674253200569e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -798.5349731445312, - "logps/real": -427.6378479003906, - "loss": 0.2621, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -52.086219787597656, - "rewards/margins": 34.135223388671875, - "rewards/real": -17.95099449157715, + "logps/generated": -871.099609375, + "logps/real": -420.02880859375, + "loss": 0.1622, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -58.3928337097168, + "rewards/margins": 41.17832565307617, + "rewards/real": -17.21451187133789, "step": 740 }, { - "epoch": 0.96, - "learning_rate": 2.275960170697013e-08, + "epoch": 0.48, + "learning_rate": 2.8911806543385494e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -835.6724853515625, - "logps/real": -414.00152587890625, - "loss": 0.1564, + "logps/generated": -907.7469482421875, + "logps/real": -472.86614990234375, + "loss": 0.2422, "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -52.597930908203125, - "rewards/margins": 36.33824920654297, - "rewards/real": -16.259681701660156, + "rewards/generated": -61.027503967285156, + "rewards/margins": 42.720279693603516, + "rewards/real": -18.30722427368164, "step": 750 }, { - "epoch": 0.97, - "learning_rate": 1.564722617354196e-08, + "epoch": 0.49, + "learning_rate": 2.855618776671408e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -850.904296875, - "logps/real": -392.68310546875, - "loss": 0.1515, + "logps/generated": -920.8482666015625, + "logps/real": -464.93218994140625, + "loss": 0.1303, "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -55.202125549316406, - "rewards/margins": 38.95097351074219, - "rewards/real": -16.25115394592285, + "rewards/generated": -63.184059143066406, + "rewards/margins": 44.418663024902344, + "rewards/real": -18.765398025512695, "step": 760 }, { - "epoch": 0.98, - "learning_rate": 8.534850640113798e-09, + "epoch": 0.49, + "learning_rate": 2.8200568990042673e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -814.2432250976562, - "logps/real": -415.373779296875, - "loss": 0.1867, - "rewards/accuracies": 0.9624999761581421, - "rewards/generated": -53.38776397705078, - "rewards/margins": 37.246055603027344, - "rewards/real": -16.141704559326172, + "logps/generated": -957.7092895507812, + "logps/real": -442.88897705078125, + "loss": 0.3922, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -65.42414093017578, + "rewards/margins": 46.468353271484375, + "rewards/real": -18.955785751342773, "step": 770 }, { - "epoch": 1.0, - "learning_rate": 1.422475106685633e-09, + "epoch": 0.5, + "learning_rate": 2.784495021337127e-07, "logits/generated": -Infinity, "logits/real": -Infinity, - "logps/generated": -811.1026611328125, - "logps/real": -388.99835205078125, - "loss": 0.2403, - "rewards/accuracies": 0.9750000238418579, - "rewards/generated": -51.92308807373047, - "rewards/margins": 35.41986846923828, - "rewards/real": -16.503215789794922, + "logps/generated": -908.4993286132812, + "logps/real": -413.3855895996094, + "loss": 0.1754, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -63.04656982421875, + "rewards/margins": 44.62030792236328, + "rewards/real": -18.426259994506836, "step": 780 }, + { + "epoch": 0.51, + "learning_rate": 2.7489331436699857e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -863.7523193359375, + "logps/real": -465.85009765625, + "loss": 0.1958, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -56.57744216918945, + "rewards/margins": 37.282875061035156, + "rewards/real": -19.2945613861084, + "step": 790 + }, + { + "epoch": 0.51, + "learning_rate": 2.7133712660028446e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -967.6433715820312, + "logps/real": -416.7491149902344, + "loss": 0.163, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -66.82258605957031, + "rewards/margins": 49.467437744140625, + "rewards/real": -17.355154037475586, + "step": 800 + }, + { + "epoch": 0.52, + "learning_rate": 2.677809388335704e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -902.4373168945312, + "logps/real": -438.209716796875, + "loss": 0.1638, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -61.510841369628906, + "rewards/margins": 41.37336349487305, + "rewards/real": -20.13747787475586, + "step": 810 + }, + { + "epoch": 0.52, + "learning_rate": 2.642247510668563e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -977.4361572265625, + "logps/real": -426.0132751464844, + "loss": 0.1951, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -67.98822784423828, + "rewards/margins": 50.39772415161133, + "rewards/real": -17.59050941467285, + "step": 820 + }, + { + "epoch": 0.53, + "learning_rate": 2.6066856330014225e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -933.1921997070312, + "logps/real": -432.3758239746094, + "loss": 0.1135, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -64.10530090332031, + "rewards/margins": 46.14706802368164, + "rewards/real": -17.958240509033203, + "step": 830 + }, + { + "epoch": 0.54, + "learning_rate": 2.5711237553342815e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -933.9869995117188, + "logps/real": -469.08074951171875, + "loss": 0.1554, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -62.14231491088867, + "rewards/margins": 42.210784912109375, + "rewards/real": -19.931529998779297, + "step": 840 + }, + { + "epoch": 0.54, + "learning_rate": 2.5355618776671404e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -905.9029541015625, + "logps/real": -442.23382568359375, + "loss": 0.0891, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -61.816429138183594, + "rewards/margins": 43.42655944824219, + "rewards/real": -18.38986587524414, + "step": 850 + }, + { + "epoch": 0.55, + "learning_rate": 2.5e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -903.47314453125, + "logps/real": -426.867431640625, + "loss": 0.2286, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -61.971595764160156, + "rewards/margins": 43.64350891113281, + "rewards/real": -18.328088760375977, + "step": 860 + }, + { + "epoch": 0.56, + "learning_rate": 2.4644381223328594e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -904.4534301757812, + "logps/real": -448.7220153808594, + "loss": 0.0816, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -62.04343795776367, + "rewards/margins": 42.920806884765625, + "rewards/real": -19.122634887695312, + "step": 870 + }, + { + "epoch": 0.56, + "learning_rate": 2.4288762446657183e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -967.4172973632812, + "logps/real": -439.7066955566406, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/generated": -68.501220703125, + "rewards/margins": 50.839942932128906, + "rewards/real": -17.66128158569336, + "step": 880 + }, + { + "epoch": 0.57, + "learning_rate": 2.393314366998578e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -949.60546875, + "logps/real": -431.2792053222656, + "loss": 0.2069, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -65.57563781738281, + "rewards/margins": 47.400535583496094, + "rewards/real": -18.175098419189453, + "step": 890 + }, + { + "epoch": 0.58, + "learning_rate": 2.3577524893314365e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -958.1769409179688, + "logps/real": -407.71038818359375, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/generated": -65.54978942871094, + "rewards/margins": 49.18589401245117, + "rewards/real": -16.363895416259766, + "step": 900 + }, + { + "epoch": 0.58, + "learning_rate": 2.322190611664296e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -854.2259521484375, + "logps/real": -461.548583984375, + "loss": 0.2207, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -57.03990936279297, + "rewards/margins": 37.42232131958008, + "rewards/real": -19.617586135864258, + "step": 910 + }, + { + "epoch": 0.59, + "learning_rate": 2.2866287339971549e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -913.2406005859375, + "logps/real": -424.36065673828125, + "loss": 0.1078, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -61.39838790893555, + "rewards/margins": 44.66621780395508, + "rewards/real": -16.732179641723633, + "step": 920 + }, + { + "epoch": 0.6, + "learning_rate": 2.251066856330014e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -903.1484375, + "logps/real": -405.7799987792969, + "loss": 0.2241, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -62.2490234375, + "rewards/margins": 45.68339920043945, + "rewards/real": -16.565624237060547, + "step": 930 + }, + { + "epoch": 0.6, + "learning_rate": 2.2155049786628733e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -916.8377075195312, + "logps/real": -432.683837890625, + "loss": 0.1258, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -61.457244873046875, + "rewards/margins": 44.813026428222656, + "rewards/real": -16.64422035217285, + "step": 940 + }, + { + "epoch": 0.61, + "learning_rate": 2.1799431009957325e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -894.6036987304688, + "logps/real": -406.5244445800781, + "loss": 0.029, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -60.447288513183594, + "rewards/margins": 43.23609161376953, + "rewards/real": -17.211193084716797, + "step": 950 + }, + { + "epoch": 0.61, + "learning_rate": 2.1443812233285914e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -938.0587158203125, + "logps/real": -458.41143798828125, + "loss": 0.1849, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -64.76756286621094, + "rewards/margins": 45.54555892944336, + "rewards/real": -19.222000122070312, + "step": 960 + }, + { + "epoch": 0.62, + "learning_rate": 2.108819345661451e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -990.7427978515625, + "logps/real": -451.2483825683594, + "loss": 0.2407, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -68.94884490966797, + "rewards/margins": 48.71486282348633, + "rewards/real": -20.23398208618164, + "step": 970 + }, + { + "epoch": 0.63, + "learning_rate": 2.0732574679943098e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -833.5616455078125, + "logps/real": -393.88153076171875, + "loss": 0.162, + "rewards/accuracies": 0.9375, + "rewards/generated": -54.51671600341797, + "rewards/margins": 37.748043060302734, + "rewards/real": -16.768672943115234, + "step": 980 + }, + { + "epoch": 0.63, + "learning_rate": 2.0376955903271693e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -875.89453125, + "logps/real": -475.2669982910156, + "loss": 0.1122, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -59.66436004638672, + "rewards/margins": 39.77927780151367, + "rewards/real": -19.88509178161621, + "step": 990 + }, + { + "epoch": 0.64, + "learning_rate": 2.0021337126600283e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -917.2653198242188, + "logps/real": -410.7203674316406, + "loss": 0.0933, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -63.5474853515625, + "rewards/margins": 45.195716857910156, + "rewards/real": -18.35177230834961, + "step": 1000 + }, + { + "epoch": 0.64, + "eval_logits/generated": -Infinity, + "eval_logits/real": -Infinity, + "eval_logps/generated": -639.9600219726562, + "eval_logps/real": -298.9767761230469, + "eval_loss": 0.15978027880191803, + "eval_rewards/accuracies": 0.9609872698783875, + "eval_rewards/generated": -34.85248565673828, + "eval_rewards/margins": 30.193498611450195, + "eval_rewards/real": -4.658985614776611, + "eval_runtime": 516.5493, + "eval_samples_per_second": 9.68, + "eval_steps_per_second": 0.304, + "step": 1000 + }, + { + "epoch": 0.65, + "learning_rate": 1.9665718349928875e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -931.1051025390625, + "logps/real": -400.90509033203125, + "loss": 0.1599, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -65.04185485839844, + "rewards/margins": 48.49779510498047, + "rewards/real": -16.544055938720703, + "step": 1010 + }, + { + "epoch": 0.65, + "learning_rate": 1.931009957325747e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -962.6014404296875, + "logps/real": -403.762939453125, + "loss": 0.1063, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -66.96389770507812, + "rewards/margins": 49.26046371459961, + "rewards/real": -17.703428268432617, + "step": 1020 + }, + { + "epoch": 0.66, + "learning_rate": 1.895448079658606e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -907.5885620117188, + "logps/real": -444.3199768066406, + "loss": 0.1852, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -62.3195915222168, + "rewards/margins": 43.62810134887695, + "rewards/real": -18.691490173339844, + "step": 1030 + }, + { + "epoch": 0.67, + "learning_rate": 1.859886201991465e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -943.9777221679688, + "logps/real": -450.1148986816406, + "loss": 0.237, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -65.29325866699219, + "rewards/margins": 46.713497161865234, + "rewards/real": -18.57975959777832, + "step": 1040 + }, + { + "epoch": 0.67, + "learning_rate": 1.8243243243243243e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -988.4417724609375, + "logps/real": -427.38702392578125, + "loss": 0.0954, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -69.71871948242188, + "rewards/margins": 52.45946502685547, + "rewards/real": -17.259246826171875, + "step": 1050 + }, + { + "epoch": 0.68, + "learning_rate": 1.7887624466571835e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -911.5084228515625, + "logps/real": -415.616455078125, + "loss": 0.1856, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -62.075843811035156, + "rewards/margins": 45.55692672729492, + "rewards/real": -16.51891326904297, + "step": 1060 + }, + { + "epoch": 0.68, + "learning_rate": 1.7532005689900424e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -979.6092529296875, + "logps/real": -422.32720947265625, + "loss": 0.2408, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -68.75633239746094, + "rewards/margins": 51.622406005859375, + "rewards/real": -17.13392448425293, + "step": 1070 + }, + { + "epoch": 0.69, + "learning_rate": 1.717638691322902e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -931.5291748046875, + "logps/real": -425.2210388183594, + "loss": 0.2356, + "rewards/accuracies": 0.9375, + "rewards/generated": -65.39581298828125, + "rewards/margins": 47.234867095947266, + "rewards/real": -18.160947799682617, + "step": 1080 + }, + { + "epoch": 0.7, + "learning_rate": 1.6820768136557609e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -958.6076049804688, + "logps/real": -406.325927734375, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/generated": -66.18185424804688, + "rewards/margins": 50.36635208129883, + "rewards/real": -15.815505981445312, + "step": 1090 + }, + { + "epoch": 0.7, + "learning_rate": 1.64651493598862e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -857.2650146484375, + "logps/real": -447.1287536621094, + "loss": 0.1216, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -55.9595947265625, + "rewards/margins": 37.767311096191406, + "rewards/real": -18.192276000976562, + "step": 1100 + }, + { + "epoch": 0.71, + "learning_rate": 1.6109530583214793e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -849.4425048828125, + "logps/real": -399.19500732421875, + "loss": 0.1411, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -58.21696090698242, + "rewards/margins": 42.287139892578125, + "rewards/real": -15.929832458496094, + "step": 1110 + }, + { + "epoch": 0.72, + "learning_rate": 1.5753911806543385e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1003.6033325195312, + "logps/real": -437.3089294433594, + "loss": 0.2411, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -69.50968933105469, + "rewards/margins": 51.704078674316406, + "rewards/real": -17.805606842041016, + "step": 1120 + }, + { + "epoch": 0.72, + "learning_rate": 1.5398293029871974e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -963.5474853515625, + "logps/real": -435.29095458984375, + "loss": 0.175, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -66.86241149902344, + "rewards/margins": 49.975059509277344, + "rewards/real": -16.887353897094727, + "step": 1130 + }, + { + "epoch": 0.73, + "learning_rate": 1.504267425320057e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -963.6954956054688, + "logps/real": -415.65960693359375, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/generated": -67.89158630371094, + "rewards/margins": 50.22557830810547, + "rewards/real": -17.666006088256836, + "step": 1140 + }, + { + "epoch": 0.74, + "learning_rate": 1.4687055476529158e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -963.4187622070312, + "logps/real": -441.03094482421875, + "loss": 0.256, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -65.62948608398438, + "rewards/margins": 47.720008850097656, + "rewards/real": -17.909475326538086, + "step": 1150 + }, + { + "epoch": 0.74, + "learning_rate": 1.4331436699857753e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -948.861328125, + "logps/real": -444.0084533691406, + "loss": 0.2541, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -63.19414138793945, + "rewards/margins": 43.93315887451172, + "rewards/real": -19.26097869873047, + "step": 1160 + }, + { + "epoch": 0.75, + "learning_rate": 1.3975817923186345e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -920.9754028320312, + "logps/real": -451.8453063964844, + "loss": 0.1545, + "rewards/accuracies": 1.0, + "rewards/generated": -62.874755859375, + "rewards/margins": 45.2825927734375, + "rewards/real": -17.592164993286133, + "step": 1170 + }, + { + "epoch": 0.75, + "learning_rate": 1.3620199146514935e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -962.2693481445312, + "logps/real": -381.82183837890625, + "loss": 0.1726, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -66.71905517578125, + "rewards/margins": 50.402278900146484, + "rewards/real": -16.3167667388916, + "step": 1180 + }, + { + "epoch": 0.76, + "learning_rate": 1.326458036984353e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -844.4295043945312, + "logps/real": -408.8478088378906, + "loss": 0.0521, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.36383819580078, + "rewards/margins": 38.550174713134766, + "rewards/real": -16.813655853271484, + "step": 1190 + }, + { + "epoch": 0.77, + "learning_rate": 1.290896159317212e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1029.0372314453125, + "logps/real": -405.693359375, + "loss": 0.0738, + "rewards/accuracies": 1.0, + "rewards/generated": -73.27754211425781, + "rewards/margins": 56.71906661987305, + "rewards/real": -16.558481216430664, + "step": 1200 + }, + { + "epoch": 0.77, + "learning_rate": 1.255334281650071e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -955.4649658203125, + "logps/real": -442.18377685546875, + "loss": 0.1159, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -66.71927642822266, + "rewards/margins": 48.399192810058594, + "rewards/real": -18.320079803466797, + "step": 1210 + }, + { + "epoch": 0.78, + "learning_rate": 1.2197724039829303e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -925.6458129882812, + "logps/real": -418.9791564941406, + "loss": 0.115, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -63.2381477355957, + "rewards/margins": 45.51228332519531, + "rewards/real": -17.725872039794922, + "step": 1220 + }, + { + "epoch": 0.79, + "learning_rate": 1.1842105263157894e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -915.9041137695312, + "logps/real": -407.2160949707031, + "loss": 0.1614, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -62.57989501953125, + "rewards/margins": 44.657745361328125, + "rewards/real": -17.922143936157227, + "step": 1230 + }, + { + "epoch": 0.79, + "learning_rate": 1.1486486486486487e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -895.3005981445312, + "logps/real": -435.64947509765625, + "loss": 0.1643, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -61.3538932800293, + "rewards/margins": 43.27713394165039, + "rewards/real": -18.076759338378906, + "step": 1240 + }, + { + "epoch": 0.8, + "learning_rate": 1.1130867709815078e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -926.37255859375, + "logps/real": -427.218505859375, + "loss": 0.1664, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -63.830535888671875, + "rewards/margins": 45.601234436035156, + "rewards/real": -18.229299545288086, + "step": 1250 + }, + { + "epoch": 0.81, + "learning_rate": 1.077524893314367e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -968.36279296875, + "logps/real": -382.15057373046875, + "loss": 0.1394, + "rewards/accuracies": 1.0, + "rewards/generated": -66.76223754882812, + "rewards/margins": 51.3797721862793, + "rewards/real": -15.382467269897461, + "step": 1260 + }, + { + "epoch": 0.81, + "learning_rate": 1.0419630156472262e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -958.3225708007812, + "logps/real": -481.63055419921875, + "loss": 0.3112, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -67.70050048828125, + "rewards/margins": 46.94557189941406, + "rewards/real": -20.754926681518555, + "step": 1270 + }, + { + "epoch": 0.82, + "learning_rate": 1.0064011379800854e-07, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -937.0823364257812, + "logps/real": -458.1944274902344, + "loss": 0.088, + "rewards/accuracies": 0.9375, + "rewards/generated": -64.70439910888672, + "rewards/margins": 45.66063690185547, + "rewards/real": -19.043764114379883, + "step": 1280 + }, + { + "epoch": 0.83, + "learning_rate": 9.708392603129445e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -929.6287841796875, + "logps/real": -412.03436279296875, + "loss": 0.1778, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -65.4493408203125, + "rewards/margins": 48.98451614379883, + "rewards/real": -16.46481704711914, + "step": 1290 + }, + { + "epoch": 0.83, + "learning_rate": 9.352773826458037e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1027.0244140625, + "logps/real": -444.76092529296875, + "loss": 0.1457, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -72.01765441894531, + "rewards/margins": 53.2171745300293, + "rewards/real": -18.800477981567383, + "step": 1300 + }, + { + "epoch": 0.84, + "learning_rate": 8.997155049786629e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -937.8785400390625, + "logps/real": -400.3741760253906, + "loss": 0.0517, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -64.58556365966797, + "rewards/margins": 48.209327697753906, + "rewards/real": -16.37624168395996, + "step": 1310 + }, + { + "epoch": 0.84, + "learning_rate": 8.64153627311522e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -962.7506103515625, + "logps/real": -421.57952880859375, + "loss": 0.2135, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -67.5987777709961, + "rewards/margins": 49.87495803833008, + "rewards/real": -17.72382164001465, + "step": 1320 + }, + { + "epoch": 0.85, + "learning_rate": 8.285917496443812e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -929.7264404296875, + "logps/real": -431.411376953125, + "loss": 0.0276, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -63.279151916503906, + "rewards/margins": 45.76356506347656, + "rewards/real": -17.515583038330078, + "step": 1330 + }, + { + "epoch": 0.86, + "learning_rate": 7.930298719772404e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -960.4884033203125, + "logps/real": -449.7841796875, + "loss": 0.1428, + "rewards/accuracies": 1.0, + "rewards/generated": -67.2632827758789, + "rewards/margins": 49.2481689453125, + "rewards/real": -18.015111923217773, + "step": 1340 + }, + { + "epoch": 0.86, + "learning_rate": 7.574679943100994e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -918.400390625, + "logps/real": -389.68658447265625, + "loss": 0.2082, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -64.85960388183594, + "rewards/margins": 48.430328369140625, + "rewards/real": -16.429283142089844, + "step": 1350 + }, + { + "epoch": 0.87, + "learning_rate": 7.219061166429587e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -938.8449096679688, + "logps/real": -409.8709411621094, + "loss": 0.0844, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -66.37425994873047, + "rewards/margins": 50.338191986083984, + "rewards/real": -16.036067962646484, + "step": 1360 + }, + { + "epoch": 0.88, + "learning_rate": 6.863442389758179e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -961.57958984375, + "logps/real": -456.1361389160156, + "loss": 0.1468, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -67.77473449707031, + "rewards/margins": 49.818424224853516, + "rewards/real": -17.956310272216797, + "step": 1370 + }, + { + "epoch": 0.88, + "learning_rate": 6.507823613086771e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1005.2136840820312, + "logps/real": -452.8905334472656, + "loss": 0.2335, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -70.96513366699219, + "rewards/margins": 52.77602005004883, + "rewards/real": -18.189117431640625, + "step": 1380 + }, + { + "epoch": 0.89, + "learning_rate": 6.152204836415363e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -981.8912963867188, + "logps/real": -449.5849609375, + "loss": 0.102, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -69.54399871826172, + "rewards/margins": 50.90169143676758, + "rewards/real": -18.642309188842773, + "step": 1390 + }, + { + "epoch": 0.9, + "learning_rate": 5.796586059743954e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -939.12841796875, + "logps/real": -418.45855712890625, + "loss": 0.053, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -64.34689331054688, + "rewards/margins": 47.31360626220703, + "rewards/real": -17.03328514099121, + "step": 1400 + }, + { + "epoch": 0.9, + "learning_rate": 5.4409672830725456e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -996.6143798828125, + "logps/real": -419.6319885253906, + "loss": 0.1242, + "rewards/accuracies": 1.0, + "rewards/generated": -69.24372863769531, + "rewards/margins": 51.62445831298828, + "rewards/real": -17.619264602661133, + "step": 1410 + }, + { + "epoch": 0.91, + "learning_rate": 5.0853485064011376e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1000.04052734375, + "logps/real": -417.8561096191406, + "loss": 0.0612, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -70.85627746582031, + "rewards/margins": 52.63084030151367, + "rewards/real": -18.22542953491211, + "step": 1420 + }, + { + "epoch": 0.91, + "learning_rate": 4.72972972972973e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -934.1195068359375, + "logps/real": -418.022705078125, + "loss": 0.2932, + "rewards/accuracies": 0.9125000238418579, + "rewards/generated": -66.13401794433594, + "rewards/margins": 50.10154342651367, + "rewards/real": -16.0324764251709, + "step": 1430 + }, + { + "epoch": 0.92, + "learning_rate": 4.374110953058322e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -913.4429931640625, + "logps/real": -439.560791015625, + "loss": 0.3329, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -63.869712829589844, + "rewards/margins": 44.8972053527832, + "rewards/real": -18.972515106201172, + "step": 1440 + }, + { + "epoch": 0.93, + "learning_rate": 4.018492176386913e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -911.0625, + "logps/real": -418.2806091308594, + "loss": 0.2745, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -61.86652755737305, + "rewards/margins": 44.91896438598633, + "rewards/real": -16.94756317138672, + "step": 1450 + }, + { + "epoch": 0.93, + "learning_rate": 3.6628733997155046e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -981.3883666992188, + "logps/real": -376.0027770996094, + "loss": 0.3734, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -68.52994537353516, + "rewards/margins": 52.24712371826172, + "rewards/real": -16.282825469970703, + "step": 1460 + }, + { + "epoch": 0.94, + "learning_rate": 3.3072546230440967e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -1051.210205078125, + "logps/real": -414.3362731933594, + "loss": 0.1266, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -75.56956481933594, + "rewards/margins": 58.789466857910156, + "rewards/real": -16.780101776123047, + "step": 1470 + }, + { + "epoch": 0.95, + "learning_rate": 2.9516358463726884e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -964.0816650390625, + "logps/real": -437.93572998046875, + "loss": 0.1666, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -67.49874877929688, + "rewards/margins": 50.19408416748047, + "rewards/real": -17.304664611816406, + "step": 1480 + }, + { + "epoch": 0.95, + "learning_rate": 2.59601706970128e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -985.89404296875, + "logps/real": -457.82720947265625, + "loss": 0.2917, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -68.58557891845703, + "rewards/margins": 50.40437698364258, + "rewards/real": -18.181203842163086, + "step": 1490 + }, + { + "epoch": 0.96, + "learning_rate": 2.240398293029872e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -966.505859375, + "logps/real": -419.21337890625, + "loss": 0.2065, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -67.9041519165039, + "rewards/margins": 51.61236572265625, + "rewards/real": -16.291778564453125, + "step": 1500 + }, + { + "epoch": 0.96, + "eval_logits/generated": -Infinity, + "eval_logits/real": -Infinity, + "eval_logps/generated": -659.6319580078125, + "eval_logps/real": -295.5523376464844, + "eval_loss": 0.1642562448978424, + "eval_rewards/accuracies": 0.962579607963562, + "eval_rewards/generated": -36.8196907043457, + "eval_rewards/margins": 32.503150939941406, + "eval_rewards/real": -4.316542148590088, + "eval_runtime": 514.2854, + "eval_samples_per_second": 9.722, + "eval_steps_per_second": 0.305, + "step": 1500 + }, + { + "epoch": 0.97, + "learning_rate": 1.8847795163584636e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -906.3391723632812, + "logps/real": -424.758544921875, + "loss": 0.1254, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -62.673912048339844, + "rewards/margins": 45.69168472290039, + "rewards/real": -16.98223114013672, + "step": 1510 + }, + { + "epoch": 0.97, + "learning_rate": 1.5291607396870554e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -930.1663208007812, + "logps/real": -418.9947204589844, + "loss": 0.1585, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -64.94093322753906, + "rewards/margins": 47.36656951904297, + "rewards/real": -17.574371337890625, + "step": 1520 + }, + { + "epoch": 0.98, + "learning_rate": 1.1735419630156473e-08, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -991.5504760742188, + "logps/real": -432.77117919921875, + "loss": 0.0505, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -69.73775482177734, + "rewards/margins": 52.04203414916992, + "rewards/real": -17.69571876525879, + "step": 1530 + }, + { + "epoch": 0.99, + "learning_rate": 8.179231863442388e-09, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -957.3475341796875, + "logps/real": -448.859619140625, + "loss": 0.1641, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -66.77298736572266, + "rewards/margins": 47.40740203857422, + "rewards/real": -19.36557960510254, + "step": 1540 + }, + { + "epoch": 0.99, + "learning_rate": 4.623044096728307e-09, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -999.8143310546875, + "logps/real": -424.55364990234375, + "loss": 0.0578, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -71.12496948242188, + "rewards/margins": 54.26820755004883, + "rewards/real": -16.85675621032715, + "step": 1550 + }, + { + "epoch": 1.0, + "learning_rate": 1.0668563300142248e-09, + "logits/generated": -Infinity, + "logits/real": -Infinity, + "logps/generated": -885.8681640625, + "logps/real": -414.7955627441406, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/generated": -60.05329513549805, + "rewards/margins": 43.077919006347656, + "rewards/real": -16.975379943847656, + "step": 1560 + }, { "epoch": 1.0, - "step": 782, + "step": 1563, "total_flos": 0.0, - "train_loss": 0.6284351689954493, - "train_runtime": 6560.8804, - "train_samples_per_second": 3.81, - "train_steps_per_second": 0.119 + "train_loss": 0.42768371397759275, + "train_runtime": 16836.7545, + "train_samples_per_second": 2.97, + "train_steps_per_second": 0.093 } ], "logging_steps": 10, - "max_steps": 782, + "max_steps": 1563, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100,