{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 9375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.33049040511727e-10, "logits/generated": -1.8826184272766113, "logits/real": -1.7995665073394775, "logps/generated": -403.8936767578125, "logps/real": -344.43768310546875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.3304904051172705e-09, "logits/generated": -1.7489397525787354, "logits/real": -1.9043943881988525, "logps/generated": -387.8950500488281, "logps/real": -287.7349853515625, "loss": 0.6909, "rewards/accuracies": 0.4027777910232544, "rewards/generated": 0.0008104961016215384, "rewards/margins": 0.008820068091154099, "rewards/real": 0.009630562737584114, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": -1.6604442596435547, "logits/real": -1.7629890441894531, "logps/generated": -389.0264892578125, "logps/real": -304.28338623046875, "loss": 0.6709, "rewards/accuracies": 0.6499999761581421, "rewards/generated": -0.0583014115691185, "rewards/margins": 0.061269670724868774, "rewards/real": 0.002968253567814827, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.5991471215351812e-08, "logits/generated": -1.754969596862793, "logits/real": -1.8286685943603516, "logps/generated": -407.74810791015625, "logps/real": -312.97747802734375, "loss": 0.6054, "rewards/accuracies": 0.800000011920929, "rewards/generated": -0.26939621567726135, "rewards/margins": 0.2280093878507614, "rewards/real": -0.04138679429888725, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": -1.7067832946777344, "logits/real": -1.7931840419769287, "logps/generated": -386.2824401855469, "logps/real": -292.9158020019531, "loss": 0.5189, "rewards/accuracies": 0.800000011920929, "rewards/generated": -0.5650928616523743, "rewards/margins": 0.44205719232559204, "rewards/real": -0.12303560972213745, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.6652452025586352e-08, "logits/generated": -1.7791906595230103, "logits/real": -1.9039980173110962, "logps/generated": -402.95343017578125, "logps/real": -345.21856689453125, "loss": 0.4618, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.9019126892089844, "rewards/margins": 0.7108197212219238, "rewards/real": -0.19109299778938293, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": -1.7135913372039795, "logits/real": -1.8214619159698486, "logps/generated": -382.85003662109375, "logps/real": -295.09246826171875, "loss": 0.3989, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.145892858505249, "rewards/margins": 0.9312199354171753, "rewards/real": -0.21467280387878418, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.731343283582089e-08, "logits/generated": -1.6685903072357178, "logits/real": -1.7730849981307983, "logps/generated": -405.3398132324219, "logps/real": -307.63970947265625, "loss": 0.3404, "rewards/accuracies": 0.887499988079071, "rewards/generated": -1.7393802404403687, "rewards/margins": 1.3477346897125244, "rewards/real": -0.39164555072784424, "step": 70 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": -1.6948124170303345, "logits/real": -1.846187949180603, "logps/generated": -405.7284240722656, "logps/real": -360.30682373046875, "loss": 0.3091, "rewards/accuracies": 0.925000011920929, "rewards/generated": -2.1506481170654297, "rewards/margins": 1.6161304712295532, "rewards/real": -0.5345176458358765, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.7974413646055434e-08, "logits/generated": -1.7541179656982422, "logits/real": -1.8123397827148438, "logps/generated": -416.25689697265625, "logps/real": -325.4775695800781, "loss": 0.2753, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -2.514233112335205, "rewards/margins": 2.092501640319824, "rewards/real": -0.42173153162002563, "step": 90 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": -1.8153146505355835, "logits/real": -1.8279272317886353, "logps/generated": -395.2728271484375, "logps/real": -335.0040588378906, "loss": 0.2524, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.448599338531494, "rewards/margins": 2.130467414855957, "rewards/real": -0.31813228130340576, "step": 100 }, { "epoch": 0.04, "learning_rate": 5.8635394456289973e-08, "logits/generated": -1.6887117624282837, "logits/real": -1.7604955434799194, "logps/generated": -447.2493591308594, "logps/real": -334.47344970703125, "loss": 0.2073, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.40751314163208, "rewards/margins": 2.9876201152801514, "rewards/real": -0.41989272832870483, "step": 110 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": -1.6772953271865845, "logits/real": -1.7890123128890991, "logps/generated": -393.61126708984375, "logps/real": -350.9790954589844, "loss": 0.2025, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.881321668624878, "rewards/margins": 2.3837037086486816, "rewards/real": -0.4976181387901306, "step": 120 }, { "epoch": 0.04, "learning_rate": 6.929637526652451e-08, "logits/generated": -1.6851392984390259, "logits/real": -1.7750492095947266, "logps/generated": -376.60565185546875, "logps/real": -317.86749267578125, "loss": 0.1834, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.27082896232605, "rewards/margins": 2.8993308544158936, "rewards/real": -0.3714984655380249, "step": 130 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": -1.5641597509384155, "logits/real": -1.7553110122680664, "logps/generated": -423.54559326171875, "logps/real": -311.1752624511719, "loss": 0.1446, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -4.2097249031066895, "rewards/margins": 3.8489441871643066, "rewards/real": -0.36078035831451416, "step": 140 }, { "epoch": 0.05, "learning_rate": 7.995735607675907e-08, "logits/generated": -1.6019731760025024, "logits/real": -1.7481319904327393, "logps/generated": -453.563720703125, "logps/real": -314.98651123046875, "loss": 0.1327, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.146961212158203, "rewards/margins": 4.662848472595215, "rewards/real": -0.48411256074905396, "step": 150 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": -1.5494823455810547, "logits/real": -1.7241367101669312, "logps/generated": -444.03521728515625, "logps/real": -352.8829040527344, "loss": 0.1291, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.463521957397461, "rewards/margins": 4.476443290710449, "rewards/real": -0.9870781898498535, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.061833688699359e-08, "logits/generated": -1.591257095336914, "logits/real": -1.7483450174331665, "logps/generated": -450.844970703125, "logps/real": -319.1195068359375, "loss": 0.1246, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -5.638657569885254, "rewards/margins": 4.8695173263549805, "rewards/real": -0.7691398859024048, "step": 170 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": -1.5604255199432373, "logits/real": -1.6746511459350586, "logps/generated": -455.9513244628906, "logps/real": -324.9789123535156, "loss": 0.1172, "rewards/accuracies": 0.949999988079071, "rewards/generated": -6.108395576477051, "rewards/margins": 4.853656768798828, "rewards/real": -1.254738450050354, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.0127931769722814e-07, "logits/generated": -1.536947250366211, "logits/real": -1.5838528871536255, "logps/generated": -464.4476623535156, "logps/real": -289.5835266113281, "loss": 0.1013, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.77472448348999, "rewards/margins": 5.815009593963623, "rewards/real": -0.9597145318984985, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": -1.559515357017517, "logits/real": -1.6441863775253296, "logps/generated": -485.25335693359375, "logps/real": -331.6617736816406, "loss": 0.0884, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.514554500579834, "rewards/margins": 6.487561225891113, "rewards/real": -1.0269935131072998, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.1194029850746268e-07, "logits/generated": -1.542763352394104, "logits/real": -1.6364444494247437, "logps/generated": -456.9220275878906, "logps/real": -353.18841552734375, "loss": 0.0903, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.124857425689697, "rewards/margins": 5.564694404602051, "rewards/real": -1.5601634979248047, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": -1.5230910778045654, "logits/real": -1.661611795425415, "logps/generated": -465.93853759765625, "logps/real": -355.095703125, "loss": 0.0933, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.115549087524414, "rewards/margins": 6.8037614822387695, "rewards/real": -1.31178879737854, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.226012793176972e-07, "logits/generated": -1.4759384393692017, "logits/real": -1.7497766017913818, "logps/generated": -486.30755615234375, "logps/real": -409.23126220703125, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/generated": -8.072816848754883, "rewards/margins": 6.9501166343688965, "rewards/real": -1.1227010488510132, "step": 230 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": -1.4614694118499756, "logits/real": -1.6280324459075928, "logps/generated": -480.77569580078125, "logps/real": -312.9775085449219, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/generated": -8.565336227416992, "rewards/margins": 7.487596035003662, "rewards/real": -1.0777397155761719, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.3326226012793176e-07, "logits/generated": -1.5403387546539307, "logits/real": -1.71304190158844, "logps/generated": -497.54376220703125, "logps/real": -393.9128723144531, "loss": 0.0848, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.98359489440918, "rewards/margins": 7.220086574554443, "rewards/real": -1.7635078430175781, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": -1.395140290260315, "logits/real": -1.6418180465698242, "logps/generated": -477.68243408203125, "logps/real": -353.9394836425781, "loss": 0.0671, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.58221435546875, "rewards/margins": 7.423464775085449, "rewards/real": -1.1587491035461426, "step": 260 }, { "epoch": 0.09, "learning_rate": 1.439232409381663e-07, "logits/generated": -1.4475288391113281, "logits/real": -1.5991582870483398, "logps/generated": -463.43817138671875, "logps/real": -314.2174987792969, "loss": 0.065, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.937777519226074, "rewards/margins": 7.0476579666137695, "rewards/real": -0.890119194984436, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": -1.4230304956436157, "logits/real": -1.5950751304626465, "logps/generated": -451.63006591796875, "logps/real": -333.942138671875, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/generated": -8.133646965026855, "rewards/margins": 7.216717720031738, "rewards/real": -0.9169293642044067, "step": 280 }, { "epoch": 0.09, "learning_rate": 1.5458422174840087e-07, "logits/generated": -1.4459788799285889, "logits/real": -1.5633540153503418, "logps/generated": -492.1522521972656, "logps/real": -360.3606262207031, "loss": 0.0527, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -9.392778396606445, "rewards/margins": 8.276235580444336, "rewards/real": -1.1165430545806885, "step": 290 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": -1.4453847408294678, "logits/real": -1.5341203212738037, "logps/generated": -458.6255798339844, "logps/real": -305.7059326171875, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/generated": -9.357995986938477, "rewards/margins": 8.50068187713623, "rewards/real": -0.8573150634765625, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.652452025586354e-07, "logits/generated": -1.423008680343628, "logits/real": -1.6011879444122314, "logps/generated": -469.68695068359375, "logps/real": -369.8955078125, "loss": 0.0546, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.097238540649414, "rewards/margins": 8.363798141479492, "rewards/real": -0.7334394454956055, "step": 310 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": -1.3465330600738525, "logits/real": -1.4571083784103394, "logps/generated": -523.9188232421875, "logps/real": -312.72979736328125, "loss": 0.0486, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.398682594299316, "rewards/margins": 10.032429695129395, "rewards/real": -0.36625421047210693, "step": 320 }, { "epoch": 0.11, "learning_rate": 1.7590618336886992e-07, "logits/generated": -1.4364181756973267, "logits/real": -1.6128456592559814, "logps/generated": -487.78912353515625, "logps/real": -324.5415344238281, "loss": 0.06, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.630109786987305, "rewards/margins": 8.89991569519043, "rewards/real": -0.7301940321922302, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": -1.4497346878051758, "logits/real": -1.5822639465332031, "logps/generated": -505.73028564453125, "logps/real": -292.947265625, "loss": 0.0385, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.920869827270508, "rewards/margins": 9.890485763549805, "rewards/real": -1.030383825302124, "step": 340 }, { "epoch": 0.11, "learning_rate": 1.8656716417910447e-07, "logits/generated": -1.3929362297058105, "logits/real": -1.5212783813476562, "logps/generated": -541.8226318359375, "logps/real": -317.99932861328125, "loss": 0.0415, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.88868522644043, "rewards/margins": 11.808540344238281, "rewards/real": -1.0801454782485962, "step": 350 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": -1.3346354961395264, "logits/real": -1.5566127300262451, "logps/generated": -527.8130493164062, "logps/real": -329.54571533203125, "loss": 0.0442, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.514655113220215, "rewards/margins": 10.130718231201172, "rewards/real": -1.3839375972747803, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.9722814498933903e-07, "logits/generated": -1.2722876071929932, "logits/real": -1.5687021017074585, "logps/generated": -506.6629943847656, "logps/real": -324.5752258300781, "loss": 0.034, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.572894096374512, "rewards/margins": 10.359813690185547, "rewards/real": -1.213080644607544, "step": 370 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": -1.3695201873779297, "logits/real": -1.6315975189208984, "logps/generated": -464.67181396484375, "logps/real": -353.6014709472656, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/generated": -10.642364501953125, "rewards/margins": 9.726277351379395, "rewards/real": -0.9160875082015991, "step": 380 }, { "epoch": 0.12, "learning_rate": 2.0788912579957355e-07, "logits/generated": -1.3165477514266968, "logits/real": -1.5160284042358398, "logps/generated": -515.6984252929688, "logps/real": -316.2281799316406, "loss": 0.0507, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.083415031433105, "rewards/margins": 11.66950798034668, "rewards/real": -0.41390690207481384, "step": 390 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": -1.1762816905975342, "logits/real": -1.505501389503479, "logps/generated": -522.8754272460938, "logps/real": -369.6803283691406, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/generated": -13.3060302734375, "rewards/margins": 12.078089714050293, "rewards/real": -1.2279411554336548, "step": 400 }, { "epoch": 0.13, "learning_rate": 2.185501066098081e-07, "logits/generated": -1.1128976345062256, "logits/real": -1.4507527351379395, "logps/generated": -563.6268310546875, "logps/real": -351.15802001953125, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/generated": -15.432531356811523, "rewards/margins": 14.190030097961426, "rewards/real": -1.2425031661987305, "step": 410 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": -1.210235357284546, "logits/real": -1.5492087602615356, "logps/generated": -517.4049072265625, "logps/real": -346.5636291503906, "loss": 0.0368, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.797286987304688, "rewards/margins": 12.366052627563477, "rewards/real": -1.431235671043396, "step": 420 }, { "epoch": 0.14, "learning_rate": 2.2921108742004263e-07, "logits/generated": -1.2352956533432007, "logits/real": -1.5816096067428589, "logps/generated": -529.5254516601562, "logps/real": -333.84271240234375, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/generated": -13.768930435180664, "rewards/margins": 13.00990104675293, "rewards/real": -0.7590312361717224, "step": 430 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": -1.2411397695541382, "logits/real": -1.5679179430007935, "logps/generated": -503.085205078125, "logps/real": -343.2332458496094, "loss": 0.0391, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.77052116394043, "rewards/margins": 12.486634254455566, "rewards/real": -0.28388747572898865, "step": 440 }, { "epoch": 0.14, "learning_rate": 2.3987206823027716e-07, "logits/generated": -1.135864019393921, "logits/real": -1.5101805925369263, "logps/generated": -536.2482299804688, "logps/real": -373.39569091796875, "loss": 0.0328, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.448722839355469, "rewards/margins": 13.376909255981445, "rewards/real": -1.0718127489089966, "step": 450 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": -1.1627209186553955, "logits/real": -1.4507901668548584, "logps/generated": -542.908447265625, "logps/real": -350.0838317871094, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/generated": -15.044692993164062, "rewards/margins": 14.695103645324707, "rewards/real": -0.3495886027812958, "step": 460 }, { "epoch": 0.15, "learning_rate": 2.505330490405117e-07, "logits/generated": -1.1846784353256226, "logits/real": -1.3428871631622314, "logps/generated": -531.530517578125, "logps/real": -300.04010009765625, "loss": 0.0504, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.481117248535156, "rewards/margins": 14.126989364624023, "rewards/real": -1.3541295528411865, "step": 470 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": -1.2153561115264893, "logits/real": -1.4080696105957031, "logps/generated": -598.9599609375, "logps/real": -333.67205810546875, "loss": 0.0524, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.319135665893555, "rewards/margins": 16.647058486938477, "rewards/real": -1.672079086303711, "step": 480 }, { "epoch": 0.16, "learning_rate": 2.611940298507462e-07, "logits/generated": -1.1052753925323486, "logits/real": -1.4038926362991333, "logps/generated": -625.4494018554688, "logps/real": -298.06854248046875, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.859481811523438, "rewards/margins": 18.040180206298828, "rewards/real": -1.8193010091781616, "step": 490 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": -1.1053588390350342, "logits/real": -1.3924005031585693, "logps/generated": -576.0348510742188, "logps/real": -353.4833068847656, "loss": 0.0337, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.51312255859375, "rewards/margins": 15.454565048217773, "rewards/real": -1.058556318283081, "step": 500 }, { "epoch": 0.16, "learning_rate": 2.7185501066098084e-07, "logits/generated": -1.1117361783981323, "logits/real": -1.4525091648101807, "logps/generated": -566.1034545898438, "logps/real": -364.39886474609375, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/generated": -17.288684844970703, "rewards/margins": 15.412053108215332, "rewards/real": -1.876631498336792, "step": 510 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": -0.9736140966415405, "logits/real": -1.4299156665802002, "logps/generated": -591.044921875, "logps/real": -321.2178649902344, "loss": 0.0314, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.490947723388672, "rewards/margins": 16.805511474609375, "rewards/real": -0.685435950756073, "step": 520 }, { "epoch": 0.17, "learning_rate": 2.8251599147121537e-07, "logits/generated": -1.0898406505584717, "logits/real": -1.4943970441818237, "logps/generated": -500.5962829589844, "logps/real": -335.22882080078125, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.250688552856445, "rewards/margins": 12.409687995910645, "rewards/real": 0.1589992791414261, "step": 530 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": -0.9211187362670898, "logits/real": -1.2392743825912476, "logps/generated": -626.4434814453125, "logps/real": -259.01593017578125, "loss": 0.0719, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.40229606628418, "rewards/margins": 20.59931182861328, "rewards/real": 0.19701404869556427, "step": 540 }, { "epoch": 0.18, "learning_rate": 2.931769722814499e-07, "logits/generated": -1.020733118057251, "logits/real": -1.3917875289916992, "logps/generated": -598.0836791992188, "logps/real": -350.3652038574219, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/generated": -18.26373863220215, "rewards/margins": 18.554927825927734, "rewards/real": 0.2911873757839203, "step": 550 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": -1.160035490989685, "logits/real": -1.3027687072753906, "logps/generated": -570.22705078125, "logps/real": -318.4032287597656, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.718929290771484, "rewards/margins": 16.853124618530273, "rewards/real": 0.1341935694217682, "step": 560 }, { "epoch": 0.18, "learning_rate": 3.038379530916844e-07, "logits/generated": -1.0827645063400269, "logits/real": -1.3972750902175903, "logps/generated": -562.3015747070312, "logps/real": -343.81829833984375, "loss": 0.0299, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.274452209472656, "rewards/margins": 15.849164009094238, "rewards/real": -0.4252890944480896, "step": 570 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": -0.9818236231803894, "logits/real": -1.3140885829925537, "logps/generated": -516.5469360351562, "logps/real": -355.1119689941406, "loss": 0.0385, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.40649127960205, "rewards/margins": 14.21912670135498, "rewards/real": -1.187363624572754, "step": 580 }, { "epoch": 0.19, "learning_rate": 3.1449893390191895e-07, "logits/generated": -1.0166418552398682, "logits/real": -1.3364452123641968, "logps/generated": -538.85400390625, "logps/real": -324.9503479003906, "loss": 0.0917, "rewards/accuracies": 0.949999988079071, "rewards/generated": -15.649869918823242, "rewards/margins": 15.868939399719238, "rewards/real": 0.21906885504722595, "step": 590 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": -1.0271713733673096, "logits/real": -1.263146162033081, "logps/generated": -567.3302612304688, "logps/real": -291.7242431640625, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/generated": -18.165842056274414, "rewards/margins": 17.980998992919922, "rewards/real": -0.1848386824131012, "step": 600 }, { "epoch": 0.2, "learning_rate": 3.2515991471215347e-07, "logits/generated": -0.9488552212715149, "logits/real": -1.2689321041107178, "logps/generated": -598.2969970703125, "logps/real": -355.51934814453125, "loss": 0.0623, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.530506134033203, "rewards/margins": 19.943836212158203, "rewards/real": -1.5866692066192627, "step": 610 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": -1.002000331878662, "logits/real": -1.1596615314483643, "logps/generated": -631.7313232421875, "logps/real": -325.359130859375, "loss": 0.022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.148792266845703, "rewards/margins": 21.61235237121582, "rewards/real": -1.5364404916763306, "step": 620 }, { "epoch": 0.2, "learning_rate": 3.3582089552238805e-07, "logits/generated": -0.8440952301025391, "logits/real": -1.2660382986068726, "logps/generated": -635.0595092773438, "logps/real": -346.52239990234375, "loss": 0.0217, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.026508331298828, "rewards/margins": 21.37133026123047, "rewards/real": -2.6551785469055176, "step": 630 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": -0.9189499020576477, "logits/real": -1.3209021091461182, "logps/generated": -581.1162109375, "logps/real": -347.0169372558594, "loss": 0.0424, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.726146697998047, "rewards/margins": 18.92522621154785, "rewards/real": -1.8009216785430908, "step": 640 }, { "epoch": 0.21, "learning_rate": 3.464818763326226e-07, "logits/generated": -0.9096330404281616, "logits/real": -1.1646416187286377, "logps/generated": -638.0400390625, "logps/real": -326.70819091796875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/generated": -23.011674880981445, "rewards/margins": 20.560462951660156, "rewards/real": -2.4512124061584473, "step": 650 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": -0.8807134628295898, "logits/real": -1.0881080627441406, "logps/generated": -645.7661743164062, "logps/real": -340.04681396484375, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/generated": -24.14626693725586, "rewards/margins": 21.388887405395508, "rewards/real": -2.757380962371826, "step": 660 }, { "epoch": 0.21, "learning_rate": 3.5714285714285716e-07, "logits/generated": -1.2381738424301147, "logits/real": -1.346702218055725, "logps/generated": -547.367919921875, "logps/real": -348.85137939453125, "loss": 0.0613, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.925430297851562, "rewards/margins": 13.218635559082031, "rewards/real": -0.7067966461181641, "step": 670 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": -1.0652974843978882, "logits/real": -1.292110800743103, "logps/generated": -543.0350952148438, "logps/real": -339.61285400390625, "loss": 0.0284, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.47148895263672, "rewards/margins": 14.554428100585938, "rewards/real": -1.9170618057250977, "step": 680 }, { "epoch": 0.22, "learning_rate": 3.678038379530917e-07, "logits/generated": -0.9800616502761841, "logits/real": -1.1700372695922852, "logps/generated": -584.2894287109375, "logps/real": -363.9668273925781, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/generated": -18.656932830810547, "rewards/margins": 17.38612937927246, "rewards/real": -1.2708007097244263, "step": 690 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": -1.0084197521209717, "logits/real": -0.9993604421615601, "logps/generated": -538.5408325195312, "logps/real": -324.64520263671875, "loss": 0.0687, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -15.12083625793457, "rewards/margins": 14.68799877166748, "rewards/real": -0.4328370988368988, "step": 700 }, { "epoch": 0.23, "learning_rate": 3.784648187633262e-07, "logits/generated": -0.9193227887153625, "logits/real": -1.1186764240264893, "logps/generated": -556.6070556640625, "logps/real": -377.97259521484375, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.488893508911133, "rewards/margins": 15.544939994812012, "rewards/real": -0.943952202796936, "step": 710 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": -0.9582284092903137, "logits/real": -1.1462305784225464, "logps/generated": -557.4412841796875, "logps/real": -316.1856994628906, "loss": 0.0587, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.733484268188477, "rewards/margins": 17.631059646606445, "rewards/real": -0.10242464393377304, "step": 720 }, { "epoch": 0.23, "learning_rate": 3.8912579957356074e-07, "logits/generated": -0.9639987945556641, "logits/real": -1.1116389036178589, "logps/generated": -579.411376953125, "logps/real": -360.36968994140625, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/generated": -19.355396270751953, "rewards/margins": 18.144670486450195, "rewards/real": -1.2107274532318115, "step": 730 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": -0.7743756175041199, "logits/real": -1.1329243183135986, "logps/generated": -600.0618896484375, "logps/real": -326.8385925292969, "loss": 0.0344, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.37815284729004, "rewards/margins": 19.694599151611328, "rewards/real": -0.683555543422699, "step": 740 }, { "epoch": 0.24, "learning_rate": 3.9978678038379526e-07, "logits/generated": -0.7812870144844055, "logits/real": -0.9252561330795288, "logps/generated": -565.31787109375, "logps/real": -309.7308654785156, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.80691909790039, "rewards/margins": 17.52267074584961, "rewards/real": -0.2842453420162201, "step": 750 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": -0.721796452999115, "logits/real": -0.9380915760993958, "logps/generated": -636.4271850585938, "logps/real": -328.95880126953125, "loss": 0.0225, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.856843948364258, "rewards/margins": 21.279296875, "rewards/real": -0.577548623085022, "step": 760 }, { "epoch": 0.25, "learning_rate": 4.1044776119402984e-07, "logits/generated": -0.8223736882209778, "logits/real": -0.9600740671157837, "logps/generated": -642.3658447265625, "logps/real": -351.7137756347656, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/generated": -23.127605438232422, "rewards/margins": 21.979970932006836, "rewards/real": -1.1476361751556396, "step": 770 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": -0.6737440228462219, "logits/real": -0.9427323341369629, "logps/generated": -606.7221069335938, "logps/real": -317.69134521484375, "loss": 0.1004, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.423994064331055, "rewards/margins": 20.49788475036621, "rewards/real": 0.0738908052444458, "step": 780 }, { "epoch": 0.25, "learning_rate": 4.2110874200426437e-07, "logits/generated": -0.5200916528701782, "logits/real": -0.9301559329032898, "logps/generated": -597.6541748046875, "logps/real": -360.3106384277344, "loss": 0.0697, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.777925491333008, "rewards/margins": 17.938873291015625, "rewards/real": -1.839052438735962, "step": 790 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": -0.48490971326828003, "logits/real": -0.8440488576889038, "logps/generated": -638.52392578125, "logps/real": -314.540771484375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/generated": -23.257488250732422, "rewards/margins": 22.10464096069336, "rewards/real": -1.1528491973876953, "step": 800 }, { "epoch": 0.26, "learning_rate": 4.317697228144989e-07, "logits/generated": -0.6229613423347473, "logits/real": -0.9615219831466675, "logps/generated": -565.7908935546875, "logps/real": -279.216064453125, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/generated": -20.155855178833008, "rewards/margins": 19.441150665283203, "rewards/real": -0.7147022485733032, "step": 810 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": -0.5994982123374939, "logits/real": -1.146607756614685, "logps/generated": -589.715087890625, "logps/real": -355.19775390625, "loss": 0.0646, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.48986053466797, "rewards/margins": 19.900867462158203, "rewards/real": -0.5889959335327148, "step": 820 }, { "epoch": 0.27, "learning_rate": 4.4243070362473347e-07, "logits/generated": -0.7427669167518616, "logits/real": -1.1212642192840576, "logps/generated": -686.93994140625, "logps/real": -332.35089111328125, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/generated": -27.279861450195312, "rewards/margins": 24.79286003112793, "rewards/real": -2.4869980812072754, "step": 830 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": -0.6697017550468445, "logits/real": -1.123647928237915, "logps/generated": -608.34619140625, "logps/real": -361.9219055175781, "loss": 0.2129, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -22.527164459228516, "rewards/margins": 20.38083839416504, "rewards/real": -2.1463239192962646, "step": 840 }, { "epoch": 0.27, "learning_rate": 4.53091684434968e-07, "logits/generated": -0.6786950826644897, "logits/real": -1.1746528148651123, "logps/generated": -612.7151489257812, "logps/real": -316.81231689453125, "loss": 0.0921, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.420848846435547, "rewards/margins": 21.539230346679688, "rewards/real": 0.11838479340076447, "step": 850 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": -0.647723913192749, "logits/real": -1.2561864852905273, "logps/generated": -609.9498901367188, "logps/real": -344.63385009765625, "loss": 0.0967, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.064952850341797, "rewards/margins": 20.77135467529297, "rewards/real": -0.29359906911849976, "step": 860 }, { "epoch": 0.28, "learning_rate": 4.637526652452025e-07, "logits/generated": -0.9066628217697144, "logits/real": -1.4026243686676025, "logps/generated": -566.8709106445312, "logps/real": -324.4098815917969, "loss": 0.0464, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.857772827148438, "rewards/margins": 18.00718116760254, "rewards/real": -0.8505916595458984, "step": 870 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": -1.0487242937088013, "logits/real": -1.3853559494018555, "logps/generated": -573.9552001953125, "logps/real": -296.62115478515625, "loss": 0.028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.28946304321289, "rewards/margins": 19.398359298706055, "rewards/real": 2.108898639678955, "step": 880 }, { "epoch": 0.28, "learning_rate": 4.744136460554371e-07, "logits/generated": -0.7771695256233215, "logits/real": -1.3479080200195312, "logps/generated": -620.6242065429688, "logps/real": -335.8597717285156, "loss": 0.0447, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.570037841796875, "rewards/margins": 21.122669219970703, "rewards/real": 0.552629828453064, "step": 890 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": -1.0141932964324951, "logits/real": -1.4638208150863647, "logps/generated": -529.6288452148438, "logps/real": -303.21026611328125, "loss": 0.059, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.210214614868164, "rewards/margins": 16.335933685302734, "rewards/real": 1.1257202625274658, "step": 900 }, { "epoch": 0.29, "learning_rate": 4.850746268656717e-07, "logits/generated": -0.8784846067428589, "logits/real": -1.3946049213409424, "logps/generated": -570.2777099609375, "logps/real": -336.4716796875, "loss": 0.0831, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.878326416015625, "rewards/margins": 18.196462631225586, "rewards/real": 0.3181368112564087, "step": 910 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": -0.5604439377784729, "logits/real": -1.0903195142745972, "logps/generated": -625.4072265625, "logps/real": -302.01470947265625, "loss": 0.0594, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.253019332885742, "rewards/margins": 21.284252166748047, "rewards/real": 0.0312324408441782, "step": 920 }, { "epoch": 0.3, "learning_rate": 4.957356076759062e-07, "logits/generated": -0.4214113652706146, "logits/real": -1.1398684978485107, "logps/generated": -675.2928466796875, "logps/real": -317.0589599609375, "loss": 0.0624, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.430103302001953, "rewards/margins": 25.21658706665039, "rewards/real": -0.21351394057273865, "step": 930 }, { "epoch": 0.3, "learning_rate": 4.998814744577456e-07, "logits/generated": -0.7198468446731567, "logits/real": -1.2382924556732178, "logps/generated": -663.3746337890625, "logps/real": -340.4453125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -27.136676788330078, "rewards/margins": 24.233760833740234, "rewards/real": -2.902914524078369, "step": 940 }, { "epoch": 0.3, "learning_rate": 4.992888467464738e-07, "logits/generated": -0.6365340352058411, "logits/real": -1.2204135656356812, "logps/generated": -607.4127197265625, "logps/real": -347.8791809082031, "loss": 0.0395, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.3405818939209, "rewards/margins": 20.96994972229004, "rewards/real": -1.3706319332122803, "step": 950 }, { "epoch": 0.31, "learning_rate": 4.986962190352021e-07, "logits/generated": -0.3528062701225281, "logits/real": -1.0751874446868896, "logps/generated": -617.19384765625, "logps/real": -343.6873474121094, "loss": 0.0759, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.447063446044922, "rewards/margins": 22.974315643310547, "rewards/real": -1.4727448225021362, "step": 960 }, { "epoch": 0.31, "learning_rate": 4.981035913239302e-07, "logits/generated": -0.5571753978729248, "logits/real": -1.3170697689056396, "logps/generated": -509.511962890625, "logps/real": -326.5060729980469, "loss": 0.117, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -13.880078315734863, "rewards/margins": 14.30474853515625, "rewards/real": 0.42466872930526733, "step": 970 }, { "epoch": 0.31, "learning_rate": 4.975109636126585e-07, "logits/generated": 0.031067097559571266, "logits/real": -0.8951910734176636, "logps/generated": -644.8367309570312, "logps/real": -297.2452087402344, "loss": 0.0844, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.296676635742188, "rewards/margins": 24.991586685180664, "rewards/real": -0.30508843064308167, "step": 980 }, { "epoch": 0.32, "learning_rate": 4.969183359013867e-07, "logits/generated": -0.1350618600845337, "logits/real": -1.1355469226837158, "logps/generated": -546.244873046875, "logps/real": -359.3175048828125, "loss": 0.0814, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.15607452392578, "rewards/margins": 19.000545501708984, "rewards/real": -0.15552793443202972, "step": 990 }, { "epoch": 0.32, "learning_rate": 4.96325708190115e-07, "logits/generated": -0.11877751350402832, "logits/real": -1.002561092376709, "logps/generated": -697.0966796875, "logps/real": -361.0674743652344, "loss": 0.0431, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.546356201171875, "rewards/margins": 27.693737030029297, "rewards/real": -2.8526175022125244, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.957330804788432e-07, "logits/generated": -0.3785037100315094, "logits/real": -1.0688451528549194, "logps/generated": -747.1729736328125, "logps/real": -315.0045166015625, "loss": 0.0902, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.87823486328125, "rewards/margins": 30.961559295654297, "rewards/real": -1.9166730642318726, "step": 1010 }, { "epoch": 0.33, "learning_rate": 4.951404527675714e-07, "logits/generated": -0.4004201889038086, "logits/real": -1.2025635242462158, "logps/generated": -725.2938232421875, "logps/real": -362.38348388671875, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/generated": -32.35773849487305, "rewards/margins": 29.627965927124023, "rewards/real": -2.7297706604003906, "step": 1020 }, { "epoch": 0.33, "learning_rate": 4.945478250562996e-07, "logits/generated": -0.4088473916053772, "logits/real": -1.1282621622085571, "logps/generated": -723.2651977539062, "logps/real": -359.61248779296875, "loss": 0.0153, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.53559494018555, "rewards/margins": 28.994110107421875, "rewards/real": -4.541485786437988, "step": 1030 }, { "epoch": 0.33, "learning_rate": 4.939551973450278e-07, "logits/generated": -0.29500117897987366, "logits/real": -1.215680718421936, "logps/generated": -781.2555541992188, "logps/real": -359.85198974609375, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/generated": -36.472259521484375, "rewards/margins": 31.787761688232422, "rewards/real": -4.684496879577637, "step": 1040 }, { "epoch": 0.34, "learning_rate": 4.933625696337561e-07, "logits/generated": -0.2509937286376953, "logits/real": -1.2540266513824463, "logps/generated": -760.2052612304688, "logps/real": -320.0561828613281, "loss": 0.1264, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.826995849609375, "rewards/margins": 32.62900924682617, "rewards/real": -2.197985887527466, "step": 1050 }, { "epoch": 0.34, "learning_rate": 4.927699419224843e-07, "logits/generated": -0.141755610704422, "logits/real": -1.0157017707824707, "logps/generated": -612.1473388671875, "logps/real": -355.87091064453125, "loss": 0.0481, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.255905151367188, "rewards/margins": 23.14793586730957, "rewards/real": -3.107970714569092, "step": 1060 }, { "epoch": 0.34, "learning_rate": 4.921773142112125e-07, "logits/generated": 0.0805276483297348, "logits/real": -0.4895111918449402, "logps/generated": -673.2080078125, "logps/real": -388.30377197265625, "loss": 0.0606, "rewards/accuracies": 0.949999988079071, "rewards/generated": -28.200572967529297, "rewards/margins": 23.83773422241211, "rewards/real": -4.362841606140137, "step": 1070 }, { "epoch": 0.35, "learning_rate": 4.915846864999407e-07, "logits/generated": 0.006424567196518183, "logits/real": -0.6142998337745667, "logps/generated": -738.597900390625, "logps/real": -373.8583984375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -33.54041290283203, "rewards/margins": 28.793270111083984, "rewards/real": -4.7471418380737305, "step": 1080 }, { "epoch": 0.35, "learning_rate": 4.909920587886689e-07, "logits/generated": -0.3021875023841858, "logits/real": -1.0494592189788818, "logps/generated": -647.1887817382812, "logps/real": -317.3564453125, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/generated": -26.67519187927246, "rewards/margins": 26.018726348876953, "rewards/real": -0.6564682722091675, "step": 1090 }, { "epoch": 0.35, "learning_rate": 4.903994310773972e-07, "logits/generated": -0.07100073248147964, "logits/real": -0.7578374147415161, "logps/generated": -597.0128173828125, "logps/real": -317.7532958984375, "loss": 0.1071, "rewards/accuracies": 0.9375, "rewards/generated": -20.897869110107422, "rewards/margins": 21.192420959472656, "rewards/real": 0.29454854130744934, "step": 1100 }, { "epoch": 0.36, "learning_rate": 4.898068033661254e-07, "logits/generated": 0.026780009269714355, "logits/real": -0.961300253868103, "logps/generated": -630.4881591796875, "logps/real": -365.9146728515625, "loss": 0.0805, "rewards/accuracies": 0.949999988079071, "rewards/generated": -26.19057846069336, "rewards/margins": 25.535663604736328, "rewards/real": -0.6549181938171387, "step": 1110 }, { "epoch": 0.36, "learning_rate": 4.892141756548536e-07, "logits/generated": 0.17940345406532288, "logits/real": -0.9497494697570801, "logps/generated": -639.5001220703125, "logps/real": -297.89691162109375, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/generated": -25.884960174560547, "rewards/margins": 26.278423309326172, "rewards/real": 0.39346298575401306, "step": 1120 }, { "epoch": 0.36, "learning_rate": 4.886215479435819e-07, "logits/generated": -0.3005313575267792, "logits/real": -1.163912296295166, "logps/generated": -578.6134033203125, "logps/real": -283.5059814453125, "loss": 0.0517, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.6179256439209, "rewards/margins": 21.1088809967041, "rewards/real": 0.49095502495765686, "step": 1130 }, { "epoch": 0.36, "learning_rate": 4.8802892023231e-07, "logits/generated": -0.02864791825413704, "logits/real": -1.1268925666809082, "logps/generated": -725.9310913085938, "logps/real": -330.99298095703125, "loss": 0.0583, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -34.120079040527344, "rewards/margins": 33.20871353149414, "rewards/real": -0.9113671183586121, "step": 1140 }, { "epoch": 0.37, "learning_rate": 4.874362925210383e-07, "logits/generated": 0.1999046355485916, "logits/real": -1.0573365688323975, "logps/generated": -840.8147583007812, "logps/real": -307.5840148925781, "loss": 0.0808, "rewards/accuracies": 1.0, "rewards/generated": -44.21206283569336, "rewards/margins": 41.520259857177734, "rewards/real": -2.691797971725464, "step": 1150 }, { "epoch": 0.37, "learning_rate": 4.868436648097665e-07, "logits/generated": -0.03737213462591171, "logits/real": -1.0409324169158936, "logps/generated": -826.9354248046875, "logps/real": -359.90130615234375, "loss": 0.0481, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -40.422664642333984, "rewards/margins": 38.234439849853516, "rewards/real": -2.188223361968994, "step": 1160 }, { "epoch": 0.37, "learning_rate": 4.862510370984946e-07, "logits/generated": 0.018914643675088882, "logits/real": -1.1064562797546387, "logps/generated": -721.578857421875, "logps/real": -350.8349304199219, "loss": 0.0247, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -33.86790466308594, "rewards/margins": 31.102153778076172, "rewards/real": -2.7657506465911865, "step": 1170 }, { "epoch": 0.38, "learning_rate": 4.856584093872229e-07, "logits/generated": 0.07794220000505447, "logits/real": -1.065049409866333, "logps/generated": -789.3442993164062, "logps/real": -315.46148681640625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -39.061622619628906, "rewards/margins": 39.12267303466797, "rewards/real": 0.06105160713195801, "step": 1180 }, { "epoch": 0.38, "learning_rate": 4.850657816759511e-07, "logits/generated": -0.010703866370022297, "logits/real": -1.0528227090835571, "logps/generated": -712.7218017578125, "logps/real": -336.1849060058594, "loss": 0.0817, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.728797912597656, "rewards/margins": 30.812725067138672, "rewards/real": -1.9160690307617188, "step": 1190 }, { "epoch": 0.38, "learning_rate": 4.844731539646794e-07, "logits/generated": -0.15801379084587097, "logits/real": -1.0353472232818604, "logps/generated": -763.3748168945312, "logps/real": -316.615478515625, "loss": 0.0816, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -36.90118408203125, "rewards/margins": 33.61400604248047, "rewards/real": -3.287179470062256, "step": 1200 }, { "epoch": 0.39, "learning_rate": 4.838805262534076e-07, "logits/generated": -0.4400274157524109, "logits/real": -1.2942895889282227, "logps/generated": -637.5701293945312, "logps/real": -310.1935729980469, "loss": 0.1157, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -26.2459659576416, "rewards/margins": 26.2324275970459, "rewards/real": -0.013539028353989124, "step": 1210 }, { "epoch": 0.39, "learning_rate": 4.832878985421358e-07, "logits/generated": -0.20033612847328186, "logits/real": -1.2102962732315063, "logps/generated": -688.7596435546875, "logps/real": -299.0711364746094, "loss": 0.0293, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.408504486083984, "rewards/margins": 29.371475219726562, "rewards/real": -0.03703027963638306, "step": 1220 }, { "epoch": 0.39, "learning_rate": 4.82695270830864e-07, "logits/generated": -0.22372718155384064, "logits/real": -1.134982943534851, "logps/generated": -646.0555419921875, "logps/real": -304.77130126953125, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/generated": -24.955896377563477, "rewards/margins": 24.676605224609375, "rewards/real": -0.2792915105819702, "step": 1230 }, { "epoch": 0.4, "learning_rate": 4.821026431195922e-07, "logits/generated": -0.05184303969144821, "logits/real": -1.1724616289138794, "logps/generated": -745.6905517578125, "logps/real": -334.4775390625, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.423221588134766, "rewards/margins": 33.6823616027832, "rewards/real": 0.2591377794742584, "step": 1240 }, { "epoch": 0.4, "learning_rate": 4.815100154083205e-07, "logits/generated": -0.3657141625881195, "logits/real": -1.26314115524292, "logps/generated": -639.5381469726562, "logps/real": -302.38604736328125, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/generated": -25.064176559448242, "rewards/margins": 26.08724021911621, "rewards/real": 1.0230640172958374, "step": 1250 }, { "epoch": 0.4, "learning_rate": 4.809173876970487e-07, "logits/generated": -0.28293323516845703, "logits/real": -1.287521243095398, "logps/generated": -651.6383056640625, "logps/real": -277.32098388671875, "loss": 0.1504, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.853628158569336, "rewards/margins": 27.138935089111328, "rewards/real": 1.2853089570999146, "step": 1260 }, { "epoch": 0.41, "learning_rate": 4.80324759985777e-07, "logits/generated": -0.1853010356426239, "logits/real": -1.2585564851760864, "logps/generated": -624.6441040039062, "logps/real": -323.127685546875, "loss": 0.0547, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.59101676940918, "rewards/margins": 23.93788719177246, "rewards/real": 0.3468722701072693, "step": 1270 }, { "epoch": 0.41, "learning_rate": 4.797321322745052e-07, "logits/generated": -0.3382716476917267, "logits/real": -1.2762130498886108, "logps/generated": -731.6915283203125, "logps/real": -327.10223388671875, "loss": 0.0485, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.39984893798828, "rewards/margins": 30.74454116821289, "rewards/real": -1.655306100845337, "step": 1280 }, { "epoch": 0.41, "learning_rate": 4.791395045632333e-07, "logits/generated": -0.35983893275260925, "logits/real": -1.251419186592102, "logps/generated": -721.2036743164062, "logps/real": -380.6951599121094, "loss": 0.1202, "rewards/accuracies": 0.949999988079071, "rewards/generated": -32.7227668762207, "rewards/margins": 29.393871307373047, "rewards/real": -3.3288989067077637, "step": 1290 }, { "epoch": 0.42, "learning_rate": 4.785468768519616e-07, "logits/generated": -0.5904273986816406, "logits/real": -1.3770813941955566, "logps/generated": -645.2593994140625, "logps/real": -373.2558898925781, "loss": 0.0258, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.56414794921875, "rewards/margins": 24.9666748046875, "rewards/real": -1.5974750518798828, "step": 1300 }, { "epoch": 0.42, "learning_rate": 4.779542491406898e-07, "logits/generated": -0.17467817664146423, "logits/real": -1.0748966932296753, "logps/generated": -705.0093383789062, "logps/real": -353.30194091796875, "loss": 0.0512, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.598918914794922, "rewards/margins": 29.074283599853516, "rewards/real": -2.52463436126709, "step": 1310 }, { "epoch": 0.42, "learning_rate": 4.77361621429418e-07, "logits/generated": -0.24216961860656738, "logits/real": -1.2368093729019165, "logps/generated": -689.7742919921875, "logps/real": -341.96343994140625, "loss": 0.0487, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.19826889038086, "rewards/margins": 27.094905853271484, "rewards/real": -0.10336218029260635, "step": 1320 }, { "epoch": 0.43, "learning_rate": 4.7676899371814624e-07, "logits/generated": -0.3762677311897278, "logits/real": -1.3373234272003174, "logps/generated": -652.1893310546875, "logps/real": -318.98681640625, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/generated": -25.372386932373047, "rewards/margins": 26.68527603149414, "rewards/real": 1.3128888607025146, "step": 1330 }, { "epoch": 0.43, "learning_rate": 4.7617636600687443e-07, "logits/generated": -0.11979229748249054, "logits/real": -1.1395213603973389, "logps/generated": -795.4387817382812, "logps/real": -324.91082763671875, "loss": 0.0125, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -39.38142013549805, "rewards/margins": 35.4011344909668, "rewards/real": -3.9802863597869873, "step": 1340 }, { "epoch": 0.43, "learning_rate": 4.755837382956027e-07, "logits/generated": -0.6125169992446899, "logits/real": -1.3682019710540771, "logps/generated": -678.634521484375, "logps/real": -297.8531188964844, "loss": 0.0546, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.750164031982422, "rewards/margins": 30.06414222717285, "rewards/real": -1.686022400856018, "step": 1350 }, { "epoch": 0.44, "learning_rate": 4.7499111058433086e-07, "logits/generated": -0.29297947883605957, "logits/real": -1.2235249280929565, "logps/generated": -619.5758056640625, "logps/real": -346.8426513671875, "loss": 0.0664, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.86092758178711, "rewards/margins": 24.205778121948242, "rewards/real": -0.6551482081413269, "step": 1360 }, { "epoch": 0.44, "learning_rate": 4.743984828730591e-07, "logits/generated": -0.22886662185192108, "logits/real": -1.1904137134552002, "logps/generated": -693.4284057617188, "logps/real": -338.90582275390625, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/generated": -29.037893295288086, "rewards/margins": 28.427536010742188, "rewards/real": -0.6103585958480835, "step": 1370 }, { "epoch": 0.44, "learning_rate": 4.7380585516178735e-07, "logits/generated": -0.3873533010482788, "logits/real": -1.3212558031082153, "logps/generated": -695.8396606445312, "logps/real": -314.5908203125, "loss": 0.0406, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.094036102294922, "rewards/margins": 28.612716674804688, "rewards/real": -0.4813196063041687, "step": 1380 }, { "epoch": 0.44, "learning_rate": 4.7321322745051554e-07, "logits/generated": -0.2663424611091614, "logits/real": -1.234851360321045, "logps/generated": -626.5375366210938, "logps/real": -329.78070068359375, "loss": 0.0486, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.41695213317871, "rewards/margins": 24.246206283569336, "rewards/real": -1.1707462072372437, "step": 1390 }, { "epoch": 0.45, "learning_rate": 4.726205997392438e-07, "logits/generated": -0.27236634492874146, "logits/real": -1.3489251136779785, "logps/generated": -594.3746948242188, "logps/real": -310.15008544921875, "loss": 0.0353, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.29964256286621, "rewards/margins": 21.168689727783203, "rewards/real": -0.1309548020362854, "step": 1400 }, { "epoch": 0.45, "learning_rate": 4.72027972027972e-07, "logits/generated": -0.4870881140232086, "logits/real": -1.3286640644073486, "logps/generated": -613.6137084960938, "logps/real": -301.20849609375, "loss": 0.0798, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.393417358398438, "rewards/margins": 24.340110778808594, "rewards/real": 0.9466953277587891, "step": 1410 }, { "epoch": 0.45, "learning_rate": 4.714353443167002e-07, "logits/generated": -0.3089195489883423, "logits/real": -1.2844128608703613, "logps/generated": -629.444580078125, "logps/real": -326.8503112792969, "loss": 0.0602, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.135459899902344, "rewards/margins": 25.159605026245117, "rewards/real": 1.024143099784851, "step": 1420 }, { "epoch": 0.46, "learning_rate": 4.7084271660542845e-07, "logits/generated": 0.06970086693763733, "logits/real": -0.873447060585022, "logps/generated": -689.5084228515625, "logps/real": -334.2491149902344, "loss": 0.0457, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.447406768798828, "rewards/margins": 28.454111099243164, "rewards/real": -1.9932889938354492, "step": 1430 }, { "epoch": 0.46, "learning_rate": 4.702500888941567e-07, "logits/generated": -0.25644490122795105, "logits/real": -1.1300890445709229, "logps/generated": -681.3156127929688, "logps/real": -349.94122314453125, "loss": 0.0405, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.21697998046875, "rewards/margins": 28.60993003845215, "rewards/real": 0.3929504156112671, "step": 1440 }, { "epoch": 0.46, "learning_rate": 4.696574611828849e-07, "logits/generated": -0.0076753199100494385, "logits/real": -0.9142985343933105, "logps/generated": -709.6637573242188, "logps/real": -343.50299072265625, "loss": 0.1348, "rewards/accuracies": 0.949999988079071, "rewards/generated": -28.745708465576172, "rewards/margins": 28.527847290039062, "rewards/real": -0.2178615778684616, "step": 1450 }, { "epoch": 0.47, "learning_rate": 4.690648334716131e-07, "logits/generated": -0.10671776533126831, "logits/real": -0.9834194183349609, "logps/generated": -670.0909423828125, "logps/real": -373.78857421875, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.11911964416504, "rewards/margins": 26.801151275634766, "rewards/real": -0.3179682195186615, "step": 1460 }, { "epoch": 0.47, "learning_rate": 4.6847220576034137e-07, "logits/generated": -0.093577541410923, "logits/real": -0.9135047197341919, "logps/generated": -715.4546508789062, "logps/real": -293.73681640625, "loss": 0.1093, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.328060150146484, "rewards/margins": 29.672901153564453, "rewards/real": 0.34484216570854187, "step": 1470 }, { "epoch": 0.47, "learning_rate": 4.6787957804906955e-07, "logits/generated": 0.07950839400291443, "logits/real": -0.7794169783592224, "logps/generated": -624.20263671875, "logps/real": -323.77630615234375, "loss": 0.0277, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.57122039794922, "rewards/margins": 22.5595703125, "rewards/real": -1.0116502046585083, "step": 1480 }, { "epoch": 0.48, "learning_rate": 4.6728695033779774e-07, "logits/generated": 0.12502098083496094, "logits/real": -0.893004298210144, "logps/generated": -721.6754760742188, "logps/real": -351.4068298339844, "loss": 0.0341, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.803447723388672, "rewards/margins": 30.353382110595703, "rewards/real": -1.4500672817230225, "step": 1490 }, { "epoch": 0.48, "learning_rate": 4.66694322626526e-07, "logits/generated": -0.1611909121274948, "logits/real": -1.0857502222061157, "logps/generated": -606.7127685546875, "logps/real": -346.6650085449219, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/generated": -23.76133918762207, "rewards/margins": 23.08817481994629, "rewards/real": -0.6731644868850708, "step": 1500 }, { "epoch": 0.48, "learning_rate": 4.661016949152542e-07, "logits/generated": -0.4079923629760742, "logits/real": -1.1771427392959595, "logps/generated": -712.0586547851562, "logps/real": -318.7583312988281, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/generated": -29.198932647705078, "rewards/margins": 29.653606414794922, "rewards/real": 0.4546758234500885, "step": 1510 }, { "epoch": 0.49, "learning_rate": 4.655090672039824e-07, "logits/generated": -0.006799777038395405, "logits/real": -0.9504464268684387, "logps/generated": -763.8966674804688, "logps/real": -341.6388244628906, "loss": 0.0535, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.669090270996094, "rewards/margins": 33.23848342895508, "rewards/real": -2.43060564994812, "step": 1520 }, { "epoch": 0.49, "learning_rate": 4.6491643949271066e-07, "logits/generated": -0.04143080860376358, "logits/real": -0.8772333860397339, "logps/generated": -642.1051025390625, "logps/real": -323.8268127441406, "loss": 0.0595, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.772869110107422, "rewards/margins": 25.335664749145508, "rewards/real": -1.4372053146362305, "step": 1530 }, { "epoch": 0.49, "learning_rate": 4.6432381178143885e-07, "logits/generated": -0.3368912935256958, "logits/real": -0.9504976272583008, "logps/generated": -715.6137084960938, "logps/real": -327.49212646484375, "loss": 0.0526, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -31.8399715423584, "rewards/margins": 29.859363555908203, "rewards/real": -1.980611801147461, "step": 1540 }, { "epoch": 0.5, "learning_rate": 4.637311840701671e-07, "logits/generated": -0.04745306074619293, "logits/real": -0.7478165626525879, "logps/generated": -680.3505859375, "logps/real": -313.1524963378906, "loss": 0.0477, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -27.10567283630371, "rewards/margins": 25.496183395385742, "rewards/real": -1.609485387802124, "step": 1550 }, { "epoch": 0.5, "learning_rate": 4.6313855635889533e-07, "logits/generated": -0.09471658617258072, "logits/real": -0.9122379422187805, "logps/generated": -711.54248046875, "logps/real": -362.5740966796875, "loss": 0.0266, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.436248779296875, "rewards/margins": 28.634613037109375, "rewards/real": -2.8016300201416016, "step": 1560 }, { "epoch": 0.5, "learning_rate": 4.625459286476235e-07, "logits/generated": -0.5175934433937073, "logits/real": -1.0976009368896484, "logps/generated": -610.1480102539062, "logps/real": -325.7567138671875, "loss": 0.0686, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -23.987730026245117, "rewards/margins": 23.50770378112793, "rewards/real": -0.4800271987915039, "step": 1570 }, { "epoch": 0.51, "learning_rate": 4.6195330093635176e-07, "logits/generated": -0.3180214762687683, "logits/real": -1.0340139865875244, "logps/generated": -640.499755859375, "logps/real": -286.6065368652344, "loss": 0.0269, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.94277572631836, "rewards/margins": 24.719507217407227, "rewards/real": 0.776727557182312, "step": 1580 }, { "epoch": 0.51, "learning_rate": 4.6136067322508e-07, "logits/generated": -0.16230185329914093, "logits/real": -1.1436270475387573, "logps/generated": -604.614501953125, "logps/real": -316.1036071777344, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/generated": -22.601945877075195, "rewards/margins": 22.853349685668945, "rewards/real": 0.2514052093029022, "step": 1590 }, { "epoch": 0.51, "learning_rate": 4.607680455138082e-07, "logits/generated": 0.43026676774024963, "logits/real": -0.7559512853622437, "logps/generated": -647.7166748046875, "logps/real": -370.7508850097656, "loss": 0.0879, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -24.863733291625977, "rewards/margins": 23.403057098388672, "rewards/real": -1.4606760740280151, "step": 1600 }, { "epoch": 0.52, "learning_rate": 4.6017541780253643e-07, "logits/generated": 0.9180746078491211, "logits/real": 0.024421293288469315, "logps/generated": -685.8744506835938, "logps/real": -314.4345397949219, "loss": 0.0898, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.287607192993164, "rewards/margins": 27.995941162109375, "rewards/real": -1.2916669845581055, "step": 1610 }, { "epoch": 0.52, "learning_rate": 4.595827900912647e-07, "logits/generated": 0.43732696771621704, "logits/real": -0.18413802981376648, "logps/generated": -630.1939086914062, "logps/real": -307.03253173828125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -24.569049835205078, "rewards/margins": 23.723299026489258, "rewards/real": -0.8457552194595337, "step": 1620 }, { "epoch": 0.52, "learning_rate": 4.5899016237999286e-07, "logits/generated": 0.6507248282432556, "logits/real": -0.3299483358860016, "logps/generated": -644.2907104492188, "logps/real": -293.15740966796875, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/generated": -27.317459106445312, "rewards/margins": 27.067108154296875, "rewards/real": -0.2503497004508972, "step": 1630 }, { "epoch": 0.52, "learning_rate": 4.583975346687211e-07, "logits/generated": 0.7223843932151794, "logits/real": -0.22026348114013672, "logps/generated": -714.4222412109375, "logps/real": -367.756103515625, "loss": 0.0471, "rewards/accuracies": 0.949999988079071, "rewards/generated": -31.6726016998291, "rewards/margins": 28.926502227783203, "rewards/real": -2.746098756790161, "step": 1640 }, { "epoch": 0.53, "learning_rate": 4.5780490695744935e-07, "logits/generated": 0.7971788644790649, "logits/real": -0.11250700801610947, "logps/generated": -671.2685546875, "logps/real": -360.0363464355469, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/generated": -27.458694458007812, "rewards/margins": 24.904430389404297, "rewards/real": -2.5542635917663574, "step": 1650 }, { "epoch": 0.53, "learning_rate": 4.5721227924617754e-07, "logits/generated": 0.7440794706344604, "logits/real": 0.10373647511005402, "logps/generated": -722.1324462890625, "logps/real": -315.26153564453125, "loss": 0.091, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -31.29463768005371, "rewards/margins": 29.36053466796875, "rewards/real": -1.9341026544570923, "step": 1660 }, { "epoch": 0.53, "learning_rate": 4.566196515349057e-07, "logits/generated": 0.45260196924209595, "logits/real": 0.03100525215268135, "logps/generated": -785.9488525390625, "logps/real": -309.55194091796875, "loss": 0.0448, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -34.245689392089844, "rewards/margins": 33.135684967041016, "rewards/real": -1.1099998950958252, "step": 1670 }, { "epoch": 0.54, "learning_rate": 4.5602702382363397e-07, "logits/generated": 0.7789133787155151, "logits/real": -0.20548442006111145, "logps/generated": -666.3745727539062, "logps/real": -360.34490966796875, "loss": 0.0752, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.628658294677734, "rewards/margins": 27.15573501586914, "rewards/real": -0.4729282259941101, "step": 1680 }, { "epoch": 0.54, "learning_rate": 4.5543439611236216e-07, "logits/generated": 0.797033429145813, "logits/real": 0.16859188675880432, "logps/generated": -727.2579345703125, "logps/real": -318.44439697265625, "loss": 0.0715, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.163982391357422, "rewards/margins": 28.728031158447266, "rewards/real": -2.4359545707702637, "step": 1690 }, { "epoch": 0.54, "learning_rate": 4.548417684010904e-07, "logits/generated": 0.5582669973373413, "logits/real": -0.4732363224029541, "logps/generated": -640.9185791015625, "logps/real": -336.33404541015625, "loss": 0.1114, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.40323829650879, "rewards/margins": 25.0753173828125, "rewards/real": -0.3279207646846771, "step": 1700 }, { "epoch": 0.55, "learning_rate": 4.5424914068981864e-07, "logits/generated": 0.6099811792373657, "logits/real": -0.357994019985199, "logps/generated": -672.3972778320312, "logps/real": -352.52252197265625, "loss": 0.0654, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -28.22308921813965, "rewards/margins": 26.43735122680664, "rewards/real": -1.7857351303100586, "step": 1710 }, { "epoch": 0.55, "learning_rate": 4.5365651297854683e-07, "logits/generated": 1.0697330236434937, "logits/real": -0.14076311886310577, "logps/generated": -790.8233642578125, "logps/real": -360.5348205566406, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/generated": -36.21926498413086, "rewards/margins": 33.47108459472656, "rewards/real": -2.7481868267059326, "step": 1720 }, { "epoch": 0.55, "learning_rate": 4.5306388526727507e-07, "logits/generated": 0.7185707688331604, "logits/real": -0.31060856580734253, "logps/generated": -660.9615478515625, "logps/real": -315.27874755859375, "loss": 0.1065, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -26.4785213470459, "rewards/margins": 25.37911033630371, "rewards/real": -1.0994105339050293, "step": 1730 }, { "epoch": 0.56, "learning_rate": 4.524712575560033e-07, "logits/generated": 0.5896469950675964, "logits/real": -0.13658718764781952, "logps/generated": -768.370361328125, "logps/real": -327.138427734375, "loss": 0.0505, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -35.88934326171875, "rewards/margins": 32.99297332763672, "rewards/real": -2.8963723182678223, "step": 1740 }, { "epoch": 0.56, "learning_rate": 4.518786298447315e-07, "logits/generated": 0.5808233618736267, "logits/real": -0.32730236649513245, "logps/generated": -707.2631225585938, "logps/real": -377.5989685058594, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/generated": -33.49466323852539, "rewards/margins": 29.88504981994629, "rewards/real": -3.6096129417419434, "step": 1750 }, { "epoch": 0.56, "learning_rate": 4.5128600213345974e-07, "logits/generated": 0.6635645627975464, "logits/real": 0.02990163303911686, "logps/generated": -744.2517700195312, "logps/real": -344.38763427734375, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/generated": -35.34321975708008, "rewards/margins": 31.038623809814453, "rewards/real": -4.304598331451416, "step": 1760 }, { "epoch": 0.57, "learning_rate": 4.50693374422188e-07, "logits/generated": 1.0504231452941895, "logits/real": 0.14314612746238708, "logps/generated": -722.0823364257812, "logps/real": -415.1273498535156, "loss": 0.0774, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -35.303855895996094, "rewards/margins": 27.821096420288086, "rewards/real": -7.482762813568115, "step": 1770 }, { "epoch": 0.57, "learning_rate": 4.501007467109162e-07, "logits/generated": 1.0505800247192383, "logits/real": 0.025553371757268906, "logps/generated": -792.7987060546875, "logps/real": -369.75714111328125, "loss": 0.0941, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.77853775024414, "rewards/margins": 33.75434112548828, "rewards/real": -4.024199485778809, "step": 1780 }, { "epoch": 0.57, "learning_rate": 4.495081189996444e-07, "logits/generated": 0.880537211894989, "logits/real": -0.4084358811378479, "logps/generated": -702.1065673828125, "logps/real": -356.48077392578125, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/generated": -33.2712287902832, "rewards/margins": 30.7994327545166, "rewards/real": -2.47179913520813, "step": 1790 }, { "epoch": 0.58, "learning_rate": 4.4891549128837266e-07, "logits/generated": 0.8617936968803406, "logits/real": -0.29051464796066284, "logps/generated": -713.6237182617188, "logps/real": -321.52752685546875, "loss": 0.0241, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -33.624881744384766, "rewards/margins": 32.551475524902344, "rewards/real": -1.0734100341796875, "step": 1800 }, { "epoch": 0.58, "learning_rate": 4.4832286357710085e-07, "logits/generated": 0.7705877423286438, "logits/real": 0.30394884943962097, "logps/generated": -721.6909790039062, "logps/real": -366.5400390625, "loss": 0.0631, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -33.41383743286133, "rewards/margins": 28.932180404663086, "rewards/real": -4.481656074523926, "step": 1810 }, { "epoch": 0.58, "learning_rate": 4.477302358658291e-07, "logits/generated": 0.5584123134613037, "logits/real": -0.6512025594711304, "logps/generated": -729.8486938476562, "logps/real": -348.49432373046875, "loss": 0.036, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -34.38263702392578, "rewards/margins": 31.932994842529297, "rewards/real": -2.449641466140747, "step": 1820 }, { "epoch": 0.59, "learning_rate": 4.4713760815455733e-07, "logits/generated": 0.2919533848762512, "logits/real": -0.666230320930481, "logps/generated": -740.5325927734375, "logps/real": -324.1904602050781, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/generated": -35.278297424316406, "rewards/margins": 33.55880355834961, "rewards/real": -1.7194910049438477, "step": 1830 }, { "epoch": 0.59, "learning_rate": 4.465449804432855e-07, "logits/generated": 0.4498261511325836, "logits/real": -0.43754512071609497, "logps/generated": -744.0653076171875, "logps/real": -335.212158203125, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/generated": -35.13162612915039, "rewards/margins": 33.10210418701172, "rewards/real": -2.0295262336730957, "step": 1840 }, { "epoch": 0.59, "learning_rate": 4.459523527320137e-07, "logits/generated": 0.427295982837677, "logits/real": -0.6499985456466675, "logps/generated": -658.7879638671875, "logps/real": -397.44158935546875, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/generated": -29.81673812866211, "rewards/margins": 26.594614028930664, "rewards/real": -3.2221245765686035, "step": 1850 }, { "epoch": 0.6, "learning_rate": 4.4535972502074195e-07, "logits/generated": 0.3401271402835846, "logits/real": -0.9475802183151245, "logps/generated": -682.956298828125, "logps/real": -343.54644775390625, "loss": 0.051, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.154062271118164, "rewards/margins": 29.059768676757812, "rewards/real": -1.0942920446395874, "step": 1860 }, { "epoch": 0.6, "learning_rate": 4.4476709730947014e-07, "logits/generated": 0.48368996381759644, "logits/real": -0.6674381494522095, "logps/generated": -752.8349609375, "logps/real": -310.81005859375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/generated": -36.16936492919922, "rewards/margins": 33.7068977355957, "rewards/real": -2.4624717235565186, "step": 1870 }, { "epoch": 0.6, "learning_rate": 4.441744695981984e-07, "logits/generated": 0.27173930406570435, "logits/real": -0.8330855369567871, "logps/generated": -731.5062255859375, "logps/real": -392.802001953125, "loss": 0.0939, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -35.108642578125, "rewards/margins": 31.652996063232422, "rewards/real": -3.455641508102417, "step": 1880 }, { "epoch": 0.6, "learning_rate": 4.435818418869266e-07, "logits/generated": 0.5597046613693237, "logits/real": -0.6597913503646851, "logps/generated": -837.8775634765625, "logps/real": -348.21966552734375, "loss": 0.0481, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.762596130371094, "rewards/margins": 38.68193817138672, "rewards/real": -6.080657005310059, "step": 1890 }, { "epoch": 0.61, "learning_rate": 4.429892141756548e-07, "logits/generated": 0.7637578845024109, "logits/real": -0.36679187417030334, "logps/generated": -837.9700927734375, "logps/real": -364.4372253417969, "loss": 0.0329, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.84961700439453, "rewards/margins": 36.390464782714844, "rewards/real": -7.459149360656738, "step": 1900 }, { "epoch": 0.61, "learning_rate": 4.4239658646438306e-07, "logits/generated": 0.6158145666122437, "logits/real": -0.3679594099521637, "logps/generated": -796.8351440429688, "logps/real": -405.11151123046875, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/generated": -40.54536056518555, "rewards/margins": 30.88002586364746, "rewards/real": -9.665339469909668, "step": 1910 }, { "epoch": 0.61, "learning_rate": 4.418039587531113e-07, "logits/generated": 0.5541807413101196, "logits/real": -0.3465508818626404, "logps/generated": -842.4884643554688, "logps/real": -400.547607421875, "loss": 0.05, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.56201934814453, "rewards/margins": 34.930511474609375, "rewards/real": -8.631510734558105, "step": 1920 }, { "epoch": 0.62, "learning_rate": 4.412113310418395e-07, "logits/generated": 0.4438748359680176, "logits/real": -0.649644136428833, "logps/generated": -714.6018676757812, "logps/real": -334.74505615234375, "loss": 0.0969, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -31.423009872436523, "rewards/margins": 29.62143898010254, "rewards/real": -1.8015722036361694, "step": 1930 }, { "epoch": 0.62, "learning_rate": 4.4061870333056773e-07, "logits/generated": 0.918286144733429, "logits/real": -0.5583127737045288, "logps/generated": -660.2481689453125, "logps/real": -349.63916015625, "loss": 0.0385, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -27.5297794342041, "rewards/margins": 24.28526496887207, "rewards/real": -3.2445099353790283, "step": 1940 }, { "epoch": 0.62, "learning_rate": 4.4002607561929597e-07, "logits/generated": 0.7378710508346558, "logits/real": -0.4875301718711853, "logps/generated": -716.6314697265625, "logps/real": -383.4659729003906, "loss": 0.0606, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.32218933105469, "rewards/margins": 29.484619140625, "rewards/real": -2.837568998336792, "step": 1950 }, { "epoch": 0.63, "learning_rate": 4.3943344790802416e-07, "logits/generated": 0.704971432685852, "logits/real": -0.3970826268196106, "logps/generated": -685.1222534179688, "logps/real": -337.703125, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/generated": -29.617624282836914, "rewards/margins": 27.498523712158203, "rewards/real": -2.1190993785858154, "step": 1960 }, { "epoch": 0.63, "learning_rate": 4.388408201967524e-07, "logits/generated": 0.48584890365600586, "logits/real": -0.5464336276054382, "logps/generated": -721.333740234375, "logps/real": -357.57159423828125, "loss": 0.0161, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.480567932128906, "rewards/margins": 28.700443267822266, "rewards/real": -3.7801260948181152, "step": 1970 }, { "epoch": 0.63, "learning_rate": 4.3824819248548064e-07, "logits/generated": 1.1303284168243408, "logits/real": -0.15861235558986664, "logps/generated": -724.344482421875, "logps/real": -378.0018005371094, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/generated": -32.12859344482422, "rewards/margins": 28.6373233795166, "rewards/real": -3.491267681121826, "step": 1980 }, { "epoch": 0.64, "learning_rate": 4.3765556477420883e-07, "logits/generated": 1.001157283782959, "logits/real": -0.1735081970691681, "logps/generated": -642.5946655273438, "logps/real": -329.3343505859375, "loss": 0.0826, "rewards/accuracies": 0.949999988079071, "rewards/generated": -27.2222900390625, "rewards/margins": 25.380970001220703, "rewards/real": -1.8413175344467163, "step": 1990 }, { "epoch": 0.64, "learning_rate": 4.3706293706293707e-07, "logits/generated": 1.2101815938949585, "logits/real": 0.2167239934206009, "logps/generated": -784.0897216796875, "logps/real": -338.1402893066406, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.91041946411133, "rewards/margins": 33.334041595458984, "rewards/real": -2.576378345489502, "step": 2000 }, { "epoch": 0.64, "learning_rate": 4.364703093516653e-07, "logits/generated": 0.8496305346488953, "logits/real": -0.08679083734750748, "logps/generated": -722.7340087890625, "logps/real": -333.61529541015625, "loss": 0.041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.327056884765625, "rewards/margins": 29.975378036499023, "rewards/real": -1.3516753911972046, "step": 2010 }, { "epoch": 0.65, "learning_rate": 4.3587768164039345e-07, "logits/generated": 1.025160312652588, "logits/real": -0.004342697560787201, "logps/generated": -671.9498901367188, "logps/real": -371.9809875488281, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/generated": -28.117258071899414, "rewards/margins": 26.58926773071289, "rewards/real": -1.5279954671859741, "step": 2020 }, { "epoch": 0.65, "learning_rate": 4.352850539291217e-07, "logits/generated": 1.013951063156128, "logits/real": 0.09798892587423325, "logps/generated": -701.6458740234375, "logps/real": -334.0567932128906, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/generated": -31.478445053100586, "rewards/margins": 28.081411361694336, "rewards/real": -3.3970324993133545, "step": 2030 }, { "epoch": 0.65, "learning_rate": 4.346924262178499e-07, "logits/generated": 0.6137873530387878, "logits/real": -0.20404133200645447, "logps/generated": -681.4293212890625, "logps/real": -321.9293518066406, "loss": 0.0687, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -28.87796401977539, "rewards/margins": 27.810291290283203, "rewards/real": -1.0676777362823486, "step": 2040 }, { "epoch": 0.66, "learning_rate": 4.340997985065781e-07, "logits/generated": 0.9127419590950012, "logits/real": -0.434025377035141, "logps/generated": -730.1388549804688, "logps/real": -357.342529296875, "loss": 0.0274, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -32.81401062011719, "rewards/margins": 31.743839263916016, "rewards/real": -1.0701699256896973, "step": 2050 }, { "epoch": 0.66, "learning_rate": 4.3350717079530637e-07, "logits/generated": 0.720097541809082, "logits/real": -0.33784544467926025, "logps/generated": -650.9515380859375, "logps/real": -381.292724609375, "loss": 0.0321, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.152469635009766, "rewards/margins": 26.854297637939453, "rewards/real": -0.29817166924476624, "step": 2060 }, { "epoch": 0.66, "learning_rate": 4.3291454308403455e-07, "logits/generated": 0.8382455110549927, "logits/real": -0.13121643662452698, "logps/generated": -728.041748046875, "logps/real": -311.09844970703125, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/generated": -31.98345947265625, "rewards/margins": 30.502544403076172, "rewards/real": -1.4809117317199707, "step": 2070 }, { "epoch": 0.67, "learning_rate": 4.323219153727628e-07, "logits/generated": 0.4975649416446686, "logits/real": -0.34417563676834106, "logps/generated": -714.7693481445312, "logps/real": -346.2616271972656, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/generated": -31.47088623046875, "rewards/margins": 30.593393325805664, "rewards/real": -0.8774968385696411, "step": 2080 }, { "epoch": 0.67, "learning_rate": 4.3172928766149104e-07, "logits/generated": 1.0943410396575928, "logits/real": -0.14147847890853882, "logps/generated": -729.1746826171875, "logps/real": -393.4339599609375, "loss": 0.0412, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.71548080444336, "rewards/margins": 30.33559226989746, "rewards/real": -3.3798928260803223, "step": 2090 }, { "epoch": 0.67, "learning_rate": 4.3113665995021923e-07, "logits/generated": 1.0804413557052612, "logits/real": -0.07776399701833725, "logps/generated": -672.5110473632812, "logps/real": -358.855224609375, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/generated": -29.979761123657227, "rewards/margins": 27.629995346069336, "rewards/real": -2.349764347076416, "step": 2100 }, { "epoch": 0.68, "learning_rate": 4.3054403223894747e-07, "logits/generated": 0.8239561915397644, "logits/real": -0.2564181983470917, "logps/generated": -684.0074462890625, "logps/real": -354.76300048828125, "loss": 0.0756, "rewards/accuracies": 0.949999988079071, "rewards/generated": -29.912456512451172, "rewards/margins": 27.336200714111328, "rewards/real": -2.576260805130005, "step": 2110 }, { "epoch": 0.68, "learning_rate": 4.299514045276757e-07, "logits/generated": 0.6753214597702026, "logits/real": -0.4390442371368408, "logps/generated": -799.2703857421875, "logps/real": -318.34698486328125, "loss": 0.1472, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -39.853477478027344, "rewards/margins": 36.59901428222656, "rewards/real": -3.2544643878936768, "step": 2120 }, { "epoch": 0.68, "learning_rate": 4.293587768164039e-07, "logits/generated": 0.9953800439834595, "logits/real": -0.3538312315940857, "logps/generated": -812.5777587890625, "logps/real": -328.5263671875, "loss": 0.0639, "rewards/accuracies": 0.949999988079071, "rewards/generated": -40.352821350097656, "rewards/margins": 38.133304595947266, "rewards/real": -2.2195210456848145, "step": 2130 }, { "epoch": 0.68, "learning_rate": 4.2876614910513214e-07, "logits/generated": 1.1399072408676147, "logits/real": 0.03536539152264595, "logps/generated": -702.9224853515625, "logps/real": -322.35809326171875, "loss": 0.0495, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.2816047668457, "rewards/margins": 30.668594360351562, "rewards/real": -4.613009452819824, "step": 2140 }, { "epoch": 0.69, "learning_rate": 4.281735213938604e-07, "logits/generated": 0.9407382011413574, "logits/real": 0.09566624462604523, "logps/generated": -911.5589599609375, "logps/real": -360.679931640625, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/generated": -49.54302215576172, "rewards/margins": 44.427650451660156, "rewards/real": -5.115373134613037, "step": 2150 }, { "epoch": 0.69, "learning_rate": 4.2758089368258857e-07, "logits/generated": 0.7384020090103149, "logits/real": -0.2857319414615631, "logps/generated": -767.8818969726562, "logps/real": -369.85693359375, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -37.77294921875, "rewards/margins": 35.26585006713867, "rewards/real": -2.5070996284484863, "step": 2160 }, { "epoch": 0.69, "learning_rate": 4.269882659713168e-07, "logits/generated": 0.7593884468078613, "logits/real": -0.10088062286376953, "logps/generated": -803.2346801757812, "logps/real": -424.66497802734375, "loss": 0.0911, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -39.97159957885742, "rewards/margins": 36.61625289916992, "rewards/real": -3.3553497791290283, "step": 2170 }, { "epoch": 0.7, "learning_rate": 4.2639563826004506e-07, "logits/generated": 0.6498333215713501, "logits/real": -0.28292304277420044, "logps/generated": -615.6293334960938, "logps/real": -327.6316223144531, "loss": 0.0275, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.123022079467773, "rewards/margins": 23.52663803100586, "rewards/real": -1.5963869094848633, "step": 2180 }, { "epoch": 0.7, "learning_rate": 4.2580301054877325e-07, "logits/generated": 0.5390284657478333, "logits/real": -0.5178315043449402, "logps/generated": -705.2702026367188, "logps/real": -345.2986755371094, "loss": 0.1221, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.884349822998047, "rewards/margins": 29.818450927734375, "rewards/real": -2.0658998489379883, "step": 2190 }, { "epoch": 0.7, "learning_rate": 4.2521038283750143e-07, "logits/generated": 0.13954707980155945, "logits/real": -0.8350412249565125, "logps/generated": -677.1754150390625, "logps/real": -328.5754089355469, "loss": 0.0282, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -27.805150985717773, "rewards/margins": 27.79385757446289, "rewards/real": -0.011295723728835583, "step": 2200 }, { "epoch": 0.71, "learning_rate": 4.246177551262297e-07, "logits/generated": 0.10666545480489731, "logits/real": -0.9430726170539856, "logps/generated": -637.8295288085938, "logps/real": -362.80181884765625, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/generated": -25.40670394897461, "rewards/margins": 25.37242317199707, "rewards/real": -0.03428385406732559, "step": 2210 }, { "epoch": 0.71, "learning_rate": 4.2402512741495787e-07, "logits/generated": 0.26229003071784973, "logits/real": -0.7658149003982544, "logps/generated": -682.8697509765625, "logps/real": -308.9587097167969, "loss": 0.0293, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.598562240600586, "rewards/margins": 27.416921615600586, "rewards/real": -0.18163709342479706, "step": 2220 }, { "epoch": 0.71, "learning_rate": 4.234324997036861e-07, "logits/generated": 0.2817060947418213, "logits/real": -0.8132452964782715, "logps/generated": -684.8417358398438, "logps/real": -323.7658386230469, "loss": 0.0275, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -29.28280258178711, "rewards/margins": 29.19304847717285, "rewards/real": -0.08975468575954437, "step": 2230 }, { "epoch": 0.72, "learning_rate": 4.2283987199241435e-07, "logits/generated": 0.7587065696716309, "logits/real": -0.7801111936569214, "logps/generated": -752.9880981445312, "logps/real": -334.2576599121094, "loss": 0.0486, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.82455825805664, "rewards/margins": 33.066871643066406, "rewards/real": -1.7576910257339478, "step": 2240 }, { "epoch": 0.72, "learning_rate": 4.2224724428114254e-07, "logits/generated": 0.5060659050941467, "logits/real": -0.7594629526138306, "logps/generated": -717.951171875, "logps/real": -326.2234802246094, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/generated": -34.36233139038086, "rewards/margins": 31.187103271484375, "rewards/real": -3.17522931098938, "step": 2250 }, { "epoch": 0.72, "learning_rate": 4.216546165698708e-07, "logits/generated": 0.27015620470046997, "logits/real": -0.9821538925170898, "logps/generated": -744.8015747070312, "logps/real": -362.250244140625, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/generated": -37.133689880371094, "rewards/margins": 34.903221130371094, "rewards/real": -2.2304701805114746, "step": 2260 }, { "epoch": 0.73, "learning_rate": 4.21061988858599e-07, "logits/generated": 0.8057888150215149, "logits/real": -0.6995252370834351, "logps/generated": -900.9762573242188, "logps/real": -348.0375061035156, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/generated": -50.746665954589844, "rewards/margins": 46.1430549621582, "rewards/real": -4.603612899780273, "step": 2270 }, { "epoch": 0.73, "learning_rate": 4.204693611473272e-07, "logits/generated": 0.5594112873077393, "logits/real": -0.858725905418396, "logps/generated": -786.5313720703125, "logps/real": -342.39398193359375, "loss": 0.0575, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.971519470214844, "rewards/margins": 36.61110305786133, "rewards/real": -3.3604228496551514, "step": 2280 }, { "epoch": 0.73, "learning_rate": 4.1987673343605545e-07, "logits/generated": 0.47553759813308716, "logits/real": -0.9600407481193542, "logps/generated": -716.9024658203125, "logps/real": -329.8050842285156, "loss": 0.0415, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.34065628051758, "rewards/margins": 30.928781509399414, "rewards/real": -1.411874771118164, "step": 2290 }, { "epoch": 0.74, "learning_rate": 4.192841057247837e-07, "logits/generated": 0.6827605962753296, "logits/real": -0.6279144883155823, "logps/generated": -748.5231323242188, "logps/real": -326.0926208496094, "loss": 0.0301, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.044044494628906, "rewards/margins": 32.811458587646484, "rewards/real": -3.232586622238159, "step": 2300 }, { "epoch": 0.74, "learning_rate": 4.186914780135119e-07, "logits/generated": 1.258171796798706, "logits/real": -0.22543036937713623, "logps/generated": -789.295654296875, "logps/real": -404.57769775390625, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.3532600402832, "rewards/margins": 34.534461975097656, "rewards/real": -4.8187994956970215, "step": 2310 }, { "epoch": 0.74, "learning_rate": 4.180988503022401e-07, "logits/generated": 0.8405712842941284, "logits/real": -0.3352198600769043, "logps/generated": -863.91064453125, "logps/real": -342.0094909667969, "loss": 0.1225, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -45.92886734008789, "rewards/margins": 42.29278564453125, "rewards/real": -3.636077880859375, "step": 2320 }, { "epoch": 0.75, "learning_rate": 4.1750622259096837e-07, "logits/generated": 0.5765670537948608, "logits/real": -0.5540295839309692, "logps/generated": -705.8466796875, "logps/real": -369.53131103515625, "loss": 0.0957, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.74860763549805, "rewards/margins": 28.87831687927246, "rewards/real": -3.8702900409698486, "step": 2330 }, { "epoch": 0.75, "learning_rate": 4.1691359487969656e-07, "logits/generated": 0.7854418158531189, "logits/real": -0.22548596560955048, "logps/generated": -680.0382080078125, "logps/real": -356.9978942871094, "loss": 0.0195, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.84836196899414, "rewards/margins": 27.478572845458984, "rewards/real": -4.369787693023682, "step": 2340 }, { "epoch": 0.75, "learning_rate": 4.163209671684248e-07, "logits/generated": 1.065263271331787, "logits/real": -0.21278850734233856, "logps/generated": -841.5086669921875, "logps/real": -358.0589599609375, "loss": 0.0514, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.41203689575195, "rewards/margins": 38.31954574584961, "rewards/real": -5.092487335205078, "step": 2350 }, { "epoch": 0.76, "learning_rate": 4.1572833945715304e-07, "logits/generated": 1.261389970779419, "logits/real": -0.06634785234928131, "logps/generated": -812.8255004882812, "logps/real": -410.88409423828125, "loss": 0.088, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.180484771728516, "rewards/margins": 37.8367919921875, "rewards/real": -5.34368896484375, "step": 2360 }, { "epoch": 0.76, "learning_rate": 4.1513571174588123e-07, "logits/generated": 1.4714341163635254, "logits/real": -0.021521415561437607, "logps/generated": -850.0011596679688, "logps/real": -387.5086364746094, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/generated": -46.83484649658203, "rewards/margins": 40.592525482177734, "rewards/real": -6.242323875427246, "step": 2370 }, { "epoch": 0.76, "learning_rate": 4.145430840346094e-07, "logits/generated": 1.3156936168670654, "logits/real": -0.018222743645310402, "logps/generated": -890.7706298828125, "logps/real": -364.0563049316406, "loss": 0.0503, "rewards/accuracies": 0.987500011920929, "rewards/generated": -51.08381652832031, "rewards/margins": 44.87675094604492, "rewards/real": -6.2070631980896, "step": 2380 }, { "epoch": 0.76, "learning_rate": 4.1395045632333766e-07, "logits/generated": 1.3237955570220947, "logits/real": -0.33364278078079224, "logps/generated": -869.8988037109375, "logps/real": -334.1073913574219, "loss": 0.0599, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.939788818359375, "rewards/margins": 45.201969146728516, "rewards/real": -1.7378181219100952, "step": 2390 }, { "epoch": 0.77, "learning_rate": 4.1335782861206585e-07, "logits/generated": 1.3701781034469604, "logits/real": -0.269438773393631, "logps/generated": -753.6387329101562, "logps/real": -360.86480712890625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -37.43694305419922, "rewards/margins": 33.881229400634766, "rewards/real": -3.5557167530059814, "step": 2400 }, { "epoch": 0.77, "learning_rate": 4.127652009007941e-07, "logits/generated": 1.4290558099746704, "logits/real": -0.466459184885025, "logps/generated": -799.3381958007812, "logps/real": -360.6759338378906, "loss": 0.0534, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.645240783691406, "rewards/margins": 38.60773468017578, "rewards/real": -2.0375008583068848, "step": 2410 }, { "epoch": 0.77, "learning_rate": 4.1217257318952233e-07, "logits/generated": 1.358341932296753, "logits/real": -0.27567583322525024, "logps/generated": -753.713623046875, "logps/real": -386.53875732421875, "loss": 0.0153, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.4615364074707, "rewards/margins": 35.6826171875, "rewards/real": -1.7789156436920166, "step": 2420 }, { "epoch": 0.78, "learning_rate": 4.115799454782505e-07, "logits/generated": 1.1428701877593994, "logits/real": -0.5059648752212524, "logps/generated": -835.0396728515625, "logps/real": -323.19036865234375, "loss": 0.0883, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -45.086402893066406, "rewards/margins": 44.14323806762695, "rewards/real": -0.9431692361831665, "step": 2430 }, { "epoch": 0.78, "learning_rate": 4.1098731776697876e-07, "logits/generated": 1.0485864877700806, "logits/real": -0.4481213688850403, "logps/generated": -812.9012451171875, "logps/real": -368.0522155761719, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/generated": -40.25575637817383, "rewards/margins": 37.742332458496094, "rewards/real": -2.5134170055389404, "step": 2440 }, { "epoch": 0.78, "learning_rate": 4.10394690055707e-07, "logits/generated": 0.8519207239151001, "logits/real": -0.6197376251220703, "logps/generated": -725.4412231445312, "logps/real": -374.92523193359375, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.39899826049805, "rewards/margins": 32.154273986816406, "rewards/real": -3.2447314262390137, "step": 2450 }, { "epoch": 0.79, "learning_rate": 4.098020623444352e-07, "logits/generated": 0.6339820623397827, "logits/real": -0.6408648490905762, "logps/generated": -766.2493286132812, "logps/real": -304.57000732421875, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/generated": -38.85112762451172, "rewards/margins": 36.97227096557617, "rewards/real": -1.878852128982544, "step": 2460 }, { "epoch": 0.79, "learning_rate": 4.0920943463316344e-07, "logits/generated": 0.5412781834602356, "logits/real": -0.9522072076797485, "logps/generated": -724.3004150390625, "logps/real": -409.66693115234375, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/generated": -33.71949005126953, "rewards/margins": 31.248050689697266, "rewards/real": -2.47143816947937, "step": 2470 }, { "epoch": 0.79, "learning_rate": 4.086168069218917e-07, "logits/generated": 0.5702400207519531, "logits/real": -0.8005739450454712, "logps/generated": -813.8245849609375, "logps/real": -353.59234619140625, "loss": 0.0578, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.12620162963867, "rewards/margins": 39.095558166503906, "rewards/real": -3.0306408405303955, "step": 2480 }, { "epoch": 0.8, "learning_rate": 4.0802417921061987e-07, "logits/generated": 0.12249743938446045, "logits/real": -0.9880908131599426, "logps/generated": -701.9227905273438, "logps/real": -305.2627868652344, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/generated": -32.676918029785156, "rewards/margins": 30.052724838256836, "rewards/real": -2.624189853668213, "step": 2490 }, { "epoch": 0.8, "learning_rate": 4.074315514993481e-07, "logits/generated": 0.5771058797836304, "logits/real": -0.8167764544487, "logps/generated": -770.1046142578125, "logps/real": -381.06103515625, "loss": 0.0452, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.183433532714844, "rewards/margins": 34.773338317871094, "rewards/real": -4.410101413726807, "step": 2500 }, { "epoch": 0.8, "learning_rate": 4.0683892378807635e-07, "logits/generated": 0.7594148516654968, "logits/real": -0.6675012707710266, "logps/generated": -858.6218872070312, "logps/real": -345.3905944824219, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/generated": -43.47161102294922, "rewards/margins": 40.07362365722656, "rewards/real": -3.3979930877685547, "step": 2510 }, { "epoch": 0.81, "learning_rate": 4.0624629607680454e-07, "logits/generated": 0.6793375015258789, "logits/real": -0.8097376823425293, "logps/generated": -740.8778076171875, "logps/real": -375.3699645996094, "loss": 0.0348, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.646881103515625, "rewards/margins": 33.874732971191406, "rewards/real": -2.772146701812744, "step": 2520 }, { "epoch": 0.81, "learning_rate": 4.056536683655328e-07, "logits/generated": 0.7047010660171509, "logits/real": -0.57855224609375, "logps/generated": -712.8539428710938, "logps/real": -426.24114990234375, "loss": 0.0759, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.26369857788086, "rewards/margins": 29.167776107788086, "rewards/real": -4.095925331115723, "step": 2530 }, { "epoch": 0.81, "learning_rate": 4.05061040654261e-07, "logits/generated": 1.5344918966293335, "logits/real": -0.032828450202941895, "logps/generated": -729.2464599609375, "logps/real": -380.0123291015625, "loss": 0.0481, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -34.09107208251953, "rewards/margins": 30.152652740478516, "rewards/real": -3.9384231567382812, "step": 2540 }, { "epoch": 0.82, "learning_rate": 4.044684129429892e-07, "logits/generated": 1.892467737197876, "logits/real": 0.30082520842552185, "logps/generated": -806.906005859375, "logps/real": -386.27667236328125, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.894954681396484, "rewards/margins": 36.069820404052734, "rewards/real": -4.825134754180908, "step": 2550 }, { "epoch": 0.82, "learning_rate": 4.038757852317174e-07, "logits/generated": 1.9323184490203857, "logits/real": 0.17705607414245605, "logps/generated": -850.033203125, "logps/real": -378.65594482421875, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/generated": -44.78071975708008, "rewards/margins": 39.56897735595703, "rewards/real": -5.211737632751465, "step": 2560 }, { "epoch": 0.82, "learning_rate": 4.032831575204456e-07, "logits/generated": 1.6935707330703735, "logits/real": 0.023059988394379616, "logps/generated": -759.7291259765625, "logps/real": -374.7304992675781, "loss": 0.1038, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.43316650390625, "rewards/margins": 32.626365661621094, "rewards/real": -3.8068041801452637, "step": 2570 }, { "epoch": 0.83, "learning_rate": 4.0269052980917383e-07, "logits/generated": 1.4341130256652832, "logits/real": -0.06971609592437744, "logps/generated": -665.3819580078125, "logps/real": -333.2209777832031, "loss": 0.0796, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.176000595092773, "rewards/margins": 27.55510902404785, "rewards/real": -2.6208901405334473, "step": 2580 }, { "epoch": 0.83, "learning_rate": 4.0209790209790207e-07, "logits/generated": 1.9503005743026733, "logits/real": 0.09237826615571976, "logps/generated": -765.220458984375, "logps/real": -364.03558349609375, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/generated": -35.612098693847656, "rewards/margins": 32.24553298950195, "rewards/real": -3.3665618896484375, "step": 2590 }, { "epoch": 0.83, "learning_rate": 4.0150527438663026e-07, "logits/generated": 0.9755905866622925, "logits/real": -0.44502443075180054, "logps/generated": -657.8660888671875, "logps/real": -296.46661376953125, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/generated": -27.2720947265625, "rewards/margins": 27.332311630249023, "rewards/real": 0.06021898239850998, "step": 2600 }, { "epoch": 0.84, "learning_rate": 4.009126466753585e-07, "logits/generated": 1.4939401149749756, "logits/real": -0.5174384117126465, "logps/generated": -657.0867309570312, "logps/real": -356.69244384765625, "loss": 0.0858, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.77162742614746, "rewards/margins": 25.447311401367188, "rewards/real": -0.3243153393268585, "step": 2610 }, { "epoch": 0.84, "learning_rate": 4.0032001896408675e-07, "logits/generated": 1.1159507036209106, "logits/real": -0.2947324216365814, "logps/generated": -627.3345947265625, "logps/real": -336.0748291015625, "loss": 0.0606, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.557340621948242, "rewards/margins": 22.41960906982422, "rewards/real": -1.1377298831939697, "step": 2620 }, { "epoch": 0.84, "learning_rate": 3.9972739125281494e-07, "logits/generated": 1.513810396194458, "logits/real": -0.5344418287277222, "logps/generated": -590.8358154296875, "logps/real": -370.3486633300781, "loss": 0.0269, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.85166358947754, "rewards/margins": 20.52071762084961, "rewards/real": -2.330944299697876, "step": 2630 }, { "epoch": 0.84, "learning_rate": 3.991347635415432e-07, "logits/generated": 1.49335777759552, "logits/real": -0.3261922001838684, "logps/generated": -704.072021484375, "logps/real": -277.5023498535156, "loss": 0.0315, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.9993896484375, "rewards/margins": 30.11093521118164, "rewards/real": -0.8884493112564087, "step": 2640 }, { "epoch": 0.85, "learning_rate": 3.985421358302714e-07, "logits/generated": 0.7591944932937622, "logits/real": -0.5074256062507629, "logps/generated": -648.0003662109375, "logps/real": -304.24810791015625, "loss": 0.0761, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -26.947917938232422, "rewards/margins": 25.95284080505371, "rewards/real": -0.995078444480896, "step": 2650 }, { "epoch": 0.85, "learning_rate": 3.979495081189996e-07, "logits/generated": 1.0833790302276611, "logits/real": -0.6860691905021667, "logps/generated": -735.9494018554688, "logps/real": -347.7568359375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -32.68233871459961, "rewards/margins": 30.6114444732666, "rewards/real": -2.0708956718444824, "step": 2660 }, { "epoch": 0.85, "learning_rate": 3.9735688040772785e-07, "logits/generated": 0.9727069139480591, "logits/real": -0.7404571771621704, "logps/generated": -681.7575073242188, "logps/real": -368.0675048828125, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.452762603759766, "rewards/margins": 25.981693267822266, "rewards/real": -2.47106671333313, "step": 2670 }, { "epoch": 0.86, "learning_rate": 3.967642526964561e-07, "logits/generated": 1.2547314167022705, "logits/real": -0.7258102893829346, "logps/generated": -676.3906860351562, "logps/real": -340.873779296875, "loss": 0.0195, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.694448471069336, "rewards/margins": 25.771648406982422, "rewards/real": -1.9227993488311768, "step": 2680 }, { "epoch": 0.86, "learning_rate": 3.961716249851843e-07, "logits/generated": 0.6079329252243042, "logits/real": -0.9624984860420227, "logps/generated": -601.7574462890625, "logps/real": -360.03955078125, "loss": 0.057, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.048744201660156, "rewards/margins": 21.088842391967773, "rewards/real": -0.9599024057388306, "step": 2690 }, { "epoch": 0.86, "learning_rate": 3.955789972739125e-07, "logits/generated": 1.1258697509765625, "logits/real": -0.8072816133499146, "logps/generated": -684.0726928710938, "logps/real": -338.45941162109375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/generated": -28.91874122619629, "rewards/margins": 27.85943603515625, "rewards/real": -1.059303879737854, "step": 2700 }, { "epoch": 0.87, "learning_rate": 3.9498636956264076e-07, "logits/generated": 1.4965988397598267, "logits/real": -0.3411404490470886, "logps/generated": -697.1265869140625, "logps/real": -352.145751953125, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/generated": -32.76347732543945, "rewards/margins": 29.158267974853516, "rewards/real": -3.6052098274230957, "step": 2710 }, { "epoch": 0.87, "learning_rate": 3.9439374185136895e-07, "logits/generated": 1.4621516466140747, "logits/real": -0.5707725286483765, "logps/generated": -785.8665161132812, "logps/real": -319.3700256347656, "loss": 0.0298, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.61449432373047, "rewards/margins": 33.8472900390625, "rewards/real": -1.767205834388733, "step": 2720 }, { "epoch": 0.87, "learning_rate": 3.9380111414009714e-07, "logits/generated": 1.1874886751174927, "logits/real": -0.09907079488039017, "logps/generated": -687.0340576171875, "logps/real": -303.25836181640625, "loss": 0.0492, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.032939910888672, "rewards/margins": 27.3311710357666, "rewards/real": -3.7017662525177, "step": 2730 }, { "epoch": 0.88, "learning_rate": 3.932084864288254e-07, "logits/generated": 1.5363190174102783, "logits/real": -0.20464110374450684, "logps/generated": -632.426513671875, "logps/real": -320.273681640625, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/generated": -26.085735321044922, "rewards/margins": 24.259374618530273, "rewards/real": -1.8263591527938843, "step": 2740 }, { "epoch": 0.88, "learning_rate": 3.9261585871755357e-07, "logits/generated": 1.5121079683303833, "logits/real": -0.11793769896030426, "logps/generated": -633.5842895507812, "logps/real": -350.47589111328125, "loss": 0.0154, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.712158203125, "rewards/margins": 24.083269119262695, "rewards/real": -1.6288902759552002, "step": 2750 }, { "epoch": 0.88, "learning_rate": 3.920232310062818e-07, "logits/generated": 1.3446996212005615, "logits/real": -0.29524847865104675, "logps/generated": -689.9698486328125, "logps/real": -320.5229187011719, "loss": 0.0344, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.11871337890625, "rewards/margins": 29.273574829101562, "rewards/real": -0.8451415300369263, "step": 2760 }, { "epoch": 0.89, "learning_rate": 3.9143060329501006e-07, "logits/generated": 1.274344563484192, "logits/real": -0.36024925112724304, "logps/generated": -632.7415161132812, "logps/real": -304.83941650390625, "loss": 0.1162, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.04990005493164, "rewards/margins": 23.171749114990234, "rewards/real": 0.12185032665729523, "step": 2770 }, { "epoch": 0.89, "learning_rate": 3.9083797558373825e-07, "logits/generated": 1.2794438600540161, "logits/real": -0.06939432770013809, "logps/generated": -634.8363037109375, "logps/real": -288.30963134765625, "loss": 0.0578, "rewards/accuracies": 0.949999988079071, "rewards/generated": -23.818239212036133, "rewards/margins": 23.314865112304688, "rewards/real": -0.5033752918243408, "step": 2780 }, { "epoch": 0.89, "learning_rate": 3.902453478724665e-07, "logits/generated": 1.4398317337036133, "logits/real": 0.08840557187795639, "logps/generated": -682.88525390625, "logps/real": -367.47454833984375, "loss": 0.0726, "rewards/accuracies": 0.949999988079071, "rewards/generated": -27.898357391357422, "rewards/margins": 26.814105987548828, "rewards/real": -1.0842539072036743, "step": 2790 }, { "epoch": 0.9, "learning_rate": 3.8965272016119473e-07, "logits/generated": 1.6847522258758545, "logits/real": 0.17343257367610931, "logps/generated": -731.3448486328125, "logps/real": -316.51141357421875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/generated": -30.514354705810547, "rewards/margins": 29.706554412841797, "rewards/real": -0.8077989816665649, "step": 2800 }, { "epoch": 0.9, "learning_rate": 3.890600924499229e-07, "logits/generated": 1.7153816223144531, "logits/real": -0.006909878458827734, "logps/generated": -734.4896240234375, "logps/real": -339.341064453125, "loss": 0.0236, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.376312255859375, "rewards/margins": 31.589298248291016, "rewards/real": -0.7870132923126221, "step": 2810 }, { "epoch": 0.9, "learning_rate": 3.8846746473865116e-07, "logits/generated": 1.4768227338790894, "logits/real": 0.0006786882877349854, "logps/generated": -782.7421875, "logps/real": -317.4217834472656, "loss": 0.0289, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.7784423828125, "rewards/margins": 35.54150390625, "rewards/real": -1.2369422912597656, "step": 2820 }, { "epoch": 0.91, "learning_rate": 3.878748370273794e-07, "logits/generated": 1.4829686880111694, "logits/real": -0.4692135453224182, "logps/generated": -734.4862060546875, "logps/real": -355.86480712890625, "loss": 0.0521, "rewards/accuracies": 1.0, "rewards/generated": -32.499908447265625, "rewards/margins": 31.743759155273438, "rewards/real": -0.7561527490615845, "step": 2830 }, { "epoch": 0.91, "learning_rate": 3.872822093161076e-07, "logits/generated": 1.4394299983978271, "logits/real": -0.4337243139743805, "logps/generated": -692.165771484375, "logps/real": -344.65228271484375, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/generated": -28.684391021728516, "rewards/margins": 28.18329429626465, "rewards/real": -0.5010913014411926, "step": 2840 }, { "epoch": 0.91, "learning_rate": 3.8668958160483583e-07, "logits/generated": 0.8190478086471558, "logits/real": -0.667715847492218, "logps/generated": -671.9428100585938, "logps/real": -304.90570068359375, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/generated": -28.43863296508789, "rewards/margins": 29.4898624420166, "rewards/real": 1.051224946975708, "step": 2850 }, { "epoch": 0.92, "learning_rate": 3.860969538935641e-07, "logits/generated": 1.1253232955932617, "logits/real": -0.7709970474243164, "logps/generated": -688.5896606445312, "logps/real": -313.30645751953125, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/generated": -29.60751724243164, "rewards/margins": 30.093318939208984, "rewards/real": 0.48580265045166016, "step": 2860 }, { "epoch": 0.92, "learning_rate": 3.8550432618229226e-07, "logits/generated": 0.8397982716560364, "logits/real": -0.8318503499031067, "logps/generated": -710.3811645507812, "logps/real": -330.5302734375, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/generated": -31.2648983001709, "rewards/margins": 31.341577529907227, "rewards/real": 0.07668063789606094, "step": 2870 }, { "epoch": 0.92, "learning_rate": 3.849116984710205e-07, "logits/generated": 0.9829031825065613, "logits/real": -0.5523896217346191, "logps/generated": -759.4322509765625, "logps/real": -311.7576904296875, "loss": 0.0791, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.34369659423828, "rewards/margins": 34.16350555419922, "rewards/real": -0.18019168078899384, "step": 2880 }, { "epoch": 0.92, "learning_rate": 3.8431907075974875e-07, "logits/generated": 1.3364379405975342, "logits/real": -0.6124747395515442, "logps/generated": -725.3342895507812, "logps/real": -345.75811767578125, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/generated": -33.45618438720703, "rewards/margins": 31.886425018310547, "rewards/real": -1.5697633028030396, "step": 2890 }, { "epoch": 0.93, "learning_rate": 3.8372644304847694e-07, "logits/generated": 1.2526007890701294, "logits/real": -0.4890497326850891, "logps/generated": -765.4427490234375, "logps/real": -321.2254333496094, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -35.273494720458984, "rewards/margins": 34.765052795410156, "rewards/real": -0.5084399580955505, "step": 2900 }, { "epoch": 0.93, "learning_rate": 3.831338153372051e-07, "logits/generated": 1.3250693082809448, "logits/real": -0.5833691358566284, "logps/generated": -737.9568481445312, "logps/real": -363.9207763671875, "loss": 0.035, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.84966278076172, "rewards/margins": 31.976970672607422, "rewards/real": -1.8726847171783447, "step": 2910 }, { "epoch": 0.93, "learning_rate": 3.8254118762593337e-07, "logits/generated": 1.3177597522735596, "logits/real": -0.6353263258934021, "logps/generated": -697.212890625, "logps/real": -352.11224365234375, "loss": 0.0237, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.049537658691406, "rewards/margins": 31.983068466186523, "rewards/real": -0.06646408140659332, "step": 2920 }, { "epoch": 0.94, "learning_rate": 3.8194855991466156e-07, "logits/generated": 1.6161171197891235, "logits/real": -0.48282140493392944, "logps/generated": -801.6009521484375, "logps/real": -340.4782409667969, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/generated": -39.814510345458984, "rewards/margins": 39.41621780395508, "rewards/real": -0.3982974588871002, "step": 2930 }, { "epoch": 0.94, "learning_rate": 3.813559322033898e-07, "logits/generated": 1.6583149433135986, "logits/real": -0.20981892943382263, "logps/generated": -731.3494873046875, "logps/real": -325.0882873535156, "loss": 0.0484, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -35.697593688964844, "rewards/margins": 33.39555358886719, "rewards/real": -2.3020355701446533, "step": 2940 }, { "epoch": 0.94, "learning_rate": 3.8076330449211804e-07, "logits/generated": 1.1138355731964111, "logits/real": -0.4513324797153473, "logps/generated": -596.7747802734375, "logps/real": -337.89434814453125, "loss": 0.0567, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.63961410522461, "rewards/margins": 24.464258193969727, "rewards/real": -1.175354242324829, "step": 2950 }, { "epoch": 0.95, "learning_rate": 3.8017067678084623e-07, "logits/generated": 1.5525901317596436, "logits/real": -0.5297213196754456, "logps/generated": -758.9441528320312, "logps/real": -332.8437805175781, "loss": 0.0674, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.707679748535156, "rewards/margins": 34.92395782470703, "rewards/real": 0.21627798676490784, "step": 2960 }, { "epoch": 0.95, "learning_rate": 3.7957804906957447e-07, "logits/generated": 1.6052268743515015, "logits/real": -0.3243894875049591, "logps/generated": -763.9934692382812, "logps/real": -324.00164794921875, "loss": 0.051, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.371944427490234, "rewards/margins": 34.579681396484375, "rewards/real": -0.7922636270523071, "step": 2970 }, { "epoch": 0.95, "learning_rate": 3.789854213583027e-07, "logits/generated": 1.6722183227539062, "logits/real": -0.36099866032600403, "logps/generated": -811.2255859375, "logps/real": -328.02349853515625, "loss": 0.0464, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.00334930419922, "rewards/margins": 38.65221405029297, "rewards/real": -1.3511369228363037, "step": 2980 }, { "epoch": 0.96, "learning_rate": 3.783927936470309e-07, "logits/generated": 1.4396727085113525, "logits/real": -0.5413111448287964, "logps/generated": -665.5133666992188, "logps/real": -355.2272033691406, "loss": 0.1088, "rewards/accuracies": 0.925000011920929, "rewards/generated": -29.053966522216797, "rewards/margins": 28.401805877685547, "rewards/real": -0.6521603465080261, "step": 2990 }, { "epoch": 0.96, "learning_rate": 3.7780016593575914e-07, "logits/generated": 1.3753300905227661, "logits/real": -0.7443369030952454, "logps/generated": -716.5177001953125, "logps/real": -298.4733581542969, "loss": 0.0703, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.979698181152344, "rewards/margins": 32.879981994628906, "rewards/real": -0.09971854090690613, "step": 3000 }, { "epoch": 0.96, "learning_rate": 3.772075382244874e-07, "logits/generated": 1.416666030883789, "logits/real": -0.7260184288024902, "logps/generated": -735.27880859375, "logps/real": -341.00372314453125, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/generated": -35.005699157714844, "rewards/margins": 34.513038635253906, "rewards/real": -0.49265843629837036, "step": 3010 }, { "epoch": 0.97, "learning_rate": 3.766149105132156e-07, "logits/generated": 0.7679153680801392, "logits/real": -1.0455870628356934, "logps/generated": -735.6075439453125, "logps/real": -299.59149169921875, "loss": 0.0286, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.97742462158203, "rewards/margins": 34.56840133666992, "rewards/real": 0.5909751653671265, "step": 3020 }, { "epoch": 0.97, "learning_rate": 3.760222828019438e-07, "logits/generated": 1.5767030715942383, "logits/real": -0.9797650575637817, "logps/generated": -789.6905517578125, "logps/real": -362.8406677246094, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/generated": -38.47359085083008, "rewards/margins": 37.2587890625, "rewards/real": -1.2148017883300781, "step": 3030 }, { "epoch": 0.97, "learning_rate": 3.7542965509067206e-07, "logits/generated": 1.5456877946853638, "logits/real": -0.6443753242492676, "logps/generated": -731.9356689453125, "logps/real": -371.6806945800781, "loss": 0.044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.98523712158203, "rewards/margins": 33.70503616333008, "rewards/real": -1.2801988124847412, "step": 3040 }, { "epoch": 0.98, "learning_rate": 3.7483702737940025e-07, "logits/generated": 2.2266459465026855, "logits/real": -0.1782468557357788, "logps/generated": -922.0955200195312, "logps/real": -302.81658935546875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -50.11591339111328, "rewards/margins": 47.3745231628418, "rewards/real": -2.741389751434326, "step": 3050 }, { "epoch": 0.98, "learning_rate": 3.742443996681285e-07, "logits/generated": 2.1318583488464355, "logits/real": -0.209198996424675, "logps/generated": -823.5095825195312, "logps/real": -371.42889404296875, "loss": 0.0581, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -43.204071044921875, "rewards/margins": 38.97028732299805, "rewards/real": -4.233780384063721, "step": 3060 }, { "epoch": 0.98, "learning_rate": 3.7365177195685673e-07, "logits/generated": 1.9918349981307983, "logits/real": -0.06721341609954834, "logps/generated": -746.8140869140625, "logps/real": -359.9891662597656, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/generated": -38.51268005371094, "rewards/margins": 33.767127990722656, "rewards/real": -4.74554443359375, "step": 3070 }, { "epoch": 0.99, "learning_rate": 3.730591442455849e-07, "logits/generated": 2.0346789360046387, "logits/real": 0.10375523567199707, "logps/generated": -743.0701904296875, "logps/real": -356.4525146484375, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/generated": -37.91783905029297, "rewards/margins": 32.76781463623047, "rewards/real": -5.150022506713867, "step": 3080 }, { "epoch": 0.99, "learning_rate": 3.724665165343131e-07, "logits/generated": 1.9749126434326172, "logits/real": 0.11264216899871826, "logps/generated": -817.4457397460938, "logps/real": -388.92205810546875, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/generated": -44.377166748046875, "rewards/margins": 40.227516174316406, "rewards/real": -4.149655818939209, "step": 3090 }, { "epoch": 0.99, "learning_rate": 3.7187388882304135e-07, "logits/generated": 2.158161163330078, "logits/real": 0.28727996349334717, "logps/generated": -889.4269409179688, "logps/real": -356.7685241699219, "loss": 0.0187, "rewards/accuracies": 0.987500011920929, "rewards/generated": -50.5318603515625, "rewards/margins": 44.726715087890625, "rewards/real": -5.80515193939209, "step": 3100 }, { "epoch": 1.0, "learning_rate": 3.7128126111176954e-07, "logits/generated": 2.0267348289489746, "logits/real": -0.09791239351034164, "logps/generated": -876.57080078125, "logps/real": -396.95074462890625, "loss": 0.0382, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -48.95111846923828, "rewards/margins": 43.48713302612305, "rewards/real": -5.463984966278076, "step": 3110 }, { "epoch": 1.0, "learning_rate": 3.706886334004978e-07, "logits/generated": 1.8139718770980835, "logits/real": -0.5260879993438721, "logps/generated": -770.6980590820312, "logps/real": -346.76629638671875, "loss": 0.0891, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -40.29029846191406, "rewards/margins": 38.30472183227539, "rewards/real": -1.9855766296386719, "step": 3120 }, { "epoch": 1.0, "learning_rate": 3.70096005689226e-07, "logits/generated": 1.0830538272857666, "logits/real": -1.0247769355773926, "logps/generated": -785.00537109375, "logps/real": -329.46380615234375, "loss": 0.0264, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -40.738319396972656, "rewards/margins": 38.62738800048828, "rewards/real": -2.1109251976013184, "step": 3130 }, { "epoch": 1.0, "learning_rate": 3.695033779779542e-07, "logits/generated": 1.399590253829956, "logits/real": -0.8435190320014954, "logps/generated": -834.1033325195312, "logps/real": -364.3261413574219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -43.888214111328125, "rewards/margins": 41.6268310546875, "rewards/real": -2.261387348175049, "step": 3140 }, { "epoch": 1.01, "learning_rate": 3.6891075026668245e-07, "logits/generated": 1.4242956638336182, "logits/real": -0.9070215225219727, "logps/generated": -738.67138671875, "logps/real": -371.7659912109375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -35.76353073120117, "rewards/margins": 34.30432891845703, "rewards/real": -1.4592043161392212, "step": 3150 }, { "epoch": 1.01, "learning_rate": 3.683181225554107e-07, "logits/generated": 1.7003734111785889, "logits/real": -0.6616460084915161, "logps/generated": -864.3294067382812, "logps/real": -326.4926452636719, "loss": 0.0441, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -46.684593200683594, "rewards/margins": 43.968421936035156, "rewards/real": -2.7161707878112793, "step": 3160 }, { "epoch": 1.01, "learning_rate": 3.677254948441389e-07, "logits/generated": 1.5160343647003174, "logits/real": -0.3163232207298279, "logps/generated": -698.2178955078125, "logps/real": -357.4117736816406, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.3349723815918, "rewards/margins": 32.157203674316406, "rewards/real": -3.177769660949707, "step": 3170 }, { "epoch": 1.02, "learning_rate": 3.6713286713286713e-07, "logits/generated": 1.8610050678253174, "logits/real": -0.1374693214893341, "logps/generated": -784.779052734375, "logps/real": -410.440185546875, "loss": 0.0312, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.53495407104492, "rewards/margins": 34.9903678894043, "rewards/real": -4.54458475112915, "step": 3180 }, { "epoch": 1.02, "learning_rate": 3.6654023942159537e-07, "logits/generated": 1.525282382965088, "logits/real": -0.2642039656639099, "logps/generated": -842.4094848632812, "logps/real": -385.2251892089844, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/generated": -43.943077087402344, "rewards/margins": 39.83400344848633, "rewards/real": -4.109073638916016, "step": 3190 }, { "epoch": 1.02, "learning_rate": 3.6594761171032356e-07, "logits/generated": 1.472769021987915, "logits/real": -0.2506261169910431, "logps/generated": -892.1646728515625, "logps/real": -318.24041748046875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -48.223716735839844, "rewards/margins": 45.1031379699707, "rewards/real": -3.1205811500549316, "step": 3200 }, { "epoch": 1.03, "learning_rate": 3.653549839990518e-07, "logits/generated": 1.7654889822006226, "logits/real": -0.418379008769989, "logps/generated": -924.9537353515625, "logps/real": -380.7850341796875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/generated": -51.7067756652832, "rewards/margins": 46.04342269897461, "rewards/real": -5.663352966308594, "step": 3210 }, { "epoch": 1.03, "learning_rate": 3.6476235628778004e-07, "logits/generated": 1.3192102909088135, "logits/real": -0.404385507106781, "logps/generated": -909.3551025390625, "logps/real": -357.1786804199219, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -50.95429229736328, "rewards/margins": 46.896427154541016, "rewards/real": -4.057864665985107, "step": 3220 }, { "epoch": 1.03, "learning_rate": 3.6416972857650823e-07, "logits/generated": 1.3335298299789429, "logits/real": -0.519232451915741, "logps/generated": -801.8142700195312, "logps/real": -407.16845703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -41.18467330932617, "rewards/margins": 36.16065216064453, "rewards/real": -5.024024963378906, "step": 3230 }, { "epoch": 1.04, "learning_rate": 3.6357710086523647e-07, "logits/generated": 1.255618691444397, "logits/real": -0.41836825013160706, "logps/generated": -905.0087890625, "logps/real": -326.54229736328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -50.212615966796875, "rewards/margins": 46.54174041748047, "rewards/real": -3.6708786487579346, "step": 3240 }, { "epoch": 1.04, "learning_rate": 3.629844731539647e-07, "logits/generated": 1.5446131229400635, "logits/real": -0.5027315020561218, "logps/generated": -786.07666015625, "logps/real": -390.6357421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -41.219154357910156, "rewards/margins": 36.56498718261719, "rewards/real": -4.654166221618652, "step": 3250 }, { "epoch": 1.04, "learning_rate": 3.6239184544269285e-07, "logits/generated": 1.2802150249481201, "logits/real": -0.33009278774261475, "logps/generated": -786.0957641601562, "logps/real": -380.52276611328125, "loss": 0.0229, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.477134704589844, "rewards/margins": 36.94097137451172, "rewards/real": -3.536158323287964, "step": 3260 }, { "epoch": 1.05, "learning_rate": 3.617992177314211e-07, "logits/generated": 1.5045629739761353, "logits/real": -0.4740613102912903, "logps/generated": -863.31640625, "logps/real": -358.13055419921875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/generated": -46.59673309326172, "rewards/margins": 42.50022506713867, "rewards/real": -4.096514701843262, "step": 3270 }, { "epoch": 1.05, "learning_rate": 3.612065900201493e-07, "logits/generated": 1.1638386249542236, "logits/real": -0.5954693555831909, "logps/generated": -802.9481201171875, "logps/real": -337.4355163574219, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/generated": -39.91973876953125, "rewards/margins": 38.482093811035156, "rewards/real": -1.4376416206359863, "step": 3280 }, { "epoch": 1.05, "learning_rate": 3.606139623088775e-07, "logits/generated": 1.0161828994750977, "logits/real": -0.6421123743057251, "logps/generated": -781.8851928710938, "logps/real": -335.606689453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -40.73774337768555, "rewards/margins": 39.00830841064453, "rewards/real": -1.7294337749481201, "step": 3290 }, { "epoch": 1.06, "learning_rate": 3.6002133459760576e-07, "logits/generated": 1.5001237392425537, "logits/real": -0.3688901364803314, "logps/generated": -846.2230224609375, "logps/real": -343.529052734375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/generated": -45.00919723510742, "rewards/margins": 42.41929244995117, "rewards/real": -2.5899059772491455, "step": 3300 }, { "epoch": 1.06, "learning_rate": 3.5942870688633395e-07, "logits/generated": 2.151215076446533, "logits/real": 0.26790112257003784, "logps/generated": -887.7449340820312, "logps/real": -350.8306884765625, "loss": 0.0405, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -49.190330505371094, "rewards/margins": 43.633689880371094, "rewards/real": -5.556635856628418, "step": 3310 }, { "epoch": 1.06, "learning_rate": 3.588360791750622e-07, "logits/generated": 1.8760957717895508, "logits/real": 0.3259205222129822, "logps/generated": -811.8831176757812, "logps/real": -371.9270324707031, "loss": 0.0171, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.311912536621094, "rewards/margins": 38.103431701660156, "rewards/real": -5.208489418029785, "step": 3320 }, { "epoch": 1.07, "learning_rate": 3.5824345146379044e-07, "logits/generated": 2.2999815940856934, "logits/real": 0.4950522780418396, "logps/generated": -886.4505004882812, "logps/real": -429.54644775390625, "loss": 0.0435, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -49.098411560058594, "rewards/margins": 40.163063049316406, "rewards/real": -8.935354232788086, "step": 3330 }, { "epoch": 1.07, "learning_rate": 3.576508237525186e-07, "logits/generated": 2.213822841644287, "logits/real": 0.05794317275285721, "logps/generated": -868.7337036132812, "logps/real": -422.2870178222656, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/generated": -47.833824157714844, "rewards/margins": 39.84661865234375, "rewards/real": -7.987205505371094, "step": 3340 }, { "epoch": 1.07, "learning_rate": 3.5705819604124687e-07, "logits/generated": 1.9691988229751587, "logits/real": -0.0420486219227314, "logps/generated": -902.9676513671875, "logps/real": -397.20849609375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -50.641273498535156, "rewards/margins": 44.71895217895508, "rewards/real": -5.922321319580078, "step": 3350 }, { "epoch": 1.08, "learning_rate": 3.564655683299751e-07, "logits/generated": 1.859004259109497, "logits/real": 0.009420597925782204, "logps/generated": -985.9837036132812, "logps/real": -397.4696350097656, "loss": 0.0063, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.76782989501953, "rewards/margins": 50.63762664794922, "rewards/real": -7.130200386047363, "step": 3360 }, { "epoch": 1.08, "learning_rate": 3.558729406187033e-07, "logits/generated": 2.1972978115081787, "logits/real": -0.18132592737674713, "logps/generated": -1021.9801025390625, "logps/real": -328.14886474609375, "loss": 0.0137, "rewards/accuracies": 0.987500011920929, "rewards/generated": -59.56706619262695, "rewards/margins": 55.32207107543945, "rewards/real": -4.244994163513184, "step": 3370 }, { "epoch": 1.08, "learning_rate": 3.5528031290743154e-07, "logits/generated": 2.036914348602295, "logits/real": -0.1863768994808197, "logps/generated": -881.8966674804688, "logps/real": -380.7073669433594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -48.05780792236328, "rewards/margins": 43.82422637939453, "rewards/real": -4.233584403991699, "step": 3380 }, { "epoch": 1.08, "learning_rate": 3.546876851961598e-07, "logits/generated": 1.5535552501678467, "logits/real": -0.3347831964492798, "logps/generated": -863.25, "logps/real": -407.659912109375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -49.036827087402344, "rewards/margins": 44.28205490112305, "rewards/real": -4.754773139953613, "step": 3390 }, { "epoch": 1.09, "learning_rate": 3.5409505748488797e-07, "logits/generated": 1.630731225013733, "logits/real": -0.2988981604576111, "logps/generated": -840.0236206054688, "logps/real": -379.4244689941406, "loss": 0.0041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.604461669921875, "rewards/margins": 40.27696228027344, "rewards/real": -5.327497959136963, "step": 3400 }, { "epoch": 1.09, "learning_rate": 3.535024297736162e-07, "logits/generated": 1.6541545391082764, "logits/real": -0.15251055359840393, "logps/generated": -982.9104614257812, "logps/real": -316.5390930175781, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/generated": -56.670555114746094, "rewards/margins": 53.13850021362305, "rewards/real": -3.5320611000061035, "step": 3410 }, { "epoch": 1.09, "learning_rate": 3.5290980206234446e-07, "logits/generated": 2.097510814666748, "logits/real": 0.2182501256465912, "logps/generated": -903.2131958007812, "logps/real": -363.33587646484375, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/generated": -50.18659591674805, "rewards/margins": 45.237586975097656, "rewards/real": -4.949007034301758, "step": 3420 }, { "epoch": 1.1, "learning_rate": 3.5231717435107264e-07, "logits/generated": 1.8379976749420166, "logits/real": 0.5325809121131897, "logps/generated": -956.9562377929688, "logps/real": -335.36041259765625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -55.28343963623047, "rewards/margins": 47.789520263671875, "rewards/real": -7.493921756744385, "step": 3430 }, { "epoch": 1.1, "learning_rate": 3.5172454663980083e-07, "logits/generated": 2.3564677238464355, "logits/real": 0.6742401123046875, "logps/generated": -982.9732666015625, "logps/real": -360.76025390625, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.386474609375, "rewards/margins": 51.120643615722656, "rewards/real": -6.2658305168151855, "step": 3440 }, { "epoch": 1.1, "learning_rate": 3.511319189285291e-07, "logits/generated": 2.4261927604675293, "logits/real": 0.7835529446601868, "logps/generated": -946.44775390625, "logps/real": -371.6916809082031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -56.288360595703125, "rewards/margins": 47.98066711425781, "rewards/real": -8.307696342468262, "step": 3450 }, { "epoch": 1.11, "learning_rate": 3.5053929121725726e-07, "logits/generated": 2.521564483642578, "logits/real": 1.0440590381622314, "logps/generated": -937.5362548828125, "logps/real": -361.4960021972656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -54.28572463989258, "rewards/margins": 45.64744186401367, "rewards/real": -8.638291358947754, "step": 3460 }, { "epoch": 1.11, "learning_rate": 3.499466635059855e-07, "logits/generated": 2.1938862800598145, "logits/real": 0.6073617935180664, "logps/generated": -873.291015625, "logps/real": -421.0392150878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -49.010807037353516, "rewards/margins": 43.64905548095703, "rewards/real": -5.361753940582275, "step": 3470 }, { "epoch": 1.11, "learning_rate": 3.4935403579471375e-07, "logits/generated": 2.1016104221343994, "logits/real": 0.6839932203292847, "logps/generated": -827.5435791015625, "logps/real": -366.30169677734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -44.64368438720703, "rewards/margins": 39.09661865234375, "rewards/real": -5.547061920166016, "step": 3480 }, { "epoch": 1.12, "learning_rate": 3.4876140808344194e-07, "logits/generated": 2.2634100914001465, "logits/real": 0.384492963552475, "logps/generated": -858.8563232421875, "logps/real": -380.0473937988281, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -45.956275939941406, "rewards/margins": 40.474647521972656, "rewards/real": -5.481632709503174, "step": 3490 }, { "epoch": 1.12, "learning_rate": 3.481687803721702e-07, "logits/generated": 2.3847341537475586, "logits/real": 0.3874856233596802, "logps/generated": -764.6117553710938, "logps/real": -375.92266845703125, "loss": 0.0542, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.28316116333008, "rewards/margins": 34.778892517089844, "rewards/real": -4.504271507263184, "step": 3500 }, { "epoch": 1.12, "learning_rate": 3.475761526608984e-07, "logits/generated": 2.0984909534454346, "logits/real": 0.2309379279613495, "logps/generated": -733.2493896484375, "logps/real": -395.85394287109375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -34.38832473754883, "rewards/margins": 31.740093231201172, "rewards/real": -2.648231029510498, "step": 3510 }, { "epoch": 1.13, "learning_rate": 3.469835249496266e-07, "logits/generated": 1.9958127737045288, "logits/real": 0.1922609508037567, "logps/generated": -799.5538330078125, "logps/real": -367.2142028808594, "loss": 0.0503, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.75733184814453, "rewards/margins": 36.59238815307617, "rewards/real": -4.164941310882568, "step": 3520 }, { "epoch": 1.13, "learning_rate": 3.4639089723835485e-07, "logits/generated": 1.7854121923446655, "logits/real": 0.46532678604125977, "logps/generated": -833.0718994140625, "logps/real": -386.58343505859375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -42.356361389160156, "rewards/margins": 37.993431091308594, "rewards/real": -4.362931251525879, "step": 3530 }, { "epoch": 1.13, "learning_rate": 3.457982695270831e-07, "logits/generated": 2.039842128753662, "logits/real": 0.41429099440574646, "logps/generated": -839.6603393554688, "logps/real": -404.8365783691406, "loss": 0.006, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.19861602783203, "rewards/margins": 39.94362258911133, "rewards/real": -4.254998207092285, "step": 3540 }, { "epoch": 1.14, "learning_rate": 3.452056418158113e-07, "logits/generated": 1.9158881902694702, "logits/real": 0.38622647523880005, "logps/generated": -815.9961547851562, "logps/real": -345.8466491699219, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.367496490478516, "rewards/margins": 38.660194396972656, "rewards/real": -4.707301139831543, "step": 3550 }, { "epoch": 1.14, "learning_rate": 3.446130141045395e-07, "logits/generated": 2.2518858909606934, "logits/real": 0.7111259698867798, "logps/generated": -833.4215087890625, "logps/real": -402.19537353515625, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.07600784301758, "rewards/margins": 38.73112869262695, "rewards/real": -5.344878196716309, "step": 3560 }, { "epoch": 1.14, "learning_rate": 3.4402038639326777e-07, "logits/generated": 2.102323293685913, "logits/real": 0.916674792766571, "logps/generated": -911.861328125, "logps/real": -367.8331604003906, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -50.8501091003418, "rewards/margins": 44.45732879638672, "rewards/real": -6.392782211303711, "step": 3570 }, { "epoch": 1.15, "learning_rate": 3.4342775868199595e-07, "logits/generated": 2.297792911529541, "logits/real": 0.6382473707199097, "logps/generated": -1008.75146484375, "logps/real": -392.03277587890625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -58.75178146362305, "rewards/margins": 51.95452880859375, "rewards/real": -6.797255516052246, "step": 3580 }, { "epoch": 1.15, "learning_rate": 3.428351309707242e-07, "logits/generated": 2.361895799636841, "logits/real": 0.598645031452179, "logps/generated": -983.6506958007812, "logps/real": -378.78961181640625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/generated": -58.34507369995117, "rewards/margins": 51.65392303466797, "rewards/real": -6.6911516189575195, "step": 3590 }, { "epoch": 1.15, "learning_rate": 3.4224250325945244e-07, "logits/generated": 2.56547212600708, "logits/real": 0.38831624388694763, "logps/generated": -878.9915161132812, "logps/real": -406.8921813964844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -48.78410720825195, "rewards/margins": 42.4536247253418, "rewards/real": -6.330479145050049, "step": 3600 }, { "epoch": 1.16, "learning_rate": 3.4164987554818063e-07, "logits/generated": 2.4987618923187256, "logits/real": 0.4154025912284851, "logps/generated": -838.1653442382812, "logps/real": -414.14971923828125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -44.137351989746094, "rewards/margins": 37.56629943847656, "rewards/real": -6.571053504943848, "step": 3610 }, { "epoch": 1.16, "learning_rate": 3.410572478369088e-07, "logits/generated": 1.8420965671539307, "logits/real": 0.007907414808869362, "logps/generated": -804.8074340820312, "logps/real": -371.8179016113281, "loss": 0.0338, "rewards/accuracies": 0.987500011920929, "rewards/generated": -41.64263153076172, "rewards/margins": 37.161338806152344, "rewards/real": -4.481302261352539, "step": 3620 }, { "epoch": 1.16, "learning_rate": 3.4046462012563706e-07, "logits/generated": 1.755743384361267, "logits/real": 0.14006485044956207, "logps/generated": -842.6290893554688, "logps/real": -353.6880187988281, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -44.62464141845703, "rewards/margins": 39.30078125, "rewards/real": -5.3238630294799805, "step": 3630 }, { "epoch": 1.16, "learning_rate": 3.3987199241436525e-07, "logits/generated": 2.199801445007324, "logits/real": 0.4196700155735016, "logps/generated": -938.73583984375, "logps/real": -372.1141052246094, "loss": 0.0123, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.06526565551758, "rewards/margins": 48.55379867553711, "rewards/real": -5.511466026306152, "step": 3640 }, { "epoch": 1.17, "learning_rate": 3.392793647030935e-07, "logits/generated": 2.6720054149627686, "logits/real": 0.729618489742279, "logps/generated": -789.3287963867188, "logps/real": -437.05084228515625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -40.290653228759766, "rewards/margins": 33.130977630615234, "rewards/real": -7.159679412841797, "step": 3650 }, { "epoch": 1.17, "learning_rate": 3.3868673699182173e-07, "logits/generated": 2.8542654514312744, "logits/real": 0.7255024313926697, "logps/generated": -928.93408203125, "logps/real": -410.2960510253906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -52.122467041015625, "rewards/margins": 44.70793533325195, "rewards/real": -7.414539337158203, "step": 3660 }, { "epoch": 1.17, "learning_rate": 3.380941092805499e-07, "logits/generated": 2.3532071113586426, "logits/real": 0.6757982969284058, "logps/generated": -847.05517578125, "logps/real": -421.7748107910156, "loss": 0.0237, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.34987258911133, "rewards/margins": 38.570709228515625, "rewards/real": -6.779162406921387, "step": 3670 }, { "epoch": 1.18, "learning_rate": 3.3750148156927816e-07, "logits/generated": 2.545330286026001, "logits/real": 0.812427818775177, "logps/generated": -869.2293701171875, "logps/real": -459.91326904296875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -48.34119415283203, "rewards/margins": 38.871009826660156, "rewards/real": -9.47019100189209, "step": 3680 }, { "epoch": 1.18, "learning_rate": 3.369088538580064e-07, "logits/generated": 2.70619535446167, "logits/real": 0.9279934763908386, "logps/generated": -1001.333984375, "logps/real": -449.50537109375, "loss": 0.005, "rewards/accuracies": 0.987500011920929, "rewards/generated": -58.6970329284668, "rewards/margins": 48.146446228027344, "rewards/real": -10.550590515136719, "step": 3690 }, { "epoch": 1.18, "learning_rate": 3.363162261467346e-07, "logits/generated": 2.323228359222412, "logits/real": 0.5112559199333191, "logps/generated": -924.51708984375, "logps/real": -421.55029296875, "loss": 0.0041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.02006149291992, "rewards/margins": 44.135807037353516, "rewards/real": -9.884255409240723, "step": 3700 }, { "epoch": 1.19, "learning_rate": 3.3572359843546283e-07, "logits/generated": 2.655287265777588, "logits/real": 1.0551230907440186, "logps/generated": -939.03125, "logps/real": -447.31439208984375, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.758880615234375, "rewards/margins": 43.87160873413086, "rewards/real": -10.887271881103516, "step": 3710 }, { "epoch": 1.19, "learning_rate": 3.351309707241911e-07, "logits/generated": 2.5216193199157715, "logits/real": 0.8036215901374817, "logps/generated": -979.0418090820312, "logps/real": -437.03717041015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -57.14995193481445, "rewards/margins": 45.73650360107422, "rewards/real": -11.413439750671387, "step": 3720 }, { "epoch": 1.19, "learning_rate": 3.3453834301291927e-07, "logits/generated": 2.5467095375061035, "logits/real": 1.121552586555481, "logps/generated": -893.8170776367188, "logps/real": -416.1778869628906, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -51.46274948120117, "rewards/margins": 40.75083923339844, "rewards/real": -10.711912155151367, "step": 3730 }, { "epoch": 1.2, "learning_rate": 3.339457153016475e-07, "logits/generated": 2.41868257522583, "logits/real": 0.8248863220214844, "logps/generated": -893.6220703125, "logps/real": -382.8924255371094, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -51.593109130859375, "rewards/margins": 43.090850830078125, "rewards/real": -8.502254486083984, "step": 3740 }, { "epoch": 1.2, "learning_rate": 3.3335308759037575e-07, "logits/generated": 1.5662766695022583, "logits/real": 0.10898448526859283, "logps/generated": -749.6936645507812, "logps/real": -396.2360534667969, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/generated": -37.53596496582031, "rewards/margins": 30.580371856689453, "rewards/real": -6.955594539642334, "step": 3750 }, { "epoch": 1.2, "learning_rate": 3.3276045987910394e-07, "logits/generated": 1.6707637310028076, "logits/real": 0.6430930495262146, "logps/generated": -810.5835571289062, "logps/real": -361.64154052734375, "loss": 0.0062, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.466434478759766, "rewards/margins": 33.841209411621094, "rewards/real": -6.625218868255615, "step": 3760 }, { "epoch": 1.21, "learning_rate": 3.321678321678322e-07, "logits/generated": 2.048856019973755, "logits/real": 0.6219021677970886, "logps/generated": -761.279296875, "logps/real": -398.64923095703125, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.76602554321289, "rewards/margins": 29.750417709350586, "rewards/real": -8.015604972839355, "step": 3770 }, { "epoch": 1.21, "learning_rate": 3.315752044565604e-07, "logits/generated": 2.1167967319488525, "logits/real": 0.3618480861186981, "logps/generated": -771.1248779296875, "logps/real": -381.4525146484375, "loss": 0.0265, "rewards/accuracies": 0.987500011920929, "rewards/generated": -38.56813430786133, "rewards/margins": 31.915334701538086, "rewards/real": -6.652792453765869, "step": 3780 }, { "epoch": 1.21, "learning_rate": 3.309825767452886e-07, "logits/generated": 1.9735320806503296, "logits/real": 0.5173706412315369, "logps/generated": -823.0919799804688, "logps/real": -388.74090576171875, "loss": 0.04, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -41.97467803955078, "rewards/margins": 34.158851623535156, "rewards/real": -7.815822601318359, "step": 3790 }, { "epoch": 1.22, "learning_rate": 3.303899490340168e-07, "logits/generated": 1.871193289756775, "logits/real": 0.2393765151500702, "logps/generated": -826.5197143554688, "logps/real": -416.1676330566406, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/generated": -41.28820037841797, "rewards/margins": 33.3169059753418, "rewards/real": -7.971290588378906, "step": 3800 }, { "epoch": 1.22, "learning_rate": 3.29797321322745e-07, "logits/generated": 1.6222301721572876, "logits/real": 0.12104681879281998, "logps/generated": -798.4268798828125, "logps/real": -419.88128662109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -40.26927947998047, "rewards/margins": 34.2071418762207, "rewards/real": -6.062142372131348, "step": 3810 }, { "epoch": 1.22, "learning_rate": 3.2920469361147323e-07, "logits/generated": 1.6478302478790283, "logits/real": 0.234401136636734, "logps/generated": -756.6922607421875, "logps/real": -347.2097473144531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -37.91858673095703, "rewards/margins": 32.255897521972656, "rewards/real": -5.662688732147217, "step": 3820 }, { "epoch": 1.23, "learning_rate": 3.2861206590020147e-07, "logits/generated": 1.8979486227035522, "logits/real": 0.01841040328145027, "logps/generated": -775.7962646484375, "logps/real": -377.652099609375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/generated": -39.21514892578125, "rewards/margins": 33.12729263305664, "rewards/real": -6.0878586769104, "step": 3830 }, { "epoch": 1.23, "learning_rate": 3.2801943818892966e-07, "logits/generated": 1.9201444387435913, "logits/real": -0.07562948018312454, "logps/generated": -859.4422607421875, "logps/real": -426.18402099609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -45.821563720703125, "rewards/margins": 39.22440719604492, "rewards/real": -6.597151279449463, "step": 3840 }, { "epoch": 1.23, "learning_rate": 3.274268104776579e-07, "logits/generated": 1.7645994424819946, "logits/real": 0.46368569135665894, "logps/generated": -893.466796875, "logps/real": -345.55316162109375, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/generated": -50.000816345214844, "rewards/margins": 43.68457794189453, "rewards/real": -6.316235542297363, "step": 3850 }, { "epoch": 1.24, "learning_rate": 3.2683418276638614e-07, "logits/generated": 1.8924249410629272, "logits/real": 0.426472008228302, "logps/generated": -893.7169189453125, "logps/real": -357.8848571777344, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/generated": -48.02570343017578, "rewards/margins": 41.03560256958008, "rewards/real": -6.990099906921387, "step": 3860 }, { "epoch": 1.24, "learning_rate": 3.2624155505511433e-07, "logits/generated": 1.7478210926055908, "logits/real": 0.3808758854866028, "logps/generated": -865.9788818359375, "logps/real": -384.21490478515625, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/generated": -45.07698440551758, "rewards/margins": 39.27788543701172, "rewards/real": -5.799102783203125, "step": 3870 }, { "epoch": 1.24, "learning_rate": 3.256489273438426e-07, "logits/generated": 1.8142160177230835, "logits/real": -0.05650439113378525, "logps/generated": -852.76416015625, "logps/real": -394.3373718261719, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.22393035888672, "rewards/margins": 40.576942443847656, "rewards/real": -5.646985054016113, "step": 3880 }, { "epoch": 1.24, "learning_rate": 3.250562996325708e-07, "logits/generated": 1.7576414346694946, "logits/real": 0.16125845909118652, "logps/generated": -737.7971801757812, "logps/real": -355.10101318359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -37.76227569580078, "rewards/margins": 32.49078369140625, "rewards/real": -5.271491050720215, "step": 3890 }, { "epoch": 1.25, "learning_rate": 3.24463671921299e-07, "logits/generated": 2.224649429321289, "logits/real": 0.034860990941524506, "logps/generated": -835.9309692382812, "logps/real": -386.48394775390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -44.693824768066406, "rewards/margins": 40.14398956298828, "rewards/real": -4.549836158752441, "step": 3900 }, { "epoch": 1.25, "learning_rate": 3.2387104421002725e-07, "logits/generated": 2.1280009746551514, "logits/real": 0.18578016757965088, "logps/generated": -811.9386596679688, "logps/real": -364.31622314453125, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.4742317199707, "rewards/margins": 36.29412078857422, "rewards/real": -6.18010950088501, "step": 3910 }, { "epoch": 1.25, "learning_rate": 3.232784164987555e-07, "logits/generated": 2.216989040374756, "logits/real": -0.006085106637328863, "logps/generated": -918.9568481445312, "logps/real": -391.79437255859375, "loss": 0.032, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -52.31040573120117, "rewards/margins": 45.7618293762207, "rewards/real": -6.548575401306152, "step": 3920 }, { "epoch": 1.26, "learning_rate": 3.226857887874837e-07, "logits/generated": 2.0761938095092773, "logits/real": -0.2655293345451355, "logps/generated": -860.9503784179688, "logps/real": -412.0650939941406, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -47.962162017822266, "rewards/margins": 41.897239685058594, "rewards/real": -6.064918518066406, "step": 3930 }, { "epoch": 1.26, "learning_rate": 3.220931610762119e-07, "logits/generated": 2.1194260120391846, "logits/real": 0.04633180424571037, "logps/generated": -938.02099609375, "logps/real": -395.6116638183594, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -53.80571365356445, "rewards/margins": 46.80584716796875, "rewards/real": -6.999871253967285, "step": 3940 }, { "epoch": 1.26, "learning_rate": 3.2150053336494016e-07, "logits/generated": 1.8909871578216553, "logits/real": 0.1479884833097458, "logps/generated": -856.759765625, "logps/real": -369.7672424316406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -46.83892059326172, "rewards/margins": 40.16466522216797, "rewards/real": -6.674252986907959, "step": 3950 }, { "epoch": 1.27, "learning_rate": 3.2090790565366835e-07, "logits/generated": 1.816232442855835, "logits/real": -0.22243690490722656, "logps/generated": -862.2125244140625, "logps/real": -355.93292236328125, "loss": 0.084, "rewards/accuracies": 0.9375, "rewards/generated": -47.1995735168457, "rewards/margins": 40.92375183105469, "rewards/real": -6.275822162628174, "step": 3960 }, { "epoch": 1.27, "learning_rate": 3.2031527794239654e-07, "logits/generated": 1.9847770929336548, "logits/real": 0.19675599038600922, "logps/generated": -775.3919677734375, "logps/real": -366.6876220703125, "loss": 0.0157, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.33246994018555, "rewards/margins": 33.03014373779297, "rewards/real": -6.302330493927002, "step": 3970 }, { "epoch": 1.27, "learning_rate": 3.197226502311248e-07, "logits/generated": 2.450059652328491, "logits/real": -0.19365069270133972, "logps/generated": -884.3902587890625, "logps/real": -445.5484313964844, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/generated": -49.08539581298828, "rewards/margins": 43.401023864746094, "rewards/real": -5.684370994567871, "step": 3980 }, { "epoch": 1.28, "learning_rate": 3.1913002251985297e-07, "logits/generated": 1.7655264139175415, "logits/real": -0.32271242141723633, "logps/generated": -781.2020263671875, "logps/real": -373.3470153808594, "loss": 0.0407, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.973079681396484, "rewards/margins": 35.65218734741211, "rewards/real": -5.320893287658691, "step": 3990 }, { "epoch": 1.28, "learning_rate": 3.185373948085812e-07, "logits/generated": 1.7735016345977783, "logits/real": -0.3135187327861786, "logps/generated": -823.1165161132812, "logps/real": -341.21514892578125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -42.40126419067383, "rewards/margins": 37.98102569580078, "rewards/real": -4.420238971710205, "step": 4000 }, { "epoch": 1.28, "learning_rate": 3.1794476709730946e-07, "logits/generated": 1.9088976383209229, "logits/real": 0.021071402356028557, "logps/generated": -997.48291015625, "logps/real": -341.7501220703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -56.8674430847168, "rewards/margins": 51.42482376098633, "rewards/real": -5.44262170791626, "step": 4010 }, { "epoch": 1.29, "learning_rate": 3.1735213938603764e-07, "logits/generated": 1.8926769495010376, "logits/real": -0.1542275995016098, "logps/generated": -903.7127685546875, "logps/real": -340.779296875, "loss": 0.0239, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.57229232788086, "rewards/margins": 45.800968170166016, "rewards/real": -3.771319627761841, "step": 4020 }, { "epoch": 1.29, "learning_rate": 3.167595116747659e-07, "logits/generated": 1.7338800430297852, "logits/real": -0.2945229113101959, "logps/generated": -908.7874755859375, "logps/real": -348.8018493652344, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/generated": -51.78312301635742, "rewards/margins": 46.67133331298828, "rewards/real": -5.111795425415039, "step": 4030 }, { "epoch": 1.29, "learning_rate": 3.1616688396349413e-07, "logits/generated": 2.5217385292053223, "logits/real": 0.2091987580060959, "logps/generated": -880.8609619140625, "logps/real": -398.03302001953125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/generated": -49.21094512939453, "rewards/margins": 42.1538200378418, "rewards/real": -7.057119846343994, "step": 4040 }, { "epoch": 1.3, "learning_rate": 3.155742562522223e-07, "logits/generated": 2.8754658699035645, "logits/real": 0.4876670241355896, "logps/generated": -896.7193603515625, "logps/real": -405.14263916015625, "loss": 0.018, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.94514465332031, "rewards/margins": 40.763938903808594, "rewards/real": -9.181201934814453, "step": 4050 }, { "epoch": 1.3, "learning_rate": 3.1498162854095056e-07, "logits/generated": 2.544055461883545, "logits/real": 0.5535674095153809, "logps/generated": -868.2188720703125, "logps/real": -442.862548828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -47.983524322509766, "rewards/margins": 35.19925308227539, "rewards/real": -12.784273147583008, "step": 4060 }, { "epoch": 1.3, "learning_rate": 3.143890008296788e-07, "logits/generated": 2.4933745861053467, "logits/real": 0.6904267072677612, "logps/generated": -949.9890747070312, "logps/real": -412.2945251464844, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -55.27671432495117, "rewards/margins": 45.037681579589844, "rewards/real": -10.239030838012695, "step": 4070 }, { "epoch": 1.31, "learning_rate": 3.13796373118407e-07, "logits/generated": 2.221097469329834, "logits/real": 0.22244521975517273, "logps/generated": -948.2183837890625, "logps/real": -422.76214599609375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -54.98572540283203, "rewards/margins": 45.12064743041992, "rewards/real": -9.865074157714844, "step": 4080 }, { "epoch": 1.31, "learning_rate": 3.1320374540713523e-07, "logits/generated": 2.7171072959899902, "logits/real": 0.5666571855545044, "logps/generated": -970.6345825195312, "logps/real": -423.0457458496094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -57.169898986816406, "rewards/margins": 44.74327850341797, "rewards/real": -12.426626205444336, "step": 4090 }, { "epoch": 1.31, "learning_rate": 3.126111176958635e-07, "logits/generated": 2.652374744415283, "logits/real": 0.2705211639404297, "logps/generated": -896.6983642578125, "logps/real": -374.56097412109375, "loss": 0.0447, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -50.743526458740234, "rewards/margins": 43.60712432861328, "rewards/real": -7.1364030838012695, "step": 4100 }, { "epoch": 1.32, "learning_rate": 3.1201848998459166e-07, "logits/generated": 2.483314037322998, "logits/real": 0.3021407723426819, "logps/generated": -808.2822875976562, "logps/real": -410.1495666503906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -43.35593795776367, "rewards/margins": 38.681724548339844, "rewards/real": -4.674206256866455, "step": 4110 }, { "epoch": 1.32, "learning_rate": 3.114258622733199e-07, "logits/generated": 1.7732019424438477, "logits/real": -0.21743369102478027, "logps/generated": -891.5406494140625, "logps/real": -392.1903991699219, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/generated": -47.2230224609375, "rewards/margins": 42.33424758911133, "rewards/real": -4.888775825500488, "step": 4120 }, { "epoch": 1.32, "learning_rate": 3.1083323456204815e-07, "logits/generated": 1.927710771560669, "logits/real": -0.045561954379081726, "logps/generated": -904.3961791992188, "logps/real": -359.3899841308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -49.64174270629883, "rewards/margins": 43.88774871826172, "rewards/real": -5.753993511199951, "step": 4130 }, { "epoch": 1.32, "learning_rate": 3.1024060685077634e-07, "logits/generated": 2.218238353729248, "logits/real": 0.02407793700695038, "logps/generated": -887.27880859375, "logps/real": -433.8273010253906, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/generated": -50.3277587890625, "rewards/margins": 43.18590545654297, "rewards/real": -7.141855716705322, "step": 4140 }, { "epoch": 1.33, "learning_rate": 3.096479791395045e-07, "logits/generated": 2.322061061859131, "logits/real": 0.03750114515423775, "logps/generated": -961.4918212890625, "logps/real": -388.9190979003906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -56.68904495239258, "rewards/margins": 49.598663330078125, "rewards/real": -7.090386867523193, "step": 4150 }, { "epoch": 1.33, "learning_rate": 3.0905535142823277e-07, "logits/generated": 1.7675060033798218, "logits/real": 0.5792126059532166, "logps/generated": -931.26806640625, "logps/real": -415.6739196777344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -53.85729217529297, "rewards/margins": 45.11085510253906, "rewards/real": -8.74643611907959, "step": 4160 }, { "epoch": 1.33, "learning_rate": 3.0846272371696095e-07, "logits/generated": 2.3035531044006348, "logits/real": 0.19912122189998627, "logps/generated": -935.2286376953125, "logps/real": -372.4601135253906, "loss": 0.01, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.5814094543457, "rewards/margins": 46.517608642578125, "rewards/real": -8.063804626464844, "step": 4170 }, { "epoch": 1.34, "learning_rate": 3.078700960056892e-07, "logits/generated": 1.9163198471069336, "logits/real": 0.3440563678741455, "logps/generated": -896.0811767578125, "logps/real": -387.7689208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -50.69479751586914, "rewards/margins": 42.404197692871094, "rewards/real": -8.29059886932373, "step": 4180 }, { "epoch": 1.34, "learning_rate": 3.0727746829441744e-07, "logits/generated": 2.886960029602051, "logits/real": 0.5897636413574219, "logps/generated": -871.70068359375, "logps/real": -396.7232971191406, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/generated": -49.031253814697266, "rewards/margins": 40.121009826660156, "rewards/real": -8.910244941711426, "step": 4190 }, { "epoch": 1.34, "learning_rate": 3.0668484058314563e-07, "logits/generated": 1.759080171585083, "logits/real": 0.4325089454650879, "logps/generated": -970.3538818359375, "logps/real": -353.41009521484375, "loss": 0.0364, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.57085037231445, "rewards/margins": 47.87283706665039, "rewards/real": -6.698004722595215, "step": 4200 }, { "epoch": 1.35, "learning_rate": 3.0609221287187387e-07, "logits/generated": 1.72823965549469, "logits/real": 0.41172105073928833, "logps/generated": -840.40625, "logps/real": -365.85968017578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -46.86371612548828, "rewards/margins": 40.378700256347656, "rewards/real": -6.485014915466309, "step": 4210 }, { "epoch": 1.35, "learning_rate": 3.054995851606021e-07, "logits/generated": 2.494736909866333, "logits/real": 0.6513184309005737, "logps/generated": -943.0419921875, "logps/real": -365.2388916015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -54.51377487182617, "rewards/margins": 47.15017318725586, "rewards/real": -7.363605499267578, "step": 4220 }, { "epoch": 1.35, "learning_rate": 3.049069574493303e-07, "logits/generated": 2.206766128540039, "logits/real": 0.5627135038375854, "logps/generated": -907.9157104492188, "logps/real": -370.6760559082031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -51.44373321533203, "rewards/margins": 44.14038848876953, "rewards/real": -7.303345680236816, "step": 4230 }, { "epoch": 1.36, "learning_rate": 3.0431432973805854e-07, "logits/generated": 2.555173397064209, "logits/real": 0.41458195447921753, "logps/generated": -791.3504638671875, "logps/real": -386.2882385253906, "loss": 0.0387, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.68230438232422, "rewards/margins": 38.028541564941406, "rewards/real": -5.653756141662598, "step": 4240 }, { "epoch": 1.36, "learning_rate": 3.037217020267868e-07, "logits/generated": 2.225285291671753, "logits/real": 0.515532374382019, "logps/generated": -836.8493041992188, "logps/real": -346.5246276855469, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.82666778564453, "rewards/margins": 38.41389846801758, "rewards/real": -7.4127702713012695, "step": 4250 }, { "epoch": 1.36, "learning_rate": 3.0312907431551497e-07, "logits/generated": 2.0543510913848877, "logits/real": 0.785476565361023, "logps/generated": -788.3221435546875, "logps/real": -386.2995300292969, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -42.3862190246582, "rewards/margins": 36.74425506591797, "rewards/real": -5.641963958740234, "step": 4260 }, { "epoch": 1.37, "learning_rate": 3.025364466042432e-07, "logits/generated": 2.4212594032287598, "logits/real": 0.8606536984443665, "logps/generated": -940.974609375, "logps/real": -378.3146667480469, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/generated": -54.0003776550293, "rewards/margins": 47.51909637451172, "rewards/real": -6.48128604888916, "step": 4270 }, { "epoch": 1.37, "learning_rate": 3.0194381889297146e-07, "logits/generated": 2.661616802215576, "logits/real": 0.6014574766159058, "logps/generated": -890.6624755859375, "logps/real": -369.1997985839844, "loss": 0.0148, "rewards/accuracies": 0.987500011920929, "rewards/generated": -50.58171844482422, "rewards/margins": 45.810325622558594, "rewards/real": -4.771393775939941, "step": 4280 }, { "epoch": 1.37, "learning_rate": 3.0135119118169965e-07, "logits/generated": 2.5344526767730713, "logits/real": 0.4575222134590149, "logps/generated": -839.5372924804688, "logps/real": -391.0158996582031, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -43.62260437011719, "rewards/margins": 40.09632110595703, "rewards/real": -3.5262866020202637, "step": 4290 }, { "epoch": 1.38, "learning_rate": 3.007585634704279e-07, "logits/generated": 2.60728120803833, "logits/real": 0.9728862643241882, "logps/generated": -790.5014038085938, "logps/real": -346.76055908203125, "loss": 0.0126, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -41.20235061645508, "rewards/margins": 36.835411071777344, "rewards/real": -4.366939544677734, "step": 4300 }, { "epoch": 1.38, "learning_rate": 3.0016593575915613e-07, "logits/generated": 1.9322798252105713, "logits/real": 0.4720967411994934, "logps/generated": -877.2966918945312, "logps/real": -434.5298767089844, "loss": 0.0041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -47.95698165893555, "rewards/margins": 43.21381378173828, "rewards/real": -4.743161201477051, "step": 4310 }, { "epoch": 1.38, "learning_rate": 2.995733080478843e-07, "logits/generated": 2.8624980449676514, "logits/real": 0.5967206954956055, "logps/generated": -826.7452392578125, "logps/real": -389.9955139160156, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/generated": -44.31755828857422, "rewards/margins": 40.05508041381836, "rewards/real": -4.262475967407227, "step": 4320 }, { "epoch": 1.39, "learning_rate": 2.989806803366125e-07, "logits/generated": 2.60481595993042, "logits/real": 1.306947112083435, "logps/generated": -876.9793701171875, "logps/real": -359.3258361816406, "loss": 0.0101, "rewards/accuracies": 0.987500011920929, "rewards/generated": -48.423545837402344, "rewards/margins": 44.53766632080078, "rewards/real": -3.885880947113037, "step": 4330 }, { "epoch": 1.39, "learning_rate": 2.9838805262534075e-07, "logits/generated": 2.8550238609313965, "logits/real": 1.2833164930343628, "logps/generated": -824.1707153320312, "logps/real": -357.21307373046875, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.847599029541016, "rewards/margins": 41.7922248840332, "rewards/real": -5.055374622344971, "step": 4340 }, { "epoch": 1.39, "learning_rate": 2.9779542491406894e-07, "logits/generated": 2.838146924972534, "logits/real": 1.0210936069488525, "logps/generated": -922.9793701171875, "logps/real": -339.36236572265625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/generated": -53.317161560058594, "rewards/margins": 47.95136260986328, "rewards/real": -5.365798473358154, "step": 4350 }, { "epoch": 1.4, "learning_rate": 2.972027972027972e-07, "logits/generated": 2.001512050628662, "logits/real": 1.1333067417144775, "logps/generated": -851.7052612304688, "logps/real": -407.76678466796875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/generated": -47.03935241699219, "rewards/margins": 39.51409149169922, "rewards/real": -7.5252580642700195, "step": 4360 }, { "epoch": 1.4, "learning_rate": 2.966101694915254e-07, "logits/generated": 2.5771706104278564, "logits/real": 1.2043229341506958, "logps/generated": -932.60302734375, "logps/real": -404.4564514160156, "loss": 0.0726, "rewards/accuracies": 0.987500011920929, "rewards/generated": -53.90533447265625, "rewards/margins": 46.47903823852539, "rewards/real": -7.42629861831665, "step": 4370 }, { "epoch": 1.4, "learning_rate": 2.960175417802536e-07, "logits/generated": 2.218547821044922, "logits/real": 0.9215396046638489, "logps/generated": -898.2188720703125, "logps/real": -358.0943603515625, "loss": 0.0122, "rewards/accuracies": 0.987500011920929, "rewards/generated": -48.935184478759766, "rewards/margins": 44.388031005859375, "rewards/real": -4.547152996063232, "step": 4380 }, { "epoch": 1.4, "learning_rate": 2.9542491406898185e-07, "logits/generated": 2.2845664024353027, "logits/real": 0.8632771372795105, "logps/generated": -885.0338134765625, "logps/real": -379.1771545410156, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -49.426361083984375, "rewards/margins": 44.742576599121094, "rewards/real": -4.683784008026123, "step": 4390 }, { "epoch": 1.41, "learning_rate": 2.948322863577101e-07, "logits/generated": 2.0430283546447754, "logits/real": 0.31897956132888794, "logps/generated": -784.330078125, "logps/real": -374.98016357421875, "loss": 0.0268, "rewards/accuracies": 0.987500011920929, "rewards/generated": -41.49010467529297, "rewards/margins": 37.16717529296875, "rewards/real": -4.322933673858643, "step": 4400 }, { "epoch": 1.41, "learning_rate": 2.942396586464383e-07, "logits/generated": 2.0797367095947266, "logits/real": 0.5651417970657349, "logps/generated": -831.4734497070312, "logps/real": -364.90423583984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -44.593421936035156, "rewards/margins": 40.9267692565918, "rewards/real": -3.666652202606201, "step": 4410 }, { "epoch": 1.41, "learning_rate": 2.936470309351665e-07, "logits/generated": 2.346694231033325, "logits/real": 0.7109388709068298, "logps/generated": -794.2559204101562, "logps/real": -349.0956726074219, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/generated": -42.08417892456055, "rewards/margins": 37.98200225830078, "rewards/real": -4.102175712585449, "step": 4420 }, { "epoch": 1.42, "learning_rate": 2.9305440322389477e-07, "logits/generated": 2.7935266494750977, "logits/real": 0.626765787601471, "logps/generated": -846.2515869140625, "logps/real": -372.41424560546875, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/generated": -47.647586822509766, "rewards/margins": 43.59477996826172, "rewards/real": -4.052794456481934, "step": 4430 }, { "epoch": 1.42, "learning_rate": 2.9246177551262296e-07, "logits/generated": 2.696134090423584, "logits/real": 0.9759427905082703, "logps/generated": -878.1096801757812, "logps/real": -366.1974182128906, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.2159538269043, "rewards/margins": 42.51264953613281, "rewards/real": -6.70331335067749, "step": 4440 }, { "epoch": 1.42, "learning_rate": 2.918691478013512e-07, "logits/generated": 2.3423283100128174, "logits/real": 0.5175257921218872, "logps/generated": -782.8167724609375, "logps/real": -340.42901611328125, "loss": 0.0269, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -41.3431396484375, "rewards/margins": 37.00103759765625, "rewards/real": -4.342096328735352, "step": 4450 }, { "epoch": 1.43, "learning_rate": 2.9127652009007944e-07, "logits/generated": 2.778712749481201, "logits/real": 0.47254347801208496, "logps/generated": -926.4280395507812, "logps/real": -344.14739990234375, "loss": 0.0356, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -50.66118240356445, "rewards/margins": 47.607582092285156, "rewards/real": -3.053595542907715, "step": 4460 }, { "epoch": 1.43, "learning_rate": 2.9068389237880763e-07, "logits/generated": 2.97314453125, "logits/real": 0.9956085085868835, "logps/generated": -832.6083984375, "logps/real": -341.3013000488281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -46.001556396484375, "rewards/margins": 42.96271514892578, "rewards/real": -3.038844347000122, "step": 4470 }, { "epoch": 1.43, "learning_rate": 2.9009126466753587e-07, "logits/generated": 2.8147237300872803, "logits/real": 0.8581963777542114, "logps/generated": -854.1736450195312, "logps/real": -357.0933532714844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -45.780052185058594, "rewards/margins": 42.75517654418945, "rewards/real": -3.0248818397521973, "step": 4480 }, { "epoch": 1.44, "learning_rate": 2.8949863695626406e-07, "logits/generated": 2.456997871398926, "logits/real": 0.7250876426696777, "logps/generated": -851.84814453125, "logps/real": -371.81243896484375, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.140907287597656, "rewards/margins": 41.824790954589844, "rewards/real": -4.316125392913818, "step": 4490 }, { "epoch": 1.44, "learning_rate": 2.889060092449923e-07, "logits/generated": 2.994431734085083, "logits/real": 1.062474012374878, "logps/generated": -885.6672973632812, "logps/real": -337.63092041015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -50.32817840576172, "rewards/margins": 46.21593475341797, "rewards/real": -4.112240314483643, "step": 4500 }, { "epoch": 1.44, "learning_rate": 2.883133815337205e-07, "logits/generated": 2.706831455230713, "logits/real": 1.0632785558700562, "logps/generated": -996.8551635742188, "logps/real": -380.18408203125, "loss": 0.0069, "rewards/accuracies": 0.987500011920929, "rewards/generated": -56.95098114013672, "rewards/margins": 50.92827224731445, "rewards/real": -6.022718906402588, "step": 4510 }, { "epoch": 1.45, "learning_rate": 2.877207538224487e-07, "logits/generated": 3.2903552055358887, "logits/real": 1.3321194648742676, "logps/generated": -977.85205078125, "logps/real": -375.55126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -56.94231033325195, "rewards/margins": 50.63762664794922, "rewards/real": -6.304681301116943, "step": 4520 }, { "epoch": 1.45, "learning_rate": 2.871281261111769e-07, "logits/generated": 3.114874839782715, "logits/real": 0.9941811561584473, "logps/generated": -954.8370971679688, "logps/real": -366.42022705078125, "loss": 0.0116, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -56.53069305419922, "rewards/margins": 50.442726135253906, "rewards/real": -6.0879669189453125, "step": 4530 }, { "epoch": 1.45, "learning_rate": 2.8653549839990516e-07, "logits/generated": 1.7220100164413452, "logits/real": 0.6936120986938477, "logps/generated": -793.5621948242188, "logps/real": -320.93280029296875, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/generated": -39.636451721191406, "rewards/margins": 36.73711395263672, "rewards/real": -2.8993372917175293, "step": 4540 }, { "epoch": 1.46, "learning_rate": 2.8594287068863335e-07, "logits/generated": 1.7378209829330444, "logits/real": 0.3296111226081848, "logps/generated": -746.2716674804688, "logps/real": -364.6012878417969, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/generated": -35.58428192138672, "rewards/margins": 34.59697723388672, "rewards/real": -0.9873050451278687, "step": 4550 }, { "epoch": 1.46, "learning_rate": 2.853502429773616e-07, "logits/generated": 2.4758598804473877, "logits/real": 0.46579688787460327, "logps/generated": -721.5530395507812, "logps/real": -349.1888427734375, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/generated": -35.16068649291992, "rewards/margins": 33.51179885864258, "rewards/real": -1.648890733718872, "step": 4560 }, { "epoch": 1.46, "learning_rate": 2.8475761526608984e-07, "logits/generated": 2.2155046463012695, "logits/real": 0.7471126317977905, "logps/generated": -775.1942138671875, "logps/real": -339.3021545410156, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/generated": -37.29993438720703, "rewards/margins": 34.87761688232422, "rewards/real": -2.422313690185547, "step": 4570 }, { "epoch": 1.47, "learning_rate": 2.84164987554818e-07, "logits/generated": 1.954671859741211, "logits/real": 0.5207287073135376, "logps/generated": -704.0051879882812, "logps/real": -368.79150390625, "loss": 0.0707, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -32.33496856689453, "rewards/margins": 28.184839248657227, "rewards/real": -4.1501288414001465, "step": 4580 }, { "epoch": 1.47, "learning_rate": 2.8357235984354627e-07, "logits/generated": 2.2055437564849854, "logits/real": 1.2804126739501953, "logps/generated": -764.0189819335938, "logps/real": -384.3921813964844, "loss": 0.0198, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -36.48469924926758, "rewards/margins": 32.02397918701172, "rewards/real": -4.460718631744385, "step": 4590 }, { "epoch": 1.47, "learning_rate": 2.829797321322745e-07, "logits/generated": 2.4263312816619873, "logits/real": 1.5482428073883057, "logps/generated": -848.1448974609375, "logps/real": -422.74664306640625, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.13447570800781, "rewards/margins": 35.92151641845703, "rewards/real": -7.212961673736572, "step": 4600 }, { "epoch": 1.48, "learning_rate": 2.823871044210027e-07, "logits/generated": 1.9039783477783203, "logits/real": 1.3764702081680298, "logps/generated": -810.2239990234375, "logps/real": -426.0375061035156, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/generated": -42.02518081665039, "rewards/margins": 35.242366790771484, "rewards/real": -6.782809257507324, "step": 4610 }, { "epoch": 1.48, "learning_rate": 2.8179447670973094e-07, "logits/generated": 2.3811886310577393, "logits/real": 1.1642677783966064, "logps/generated": -771.0386352539062, "logps/real": -392.75433349609375, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.267356872558594, "rewards/margins": 32.4055290222168, "rewards/real": -6.8618292808532715, "step": 4620 }, { "epoch": 1.48, "learning_rate": 2.812018489984592e-07, "logits/generated": 2.4021406173706055, "logits/real": 1.3993284702301025, "logps/generated": -901.5540771484375, "logps/real": -397.1238098144531, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -48.778297424316406, "rewards/margins": 42.13416290283203, "rewards/real": -6.644139289855957, "step": 4630 }, { "epoch": 1.48, "learning_rate": 2.8060922128718737e-07, "logits/generated": 2.892127513885498, "logits/real": 1.1835445165634155, "logps/generated": -836.6994018554688, "logps/real": -339.24517822265625, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/generated": -44.078670501708984, "rewards/margins": 38.7639045715332, "rewards/real": -5.314770221710205, "step": 4640 }, { "epoch": 1.49, "learning_rate": 2.800165935759156e-07, "logits/generated": 2.559720516204834, "logits/real": 0.9920206069946289, "logps/generated": -721.725830078125, "logps/real": -372.3648376464844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -35.085899353027344, "rewards/margins": 31.946239471435547, "rewards/real": -3.1396594047546387, "step": 4650 }, { "epoch": 1.49, "learning_rate": 2.7942396586464385e-07, "logits/generated": 2.060070037841797, "logits/real": 0.9687484502792358, "logps/generated": -760.3370361328125, "logps/real": -395.9288635253906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -37.018699645996094, "rewards/margins": 32.35004425048828, "rewards/real": -4.6686577796936035, "step": 4660 }, { "epoch": 1.49, "learning_rate": 2.7883133815337204e-07, "logits/generated": 2.6550605297088623, "logits/real": 1.1643413305282593, "logps/generated": -787.5021362304688, "logps/real": -328.51751708984375, "loss": 0.0229, "rewards/accuracies": 0.987500011920929, "rewards/generated": -41.66749954223633, "rewards/margins": 36.329803466796875, "rewards/real": -5.337691783905029, "step": 4670 }, { "epoch": 1.5, "learning_rate": 2.7823871044210023e-07, "logits/generated": 3.0104825496673584, "logits/real": 1.215192437171936, "logps/generated": -844.298828125, "logps/real": -346.3050842285156, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -43.288108825683594, "rewards/margins": 39.522438049316406, "rewards/real": -3.7656726837158203, "step": 4680 }, { "epoch": 1.5, "learning_rate": 2.776460827308285e-07, "logits/generated": 2.614835262298584, "logits/real": 1.3394081592559814, "logps/generated": -826.7473754882812, "logps/real": -364.2169189453125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/generated": -42.0161018371582, "rewards/margins": 37.3761100769043, "rewards/real": -4.63999080657959, "step": 4690 }, { "epoch": 1.5, "learning_rate": 2.7705345501955666e-07, "logits/generated": 2.882535696029663, "logits/real": 1.3634650707244873, "logps/generated": -787.8270263671875, "logps/real": -324.0661926269531, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/generated": -39.588706970214844, "rewards/margins": 35.58531188964844, "rewards/real": -4.003393173217773, "step": 4700 }, { "epoch": 1.51, "learning_rate": 2.764608273082849e-07, "logits/generated": 2.41412615776062, "logits/real": 0.9196175336837769, "logps/generated": -770.9818725585938, "logps/real": -370.63372802734375, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.07651901245117, "rewards/margins": 35.234256744384766, "rewards/real": -3.842259645462036, "step": 4710 }, { "epoch": 1.51, "learning_rate": 2.7586819959701315e-07, "logits/generated": 2.0489985942840576, "logits/real": 0.5775309801101685, "logps/generated": -863.5361328125, "logps/real": -341.0457458496094, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -46.23897933959961, "rewards/margins": 41.55213165283203, "rewards/real": -4.686847686767578, "step": 4720 }, { "epoch": 1.51, "learning_rate": 2.7527557188574134e-07, "logits/generated": 2.411741018295288, "logits/real": 0.46629491448402405, "logps/generated": -797.5379028320312, "logps/real": -374.79754638671875, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/generated": -42.71403884887695, "rewards/margins": 36.982208251953125, "rewards/real": -5.731827735900879, "step": 4730 }, { "epoch": 1.52, "learning_rate": 2.746829441744696e-07, "logits/generated": 2.5271687507629395, "logits/real": 0.4328778386116028, "logps/generated": -859.7745971679688, "logps/real": -399.00592041015625, "loss": 0.0371, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -46.51274490356445, "rewards/margins": 40.866783142089844, "rewards/real": -5.645957946777344, "step": 4740 }, { "epoch": 1.52, "learning_rate": 2.740903164631978e-07, "logits/generated": 1.8134750127792358, "logits/real": 0.6105095148086548, "logps/generated": -856.2125244140625, "logps/real": -349.9185791015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -46.258323669433594, "rewards/margins": 41.7020378112793, "rewards/real": -4.556281566619873, "step": 4750 }, { "epoch": 1.52, "learning_rate": 2.73497688751926e-07, "logits/generated": 2.384493827819824, "logits/real": 0.6340434551239014, "logps/generated": -861.9421997070312, "logps/real": -365.2822265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -46.07266616821289, "rewards/margins": 39.855445861816406, "rewards/real": -6.217224597930908, "step": 4760 }, { "epoch": 1.53, "learning_rate": 2.7290506104065425e-07, "logits/generated": 2.6208534240722656, "logits/real": 0.19273574650287628, "logps/generated": -884.6500244140625, "logps/real": -394.6060485839844, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.60136032104492, "rewards/margins": 43.45311737060547, "rewards/real": -6.1482391357421875, "step": 4770 }, { "epoch": 1.53, "learning_rate": 2.723124333293825e-07, "logits/generated": 2.2919468879699707, "logits/real": 0.43106716871261597, "logps/generated": -886.1224365234375, "logps/real": -372.5547790527344, "loss": 0.0165, "rewards/accuracies": 0.987500011920929, "rewards/generated": -47.8830680847168, "rewards/margins": 42.46996307373047, "rewards/real": -5.413102149963379, "step": 4780 }, { "epoch": 1.53, "learning_rate": 2.717198056181107e-07, "logits/generated": 1.85759699344635, "logits/real": 0.4433859884738922, "logps/generated": -809.621826171875, "logps/real": -347.8978271484375, "loss": 0.0598, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.997596740722656, "rewards/margins": 37.66872024536133, "rewards/real": -5.328876972198486, "step": 4790 }, { "epoch": 1.54, "learning_rate": 2.711271779068389e-07, "logits/generated": 1.4434945583343506, "logits/real": 0.22783203423023224, "logps/generated": -832.7960815429688, "logps/real": -364.6251525878906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -42.317222595214844, "rewards/margins": 36.227169036865234, "rewards/real": -6.0900492668151855, "step": 4800 }, { "epoch": 1.54, "learning_rate": 2.7053455019556716e-07, "logits/generated": 1.6734075546264648, "logits/real": 0.511894702911377, "logps/generated": -785.6248779296875, "logps/real": -377.2621154785156, "loss": 0.0348, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -40.16128158569336, "rewards/margins": 33.68966293334961, "rewards/real": -6.47162389755249, "step": 4810 }, { "epoch": 1.54, "learning_rate": 2.6994192248429535e-07, "logits/generated": 2.0791707038879395, "logits/real": 0.5065538287162781, "logps/generated": -778.4967651367188, "logps/real": -372.39385986328125, "loss": 0.0263, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.8204460144043, "rewards/margins": 34.27782440185547, "rewards/real": -5.5426201820373535, "step": 4820 }, { "epoch": 1.55, "learning_rate": 2.693492947730236e-07, "logits/generated": 1.8600364923477173, "logits/real": 0.7488612532615662, "logps/generated": -782.809814453125, "logps/real": -328.5298767089844, "loss": 0.0187, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -39.31589126586914, "rewards/margins": 34.81025314331055, "rewards/real": -4.505640983581543, "step": 4830 }, { "epoch": 1.55, "learning_rate": 2.6875666706175184e-07, "logits/generated": 1.9832321405410767, "logits/real": 0.5802012085914612, "logps/generated": -748.4049072265625, "logps/real": -365.29498291015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -38.709922790527344, "rewards/margins": 34.65495681762695, "rewards/real": -4.0549702644348145, "step": 4840 }, { "epoch": 1.55, "learning_rate": 2.6816403935048e-07, "logits/generated": 2.265695810317993, "logits/real": 0.7832272052764893, "logps/generated": -808.9696044921875, "logps/real": -328.1351318359375, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/generated": -40.70402908325195, "rewards/margins": 36.078895568847656, "rewards/real": -4.625133037567139, "step": 4850 }, { "epoch": 1.56, "learning_rate": 2.675714116392082e-07, "logits/generated": 2.306931972503662, "logits/real": 0.560020923614502, "logps/generated": -856.3054809570312, "logps/real": -397.19207763671875, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.37586212158203, "rewards/margins": 38.50695037841797, "rewards/real": -5.868910789489746, "step": 4860 }, { "epoch": 1.56, "learning_rate": 2.6697878392793646e-07, "logits/generated": 2.0997326374053955, "logits/real": 0.7822288274765015, "logps/generated": -816.8917236328125, "logps/real": -396.8566589355469, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/generated": -42.01362991333008, "rewards/margins": 37.04615020751953, "rewards/real": -4.96747350692749, "step": 4870 }, { "epoch": 1.56, "learning_rate": 2.6638615621666465e-07, "logits/generated": 2.201195478439331, "logits/real": 0.5996983647346497, "logps/generated": -790.5069580078125, "logps/real": -410.57958984375, "loss": 0.0203, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.32919692993164, "rewards/margins": 34.35948181152344, "rewards/real": -5.969719886779785, "step": 4880 }, { "epoch": 1.56, "learning_rate": 2.657935285053929e-07, "logits/generated": 2.2264912128448486, "logits/real": 0.8718013763427734, "logps/generated": -841.9896240234375, "logps/real": -379.1394348144531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -43.1845703125, "rewards/margins": 37.3220329284668, "rewards/real": -5.862529754638672, "step": 4890 }, { "epoch": 1.57, "learning_rate": 2.6520090079412113e-07, "logits/generated": 2.2339439392089844, "logits/real": 1.0355908870697021, "logps/generated": -817.2131958007812, "logps/real": -383.4435119628906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -42.284645080566406, "rewards/margins": 36.35279083251953, "rewards/real": -5.931859970092773, "step": 4900 }, { "epoch": 1.57, "learning_rate": 2.646082730828493e-07, "logits/generated": 2.570598602294922, "logits/real": 1.2461090087890625, "logps/generated": -821.4034423828125, "logps/real": -356.2522277832031, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -43.50129318237305, "rewards/margins": 36.500614166259766, "rewards/real": -7.000680446624756, "step": 4910 }, { "epoch": 1.57, "learning_rate": 2.6401564537157756e-07, "logits/generated": 1.7966744899749756, "logits/real": 1.0802150964736938, "logps/generated": -777.5578002929688, "logps/real": -347.0182189941406, "loss": 0.0423, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -39.69795227050781, "rewards/margins": 35.00285720825195, "rewards/real": -4.695092678070068, "step": 4920 }, { "epoch": 1.58, "learning_rate": 2.634230176603058e-07, "logits/generated": 2.008174180984497, "logits/real": 0.7534879446029663, "logps/generated": -756.8148193359375, "logps/real": -359.62908935546875, "loss": 0.0446, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -37.1409912109375, "rewards/margins": 32.44538497924805, "rewards/real": -4.69560432434082, "step": 4930 }, { "epoch": 1.58, "learning_rate": 2.62830389949034e-07, "logits/generated": 2.101663112640381, "logits/real": 0.6530848741531372, "logps/generated": -819.5115356445312, "logps/real": -343.81329345703125, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/generated": -41.89363479614258, "rewards/margins": 36.44382095336914, "rewards/real": -5.449813365936279, "step": 4940 }, { "epoch": 1.58, "learning_rate": 2.6223776223776223e-07, "logits/generated": 2.007061004638672, "logits/real": 0.525862991809845, "logps/generated": -821.1007080078125, "logps/real": -413.4480895996094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -41.377464294433594, "rewards/margins": 35.79021453857422, "rewards/real": -5.587252616882324, "step": 4950 }, { "epoch": 1.59, "learning_rate": 2.616451345264905e-07, "logits/generated": 2.005887746810913, "logits/real": 0.49662095308303833, "logps/generated": -714.2962036132812, "logps/real": -372.1548767089844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -34.395286560058594, "rewards/margins": 30.319265365600586, "rewards/real": -4.076025485992432, "step": 4960 }, { "epoch": 1.59, "learning_rate": 2.6105250681521866e-07, "logits/generated": 2.218632221221924, "logits/real": 0.10305402427911758, "logps/generated": -749.445556640625, "logps/real": -393.5296325683594, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -36.19743347167969, "rewards/margins": 31.335338592529297, "rewards/real": -4.862092018127441, "step": 4970 }, { "epoch": 1.59, "learning_rate": 2.604598791039469e-07, "logits/generated": 1.9723600149154663, "logits/real": 0.46274954080581665, "logps/generated": -747.9332275390625, "logps/real": -446.12237548828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -35.806602478027344, "rewards/margins": 30.62856674194336, "rewards/real": -5.178038597106934, "step": 4980 }, { "epoch": 1.6, "learning_rate": 2.5986725139267515e-07, "logits/generated": 2.192283868789673, "logits/real": 0.6728182435035706, "logps/generated": -806.6732788085938, "logps/real": -330.36962890625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/generated": -40.09038543701172, "rewards/margins": 36.229026794433594, "rewards/real": -3.861351490020752, "step": 4990 }, { "epoch": 1.6, "learning_rate": 2.5927462368140334e-07, "logits/generated": 2.228334665298462, "logits/real": 0.18269702792167664, "logps/generated": -713.8773803710938, "logps/real": -395.35455322265625, "loss": 0.0119, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.97146224975586, "rewards/margins": 28.83901023864746, "rewards/real": -4.132456302642822, "step": 5000 }, { "epoch": 1.6, "learning_rate": 2.586819959701316e-07, "logits/generated": 1.5860804319381714, "logits/real": 0.3480927646160126, "logps/generated": -733.3760986328125, "logps/real": -387.63385009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.62555694580078, "rewards/margins": 32.33049392700195, "rewards/real": -3.2950634956359863, "step": 5010 }, { "epoch": 1.61, "learning_rate": 2.580893682588598e-07, "logits/generated": 1.6055123805999756, "logits/real": 0.15970836579799652, "logps/generated": -839.0968017578125, "logps/real": -360.4985046386719, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -42.22630310058594, "rewards/margins": 37.99366760253906, "rewards/real": -4.232638359069824, "step": 5020 }, { "epoch": 1.61, "learning_rate": 2.57496740547588e-07, "logits/generated": 1.7855665683746338, "logits/real": 0.1675606667995453, "logps/generated": -744.6808471679688, "logps/real": -343.895751953125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/generated": -35.100830078125, "rewards/margins": 32.6957893371582, "rewards/real": -2.405043840408325, "step": 5030 }, { "epoch": 1.61, "learning_rate": 2.569041128363162e-07, "logits/generated": 1.64817214012146, "logits/real": 0.07610142976045609, "logps/generated": -732.545654296875, "logps/real": -350.29962158203125, "loss": 0.003, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.60157012939453, "rewards/margins": 32.50680160522461, "rewards/real": -2.094759941101074, "step": 5040 }, { "epoch": 1.62, "learning_rate": 2.563114851250444e-07, "logits/generated": 1.7408607006072998, "logits/real": 0.3170376420021057, "logps/generated": -762.3482055664062, "logps/real": -335.9871826171875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -37.032527923583984, "rewards/margins": 34.986961364746094, "rewards/real": -2.045567512512207, "step": 5050 }, { "epoch": 1.62, "learning_rate": 2.5571885741377263e-07, "logits/generated": 2.073035717010498, "logits/real": 0.3458942770957947, "logps/generated": -802.7797241210938, "logps/real": -336.88470458984375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -40.494102478027344, "rewards/margins": 37.79814147949219, "rewards/real": -2.6959633827209473, "step": 5060 }, { "epoch": 1.62, "learning_rate": 2.5512622970250087e-07, "logits/generated": 2.13322114944458, "logits/real": 0.41790610551834106, "logps/generated": -866.7073974609375, "logps/real": -332.11383056640625, "loss": 0.005, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.577178955078125, "rewards/margins": 40.421714782714844, "rewards/real": -3.155463457107544, "step": 5070 }, { "epoch": 1.63, "learning_rate": 2.5453360199122906e-07, "logits/generated": 2.54278826713562, "logits/real": 0.6949285268783569, "logps/generated": -735.1064453125, "logps/real": -364.2638244628906, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/generated": -35.53596115112305, "rewards/margins": 31.807302474975586, "rewards/real": -3.7286624908447266, "step": 5080 }, { "epoch": 1.63, "learning_rate": 2.539409742799573e-07, "logits/generated": 2.21581768989563, "logits/real": 0.4537831246852875, "logps/generated": -821.2398681640625, "logps/real": -365.47125244140625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -40.993080139160156, "rewards/margins": 36.99878692626953, "rewards/real": -3.994288921356201, "step": 5090 }, { "epoch": 1.63, "learning_rate": 2.5334834656868554e-07, "logits/generated": 2.1372594833374023, "logits/real": 0.611926257610321, "logps/generated": -754.3529052734375, "logps/real": -342.5974426269531, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -37.834938049316406, "rewards/margins": 32.91936492919922, "rewards/real": -4.915571689605713, "step": 5100 }, { "epoch": 1.64, "learning_rate": 2.5275571885741373e-07, "logits/generated": 1.6273130178451538, "logits/real": 0.826530933380127, "logps/generated": -734.1217651367188, "logps/real": -382.6690368652344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -36.935123443603516, "rewards/margins": 33.39425277709961, "rewards/real": -3.540864944458008, "step": 5110 }, { "epoch": 1.64, "learning_rate": 2.52163091146142e-07, "logits/generated": 2.3816351890563965, "logits/real": 0.943571925163269, "logps/generated": -843.9786376953125, "logps/real": -411.3369140625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/generated": -44.03273010253906, "rewards/margins": 38.141441345214844, "rewards/real": -5.891286849975586, "step": 5120 }, { "epoch": 1.64, "learning_rate": 2.515704634348702e-07, "logits/generated": 2.5418403148651123, "logits/real": 1.0677857398986816, "logps/generated": -848.6131591796875, "logps/real": -366.3487548828125, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/generated": -45.4301643371582, "rewards/margins": 39.80161666870117, "rewards/real": -5.628545761108398, "step": 5130 }, { "epoch": 1.64, "learning_rate": 2.509778357235984e-07, "logits/generated": 1.6727020740509033, "logits/real": 0.3576027750968933, "logps/generated": -851.21728515625, "logps/real": -326.9202880859375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -43.89760208129883, "rewards/margins": 40.6236457824707, "rewards/real": -3.273960828781128, "step": 5140 }, { "epoch": 1.65, "learning_rate": 2.5038520801232665e-07, "logits/generated": 2.0805184841156006, "logits/real": 0.1397530734539032, "logps/generated": -761.8963012695312, "logps/real": -356.8018493652344, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -38.69506072998047, "rewards/margins": 36.885677337646484, "rewards/real": -1.8093852996826172, "step": 5150 }, { "epoch": 1.65, "learning_rate": 2.497925803010549e-07, "logits/generated": 1.626690149307251, "logits/real": -0.054851166903972626, "logps/generated": -776.6763305664062, "logps/real": -346.9750061035156, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -38.96327209472656, "rewards/margins": 36.2351188659668, "rewards/real": -2.7281482219696045, "step": 5160 }, { "epoch": 1.65, "learning_rate": 2.491999525897831e-07, "logits/generated": 1.5822933912277222, "logits/real": -0.24726232886314392, "logps/generated": -781.2418212890625, "logps/real": -323.7926330566406, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -37.93315887451172, "rewards/margins": 36.57786560058594, "rewards/real": -1.3552961349487305, "step": 5170 }, { "epoch": 1.66, "learning_rate": 2.486073248785113e-07, "logits/generated": 1.8860301971435547, "logits/real": 0.6691206693649292, "logps/generated": -794.41845703125, "logps/real": -324.8614807128906, "loss": 0.0588, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -40.171295166015625, "rewards/margins": 36.86614227294922, "rewards/real": -3.3051559925079346, "step": 5180 }, { "epoch": 1.66, "learning_rate": 2.480146971672395e-07, "logits/generated": 2.0925936698913574, "logits/real": 0.9095395803451538, "logps/generated": -869.6298828125, "logps/real": -373.828369140625, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.91425323486328, "rewards/margins": 40.90207290649414, "rewards/real": -6.012181758880615, "step": 5190 }, { "epoch": 1.66, "learning_rate": 2.4742206945596775e-07, "logits/generated": 2.7456932067871094, "logits/real": 0.4985496401786804, "logps/generated": -859.5794067382812, "logps/real": -390.0246276855469, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -46.29896926879883, "rewards/margins": 40.370887756347656, "rewards/real": -5.92807674407959, "step": 5200 }, { "epoch": 1.67, "learning_rate": 2.46829441744696e-07, "logits/generated": 2.1732583045959473, "logits/real": 0.4838520586490631, "logps/generated": -844.9871215820312, "logps/real": -369.20745849609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -45.23528289794922, "rewards/margins": 39.98804473876953, "rewards/real": -5.247241020202637, "step": 5210 }, { "epoch": 1.67, "learning_rate": 2.462368140334242e-07, "logits/generated": 2.544534206390381, "logits/real": 0.5111418962478638, "logps/generated": -835.80712890625, "logps/real": -378.8222961425781, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -44.27240753173828, "rewards/margins": 39.94426727294922, "rewards/real": -4.3281450271606445, "step": 5220 }, { "epoch": 1.67, "learning_rate": 2.456441863221524e-07, "logits/generated": 2.4491024017333984, "logits/real": 0.6502278447151184, "logps/generated": -811.8794555664062, "logps/real": -355.411376953125, "loss": 0.0431, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.56116485595703, "rewards/margins": 37.1070671081543, "rewards/real": -5.454099655151367, "step": 5230 }, { "epoch": 1.68, "learning_rate": 2.4505155861088067e-07, "logits/generated": 2.266544818878174, "logits/real": 0.6723452806472778, "logps/generated": -750.0426025390625, "logps/real": -373.60003662109375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -37.43736267089844, "rewards/margins": 31.519084930419922, "rewards/real": -5.918280601501465, "step": 5240 }, { "epoch": 1.68, "learning_rate": 2.4445893089960885e-07, "logits/generated": 2.5079264640808105, "logits/real": 0.6131478548049927, "logps/generated": -853.6188354492188, "logps/real": -425.047607421875, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.624473571777344, "rewards/margins": 39.093013763427734, "rewards/real": -5.53145170211792, "step": 5250 }, { "epoch": 1.68, "learning_rate": 2.4386630318833704e-07, "logits/generated": 2.1609644889831543, "logits/real": 0.6694498062133789, "logps/generated": -731.3389892578125, "logps/real": -364.8908386230469, "loss": 0.0027, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.30664825439453, "rewards/margins": 31.676645278930664, "rewards/real": -4.630007266998291, "step": 5260 }, { "epoch": 1.69, "learning_rate": 2.432736754770653e-07, "logits/generated": 2.7540597915649414, "logits/real": 0.8866742253303528, "logps/generated": -916.9837646484375, "logps/real": -382.40728759765625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -48.979942321777344, "rewards/margins": 43.1822509765625, "rewards/real": -5.797692775726318, "step": 5270 }, { "epoch": 1.69, "learning_rate": 2.4268104776579353e-07, "logits/generated": 1.9669885635375977, "logits/real": 0.5683959722518921, "logps/generated": -699.2192993164062, "logps/real": -381.6709289550781, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -33.899147033691406, "rewards/margins": 28.7582950592041, "rewards/real": -5.140851020812988, "step": 5280 }, { "epoch": 1.69, "learning_rate": 2.420884200545217e-07, "logits/generated": 1.9316256046295166, "logits/real": 0.5754297375679016, "logps/generated": -809.5108642578125, "logps/real": -327.8341369628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -40.91669464111328, "rewards/margins": 37.33782196044922, "rewards/real": -3.5788674354553223, "step": 5290 }, { "epoch": 1.7, "learning_rate": 2.4149579234324996e-07, "logits/generated": 2.126364231109619, "logits/real": 0.44575291872024536, "logps/generated": -816.7424926757812, "logps/real": -332.91131591796875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -42.222469329833984, "rewards/margins": 38.331886291503906, "rewards/real": -3.890582323074341, "step": 5300 }, { "epoch": 1.7, "learning_rate": 2.409031646319782e-07, "logits/generated": 2.043090343475342, "logits/real": 0.42176419496536255, "logps/generated": -790.253662109375, "logps/real": -346.35931396484375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -38.56426239013672, "rewards/margins": 35.83377456665039, "rewards/real": -2.7304892539978027, "step": 5310 }, { "epoch": 1.7, "learning_rate": 2.403105369207064e-07, "logits/generated": 2.138488531112671, "logits/real": 0.9766354560852051, "logps/generated": -814.6067504882812, "logps/real": -329.17364501953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -41.24921798706055, "rewards/margins": 37.05216598510742, "rewards/real": -4.197047710418701, "step": 5320 }, { "epoch": 1.71, "learning_rate": 2.3971790920943463e-07, "logits/generated": 2.2863852977752686, "logits/real": 0.5989899635314941, "logps/generated": -814.4603271484375, "logps/real": -361.1416015625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -40.803672790527344, "rewards/margins": 36.39472961425781, "rewards/real": -4.408946514129639, "step": 5330 }, { "epoch": 1.71, "learning_rate": 2.3912528149816287e-07, "logits/generated": 2.3674325942993164, "logits/real": 0.9967582821846008, "logps/generated": -805.5167846679688, "logps/real": -324.6876525878906, "loss": 0.0161, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.41180419921875, "rewards/margins": 38.46437072753906, "rewards/real": -3.9474329948425293, "step": 5340 }, { "epoch": 1.71, "learning_rate": 2.3853265378689106e-07, "logits/generated": 2.268728017807007, "logits/real": 0.6285626292228699, "logps/generated": -808.87109375, "logps/real": -392.3074035644531, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -41.41926193237305, "rewards/margins": 37.2325553894043, "rewards/real": -4.186707973480225, "step": 5350 }, { "epoch": 1.72, "learning_rate": 2.3794002607561928e-07, "logits/generated": 2.5093436241149902, "logits/real": 0.8249126672744751, "logps/generated": -838.4953002929688, "logps/real": -426.6451721191406, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -43.6255989074707, "rewards/margins": 39.069976806640625, "rewards/real": -4.555621147155762, "step": 5360 }, { "epoch": 1.72, "learning_rate": 2.373473983643475e-07, "logits/generated": 2.685079336166382, "logits/real": 1.4434565305709839, "logps/generated": -856.2312622070312, "logps/real": -302.8211669921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -47.4712028503418, "rewards/margins": 42.512794494628906, "rewards/real": -4.958414554595947, "step": 5370 }, { "epoch": 1.72, "learning_rate": 2.3675477065307573e-07, "logits/generated": 2.7511277198791504, "logits/real": 0.9180728793144226, "logps/generated": -831.4625854492188, "logps/real": -413.7120056152344, "loss": 0.0082, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.00263977050781, "rewards/margins": 38.33123016357422, "rewards/real": -5.67141056060791, "step": 5380 }, { "epoch": 1.72, "learning_rate": 2.3616214294180395e-07, "logits/generated": 3.376774549484253, "logits/real": 1.1215420961380005, "logps/generated": -803.61572265625, "logps/real": -338.5130615234375, "loss": 0.0167, "rewards/accuracies": 0.987500011920929, "rewards/generated": -41.036102294921875, "rewards/margins": 38.5570068359375, "rewards/real": -2.479102611541748, "step": 5390 }, { "epoch": 1.73, "learning_rate": 2.3556951523053216e-07, "logits/generated": 3.0865302085876465, "logits/real": 1.2384517192840576, "logps/generated": -826.81982421875, "logps/real": -316.6307067871094, "loss": 0.0232, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.875328063964844, "rewards/margins": 39.101463317871094, "rewards/real": -3.7738614082336426, "step": 5400 }, { "epoch": 1.73, "learning_rate": 2.349768875192604e-07, "logits/generated": 3.274893283843994, "logits/real": 1.1233675479888916, "logps/generated": -907.5140380859375, "logps/real": -352.7439270019531, "loss": 0.0082, "rewards/accuracies": 0.987500011920929, "rewards/generated": -50.2931022644043, "rewards/margins": 46.628196716308594, "rewards/real": -3.6648964881896973, "step": 5410 }, { "epoch": 1.73, "learning_rate": 2.3438425980798862e-07, "logits/generated": 3.189450740814209, "logits/real": 1.2092788219451904, "logps/generated": -851.10205078125, "logps/real": -388.83795166015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -45.47392654418945, "rewards/margins": 41.89067840576172, "rewards/real": -3.583237886428833, "step": 5420 }, { "epoch": 1.74, "learning_rate": 2.3379163209671684e-07, "logits/generated": 2.926393985748291, "logits/real": 1.3891630172729492, "logps/generated": -914.0875244140625, "logps/real": -353.0323791503906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -48.88922882080078, "rewards/margins": 45.91227340698242, "rewards/real": -2.976952314376831, "step": 5430 }, { "epoch": 1.74, "learning_rate": 2.3319900438544505e-07, "logits/generated": 2.6641769409179688, "logits/real": 1.5170855522155762, "logps/generated": -818.1910400390625, "logps/real": -356.567138671875, "loss": 0.0164, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -42.017494201660156, "rewards/margins": 37.39936828613281, "rewards/real": -4.618124961853027, "step": 5440 }, { "epoch": 1.74, "learning_rate": 2.3260637667417327e-07, "logits/generated": 3.466313600540161, "logits/real": 1.1704745292663574, "logps/generated": -768.80810546875, "logps/real": -351.5523681640625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/generated": -41.95710372924805, "rewards/margins": 36.66460418701172, "rewards/real": -5.2924981117248535, "step": 5450 }, { "epoch": 1.75, "learning_rate": 2.3201374896290148e-07, "logits/generated": 2.5862226486206055, "logits/real": 0.6317356824874878, "logps/generated": -752.8883666992188, "logps/real": -329.9690856933594, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -36.86394500732422, "rewards/margins": 34.974632263183594, "rewards/real": -1.889310598373413, "step": 5460 }, { "epoch": 1.75, "learning_rate": 2.3142112125162973e-07, "logits/generated": 2.7336578369140625, "logits/real": 0.7334250211715698, "logps/generated": -734.8052978515625, "logps/real": -357.79412841796875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -35.966670989990234, "rewards/margins": 33.897769927978516, "rewards/real": -2.068906545639038, "step": 5470 }, { "epoch": 1.75, "learning_rate": 2.3082849354035794e-07, "logits/generated": 3.1192550659179688, "logits/real": 0.8802781105041504, "logps/generated": -740.2861328125, "logps/real": -380.763427734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -37.469337463378906, "rewards/margins": 35.78041076660156, "rewards/real": -1.6889280080795288, "step": 5480 }, { "epoch": 1.76, "learning_rate": 2.3023586582908616e-07, "logits/generated": 2.9946959018707275, "logits/real": 1.2346898317337036, "logps/generated": -813.5284423828125, "logps/real": -338.32196044921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -41.38557815551758, "rewards/margins": 39.37213897705078, "rewards/real": -2.0134406089782715, "step": 5490 }, { "epoch": 1.76, "learning_rate": 2.296432381178144e-07, "logits/generated": 2.4427175521850586, "logits/real": 1.0994888544082642, "logps/generated": -835.2789306640625, "logps/real": -315.66058349609375, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/generated": -43.05907440185547, "rewards/margins": 40.51990509033203, "rewards/real": -2.5391736030578613, "step": 5500 }, { "epoch": 1.76, "learning_rate": 2.2905061040654261e-07, "logits/generated": 2.4791624546051025, "logits/real": 0.6451729536056519, "logps/generated": -777.4930419921875, "logps/real": -379.1453857421875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -38.13090133666992, "rewards/margins": 36.711158752441406, "rewards/real": -1.4197418689727783, "step": 5510 }, { "epoch": 1.77, "learning_rate": 2.284579826952708e-07, "logits/generated": 2.8448033332824707, "logits/real": 0.7822138071060181, "logps/generated": -798.93115234375, "logps/real": -339.82275390625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -40.21331024169922, "rewards/margins": 38.072349548339844, "rewards/real": -2.1409621238708496, "step": 5520 }, { "epoch": 1.77, "learning_rate": 2.2786535498399902e-07, "logits/generated": 3.2314224243164062, "logits/real": 1.2801387310028076, "logps/generated": -813.0155029296875, "logps/real": -379.53485107421875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -42.63431167602539, "rewards/margins": 39.404544830322266, "rewards/real": -3.2297680377960205, "step": 5530 }, { "epoch": 1.77, "learning_rate": 2.2727272727272726e-07, "logits/generated": 3.7130725383758545, "logits/real": 1.5691579580307007, "logps/generated": -829.2418823242188, "logps/real": -378.4855651855469, "loss": 0.0273, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.16777801513672, "rewards/margins": 38.785911560058594, "rewards/real": -4.38187313079834, "step": 5540 }, { "epoch": 1.78, "learning_rate": 2.2668009956145548e-07, "logits/generated": 3.3218631744384766, "logits/real": 2.25030517578125, "logps/generated": -856.4890747070312, "logps/real": -371.36651611328125, "loss": 0.0578, "rewards/accuracies": 0.987500011920929, "rewards/generated": -46.51011657714844, "rewards/margins": 40.797462463378906, "rewards/real": -5.712653160095215, "step": 5550 }, { "epoch": 1.78, "learning_rate": 2.260874718501837e-07, "logits/generated": 3.470602035522461, "logits/real": 1.9761238098144531, "logps/generated": -806.2717895507812, "logps/real": -401.69171142578125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -42.60056686401367, "rewards/margins": 37.1119270324707, "rewards/real": -5.488642692565918, "step": 5560 }, { "epoch": 1.78, "learning_rate": 2.2549484413891193e-07, "logits/generated": 3.0738184452056885, "logits/real": 2.1658029556274414, "logps/generated": -887.8929443359375, "logps/real": -368.98480224609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -48.385704040527344, "rewards/margins": 43.07398223876953, "rewards/real": -5.311723709106445, "step": 5570 }, { "epoch": 1.79, "learning_rate": 2.2490221642764015e-07, "logits/generated": 3.19155216217041, "logits/real": 2.1180126667022705, "logps/generated": -903.9332885742188, "logps/real": -400.9152526855469, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/generated": -49.44337844848633, "rewards/margins": 44.32373809814453, "rewards/real": -5.119642734527588, "step": 5580 }, { "epoch": 1.79, "learning_rate": 2.2430958871636836e-07, "logits/generated": 3.5626659393310547, "logits/real": 2.015824794769287, "logps/generated": -892.6324462890625, "logps/real": -367.1024475097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -50.268959045410156, "rewards/margins": 44.50008010864258, "rewards/real": -5.7688775062561035, "step": 5590 }, { "epoch": 1.79, "learning_rate": 2.237169610050966e-07, "logits/generated": 3.5565497875213623, "logits/real": 2.07562255859375, "logps/generated": -913.05224609375, "logps/real": -386.5362243652344, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/generated": -52.48082733154297, "rewards/margins": 46.69334030151367, "rewards/real": -5.787491321563721, "step": 5600 }, { "epoch": 1.8, "learning_rate": 2.231243332938248e-07, "logits/generated": 3.433581829071045, "logits/real": 1.7816625833511353, "logps/generated": -827.8282470703125, "logps/real": -381.58648681640625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.09467315673828, "rewards/margins": 39.211341857910156, "rewards/real": -5.883331298828125, "step": 5610 }, { "epoch": 1.8, "learning_rate": 2.22531705582553e-07, "logits/generated": 3.4961745738983154, "logits/real": 2.1615192890167236, "logps/generated": -926.9774169921875, "logps/real": -351.229736328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -54.567840576171875, "rewards/margins": 48.61988830566406, "rewards/real": -5.9479498863220215, "step": 5620 }, { "epoch": 1.8, "learning_rate": 2.2193907787128125e-07, "logits/generated": 3.8028595447540283, "logits/real": 1.9373070001602173, "logps/generated": -887.8173828125, "logps/real": -427.7560119628906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -49.20186233520508, "rewards/margins": 42.82405471801758, "rewards/real": -6.377808094024658, "step": 5630 }, { "epoch": 1.8, "learning_rate": 2.2134645016000947e-07, "logits/generated": 3.5432114601135254, "logits/real": 2.064028739929199, "logps/generated": -952.5003662109375, "logps/real": -382.4255065917969, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/generated": -55.820465087890625, "rewards/margins": 48.72205352783203, "rewards/real": -7.09841251373291, "step": 5640 }, { "epoch": 1.81, "learning_rate": 2.2075382244873768e-07, "logits/generated": 3.588714122772217, "logits/real": 2.4020583629608154, "logps/generated": -1001.1809692382812, "logps/real": -391.5857849121094, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -59.59962844848633, "rewards/margins": 52.501380920410156, "rewards/real": -7.0982489585876465, "step": 5650 }, { "epoch": 1.81, "learning_rate": 2.2016119473746592e-07, "logits/generated": 3.438514232635498, "logits/real": 1.7384579181671143, "logps/generated": -796.9383544921875, "logps/real": -426.78216552734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -43.00736999511719, "rewards/margins": 36.70927810668945, "rewards/real": -6.298093318939209, "step": 5660 }, { "epoch": 1.81, "learning_rate": 2.1956856702619414e-07, "logits/generated": 3.048379421234131, "logits/real": 1.5780932903289795, "logps/generated": -797.85595703125, "logps/real": -373.5823669433594, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -42.487648010253906, "rewards/margins": 37.081180572509766, "rewards/real": -5.406468391418457, "step": 5670 }, { "epoch": 1.82, "learning_rate": 2.1897593931492236e-07, "logits/generated": 3.206472396850586, "logits/real": 1.849116563796997, "logps/generated": -904.5208129882812, "logps/real": -415.0508728027344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -50.7608528137207, "rewards/margins": 44.02716827392578, "rewards/real": -6.7336859703063965, "step": 5680 }, { "epoch": 1.82, "learning_rate": 2.183833116036506e-07, "logits/generated": 3.2869651317596436, "logits/real": 2.0496938228607178, "logps/generated": -878.7900390625, "logps/real": -352.3899841308594, "loss": 0.0184, "rewards/accuracies": 0.987500011920929, "rewards/generated": -48.67104721069336, "rewards/margins": 43.60902786254883, "rewards/real": -5.062024116516113, "step": 5690 }, { "epoch": 1.82, "learning_rate": 2.1779068389237879e-07, "logits/generated": 3.1695003509521484, "logits/real": 1.8025085926055908, "logps/generated": -850.7145385742188, "logps/real": -353.2436828613281, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.12095260620117, "rewards/margins": 40.61774444580078, "rewards/real": -4.503202438354492, "step": 5700 }, { "epoch": 1.83, "learning_rate": 2.17198056181107e-07, "logits/generated": 3.089661121368408, "logits/real": 2.070514678955078, "logps/generated": -927.8453369140625, "logps/real": -398.13427734375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -51.43744659423828, "rewards/margins": 46.39044952392578, "rewards/real": -5.046995162963867, "step": 5710 }, { "epoch": 1.83, "learning_rate": 2.1660542846983524e-07, "logits/generated": 3.114896535873413, "logits/real": 1.9745397567749023, "logps/generated": -781.8901977539062, "logps/real": -356.8275451660156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -42.2356071472168, "rewards/margins": 37.365867614746094, "rewards/real": -4.8697357177734375, "step": 5720 }, { "epoch": 1.83, "learning_rate": 2.1601280075856346e-07, "logits/generated": 3.693910598754883, "logits/real": 2.225154161453247, "logps/generated": -930.9620971679688, "logps/real": -383.722412109375, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/generated": -52.1117057800293, "rewards/margins": 46.126792907714844, "rewards/real": -5.984915256500244, "step": 5730 }, { "epoch": 1.84, "learning_rate": 2.1542017304729167e-07, "logits/generated": 3.4360313415527344, "logits/real": 2.171807050704956, "logps/generated": -856.5650634765625, "logps/real": -377.8895263671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -46.9614372253418, "rewards/margins": 41.58063507080078, "rewards/real": -5.380807399749756, "step": 5740 }, { "epoch": 1.84, "learning_rate": 2.1482754533601992e-07, "logits/generated": 3.5638012886047363, "logits/real": 1.7955198287963867, "logps/generated": -908.9310302734375, "logps/real": -368.2813415527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -50.34418487548828, "rewards/margins": 46.025291442871094, "rewards/real": -4.318894863128662, "step": 5750 }, { "epoch": 1.84, "learning_rate": 2.1423491762474813e-07, "logits/generated": 3.2547402381896973, "logits/real": 2.044173240661621, "logps/generated": -988.8541870117188, "logps/real": -356.18853759765625, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.65973663330078, "rewards/margins": 52.295738220214844, "rewards/real": -5.363998889923096, "step": 5760 }, { "epoch": 1.85, "learning_rate": 2.1364228991347635e-07, "logits/generated": 3.638179302215576, "logits/real": 1.7333250045776367, "logps/generated": -824.5289306640625, "logps/real": -428.39642333984375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/generated": -43.69512176513672, "rewards/margins": 38.19672393798828, "rewards/real": -5.498401165008545, "step": 5770 }, { "epoch": 1.85, "learning_rate": 2.130496622022046e-07, "logits/generated": 3.4846606254577637, "logits/real": 1.9787170886993408, "logps/generated": -798.28369140625, "logps/real": -341.7613220214844, "loss": 0.0505, "rewards/accuracies": 0.987500011920929, "rewards/generated": -42.80746841430664, "rewards/margins": 38.155216217041016, "rewards/real": -4.652247428894043, "step": 5780 }, { "epoch": 1.85, "learning_rate": 2.1245703449093278e-07, "logits/generated": 3.268078327178955, "logits/real": 2.0589845180511475, "logps/generated": -847.8894653320312, "logps/real": -384.1971740722656, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/generated": -45.46085739135742, "rewards/margins": 39.66806411743164, "rewards/real": -5.792795181274414, "step": 5790 }, { "epoch": 1.86, "learning_rate": 2.11864406779661e-07, "logits/generated": 3.5666251182556152, "logits/real": 2.2275381088256836, "logps/generated": -968.0863037109375, "logps/real": -347.9147033691406, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/generated": -55.46030807495117, "rewards/margins": 48.82093048095703, "rewards/real": -6.639379978179932, "step": 5800 }, { "epoch": 1.86, "learning_rate": 2.1127177906838923e-07, "logits/generated": 3.158949375152588, "logits/real": 2.068129062652588, "logps/generated": -872.3759765625, "logps/real": -394.13427734375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -47.67719268798828, "rewards/margins": 41.926361083984375, "rewards/real": -5.750826835632324, "step": 5810 }, { "epoch": 1.86, "learning_rate": 2.1067915135711745e-07, "logits/generated": 3.339181900024414, "logits/real": 2.1461918354034424, "logps/generated": -872.42578125, "logps/real": -356.68890380859375, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/generated": -48.21758270263672, "rewards/margins": 41.57461929321289, "rewards/real": -6.6429643630981445, "step": 5820 }, { "epoch": 1.87, "learning_rate": 2.1008652364584567e-07, "logits/generated": 2.7967050075531006, "logits/real": 2.063382625579834, "logps/generated": -861.4765625, "logps/real": -397.9095764160156, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -47.75603103637695, "rewards/margins": 40.92485809326172, "rewards/real": -6.831167697906494, "step": 5830 }, { "epoch": 1.87, "learning_rate": 2.0949389593457388e-07, "logits/generated": 3.5979182720184326, "logits/real": 2.320913791656494, "logps/generated": -890.7649536132812, "logps/real": -396.95733642578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -50.28728485107422, "rewards/margins": 42.32263946533203, "rewards/real": -7.964641571044922, "step": 5840 }, { "epoch": 1.87, "learning_rate": 2.0890126822330212e-07, "logits/generated": 3.5564308166503906, "logits/real": 2.129290819168091, "logps/generated": -887.2586669921875, "logps/real": -397.1784973144531, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -48.73828887939453, "rewards/margins": 40.97037887573242, "rewards/real": -7.76791524887085, "step": 5850 }, { "epoch": 1.88, "learning_rate": 2.0830864051203034e-07, "logits/generated": 3.1811208724975586, "logits/real": 2.288074016571045, "logps/generated": -960.1119995117188, "logps/real": -390.6427917480469, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/generated": -54.5243034362793, "rewards/margins": 47.607810974121094, "rewards/real": -6.916497230529785, "step": 5860 }, { "epoch": 1.88, "learning_rate": 2.0771601280075855e-07, "logits/generated": 3.8049683570861816, "logits/real": 2.2913711071014404, "logps/generated": -888.8303833007812, "logps/real": -407.41241455078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -50.86738204956055, "rewards/margins": 42.75567626953125, "rewards/real": -8.111705780029297, "step": 5870 }, { "epoch": 1.88, "learning_rate": 2.0712338508948677e-07, "logits/generated": 3.279576539993286, "logits/real": 2.593757152557373, "logps/generated": -971.8060302734375, "logps/real": -401.72796630859375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -56.71451950073242, "rewards/margins": 48.520233154296875, "rewards/real": -8.194283485412598, "step": 5880 }, { "epoch": 1.88, "learning_rate": 2.0653075737821498e-07, "logits/generated": 3.639120578765869, "logits/real": 2.5512237548828125, "logps/generated": -865.2919921875, "logps/real": -404.64776611328125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -48.085731506347656, "rewards/margins": 40.82741165161133, "rewards/real": -7.25832462310791, "step": 5890 }, { "epoch": 1.89, "learning_rate": 2.059381296669432e-07, "logits/generated": 3.123408079147339, "logits/real": 2.0407650470733643, "logps/generated": -863.9957275390625, "logps/real": -385.7313537597656, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/generated": -48.116233825683594, "rewards/margins": 40.49714660644531, "rewards/real": -7.6190900802612305, "step": 5900 }, { "epoch": 1.89, "learning_rate": 2.0534550195567144e-07, "logits/generated": 3.9081223011016846, "logits/real": 2.101438045501709, "logps/generated": -935.6234130859375, "logps/real": -417.54400634765625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -54.65632247924805, "rewards/margins": 47.10184097290039, "rewards/real": -7.554482936859131, "step": 5910 }, { "epoch": 1.89, "learning_rate": 2.0475287424439966e-07, "logits/generated": 4.165920257568359, "logits/real": 2.4171690940856934, "logps/generated": -1005.3514404296875, "logps/real": -408.0506896972656, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -59.3927116394043, "rewards/margins": 50.65937805175781, "rewards/real": -8.73333740234375, "step": 5920 }, { "epoch": 1.9, "learning_rate": 2.0416024653312787e-07, "logits/generated": 3.847409725189209, "logits/real": 2.7022252082824707, "logps/generated": -952.6790161132812, "logps/real": -365.72772216796875, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -56.093994140625, "rewards/margins": 48.033348083496094, "rewards/real": -8.06064224243164, "step": 5930 }, { "epoch": 1.9, "learning_rate": 2.0356761882185611e-07, "logits/generated": 3.6713790893554688, "logits/real": 1.9362661838531494, "logps/generated": -941.9142456054688, "logps/real": -410.051513671875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -56.05419921875, "rewards/margins": 48.42200469970703, "rewards/real": -7.632190704345703, "step": 5940 }, { "epoch": 1.9, "learning_rate": 2.0297499111058433e-07, "logits/generated": 3.6606521606445312, "logits/real": 1.7234245538711548, "logps/generated": -911.0177001953125, "logps/real": -489.6560974121094, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/generated": -52.9130973815918, "rewards/margins": 44.15934371948242, "rewards/real": -8.753759384155273, "step": 5950 }, { "epoch": 1.91, "learning_rate": 2.0238236339931255e-07, "logits/generated": 4.219864845275879, "logits/real": 2.23287034034729, "logps/generated": -1063.9403076171875, "logps/real": -397.0310974121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.46912384033203, "rewards/margins": 56.39514923095703, "rewards/real": -9.073973655700684, "step": 5960 }, { "epoch": 1.91, "learning_rate": 2.0178973568804076e-07, "logits/generated": 3.6329212188720703, "logits/real": 2.1656136512756348, "logps/generated": -952.6925659179688, "logps/real": -447.70623779296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -57.937950134277344, "rewards/margins": 49.494651794433594, "rewards/real": -8.443305015563965, "step": 5970 }, { "epoch": 1.91, "learning_rate": 2.0119710797676898e-07, "logits/generated": 3.5467257499694824, "logits/real": 2.4322285652160645, "logps/generated": -973.0812377929688, "logps/real": -360.7705993652344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -59.72382354736328, "rewards/margins": 50.79979705810547, "rewards/real": -8.924032211303711, "step": 5980 }, { "epoch": 1.92, "learning_rate": 2.006044802654972e-07, "logits/generated": 3.495687961578369, "logits/real": 2.4661941528320312, "logps/generated": -958.0787353515625, "logps/real": -354.7956848144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.43269729614258, "rewards/margins": 50.86808776855469, "rewards/real": -7.5646071434021, "step": 5990 }, { "epoch": 1.92, "learning_rate": 2.0001185255422543e-07, "logits/generated": 3.7011466026306152, "logits/real": 2.2297897338867188, "logps/generated": -946.3480224609375, "logps/real": -385.870361328125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.50101852416992, "rewards/margins": 50.18718338012695, "rewards/real": -7.313836574554443, "step": 6000 }, { "epoch": 1.92, "learning_rate": 1.9941922484295365e-07, "logits/generated": 3.404715061187744, "logits/real": 2.819580554962158, "logps/generated": -985.5870361328125, "logps/real": -360.0356140136719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -59.78424072265625, "rewards/margins": 52.910499572753906, "rewards/real": -6.873734951019287, "step": 6010 }, { "epoch": 1.93, "learning_rate": 1.9882659713168186e-07, "logits/generated": 3.6169943809509277, "logits/real": 2.3183822631835938, "logps/generated": -869.0330810546875, "logps/real": -422.5990295410156, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -51.07073211669922, "rewards/margins": 43.21461486816406, "rewards/real": -7.856122016906738, "step": 6020 }, { "epoch": 1.93, "learning_rate": 1.982339694204101e-07, "logits/generated": 3.7115769386291504, "logits/real": 1.8723506927490234, "logps/generated": -1036.148681640625, "logps/real": -418.6692810058594, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -63.597801208496094, "rewards/margins": 56.10791015625, "rewards/real": -7.48989725112915, "step": 6030 }, { "epoch": 1.93, "learning_rate": 1.9764134170913832e-07, "logits/generated": 3.66517972946167, "logits/real": 1.5840797424316406, "logps/generated": -1040.49267578125, "logps/real": -388.66314697265625, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/generated": -63.50476837158203, "rewards/margins": 57.59077072143555, "rewards/real": -5.913995265960693, "step": 6040 }, { "epoch": 1.94, "learning_rate": 1.9704871399786654e-07, "logits/generated": 4.248284816741943, "logits/real": 2.633690357208252, "logps/generated": -1084.5806884765625, "logps/real": -372.9784240722656, "loss": 0.0316, "rewards/accuracies": 0.987500011920929, "rewards/generated": -67.48811340332031, "rewards/margins": 59.094947814941406, "rewards/real": -8.393167495727539, "step": 6050 }, { "epoch": 1.94, "learning_rate": 1.9645608628659475e-07, "logits/generated": 3.789734363555908, "logits/real": 2.168854236602783, "logps/generated": -1045.887451171875, "logps/real": -426.54022216796875, "loss": 0.0474, "rewards/accuracies": 0.987500011920929, "rewards/generated": -63.383766174316406, "rewards/margins": 56.191429138183594, "rewards/real": -7.19232702255249, "step": 6060 }, { "epoch": 1.94, "learning_rate": 1.9586345857532297e-07, "logits/generated": 3.780759334564209, "logits/real": 2.035515308380127, "logps/generated": -850.5437622070312, "logps/real": -390.14569091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -47.13395690917969, "rewards/margins": 41.21845245361328, "rewards/real": -5.915501117706299, "step": 6070 }, { "epoch": 1.95, "learning_rate": 1.9527083086405118e-07, "logits/generated": 3.0783743858337402, "logits/real": 1.924883246421814, "logps/generated": -857.7428588867188, "logps/real": -336.7803649902344, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -47.51791000366211, "rewards/margins": 42.42029571533203, "rewards/real": -5.097611427307129, "step": 6080 }, { "epoch": 1.95, "learning_rate": 1.9467820315277943e-07, "logits/generated": 3.5931499004364014, "logits/real": 1.9940860271453857, "logps/generated": -846.85205078125, "logps/real": -376.54876708984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -46.58185577392578, "rewards/margins": 40.98250198364258, "rewards/real": -5.599356651306152, "step": 6090 }, { "epoch": 1.95, "learning_rate": 1.9408557544150764e-07, "logits/generated": 3.1684060096740723, "logits/real": 1.8693767786026, "logps/generated": -842.78271484375, "logps/real": -347.5132751464844, "loss": 0.0186, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -47.030582427978516, "rewards/margins": 42.137516021728516, "rewards/real": -4.893065452575684, "step": 6100 }, { "epoch": 1.96, "learning_rate": 1.9349294773023586e-07, "logits/generated": 3.25065279006958, "logits/real": 1.6499922275543213, "logps/generated": -870.6170043945312, "logps/real": -388.0818786621094, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -48.79248046875, "rewards/margins": 43.6743049621582, "rewards/real": -5.118174076080322, "step": 6110 }, { "epoch": 1.96, "learning_rate": 1.929003200189641e-07, "logits/generated": 3.3154430389404297, "logits/real": 1.2816754579544067, "logps/generated": -726.7086181640625, "logps/real": -455.2189025878906, "loss": 0.0198, "rewards/accuracies": 0.987500011920929, "rewards/generated": -38.447166442871094, "rewards/margins": 31.914520263671875, "rewards/real": -6.532646179199219, "step": 6120 }, { "epoch": 1.96, "learning_rate": 1.9230769230769231e-07, "logits/generated": 3.1148016452789307, "logits/real": 1.5517610311508179, "logps/generated": -815.5079956054688, "logps/real": -337.00311279296875, "loss": 0.0109, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -43.745155334472656, "rewards/margins": 39.53904342651367, "rewards/real": -4.206110000610352, "step": 6130 }, { "epoch": 1.96, "learning_rate": 1.9171506459642053e-07, "logits/generated": 3.7183945178985596, "logits/real": 1.6947906017303467, "logps/generated": -754.2486572265625, "logps/real": -365.54620361328125, "loss": 0.0406, "rewards/accuracies": 0.987500011920929, "rewards/generated": -40.77758026123047, "rewards/margins": 35.4954948425293, "rewards/real": -5.2820868492126465, "step": 6140 }, { "epoch": 1.97, "learning_rate": 1.9112243688514872e-07, "logits/generated": 3.4877898693084717, "logits/real": 2.0037782192230225, "logps/generated": -820.0745239257812, "logps/real": -384.6826171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -45.665775299072266, "rewards/margins": 40.5218391418457, "rewards/real": -5.143938064575195, "step": 6150 }, { "epoch": 1.97, "learning_rate": 1.9052980917387696e-07, "logits/generated": 3.5603244304656982, "logits/real": 2.0485646724700928, "logps/generated": -848.9260864257812, "logps/real": -382.2392272949219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -46.82533645629883, "rewards/margins": 41.54307556152344, "rewards/real": -5.282263278961182, "step": 6160 }, { "epoch": 1.97, "learning_rate": 1.8993718146260517e-07, "logits/generated": 3.8129520416259766, "logits/real": 1.7690954208374023, "logps/generated": -838.8488159179688, "logps/real": -382.4895324707031, "loss": 0.0386, "rewards/accuracies": 0.987500011920929, "rewards/generated": -44.65959930419922, "rewards/margins": 39.390281677246094, "rewards/real": -5.269317626953125, "step": 6170 }, { "epoch": 1.98, "learning_rate": 1.893445537513334e-07, "logits/generated": 3.376861572265625, "logits/real": 1.9085153341293335, "logps/generated": -936.9320068359375, "logps/real": -353.51983642578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -53.39668655395508, "rewards/margins": 48.76415252685547, "rewards/real": -4.632534503936768, "step": 6180 }, { "epoch": 1.98, "learning_rate": 1.8875192604006163e-07, "logits/generated": 3.5113492012023926, "logits/real": 1.8916263580322266, "logps/generated": -856.7068481445312, "logps/real": -403.841552734375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -45.738426208496094, "rewards/margins": 40.86699676513672, "rewards/real": -4.871423244476318, "step": 6190 }, { "epoch": 1.98, "learning_rate": 1.8815929832878985e-07, "logits/generated": 3.886976718902588, "logits/real": 1.7546895742416382, "logps/generated": -941.3308715820312, "logps/real": -387.34759521484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -52.989166259765625, "rewards/margins": 47.38789367675781, "rewards/real": -5.601273536682129, "step": 6200 }, { "epoch": 1.99, "learning_rate": 1.8756667061751806e-07, "logits/generated": 3.5155677795410156, "logits/real": 1.8104327917099, "logps/generated": -875.697265625, "logps/real": -374.0039367675781, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/generated": -47.55449295043945, "rewards/margins": 43.92311477661133, "rewards/real": -3.6313769817352295, "step": 6210 }, { "epoch": 1.99, "learning_rate": 1.869740429062463e-07, "logits/generated": 3.1275806427001953, "logits/real": 1.9204256534576416, "logps/generated": -919.3878173828125, "logps/real": -360.70538330078125, "loss": 0.046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.14870834350586, "rewards/margins": 45.66701126098633, "rewards/real": -3.4816970825195312, "step": 6220 }, { "epoch": 1.99, "learning_rate": 1.863814151949745e-07, "logits/generated": 3.363603115081787, "logits/real": 1.4255322217941284, "logps/generated": -873.4091796875, "logps/real": -364.060546875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -46.276676177978516, "rewards/margins": 42.56911087036133, "rewards/real": -3.70756459236145, "step": 6230 }, { "epoch": 2.0, "learning_rate": 1.857887874837027e-07, "logits/generated": 3.5246219635009766, "logits/real": 2.0341663360595703, "logps/generated": -902.45849609375, "logps/real": -346.0574951171875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/generated": -49.6707649230957, "rewards/margins": 45.00032424926758, "rewards/real": -4.670444011688232, "step": 6240 }, { "epoch": 2.0, "learning_rate": 1.8519615977243095e-07, "logits/generated": 3.884793519973755, "logits/real": 1.5322035551071167, "logps/generated": -897.5712890625, "logps/real": -401.52740478515625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -48.86095428466797, "rewards/margins": 43.582603454589844, "rewards/real": -5.278354644775391, "step": 6250 }, { "epoch": 2.0, "learning_rate": 1.8460353206115917e-07, "logits/generated": 3.5952534675598145, "logits/real": 1.9434171915054321, "logps/generated": -908.01220703125, "logps/real": -356.5626525878906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -51.807945251464844, "rewards/margins": 47.32154083251953, "rewards/real": -4.486405372619629, "step": 6260 }, { "epoch": 2.01, "learning_rate": 1.8401090434988738e-07, "logits/generated": 3.4772255420684814, "logits/real": 1.803269624710083, "logps/generated": -894.8136596679688, "logps/real": -383.3649597167969, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -49.74882507324219, "rewards/margins": 44.56361389160156, "rewards/real": -5.185210227966309, "step": 6270 }, { "epoch": 2.01, "learning_rate": 1.8341827663861562e-07, "logits/generated": 4.13782262802124, "logits/real": 1.8974603414535522, "logps/generated": -891.482421875, "logps/real": -354.6378479003906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -49.27983856201172, "rewards/margins": 44.55103302001953, "rewards/real": -4.7288079261779785, "step": 6280 }, { "epoch": 2.01, "learning_rate": 1.8282564892734384e-07, "logits/generated": 3.494687557220459, "logits/real": 1.7981832027435303, "logps/generated": -840.1969604492188, "logps/real": -391.729248046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -44.981666564941406, "rewards/margins": 39.8217658996582, "rewards/real": -5.159902095794678, "step": 6290 }, { "epoch": 2.02, "learning_rate": 1.8223302121607205e-07, "logits/generated": 3.345310688018799, "logits/real": 1.3194358348846436, "logps/generated": -789.7905883789062, "logps/real": -376.018798828125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -41.84284210205078, "rewards/margins": 37.92809295654297, "rewards/real": -3.91474986076355, "step": 6300 }, { "epoch": 2.02, "learning_rate": 1.816403935048003e-07, "logits/generated": 3.058318614959717, "logits/real": 1.6646451950073242, "logps/generated": -938.5059814453125, "logps/real": -379.447021484375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -52.523712158203125, "rewards/margins": 47.76115036010742, "rewards/real": -4.7625603675842285, "step": 6310 }, { "epoch": 2.02, "learning_rate": 1.8104776579352849e-07, "logits/generated": 3.089402198791504, "logits/real": 1.5612413883209229, "logps/generated": -831.9298095703125, "logps/real": -374.03814697265625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -44.23519515991211, "rewards/margins": 39.55861282348633, "rewards/real": -4.676581859588623, "step": 6320 }, { "epoch": 2.03, "learning_rate": 1.804551380822567e-07, "logits/generated": 3.766859769821167, "logits/real": 1.84218430519104, "logps/generated": -907.6774291992188, "logps/real": -392.12603759765625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -50.28938293457031, "rewards/margins": 46.311309814453125, "rewards/real": -3.978079319000244, "step": 6330 }, { "epoch": 2.03, "learning_rate": 1.7986251037098494e-07, "logits/generated": 3.7131881713867188, "logits/real": 1.8214771747589111, "logps/generated": -884.7658081054688, "logps/real": -373.6928405761719, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -48.31201934814453, "rewards/margins": 43.099334716796875, "rewards/real": -5.2126851081848145, "step": 6340 }, { "epoch": 2.03, "learning_rate": 1.7926988265971316e-07, "logits/generated": 3.696737289428711, "logits/real": 1.6888158321380615, "logps/generated": -837.7440185546875, "logps/real": -367.4432067871094, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.287147521972656, "rewards/margins": 41.43773651123047, "rewards/real": -3.8494114875793457, "step": 6350 }, { "epoch": 2.04, "learning_rate": 1.7867725494844137e-07, "logits/generated": 3.6794025897979736, "logits/real": 1.402541995048523, "logps/generated": -898.3410034179688, "logps/real": -381.45672607421875, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.66602325439453, "rewards/margins": 44.5916633605957, "rewards/real": -5.074358940124512, "step": 6360 }, { "epoch": 2.04, "learning_rate": 1.7808462723716962e-07, "logits/generated": 3.880311965942383, "logits/real": 2.079180955886841, "logps/generated": -954.888671875, "logps/real": -398.05291748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -54.07482147216797, "rewards/margins": 48.75034713745117, "rewards/real": -5.324476718902588, "step": 6370 }, { "epoch": 2.04, "learning_rate": 1.7749199952589783e-07, "logits/generated": 3.0106310844421387, "logits/real": 1.792249083518982, "logps/generated": -884.6341552734375, "logps/real": -361.8485412597656, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.628013610839844, "rewards/margins": 44.123741149902344, "rewards/real": -5.504273891448975, "step": 6380 }, { "epoch": 2.04, "learning_rate": 1.7689937181462605e-07, "logits/generated": 3.680366039276123, "logits/real": 1.9910833835601807, "logps/generated": -1001.8021240234375, "logps/real": -351.34906005859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -58.97861862182617, "rewards/margins": 53.63355255126953, "rewards/real": -5.345067501068115, "step": 6390 }, { "epoch": 2.05, "learning_rate": 1.763067441033543e-07, "logits/generated": 3.5425190925598145, "logits/real": 1.4850716590881348, "logps/generated": -905.5157470703125, "logps/real": -405.67022705078125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -51.5119743347168, "rewards/margins": 46.31713104248047, "rewards/real": -5.194847106933594, "step": 6400 }, { "epoch": 2.05, "learning_rate": 1.7571411639208248e-07, "logits/generated": 3.1999075412750244, "logits/real": 1.6482913494110107, "logps/generated": -807.9815673828125, "logps/real": -366.31097412109375, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -43.2049560546875, "rewards/margins": 38.5529670715332, "rewards/real": -4.6519880294799805, "step": 6410 }, { "epoch": 2.05, "learning_rate": 1.751214886808107e-07, "logits/generated": 3.399055004119873, "logits/real": 1.9470268487930298, "logps/generated": -835.4879150390625, "logps/real": -331.04522705078125, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.92859649658203, "rewards/margins": 41.39247512817383, "rewards/real": -4.536115646362305, "step": 6420 }, { "epoch": 2.06, "learning_rate": 1.7452886096953893e-07, "logits/generated": 3.8540852069854736, "logits/real": 1.836313009262085, "logps/generated": -970.19287109375, "logps/real": -364.0809631347656, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -56.99243927001953, "rewards/margins": 50.86602020263672, "rewards/real": -6.126420021057129, "step": 6430 }, { "epoch": 2.06, "learning_rate": 1.7393623325826715e-07, "logits/generated": 3.5358593463897705, "logits/real": 1.6226106882095337, "logps/generated": -841.9315185546875, "logps/real": -388.89984130859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -46.219757080078125, "rewards/margins": 40.3925666809082, "rewards/real": -5.827186107635498, "step": 6440 }, { "epoch": 2.06, "learning_rate": 1.7334360554699537e-07, "logits/generated": 3.1885433197021484, "logits/real": 2.0745949745178223, "logps/generated": -913.2047119140625, "logps/real": -387.1131896972656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -53.641197204589844, "rewards/margins": 46.503761291503906, "rewards/real": -7.1374335289001465, "step": 6450 }, { "epoch": 2.07, "learning_rate": 1.7275097783572358e-07, "logits/generated": 3.553014039993286, "logits/real": 1.905940294265747, "logps/generated": -875.4393310546875, "logps/real": -395.08892822265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -47.743553161621094, "rewards/margins": 41.1551399230957, "rewards/real": -6.588418006896973, "step": 6460 }, { "epoch": 2.07, "learning_rate": 1.7215835012445182e-07, "logits/generated": 3.791322708129883, "logits/real": 2.0190250873565674, "logps/generated": -947.0153198242188, "logps/real": -372.3125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.31102752685547, "rewards/margins": 47.426300048828125, "rewards/real": -6.884730339050293, "step": 6470 }, { "epoch": 2.07, "learning_rate": 1.7156572241318004e-07, "logits/generated": 3.7165019512176514, "logits/real": 2.2466185092926025, "logps/generated": -896.0616455078125, "logps/real": -396.95770263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -49.876625061035156, "rewards/margins": 41.883201599121094, "rewards/real": -7.9934258460998535, "step": 6480 }, { "epoch": 2.08, "learning_rate": 1.7097309470190825e-07, "logits/generated": 3.761566638946533, "logits/real": 1.9355154037475586, "logps/generated": -883.3474731445312, "logps/real": -397.15765380859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -51.003173828125, "rewards/margins": 44.23944854736328, "rewards/real": -6.763733863830566, "step": 6490 }, { "epoch": 2.08, "learning_rate": 1.7038046699063647e-07, "logits/generated": 3.918832302093506, "logits/real": 2.098384380340576, "logps/generated": -1068.8323974609375, "logps/real": -377.49884033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -63.992462158203125, "rewards/margins": 57.11040115356445, "rewards/real": -6.882063865661621, "step": 6500 }, { "epoch": 2.08, "learning_rate": 1.6978783927936468e-07, "logits/generated": 3.3875460624694824, "logits/real": 2.0191025733947754, "logps/generated": -844.9122314453125, "logps/real": -389.9608459472656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -47.293983459472656, "rewards/margins": 40.667396545410156, "rewards/real": -6.626590728759766, "step": 6510 }, { "epoch": 2.09, "learning_rate": 1.691952115680929e-07, "logits/generated": 3.16455340385437, "logits/real": 2.0957303047180176, "logps/generated": -943.8391723632812, "logps/real": -383.1474914550781, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -54.61397171020508, "rewards/margins": 48.13213348388672, "rewards/real": -6.481833457946777, "step": 6520 }, { "epoch": 2.09, "learning_rate": 1.6860258385682114e-07, "logits/generated": 3.742846727371216, "logits/real": 1.8941633701324463, "logps/generated": -833.7000122070312, "logps/real": -375.1758117675781, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -46.05198287963867, "rewards/margins": 40.61438751220703, "rewards/real": -5.437596797943115, "step": 6530 }, { "epoch": 2.09, "learning_rate": 1.6800995614554936e-07, "logits/generated": 3.6632461547851562, "logits/real": 2.2125802040100098, "logps/generated": -909.6021728515625, "logps/real": -375.26568603515625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.762611389160156, "rewards/margins": 43.21505355834961, "rewards/real": -6.547552585601807, "step": 6540 }, { "epoch": 2.1, "learning_rate": 1.6741732843427757e-07, "logits/generated": 3.6171021461486816, "logits/real": 2.102571487426758, "logps/generated": -892.7225341796875, "logps/real": -381.14483642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -49.49254608154297, "rewards/margins": 42.738670349121094, "rewards/real": -6.753874778747559, "step": 6550 }, { "epoch": 2.1, "learning_rate": 1.6682470072300581e-07, "logits/generated": 3.3604187965393066, "logits/real": 2.235856294631958, "logps/generated": -941.2318115234375, "logps/real": -373.9045104980469, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -52.93433380126953, "rewards/margins": 46.550376892089844, "rewards/real": -6.383957386016846, "step": 6560 }, { "epoch": 2.1, "learning_rate": 1.6623207301173403e-07, "logits/generated": 3.521977663040161, "logits/real": 2.065173625946045, "logps/generated": -878.505859375, "logps/real": -378.19122314453125, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -50.58102798461914, "rewards/margins": 43.341697692871094, "rewards/real": -7.2393341064453125, "step": 6570 }, { "epoch": 2.11, "learning_rate": 1.6563944530046224e-07, "logits/generated": 3.439244508743286, "logits/real": 2.1543192863464355, "logps/generated": -927.3391723632812, "logps/real": -383.65509033203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -52.10762405395508, "rewards/margins": 44.56721496582031, "rewards/real": -7.540410041809082, "step": 6580 }, { "epoch": 2.11, "learning_rate": 1.6504681758919046e-07, "logits/generated": 3.5469043254852295, "logits/real": 1.8698736429214478, "logps/generated": -837.1285400390625, "logps/real": -431.6897888183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.68436813354492, "rewards/margins": 38.102054595947266, "rewards/real": -6.58231258392334, "step": 6590 }, { "epoch": 2.11, "learning_rate": 1.6445418987791868e-07, "logits/generated": 3.2622406482696533, "logits/real": 1.9931650161743164, "logps/generated": -912.216796875, "logps/real": -397.71429443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -51.05770492553711, "rewards/margins": 44.12523651123047, "rewards/real": -6.93247127532959, "step": 6600 }, { "epoch": 2.12, "learning_rate": 1.638615621666469e-07, "logits/generated": 3.3315768241882324, "logits/real": 2.002892017364502, "logps/generated": -1019.22021484375, "logps/real": -371.0779724121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -60.481849670410156, "rewards/margins": 53.9934196472168, "rewards/real": -6.488433837890625, "step": 6610 }, { "epoch": 2.12, "learning_rate": 1.6326893445537513e-07, "logits/generated": 3.30401611328125, "logits/real": 2.0205698013305664, "logps/generated": -857.5738525390625, "logps/real": -406.16546630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -46.526493072509766, "rewards/margins": 40.06731414794922, "rewards/real": -6.459181308746338, "step": 6620 }, { "epoch": 2.12, "learning_rate": 1.6267630674410335e-07, "logits/generated": 3.6951255798339844, "logits/real": 1.9617116451263428, "logps/generated": -867.1686401367188, "logps/real": -386.513427734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -48.22928237915039, "rewards/margins": 41.44636535644531, "rewards/real": -6.782916069030762, "step": 6630 }, { "epoch": 2.12, "learning_rate": 1.6208367903283156e-07, "logits/generated": 3.649329662322998, "logits/real": 2.1086161136627197, "logps/generated": -877.0367431640625, "logps/real": -378.9393310546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -48.91767883300781, "rewards/margins": 41.61117172241211, "rewards/real": -7.306508541107178, "step": 6640 }, { "epoch": 2.13, "learning_rate": 1.614910513215598e-07, "logits/generated": 3.661804676055908, "logits/real": 2.4163119792938232, "logps/generated": -1004.2562255859375, "logps/real": -388.51336669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -58.64870071411133, "rewards/margins": 51.82874298095703, "rewards/real": -6.819955348968506, "step": 6650 }, { "epoch": 2.13, "learning_rate": 1.6089842361028802e-07, "logits/generated": 3.2976551055908203, "logits/real": 1.8500537872314453, "logps/generated": -934.6765747070312, "logps/real": -383.8113708496094, "loss": 0.0176, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.98523712158203, "rewards/margins": 48.496063232421875, "rewards/real": -6.489166259765625, "step": 6660 }, { "epoch": 2.13, "learning_rate": 1.6030579589901624e-07, "logits/generated": 3.948852062225342, "logits/real": 2.137843370437622, "logps/generated": -885.4298706054688, "logps/real": -369.081787109375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -50.035858154296875, "rewards/margins": 43.878318786621094, "rewards/real": -6.157540798187256, "step": 6670 }, { "epoch": 2.14, "learning_rate": 1.5971316818774445e-07, "logits/generated": 3.7219607830047607, "logits/real": 1.7658170461654663, "logps/generated": -920.0833129882812, "logps/real": -419.18682861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -53.35789108276367, "rewards/margins": 45.76387405395508, "rewards/real": -7.594014644622803, "step": 6680 }, { "epoch": 2.14, "learning_rate": 1.5912054047647267e-07, "logits/generated": 3.4460177421569824, "logits/real": 2.163835048675537, "logps/generated": -941.6736450195312, "logps/real": -407.65576171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -54.060951232910156, "rewards/margins": 46.508575439453125, "rewards/real": -7.552375793457031, "step": 6690 }, { "epoch": 2.14, "learning_rate": 1.5852791276520088e-07, "logits/generated": 4.178693771362305, "logits/real": 1.8809093236923218, "logps/generated": -841.2927856445312, "logps/real": -398.60443115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -46.83857345581055, "rewards/margins": 39.39279556274414, "rewards/real": -7.445780277252197, "step": 6700 }, { "epoch": 2.15, "learning_rate": 1.5793528505392912e-07, "logits/generated": 3.8482487201690674, "logits/real": 2.120398998260498, "logps/generated": -881.3566284179688, "logps/real": -398.9073791503906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -49.39754867553711, "rewards/margins": 43.08720779418945, "rewards/real": -6.310345649719238, "step": 6710 }, { "epoch": 2.15, "learning_rate": 1.5734265734265734e-07, "logits/generated": 3.056668758392334, "logits/real": 1.9427289962768555, "logps/generated": -944.3453979492188, "logps/real": -381.8882141113281, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -53.652366638183594, "rewards/margins": 46.86200714111328, "rewards/real": -6.7903642654418945, "step": 6720 }, { "epoch": 2.15, "learning_rate": 1.5675002963138556e-07, "logits/generated": 3.5041098594665527, "logits/real": 2.2960071563720703, "logps/generated": -884.7781982421875, "logps/real": -422.67413330078125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/generated": -49.832481384277344, "rewards/margins": 42.40620803833008, "rewards/real": -7.4262824058532715, "step": 6730 }, { "epoch": 2.16, "learning_rate": 1.561574019201138e-07, "logits/generated": 3.994569778442383, "logits/real": 2.588862895965576, "logps/generated": -1020.2581176757812, "logps/real": -417.8421936035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.7502326965332, "rewards/margins": 50.90573501586914, "rewards/real": -9.844499588012695, "step": 6740 }, { "epoch": 2.16, "learning_rate": 1.55564774208842e-07, "logits/generated": 3.1953651905059814, "logits/real": 2.4260640144348145, "logps/generated": -947.6217041015625, "logps/real": -375.6593933105469, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -55.07343673706055, "rewards/margins": 46.666297912597656, "rewards/real": -8.407140731811523, "step": 6750 }, { "epoch": 2.16, "learning_rate": 1.5497214649757023e-07, "logits/generated": 4.424673557281494, "logits/real": 2.2033803462982178, "logps/generated": -939.3943481445312, "logps/real": -373.49371337890625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.96144485473633, "rewards/margins": 47.3978385925293, "rewards/real": -7.56360387802124, "step": 6760 }, { "epoch": 2.17, "learning_rate": 1.5437951878629842e-07, "logits/generated": 3.748819351196289, "logits/real": 2.647487163543701, "logps/generated": -920.8893432617188, "logps/real": -416.6439514160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -51.697792053222656, "rewards/margins": 43.080196380615234, "rewards/real": -8.617592811584473, "step": 6770 }, { "epoch": 2.17, "learning_rate": 1.5378689107502666e-07, "logits/generated": 4.316756725311279, "logits/real": 2.1666080951690674, "logps/generated": -896.6207885742188, "logps/real": -413.3868103027344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -51.75537109375, "rewards/margins": 43.20842742919922, "rewards/real": -8.546947479248047, "step": 6780 }, { "epoch": 2.17, "learning_rate": 1.5319426336375487e-07, "logits/generated": 3.8626246452331543, "logits/real": 1.7933346033096313, "logps/generated": -931.4691162109375, "logps/real": -358.85797119140625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -53.052490234375, "rewards/margins": 47.16486358642578, "rewards/real": -5.887631416320801, "step": 6790 }, { "epoch": 2.18, "learning_rate": 1.526016356524831e-07, "logits/generated": 4.089632034301758, "logits/real": 1.6373741626739502, "logps/generated": -954.58544921875, "logps/real": -388.57720947265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -55.31062698364258, "rewards/margins": 49.37091064453125, "rewards/real": -5.939715385437012, "step": 6800 }, { "epoch": 2.18, "learning_rate": 1.5200900794121133e-07, "logits/generated": 3.2313010692596436, "logits/real": 1.5612775087356567, "logps/generated": -862.0437622070312, "logps/real": -378.39471435546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -47.85914611816406, "rewards/margins": 41.0986213684082, "rewards/real": -6.760532379150391, "step": 6810 }, { "epoch": 2.18, "learning_rate": 1.5141638022993955e-07, "logits/generated": 3.2439932823181152, "logits/real": 2.2634949684143066, "logps/generated": -932.8312377929688, "logps/real": -362.20806884765625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -52.68410110473633, "rewards/margins": 46.85740661621094, "rewards/real": -5.826689720153809, "step": 6820 }, { "epoch": 2.19, "learning_rate": 1.5082375251866776e-07, "logits/generated": 3.551748275756836, "logits/real": 1.9099966287612915, "logps/generated": -870.3192138671875, "logps/real": -412.82757568359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -49.41729736328125, "rewards/margins": 42.34073257446289, "rewards/real": -7.076570987701416, "step": 6830 }, { "epoch": 2.19, "learning_rate": 1.50231124807396e-07, "logits/generated": 3.5488381385803223, "logits/real": 2.0896248817443848, "logps/generated": -907.7509765625, "logps/real": -351.4305114746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -51.6164665222168, "rewards/margins": 46.79435729980469, "rewards/real": -4.822107791900635, "step": 6840 }, { "epoch": 2.19, "learning_rate": 1.496384970961242e-07, "logits/generated": 3.959463119506836, "logits/real": 1.8525702953338623, "logps/generated": -875.3648681640625, "logps/real": -353.7754821777344, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -49.686405181884766, "rewards/margins": 44.30531692504883, "rewards/real": -5.381083965301514, "step": 6850 }, { "epoch": 2.2, "learning_rate": 1.490458693848524e-07, "logits/generated": 3.450671434402466, "logits/real": 1.645078420639038, "logps/generated": -813.9600830078125, "logps/real": -415.3131408691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.97637176513672, "rewards/margins": 36.70772933959961, "rewards/real": -6.268640995025635, "step": 6860 }, { "epoch": 2.2, "learning_rate": 1.4845324167358065e-07, "logits/generated": 3.608870267868042, "logits/real": 1.752415418624878, "logps/generated": -844.51953125, "logps/real": -392.44439697265625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -47.45626449584961, "rewards/margins": 41.60123825073242, "rewards/real": -5.8550310134887695, "step": 6870 }, { "epoch": 2.2, "learning_rate": 1.4786061396230887e-07, "logits/generated": 3.241227626800537, "logits/real": 1.8304872512817383, "logps/generated": -965.0771484375, "logps/real": -389.36749267578125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -56.4457893371582, "rewards/margins": 50.60601043701172, "rewards/real": -5.839773654937744, "step": 6880 }, { "epoch": 2.2, "learning_rate": 1.4726798625103708e-07, "logits/generated": 2.908162832260132, "logits/real": 1.5250742435455322, "logps/generated": -892.7222900390625, "logps/real": -405.5044860839844, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -49.53814697265625, "rewards/margins": 43.983612060546875, "rewards/real": -5.554529666900635, "step": 6890 }, { "epoch": 2.21, "learning_rate": 1.4667535853976532e-07, "logits/generated": 3.5540802478790283, "logits/real": 1.816138505935669, "logps/generated": -1006.95654296875, "logps/real": -356.22442626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -56.956817626953125, "rewards/margins": 52.82642364501953, "rewards/real": -4.13038969039917, "step": 6900 }, { "epoch": 2.21, "learning_rate": 1.4608273082849354e-07, "logits/generated": 2.8206403255462646, "logits/real": 1.9676635265350342, "logps/generated": -758.2788696289062, "logps/real": -382.80731201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -41.22502899169922, "rewards/margins": 36.453529357910156, "rewards/real": -4.771500587463379, "step": 6910 }, { "epoch": 2.21, "learning_rate": 1.4549010311722175e-07, "logits/generated": 3.226039409637451, "logits/real": 1.8796745538711548, "logps/generated": -891.7478637695312, "logps/real": -342.97296142578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -50.727088928222656, "rewards/margins": 46.718544006347656, "rewards/real": -4.008549213409424, "step": 6920 }, { "epoch": 2.22, "learning_rate": 1.4489747540595e-07, "logits/generated": 3.080256938934326, "logits/real": 1.8325796127319336, "logps/generated": -817.8787231445312, "logps/real": -392.36981201171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -43.375423431396484, "rewards/margins": 38.49822235107422, "rewards/real": -4.8772101402282715, "step": 6930 }, { "epoch": 2.22, "learning_rate": 1.4430484769467818e-07, "logits/generated": 2.9723448753356934, "logits/real": 1.710695505142212, "logps/generated": -892.23828125, "logps/real": -414.94207763671875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -48.01591873168945, "rewards/margins": 42.979949951171875, "rewards/real": -5.035966396331787, "step": 6940 }, { "epoch": 2.22, "learning_rate": 1.437122199834064e-07, "logits/generated": 3.3378586769104004, "logits/real": 1.6810868978500366, "logps/generated": -873.9470825195312, "logps/real": -399.42779541015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -48.72085952758789, "rewards/margins": 43.833003997802734, "rewards/real": -4.887852668762207, "step": 6950 }, { "epoch": 2.23, "learning_rate": 1.4311959227213464e-07, "logits/generated": 3.7277050018310547, "logits/real": 1.8600651025772095, "logps/generated": -961.6746215820312, "logps/real": -376.4801025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -54.674163818359375, "rewards/margins": 48.67682647705078, "rewards/real": -5.997340202331543, "step": 6960 }, { "epoch": 2.23, "learning_rate": 1.4252696456086286e-07, "logits/generated": 3.812844753265381, "logits/real": 2.0348525047302246, "logps/generated": -929.8878784179688, "logps/real": -373.9241943359375, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/generated": -53.372467041015625, "rewards/margins": 46.978092193603516, "rewards/real": -6.394378662109375, "step": 6970 }, { "epoch": 2.23, "learning_rate": 1.4193433684959107e-07, "logits/generated": 3.270030975341797, "logits/real": 2.5010368824005127, "logps/generated": -923.5457763671875, "logps/real": -381.654541015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -53.984703063964844, "rewards/margins": 46.77741241455078, "rewards/real": -7.2072906494140625, "step": 6980 }, { "epoch": 2.24, "learning_rate": 1.4134170913831931e-07, "logits/generated": 3.9600830078125, "logits/real": 2.124016284942627, "logps/generated": -926.6437377929688, "logps/real": -432.20513916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.845184326171875, "rewards/margins": 47.35279083251953, "rewards/real": -7.492398262023926, "step": 6990 }, { "epoch": 2.24, "learning_rate": 1.4074908142704753e-07, "logits/generated": 3.5825181007385254, "logits/real": 2.1191952228546143, "logps/generated": -930.9474487304688, "logps/real": -372.9541931152344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -51.7147331237793, "rewards/margins": 45.40174865722656, "rewards/real": -6.312982559204102, "step": 7000 }, { "epoch": 2.24, "learning_rate": 1.4015645371577575e-07, "logits/generated": 3.6363234519958496, "logits/real": 1.4622042179107666, "logps/generated": -846.0232543945312, "logps/real": -373.27886962890625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -46.39105987548828, "rewards/margins": 40.55199432373047, "rewards/real": -5.839066505432129, "step": 7010 }, { "epoch": 2.25, "learning_rate": 1.39563826004504e-07, "logits/generated": 3.2572197914123535, "logits/real": 1.710909128189087, "logps/generated": -842.8611450195312, "logps/real": -403.93096923828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -46.79165267944336, "rewards/margins": 40.92638397216797, "rewards/real": -5.865267276763916, "step": 7020 }, { "epoch": 2.25, "learning_rate": 1.3897119829323218e-07, "logits/generated": 3.1691536903381348, "logits/real": 2.1280269622802734, "logps/generated": -882.4940185546875, "logps/real": -374.7820739746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -49.72075653076172, "rewards/margins": 44.30762481689453, "rewards/real": -5.4131364822387695, "step": 7030 }, { "epoch": 2.25, "learning_rate": 1.383785705819604e-07, "logits/generated": 3.2503700256347656, "logits/real": 2.0797364711761475, "logps/generated": -1018.7307739257812, "logps/real": -360.0143127441406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -60.452354431152344, "rewards/margins": 53.7127571105957, "rewards/real": -6.739597320556641, "step": 7040 }, { "epoch": 2.26, "learning_rate": 1.3778594287068863e-07, "logits/generated": 3.8283183574676514, "logits/real": 2.013394832611084, "logps/generated": -960.9522705078125, "logps/real": -442.35516357421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -54.580238342285156, "rewards/margins": 46.32996368408203, "rewards/real": -8.25028133392334, "step": 7050 }, { "epoch": 2.26, "learning_rate": 1.3719331515941685e-07, "logits/generated": 3.9131228923797607, "logits/real": 1.9845058917999268, "logps/generated": -936.5426635742188, "logps/real": -378.13275146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -54.35515213012695, "rewards/margins": 47.764652252197266, "rewards/real": -6.5905046463012695, "step": 7060 }, { "epoch": 2.26, "learning_rate": 1.3660068744814506e-07, "logits/generated": 3.6134800910949707, "logits/real": 2.446157932281494, "logps/generated": -1077.667236328125, "logps/real": -341.0533447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -63.74230194091797, "rewards/margins": 57.78644561767578, "rewards/real": -5.955852508544922, "step": 7070 }, { "epoch": 2.27, "learning_rate": 1.3600805973687328e-07, "logits/generated": 3.8961777687072754, "logits/real": 2.213057041168213, "logps/generated": -944.3114013671875, "logps/real": -347.3383483886719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -54.40936279296875, "rewards/margins": 47.75571823120117, "rewards/real": -6.653644561767578, "step": 7080 }, { "epoch": 2.27, "learning_rate": 1.3541543202560152e-07, "logits/generated": 3.896191120147705, "logits/real": 1.3951739072799683, "logps/generated": -864.2570190429688, "logps/real": -421.14453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.61088943481445, "rewards/margins": 40.27512741088867, "rewards/real": -7.335763454437256, "step": 7090 }, { "epoch": 2.27, "learning_rate": 1.3482280431432974e-07, "logits/generated": 3.3745689392089844, "logits/real": 1.9235150814056396, "logps/generated": -1001.2970581054688, "logps/real": -392.7232971191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -60.11412811279297, "rewards/margins": 52.768280029296875, "rewards/real": -7.345845699310303, "step": 7100 }, { "epoch": 2.28, "learning_rate": 1.3423017660305795e-07, "logits/generated": 3.599738359451294, "logits/real": 2.035776376724243, "logps/generated": -967.9103393554688, "logps/real": -403.059326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -56.32331085205078, "rewards/margins": 48.5847053527832, "rewards/real": -7.738605499267578, "step": 7110 }, { "epoch": 2.28, "learning_rate": 1.3363754889178617e-07, "logits/generated": 4.224157810211182, "logits/real": 1.5589847564697266, "logps/generated": -872.8209838867188, "logps/real": -383.27264404296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -48.575279235839844, "rewards/margins": 42.46464157104492, "rewards/real": -6.110633850097656, "step": 7120 }, { "epoch": 2.28, "learning_rate": 1.3304492118051438e-07, "logits/generated": 3.3103976249694824, "logits/real": 2.0746779441833496, "logps/generated": -998.7860107421875, "logps/real": -399.69171142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -58.93140411376953, "rewards/margins": 52.75010299682617, "rewards/real": -6.181303024291992, "step": 7130 }, { "epoch": 2.28, "learning_rate": 1.324522934692426e-07, "logits/generated": 3.538538694381714, "logits/real": 1.8912137746810913, "logps/generated": -910.70263671875, "logps/real": -382.2970275878906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -52.28912353515625, "rewards/margins": 45.06175231933594, "rewards/real": -7.2273664474487305, "step": 7140 }, { "epoch": 2.29, "learning_rate": 1.3185966575797084e-07, "logits/generated": 4.153119087219238, "logits/real": 2.034271240234375, "logps/generated": -919.7184448242188, "logps/real": -432.1029357910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.59479522705078, "rewards/margins": 45.460472106933594, "rewards/real": -8.134321212768555, "step": 7150 }, { "epoch": 2.29, "learning_rate": 1.3126703804669906e-07, "logits/generated": 3.6166234016418457, "logits/real": 2.463695526123047, "logps/generated": -902.58642578125, "logps/real": -394.44354248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.12298583984375, "rewards/margins": 45.50529861450195, "rewards/real": -8.617691993713379, "step": 7160 }, { "epoch": 2.29, "learning_rate": 1.3067441033542727e-07, "logits/generated": 4.045973300933838, "logits/real": 1.654720664024353, "logps/generated": -943.9410400390625, "logps/real": -402.5132751464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.6247444152832, "rewards/margins": 46.62574005126953, "rewards/real": -7.999002933502197, "step": 7170 }, { "epoch": 2.3, "learning_rate": 1.3008178262415551e-07, "logits/generated": 3.2982230186462402, "logits/real": 2.241882801055908, "logps/generated": -888.5969848632812, "logps/real": -421.61328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.42265319824219, "rewards/margins": 43.38030242919922, "rewards/real": -7.042349338531494, "step": 7180 }, { "epoch": 2.3, "learning_rate": 1.2948915491288373e-07, "logits/generated": 3.672785997390747, "logits/real": 1.6770737171173096, "logps/generated": -922.3863525390625, "logps/real": -394.39410400390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -53.93218231201172, "rewards/margins": 46.440086364746094, "rewards/real": -7.492100715637207, "step": 7190 }, { "epoch": 2.3, "learning_rate": 1.2889652720161194e-07, "logits/generated": 3.482133388519287, "logits/real": 2.195340633392334, "logps/generated": -903.5734252929688, "logps/real": -406.4979248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -52.43483352661133, "rewards/margins": 45.251285552978516, "rewards/real": -7.183550834655762, "step": 7200 }, { "epoch": 2.31, "learning_rate": 1.2830389949034016e-07, "logits/generated": 3.6708450317382812, "logits/real": 2.3602938652038574, "logps/generated": -915.2332763671875, "logps/real": -389.45269775390625, "loss": 0.0069, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.72968292236328, "rewards/margins": 46.505043029785156, "rewards/real": -8.224641799926758, "step": 7210 }, { "epoch": 2.31, "learning_rate": 1.2771127177906838e-07, "logits/generated": 4.2751030921936035, "logits/real": 2.1040592193603516, "logps/generated": -1038.5245361328125, "logps/real": -437.68536376953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -64.43311309814453, "rewards/margins": 55.898765563964844, "rewards/real": -8.534345626831055, "step": 7220 }, { "epoch": 2.31, "learning_rate": 1.271186440677966e-07, "logits/generated": 3.725090503692627, "logits/real": 2.492666244506836, "logps/generated": -1149.4708251953125, "logps/real": -439.4458923339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.71576690673828, "rewards/margins": 62.1614990234375, "rewards/real": -9.554258346557617, "step": 7230 }, { "epoch": 2.32, "learning_rate": 1.2652601635652483e-07, "logits/generated": 4.100815296173096, "logits/real": 2.0770721435546875, "logps/generated": -1117.468017578125, "logps/real": -454.64239501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.69239807128906, "rewards/margins": 61.77238082885742, "rewards/real": -8.920014381408691, "step": 7240 }, { "epoch": 2.32, "learning_rate": 1.2593338864525305e-07, "logits/generated": 3.8950488567352295, "logits/real": 1.7128747701644897, "logps/generated": -866.4058837890625, "logps/real": -407.7037048339844, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -49.534793853759766, "rewards/margins": 43.238182067871094, "rewards/real": -6.296614646911621, "step": 7250 }, { "epoch": 2.32, "learning_rate": 1.2534076093398126e-07, "logits/generated": 3.5611443519592285, "logits/real": 1.8351194858551025, "logps/generated": -830.7393798828125, "logps/real": -351.31988525390625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -46.701263427734375, "rewards/margins": 41.20185470581055, "rewards/real": -5.499411582946777, "step": 7260 }, { "epoch": 2.33, "learning_rate": 1.247481332227095e-07, "logits/generated": 3.4794883728027344, "logits/real": 1.7529456615447998, "logps/generated": -1048.803955078125, "logps/real": -391.748779296875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -61.122528076171875, "rewards/margins": 54.686805725097656, "rewards/real": -6.435726165771484, "step": 7270 }, { "epoch": 2.33, "learning_rate": 1.241555055114377e-07, "logits/generated": 3.7687296867370605, "logits/real": 1.7407106161117554, "logps/generated": -948.267578125, "logps/real": -408.15948486328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -54.70806884765625, "rewards/margins": 49.158287048339844, "rewards/real": -5.549779415130615, "step": 7280 }, { "epoch": 2.33, "learning_rate": 1.2356287780016594e-07, "logits/generated": 4.017646789550781, "logits/real": 1.896411657333374, "logps/generated": -1000.8963012695312, "logps/real": -381.00531005859375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -61.927223205566406, "rewards/margins": 55.34486770629883, "rewards/real": -6.5823540687561035, "step": 7290 }, { "epoch": 2.34, "learning_rate": 1.2297025008889415e-07, "logits/generated": 3.857715606689453, "logits/real": 1.914616346359253, "logps/generated": -916.0032958984375, "logps/real": -407.76373291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -54.48760223388672, "rewards/margins": 48.77091598510742, "rewards/real": -5.716687202453613, "step": 7300 }, { "epoch": 2.34, "learning_rate": 1.2237762237762237e-07, "logits/generated": 3.556292772293091, "logits/real": 1.2310831546783447, "logps/generated": -949.7063598632812, "logps/real": -416.2879333496094, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -56.6829833984375, "rewards/margins": 50.5594482421875, "rewards/real": -6.12353515625, "step": 7310 }, { "epoch": 2.34, "learning_rate": 1.2178499466635058e-07, "logits/generated": 3.877579927444458, "logits/real": 2.2572357654571533, "logps/generated": -892.0955810546875, "logps/real": -380.8299865722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.075401306152344, "rewards/margins": 46.75433349609375, "rewards/real": -6.321071147918701, "step": 7320 }, { "epoch": 2.35, "learning_rate": 1.2119236695507882e-07, "logits/generated": 3.684466600418091, "logits/real": 1.6927614212036133, "logps/generated": -926.1092529296875, "logps/real": -363.82403564453125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -54.74932098388672, "rewards/margins": 49.10929870605469, "rewards/real": -5.640023231506348, "step": 7330 }, { "epoch": 2.35, "learning_rate": 1.2059973924380704e-07, "logits/generated": 4.153500556945801, "logits/real": 1.8311598300933838, "logps/generated": -945.724609375, "logps/real": -385.7518005371094, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.987701416015625, "rewards/margins": 49.47273254394531, "rewards/real": -5.514973163604736, "step": 7340 }, { "epoch": 2.35, "learning_rate": 1.2000711153253525e-07, "logits/generated": 3.6209239959716797, "logits/real": 1.6371666193008423, "logps/generated": -824.1793212890625, "logps/real": -388.419189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -45.33180236816406, "rewards/margins": 40.07854461669922, "rewards/real": -5.25325870513916, "step": 7350 }, { "epoch": 2.36, "learning_rate": 1.1941448382126347e-07, "logits/generated": 3.3920371532440186, "logits/real": 1.992281198501587, "logps/generated": -894.01025390625, "logps/real": -359.991943359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -52.777183532714844, "rewards/margins": 48.64227294921875, "rewards/real": -4.134913444519043, "step": 7360 }, { "epoch": 2.36, "learning_rate": 1.188218561099917e-07, "logits/generated": 3.483046293258667, "logits/real": 1.4694679975509644, "logps/generated": -884.4298706054688, "logps/real": -386.8287658691406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -52.43204879760742, "rewards/margins": 46.55149459838867, "rewards/real": -5.880550384521484, "step": 7370 }, { "epoch": 2.36, "learning_rate": 1.1822922839871991e-07, "logits/generated": 3.7342770099639893, "logits/real": 1.8004376888275146, "logps/generated": -912.0466918945312, "logps/real": -356.5923156738281, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -53.13550567626953, "rewards/margins": 48.5265007019043, "rewards/real": -4.60900354385376, "step": 7380 }, { "epoch": 2.36, "learning_rate": 1.1763660068744814e-07, "logits/generated": 3.6718952655792236, "logits/real": 1.7568000555038452, "logps/generated": -857.4403076171875, "logps/real": -346.9060363769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -49.09733963012695, "rewards/margins": 44.313941955566406, "rewards/real": -4.7833943367004395, "step": 7390 }, { "epoch": 2.37, "learning_rate": 1.1704397297617637e-07, "logits/generated": 3.6978485584259033, "logits/real": 1.942521333694458, "logps/generated": -943.5989990234375, "logps/real": -414.44232177734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -55.790122985839844, "rewards/margins": 49.674232482910156, "rewards/real": -6.115891933441162, "step": 7400 }, { "epoch": 2.37, "learning_rate": 1.1645134526490457e-07, "logits/generated": 3.929466724395752, "logits/real": 1.9601457118988037, "logps/generated": -913.3077392578125, "logps/real": -395.75665283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.7274169921875, "rewards/margins": 47.70978927612305, "rewards/real": -5.017629146575928, "step": 7410 }, { "epoch": 2.37, "learning_rate": 1.158587175536328e-07, "logits/generated": 3.940483570098877, "logits/real": 1.827901840209961, "logps/generated": -916.2366943359375, "logps/real": -409.4593200683594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -53.1384391784668, "rewards/margins": 46.64787673950195, "rewards/real": -6.490558624267578, "step": 7420 }, { "epoch": 2.38, "learning_rate": 1.1526608984236103e-07, "logits/generated": 3.3632397651672363, "logits/real": 2.0485453605651855, "logps/generated": -988.9066162109375, "logps/real": -379.1597595214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.874603271484375, "rewards/margins": 54.2638053894043, "rewards/real": -5.610796928405762, "step": 7430 }, { "epoch": 2.38, "learning_rate": 1.1467346213108925e-07, "logits/generated": 3.9321389198303223, "logits/real": 1.7708828449249268, "logps/generated": -944.1676635742188, "logps/real": -427.54498291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.71954345703125, "rewards/margins": 50.34968948364258, "rewards/real": -5.369858264923096, "step": 7440 }, { "epoch": 2.38, "learning_rate": 1.1408083441981746e-07, "logits/generated": 3.4171805381774902, "logits/real": 1.8439222574234009, "logps/generated": -954.8546752929688, "logps/real": -375.6050720214844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -56.99078369140625, "rewards/margins": 52.356239318847656, "rewards/real": -4.634544849395752, "step": 7450 }, { "epoch": 2.39, "learning_rate": 1.1348820670854568e-07, "logits/generated": 3.6472580432891846, "logits/real": 2.0352094173431396, "logps/generated": -938.6751098632812, "logps/real": -366.61639404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -55.14714813232422, "rewards/margins": 49.33271408081055, "rewards/real": -5.814435005187988, "step": 7460 }, { "epoch": 2.39, "learning_rate": 1.128955789972739e-07, "logits/generated": 3.1472151279449463, "logits/real": 1.9693548679351807, "logps/generated": -1045.47802734375, "logps/real": -394.6998596191406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -64.78861236572266, "rewards/margins": 59.74689865112305, "rewards/real": -5.041702747344971, "step": 7470 }, { "epoch": 2.39, "learning_rate": 1.1230295128600213e-07, "logits/generated": 4.379024505615234, "logits/real": 2.2068142890930176, "logps/generated": -1073.003662109375, "logps/real": -342.42840576171875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -64.67329406738281, "rewards/margins": 59.653419494628906, "rewards/real": -5.019867897033691, "step": 7480 }, { "epoch": 2.4, "learning_rate": 1.1171032357473035e-07, "logits/generated": 4.089856147766113, "logits/real": 2.041640281677246, "logps/generated": -895.4403076171875, "logps/real": -362.87530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.54082107543945, "rewards/margins": 46.44224166870117, "rewards/real": -6.09857702255249, "step": 7490 }, { "epoch": 2.4, "learning_rate": 1.1111769586345857e-07, "logits/generated": 3.8495888710021973, "logits/real": 1.9811060428619385, "logps/generated": -947.15576171875, "logps/real": -374.90264892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -56.577735900878906, "rewards/margins": 50.850772857666016, "rewards/real": -5.726959705352783, "step": 7500 }, { "epoch": 2.4, "learning_rate": 1.105250681521868e-07, "logits/generated": 3.28678822517395, "logits/real": 2.0364482402801514, "logps/generated": -970.5263671875, "logps/real": -397.9372863769531, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -58.57470703125, "rewards/margins": 52.99140548706055, "rewards/real": -5.583299160003662, "step": 7510 }, { "epoch": 2.41, "learning_rate": 1.0993244044091501e-07, "logits/generated": 3.6762795448303223, "logits/real": 1.876082420349121, "logps/generated": -1004.3568115234375, "logps/real": -333.42950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.16901397705078, "rewards/margins": 55.99580764770508, "rewards/real": -5.173208713531494, "step": 7520 }, { "epoch": 2.41, "learning_rate": 1.0933981272964324e-07, "logits/generated": 3.516345977783203, "logits/real": 1.7883695363998413, "logps/generated": -1011.6312255859375, "logps/real": -423.02978515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -62.28319549560547, "rewards/margins": 56.01143264770508, "rewards/real": -6.271761894226074, "step": 7530 }, { "epoch": 2.41, "learning_rate": 1.0874718501837145e-07, "logits/generated": 3.5252366065979004, "logits/real": 2.0527803897857666, "logps/generated": -932.23974609375, "logps/real": -385.8324279785156, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.8437614440918, "rewards/margins": 49.315486907958984, "rewards/real": -5.528271198272705, "step": 7540 }, { "epoch": 2.42, "learning_rate": 1.0815455730709967e-07, "logits/generated": 2.9808735847473145, "logits/real": 1.8349164724349976, "logps/generated": -999.6062622070312, "logps/real": -376.09893798828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -59.0669059753418, "rewards/margins": 53.1815299987793, "rewards/real": -5.885369300842285, "step": 7550 }, { "epoch": 2.42, "learning_rate": 1.075619295958279e-07, "logits/generated": 3.4894936084747314, "logits/real": 2.2562403678894043, "logps/generated": -1022.0750122070312, "logps/real": -373.09075927734375, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/generated": -60.48945236206055, "rewards/margins": 55.06415939331055, "rewards/real": -5.42529821395874, "step": 7560 }, { "epoch": 2.42, "learning_rate": 1.0696930188455613e-07, "logits/generated": 3.5014560222625732, "logits/real": 2.2231686115264893, "logps/generated": -1004.1393432617188, "logps/real": -351.8412170410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -58.90232467651367, "rewards/margins": 53.25592041015625, "rewards/real": -5.646405220031738, "step": 7570 }, { "epoch": 2.43, "learning_rate": 1.0637667417328434e-07, "logits/generated": 4.085561275482178, "logits/real": 2.251117467880249, "logps/generated": -879.97216796875, "logps/real": -404.2364807128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -50.447505950927734, "rewards/margins": 43.98231506347656, "rewards/real": -6.465188503265381, "step": 7580 }, { "epoch": 2.43, "learning_rate": 1.0578404646201256e-07, "logits/generated": 3.8189921379089355, "logits/real": 2.151841640472412, "logps/generated": -995.25927734375, "logps/real": -385.592041015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -58.707427978515625, "rewards/margins": 53.73567581176758, "rewards/real": -4.97175931930542, "step": 7590 }, { "epoch": 2.43, "learning_rate": 1.0519141875074079e-07, "logits/generated": 3.7049126625061035, "logits/real": 1.8221962451934814, "logps/generated": -974.4742431640625, "logps/real": -368.1142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.956634521484375, "rewards/margins": 53.39918899536133, "rewards/real": -5.55744743347168, "step": 7600 }, { "epoch": 2.44, "learning_rate": 1.04598791039469e-07, "logits/generated": 4.49704647064209, "logits/real": 2.193587303161621, "logps/generated": -927.2984619140625, "logps/real": -375.15283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.1307258605957, "rewards/margins": 48.12715148925781, "rewards/real": -6.003569602966309, "step": 7610 }, { "epoch": 2.44, "learning_rate": 1.0400616332819723e-07, "logits/generated": 3.3553459644317627, "logits/real": 1.991625189781189, "logps/generated": -951.2344970703125, "logps/real": -379.1512756347656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -57.45017623901367, "rewards/margins": 51.63641357421875, "rewards/real": -5.8137640953063965, "step": 7620 }, { "epoch": 2.44, "learning_rate": 1.0341353561692543e-07, "logits/generated": 3.7080891132354736, "logits/real": 1.968544602394104, "logps/generated": -979.7561645507812, "logps/real": -386.4315490722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -58.76336669921875, "rewards/margins": 52.9942626953125, "rewards/real": -5.769102096557617, "step": 7630 }, { "epoch": 2.44, "learning_rate": 1.0282090790565366e-07, "logits/generated": 3.7121453285217285, "logits/real": 2.1421244144439697, "logps/generated": -1032.6112060546875, "logps/real": -422.10003662109375, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -62.85908889770508, "rewards/margins": 55.61089324951172, "rewards/real": -7.248189449310303, "step": 7640 }, { "epoch": 2.45, "learning_rate": 1.0222828019438189e-07, "logits/generated": 4.168065071105957, "logits/real": 1.7530624866485596, "logps/generated": -1050.153564453125, "logps/real": -412.28839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.661293029785156, "rewards/margins": 57.603965759277344, "rewards/real": -6.057323932647705, "step": 7650 }, { "epoch": 2.45, "learning_rate": 1.016356524831101e-07, "logits/generated": 4.1445746421813965, "logits/real": 2.0660576820373535, "logps/generated": -936.4821166992188, "logps/real": -324.1432800292969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -54.63880157470703, "rewards/margins": 49.89611053466797, "rewards/real": -4.742682933807373, "step": 7660 }, { "epoch": 2.45, "learning_rate": 1.0104302477183832e-07, "logits/generated": 3.9583544731140137, "logits/real": 2.2555437088012695, "logps/generated": -877.7376098632812, "logps/real": -390.02728271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.91754913330078, "rewards/margins": 44.96452713012695, "rewards/real": -5.9530229568481445, "step": 7670 }, { "epoch": 2.46, "learning_rate": 1.0045039706056655e-07, "logits/generated": 3.6879119873046875, "logits/real": 2.3297011852264404, "logps/generated": -987.4342041015625, "logps/real": -383.0220031738281, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -58.22246170043945, "rewards/margins": 52.973426818847656, "rewards/real": -5.249035358428955, "step": 7680 }, { "epoch": 2.46, "learning_rate": 9.985776934929476e-08, "logits/generated": 4.630789756774902, "logits/real": 2.2359023094177246, "logps/generated": -1017.9152221679688, "logps/real": -372.4312438964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.529258728027344, "rewards/margins": 56.240379333496094, "rewards/real": -6.288876056671143, "step": 7690 }, { "epoch": 2.46, "learning_rate": 9.926514163802299e-08, "logits/generated": 3.859633684158325, "logits/real": 2.0333054065704346, "logps/generated": -913.1114501953125, "logps/real": -380.2625732421875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -56.29637908935547, "rewards/margins": 51.327125549316406, "rewards/real": -4.969260215759277, "step": 7700 }, { "epoch": 2.47, "learning_rate": 9.867251392675122e-08, "logits/generated": 3.7123591899871826, "logits/real": 2.197134017944336, "logps/generated": -1053.156982421875, "logps/real": -429.6893005371094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -64.27241516113281, "rewards/margins": 56.07954788208008, "rewards/real": -8.192865371704102, "step": 7710 }, { "epoch": 2.47, "learning_rate": 9.807988621547942e-08, "logits/generated": 4.180248260498047, "logits/real": 2.1022727489471436, "logps/generated": -951.1502075195312, "logps/real": -434.37078857421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -58.84419631958008, "rewards/margins": 49.985015869140625, "rewards/real": -8.859179496765137, "step": 7720 }, { "epoch": 2.47, "learning_rate": 9.748725850420765e-08, "logits/generated": 3.679893970489502, "logits/real": 2.0825464725494385, "logps/generated": -1107.1378173828125, "logps/real": -381.36688232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.13761901855469, "rewards/margins": 61.41798782348633, "rewards/real": -6.719626426696777, "step": 7730 }, { "epoch": 2.48, "learning_rate": 9.689463079293588e-08, "logits/generated": 3.6188766956329346, "logits/real": 2.2197442054748535, "logps/generated": -897.60888671875, "logps/real": -392.6204528808594, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -52.57334518432617, "rewards/margins": 43.87472915649414, "rewards/real": -8.698624610900879, "step": 7740 }, { "epoch": 2.48, "learning_rate": 9.63020030816641e-08, "logits/generated": 3.9322972297668457, "logits/real": 2.2569358348846436, "logps/generated": -1035.963623046875, "logps/real": -410.9815979003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.803489685058594, "rewards/margins": 55.50420379638672, "rewards/real": -8.299293518066406, "step": 7750 }, { "epoch": 2.48, "learning_rate": 9.570937537039231e-08, "logits/generated": 4.188558578491211, "logits/real": 2.2345645427703857, "logps/generated": -1055.390380859375, "logps/real": -375.19586181640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -65.69261932373047, "rewards/margins": 57.92228317260742, "rewards/real": -7.770342826843262, "step": 7760 }, { "epoch": 2.49, "learning_rate": 9.511674765912053e-08, "logits/generated": 3.623384952545166, "logits/real": 2.1524815559387207, "logps/generated": -1005.8858642578125, "logps/real": -343.5038146972656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -61.33906936645508, "rewards/margins": 55.49065017700195, "rewards/real": -5.848414897918701, "step": 7770 }, { "epoch": 2.49, "learning_rate": 9.452411994784876e-08, "logits/generated": 4.0338263511657715, "logits/real": 2.1665244102478027, "logps/generated": -975.5823364257812, "logps/real": -374.0013427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.512290954589844, "rewards/margins": 52.536277770996094, "rewards/real": -5.976012229919434, "step": 7780 }, { "epoch": 2.49, "learning_rate": 9.393149223657698e-08, "logits/generated": 4.073245048522949, "logits/real": 1.9686634540557861, "logps/generated": -992.0164794921875, "logps/real": -363.16754150390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -59.51947021484375, "rewards/margins": 53.338470458984375, "rewards/real": -6.180994987487793, "step": 7790 }, { "epoch": 2.5, "learning_rate": 9.33388645253052e-08, "logits/generated": 4.028234481811523, "logits/real": 2.0553243160247803, "logps/generated": -1040.82470703125, "logps/real": -382.91851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.280296325683594, "rewards/margins": 55.88728713989258, "rewards/real": -7.393007755279541, "step": 7800 }, { "epoch": 2.5, "learning_rate": 9.274623681403342e-08, "logits/generated": 4.063427448272705, "logits/real": 2.1127984523773193, "logps/generated": -1055.404541015625, "logps/real": -361.0789794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.79178619384766, "rewards/margins": 58.767051696777344, "rewards/real": -7.024729251861572, "step": 7810 }, { "epoch": 2.5, "learning_rate": 9.215360910276164e-08, "logits/generated": 3.2966866493225098, "logits/real": 2.052476406097412, "logps/generated": -918.8580322265625, "logps/real": -417.35369873046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -54.87522506713867, "rewards/margins": 47.907630920410156, "rewards/real": -6.967595100402832, "step": 7820 }, { "epoch": 2.51, "learning_rate": 9.156098139148986e-08, "logits/generated": 4.767776966094971, "logits/real": 1.9159488677978516, "logps/generated": -961.6583251953125, "logps/real": -372.76019287109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -57.94257736206055, "rewards/margins": 51.2998161315918, "rewards/real": -6.642757415771484, "step": 7830 }, { "epoch": 2.51, "learning_rate": 9.096835368021809e-08, "logits/generated": 3.3908581733703613, "logits/real": 2.069702386856079, "logps/generated": -914.8912963867188, "logps/real": -385.53857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.78641891479492, "rewards/margins": 46.73820877075195, "rewards/real": -7.048209190368652, "step": 7840 }, { "epoch": 2.51, "learning_rate": 9.03757259689463e-08, "logits/generated": 3.3565869331359863, "logits/real": 1.883155107498169, "logps/generated": -957.8240356445312, "logps/real": -392.32806396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.133270263671875, "rewards/margins": 49.50084686279297, "rewards/real": -7.632418155670166, "step": 7850 }, { "epoch": 2.52, "learning_rate": 8.978309825767452e-08, "logits/generated": 4.155790328979492, "logits/real": 2.111621141433716, "logps/generated": -995.3546142578125, "logps/real": -368.88470458984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -61.09455108642578, "rewards/margins": 53.873817443847656, "rewards/real": -7.220743656158447, "step": 7860 }, { "epoch": 2.52, "learning_rate": 8.919047054640275e-08, "logits/generated": 3.785146713256836, "logits/real": 2.210160255432129, "logps/generated": -969.7698974609375, "logps/real": -407.4634094238281, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.033287048339844, "rewards/margins": 49.69139862060547, "rewards/real": -7.34189510345459, "step": 7870 }, { "epoch": 2.52, "learning_rate": 8.859784283513098e-08, "logits/generated": 3.6821417808532715, "logits/real": 2.3126654624938965, "logps/generated": -1092.364990234375, "logps/real": -379.85687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.31324005126953, "rewards/margins": 62.71815872192383, "rewards/real": -6.595080375671387, "step": 7880 }, { "epoch": 2.52, "learning_rate": 8.800521512385919e-08, "logits/generated": 4.2309370040893555, "logits/real": 2.0690970420837402, "logps/generated": -1044.2857666015625, "logps/real": -356.1011047363281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -63.82268142700195, "rewards/margins": 57.819053649902344, "rewards/real": -6.003624439239502, "step": 7890 }, { "epoch": 2.53, "learning_rate": 8.741258741258741e-08, "logits/generated": 3.7529544830322266, "logits/real": 2.2485594749450684, "logps/generated": -848.33740234375, "logps/real": -392.252685546875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -48.257568359375, "rewards/margins": 41.74831771850586, "rewards/real": -6.509249210357666, "step": 7900 }, { "epoch": 2.53, "learning_rate": 8.681995970131564e-08, "logits/generated": 3.6429622173309326, "logits/real": 1.6829650402069092, "logps/generated": -960.66064453125, "logps/real": -374.3921813964844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -57.94014358520508, "rewards/margins": 52.40065383911133, "rewards/real": -5.539486885070801, "step": 7910 }, { "epoch": 2.53, "learning_rate": 8.622733199004385e-08, "logits/generated": 3.404963970184326, "logits/real": 1.7258421182632446, "logps/generated": -930.3843994140625, "logps/real": -406.9457702636719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -55.154701232910156, "rewards/margins": 49.09075164794922, "rewards/real": -6.063943386077881, "step": 7920 }, { "epoch": 2.54, "learning_rate": 8.563470427877208e-08, "logits/generated": 3.465583086013794, "logits/real": 1.9416353702545166, "logps/generated": -966.3623046875, "logps/real": -384.91851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.843910217285156, "rewards/margins": 51.18348693847656, "rewards/real": -7.6604204177856445, "step": 7930 }, { "epoch": 2.54, "learning_rate": 8.504207656750028e-08, "logits/generated": 3.683955430984497, "logits/real": 1.7104041576385498, "logps/generated": -976.37109375, "logps/real": -405.4421081542969, "loss": 0.0044, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -59.630859375, "rewards/margins": 52.724388122558594, "rewards/real": -6.9064764976501465, "step": 7940 }, { "epoch": 2.54, "learning_rate": 8.444944885622851e-08, "logits/generated": 3.2371838092803955, "logits/real": 2.3724024295806885, "logps/generated": -996.0675659179688, "logps/real": -357.4671325683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -59.767494201660156, "rewards/margins": 53.17461013793945, "rewards/real": -6.592886447906494, "step": 7950 }, { "epoch": 2.55, "learning_rate": 8.385682114495674e-08, "logits/generated": 3.7273597717285156, "logits/real": 2.1956067085266113, "logps/generated": -1034.621337890625, "logps/real": -365.28460693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.633636474609375, "rewards/margins": 56.25410079956055, "rewards/real": -7.37954044342041, "step": 7960 }, { "epoch": 2.55, "learning_rate": 8.326419343368495e-08, "logits/generated": 3.5570411682128906, "logits/real": 1.882310152053833, "logps/generated": -911.0950927734375, "logps/real": -381.05291748046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -52.910743713378906, "rewards/margins": 46.59660720825195, "rewards/real": -6.314126491546631, "step": 7970 }, { "epoch": 2.55, "learning_rate": 8.267156572241317e-08, "logits/generated": 3.335761308670044, "logits/real": 1.4432555437088013, "logps/generated": -1008.70361328125, "logps/real": -412.27496337890625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -61.73523712158203, "rewards/margins": 55.1703987121582, "rewards/real": -6.564839839935303, "step": 7980 }, { "epoch": 2.56, "learning_rate": 8.20789380111414e-08, "logits/generated": 4.182002067565918, "logits/real": 2.5581986904144287, "logps/generated": -1070.162841796875, "logps/real": -364.2648010253906, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -66.75981140136719, "rewards/margins": 58.261871337890625, "rewards/real": -8.497949600219727, "step": 7990 }, { "epoch": 2.56, "learning_rate": 8.148631029986961e-08, "logits/generated": 3.970362901687622, "logits/real": 1.7085485458374023, "logps/generated": -909.9697265625, "logps/real": -420.698486328125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -53.256507873535156, "rewards/margins": 46.76911163330078, "rewards/real": -6.48738956451416, "step": 8000 }, { "epoch": 2.56, "learning_rate": 8.089368258859784e-08, "logits/generated": 4.060704231262207, "logits/real": 1.9212415218353271, "logps/generated": -972.4220581054688, "logps/real": -417.5872497558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.375953674316406, "rewards/margins": 52.89890670776367, "rewards/real": -6.477044105529785, "step": 8010 }, { "epoch": 2.57, "learning_rate": 8.030105487732607e-08, "logits/generated": 3.519432544708252, "logits/real": 2.1166985034942627, "logps/generated": -876.7869873046875, "logps/real": -398.08538818359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -52.72206497192383, "rewards/margins": 44.93832778930664, "rewards/real": -7.783738136291504, "step": 8020 }, { "epoch": 2.57, "learning_rate": 7.970842716605427e-08, "logits/generated": 4.376236915588379, "logits/real": 2.1150360107421875, "logps/generated": -1027.4515380859375, "logps/real": -388.5436096191406, "loss": 0.0182, "rewards/accuracies": 0.987500011920929, "rewards/generated": -63.449058532714844, "rewards/margins": 55.94439697265625, "rewards/real": -7.504659175872803, "step": 8030 }, { "epoch": 2.57, "learning_rate": 7.91157994547825e-08, "logits/generated": 3.525820255279541, "logits/real": 1.893680214881897, "logps/generated": -1016.8726806640625, "logps/real": -371.03125, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -60.87693405151367, "rewards/margins": 53.64154815673828, "rewards/real": -7.235389709472656, "step": 8040 }, { "epoch": 2.58, "learning_rate": 7.852317174351073e-08, "logits/generated": 3.0457284450531006, "logits/real": 1.5740561485290527, "logps/generated": -916.3117065429688, "logps/real": -406.9604797363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.98957443237305, "rewards/margins": 45.95235824584961, "rewards/real": -7.037219047546387, "step": 8050 }, { "epoch": 2.58, "learning_rate": 7.793054403223895e-08, "logits/generated": 3.4558169841766357, "logits/real": 1.8014957904815674, "logps/generated": -915.0403442382812, "logps/real": -447.24462890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -53.56114959716797, "rewards/margins": 46.14393997192383, "rewards/real": -7.417208671569824, "step": 8060 }, { "epoch": 2.58, "learning_rate": 7.733791632096716e-08, "logits/generated": 2.940850257873535, "logits/real": 1.4414273500442505, "logps/generated": -872.7760009765625, "logps/real": -391.720703125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -49.339874267578125, "rewards/margins": 43.12675094604492, "rewards/real": -6.2131218910217285, "step": 8070 }, { "epoch": 2.59, "learning_rate": 7.674528860969538e-08, "logits/generated": 2.944129705429077, "logits/real": 2.0338828563690186, "logps/generated": -917.4520263671875, "logps/real": -352.81646728515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -53.3176155090332, "rewards/margins": 47.27953338623047, "rewards/real": -6.038083076477051, "step": 8080 }, { "epoch": 2.59, "learning_rate": 7.61526608984236e-08, "logits/generated": 3.909428119659424, "logits/real": 1.7142349481582642, "logps/generated": -950.1404418945312, "logps/real": -337.119140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -56.356048583984375, "rewards/margins": 50.075618743896484, "rewards/real": -6.280430793762207, "step": 8090 }, { "epoch": 2.59, "learning_rate": 7.556003318715183e-08, "logits/generated": 3.8688418865203857, "logits/real": 1.6353342533111572, "logps/generated": -1029.6884765625, "logps/real": -378.70831298828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -61.768348693847656, "rewards/margins": 54.54430389404297, "rewards/real": -7.224038600921631, "step": 8100 }, { "epoch": 2.6, "learning_rate": 7.496740547588005e-08, "logits/generated": 3.625739336013794, "logits/real": 1.8834596872329712, "logps/generated": -936.3740234375, "logps/real": -369.0130920410156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -56.205955505371094, "rewards/margins": 49.76278305053711, "rewards/real": -6.44317102432251, "step": 8110 }, { "epoch": 2.6, "learning_rate": 7.437477776460826e-08, "logits/generated": 4.4700608253479, "logits/real": 1.8153865337371826, "logps/generated": -1135.7452392578125, "logps/real": -387.67364501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.09259796142578, "rewards/margins": 62.685211181640625, "rewards/real": -7.407387733459473, "step": 8120 }, { "epoch": 2.6, "learning_rate": 7.37821500533365e-08, "logits/generated": 3.371682643890381, "logits/real": 1.5304771661758423, "logps/generated": -1005.3448486328125, "logps/real": -396.2160339355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -61.73991775512695, "rewards/margins": 54.14348220825195, "rewards/real": -7.596432685852051, "step": 8130 }, { "epoch": 2.6, "learning_rate": 7.318952234206471e-08, "logits/generated": 3.6937670707702637, "logits/real": 1.5645238161087036, "logps/generated": -1058.8162841796875, "logps/real": -405.37615966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.2863540649414, "rewards/margins": 59.2757453918457, "rewards/real": -7.010606288909912, "step": 8140 }, { "epoch": 2.61, "learning_rate": 7.259689463079294e-08, "logits/generated": 3.9854750633239746, "logits/real": 1.6261682510375977, "logps/generated": -990.65966796875, "logps/real": -408.9169921875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -61.356956481933594, "rewards/margins": 53.9902229309082, "rewards/real": -7.366732120513916, "step": 8150 }, { "epoch": 2.61, "learning_rate": 7.200426691952115e-08, "logits/generated": 4.002991676330566, "logits/real": 1.825396180152893, "logps/generated": -1012.3742065429688, "logps/real": -411.4058532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.14048385620117, "rewards/margins": 53.03398513793945, "rewards/real": -7.1064958572387695, "step": 8160 }, { "epoch": 2.61, "learning_rate": 7.141163920824937e-08, "logits/generated": 3.600146532058716, "logits/real": 1.9582529067993164, "logps/generated": -968.4308471679688, "logps/real": -416.37945556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.93609619140625, "rewards/margins": 49.888954162597656, "rewards/real": -8.047143936157227, "step": 8170 }, { "epoch": 2.62, "learning_rate": 7.08190114969776e-08, "logits/generated": 3.9580321311950684, "logits/real": 1.7216682434082031, "logps/generated": -935.6194458007812, "logps/real": -403.15704345703125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -56.472373962402344, "rewards/margins": 48.977821350097656, "rewards/real": -7.494554042816162, "step": 8180 }, { "epoch": 2.62, "learning_rate": 7.022638378570583e-08, "logits/generated": 3.9833621978759766, "logits/real": 1.8528293371200562, "logps/generated": -929.1632690429688, "logps/real": -435.58306884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.266265869140625, "rewards/margins": 47.02381134033203, "rewards/real": -7.242454528808594, "step": 8190 }, { "epoch": 2.62, "learning_rate": 6.963375607443404e-08, "logits/generated": 3.979447603225708, "logits/real": 1.8997459411621094, "logps/generated": -1097.211669921875, "logps/real": -392.0874938964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.32579040527344, "rewards/margins": 62.917991638183594, "rewards/real": -7.407802581787109, "step": 8200 }, { "epoch": 2.63, "learning_rate": 6.904112836316226e-08, "logits/generated": 3.4925377368927, "logits/real": 1.8967807292938232, "logps/generated": -867.1658935546875, "logps/real": -375.30450439453125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/generated": -51.6424674987793, "rewards/margins": 43.996559143066406, "rewards/real": -7.645912170410156, "step": 8210 }, { "epoch": 2.63, "learning_rate": 6.844850065189047e-08, "logits/generated": 3.8538784980773926, "logits/real": 1.8840980529785156, "logps/generated": -895.7223510742188, "logps/real": -406.9442443847656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -52.7965087890625, "rewards/margins": 45.01344680786133, "rewards/real": -7.7830610275268555, "step": 8220 }, { "epoch": 2.63, "learning_rate": 6.78558729406187e-08, "logits/generated": 3.669043779373169, "logits/real": 1.752126693725586, "logps/generated": -949.6334228515625, "logps/real": -342.74896240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -57.340171813964844, "rewards/margins": 51.566688537597656, "rewards/real": -5.773486137390137, "step": 8230 }, { "epoch": 2.64, "learning_rate": 6.726324522934693e-08, "logits/generated": 3.10225510597229, "logits/real": 1.8635063171386719, "logps/generated": -1024.183349609375, "logps/real": -372.98455810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.195220947265625, "rewards/margins": 56.8504753112793, "rewards/real": -6.3447465896606445, "step": 8240 }, { "epoch": 2.64, "learning_rate": 6.667061751807513e-08, "logits/generated": 3.48057222366333, "logits/real": 1.9885294437408447, "logps/generated": -961.88330078125, "logps/real": -385.88385009765625, "loss": 0.0049, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -57.873268127441406, "rewards/margins": 50.62914276123047, "rewards/real": -7.244119167327881, "step": 8250 }, { "epoch": 2.64, "learning_rate": 6.607798980680336e-08, "logits/generated": 3.5142083168029785, "logits/real": 2.191819667816162, "logps/generated": -910.6423950195312, "logps/real": -414.0364685058594, "loss": 0.0318, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.2632942199707, "rewards/margins": 46.317115783691406, "rewards/real": -7.9461798667907715, "step": 8260 }, { "epoch": 2.65, "learning_rate": 6.548536209553159e-08, "logits/generated": 4.207846641540527, "logits/real": 1.693935751914978, "logps/generated": -955.6438598632812, "logps/real": -400.48223876953125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -56.78264617919922, "rewards/margins": 50.251136779785156, "rewards/real": -6.531507968902588, "step": 8270 }, { "epoch": 2.65, "learning_rate": 6.48927343842598e-08, "logits/generated": 3.317568302154541, "logits/real": 1.7609246969223022, "logps/generated": -1018.4681396484375, "logps/real": -376.03021240234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -61.39073944091797, "rewards/margins": 55.24901580810547, "rewards/real": -6.141722679138184, "step": 8280 }, { "epoch": 2.65, "learning_rate": 6.430010667298802e-08, "logits/generated": 3.58172607421875, "logits/real": 2.184634208679199, "logps/generated": -884.3873901367188, "logps/real": -399.60504150390625, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -52.401527404785156, "rewards/margins": 46.67741394042969, "rewards/real": -5.72410774230957, "step": 8290 }, { "epoch": 2.66, "learning_rate": 6.370747896171625e-08, "logits/generated": 4.151366233825684, "logits/real": 2.0955371856689453, "logps/generated": -941.9944458007812, "logps/real": -357.70477294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.915794372558594, "rewards/margins": 51.085426330566406, "rewards/real": -6.8303632736206055, "step": 8300 }, { "epoch": 2.66, "learning_rate": 6.311485125044446e-08, "logits/generated": 4.350826740264893, "logits/real": 2.037896156311035, "logps/generated": -894.1232299804688, "logps/real": -352.6371765136719, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -52.48077392578125, "rewards/margins": 46.843807220458984, "rewards/real": -5.636960506439209, "step": 8310 }, { "epoch": 2.66, "learning_rate": 6.252222353917269e-08, "logits/generated": 3.375370740890503, "logits/real": 1.5641025304794312, "logps/generated": -920.6629028320312, "logps/real": -429.950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.04949188232422, "rewards/margins": 45.768009185791016, "rewards/real": -7.281485080718994, "step": 8320 }, { "epoch": 2.67, "learning_rate": 6.192959582790091e-08, "logits/generated": 3.415733814239502, "logits/real": 2.0732007026672363, "logps/generated": -944.77294921875, "logps/real": -397.92083740234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -54.639923095703125, "rewards/margins": 47.84805679321289, "rewards/real": -6.791872978210449, "step": 8330 }, { "epoch": 2.67, "learning_rate": 6.133696811662914e-08, "logits/generated": 3.229666233062744, "logits/real": 1.9830280542373657, "logps/generated": -946.1580200195312, "logps/real": -447.03179931640625, "loss": 0.0374, "rewards/accuracies": 0.987500011920929, "rewards/generated": -54.59568405151367, "rewards/margins": 46.74303436279297, "rewards/real": -7.852652549743652, "step": 8340 }, { "epoch": 2.67, "learning_rate": 6.074434040535735e-08, "logits/generated": 3.708914279937744, "logits/real": 2.1419925689697266, "logps/generated": -946.6541748046875, "logps/real": -415.590576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -56.01914596557617, "rewards/margins": 48.601951599121094, "rewards/real": -7.417193412780762, "step": 8350 }, { "epoch": 2.68, "learning_rate": 6.015171269408558e-08, "logits/generated": 3.5551514625549316, "logits/real": 2.1436784267425537, "logps/generated": -957.1892700195312, "logps/real": -359.36767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -57.14799880981445, "rewards/margins": 51.1831169128418, "rewards/real": -5.964877128601074, "step": 8360 }, { "epoch": 2.68, "learning_rate": 5.955908498281379e-08, "logits/generated": 4.05293607711792, "logits/real": 2.651482343673706, "logps/generated": -912.5654296875, "logps/real": -379.1618347167969, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -54.689910888671875, "rewards/margins": 46.95143508911133, "rewards/real": -7.738478660583496, "step": 8370 }, { "epoch": 2.68, "learning_rate": 5.896645727154202e-08, "logits/generated": 4.161120414733887, "logits/real": 2.1185717582702637, "logps/generated": -923.5693359375, "logps/real": -418.39532470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.5535774230957, "rewards/margins": 47.09696960449219, "rewards/real": -7.456611633300781, "step": 8380 }, { "epoch": 2.68, "learning_rate": 5.837382956027023e-08, "logits/generated": 3.6960418224334717, "logits/real": 2.034367561340332, "logps/generated": -1019.1530151367188, "logps/real": -405.61175537109375, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -60.967247009277344, "rewards/margins": 53.078208923339844, "rewards/real": -7.889039039611816, "step": 8390 }, { "epoch": 2.69, "learning_rate": 5.7781201848998455e-08, "logits/generated": 3.299665927886963, "logits/real": 1.9864240884780884, "logps/generated": -1078.4036865234375, "logps/real": -402.57269287109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -66.20504760742188, "rewards/margins": 58.318260192871094, "rewards/real": -7.886781215667725, "step": 8400 }, { "epoch": 2.69, "learning_rate": 5.7188574137726684e-08, "logits/generated": 3.879106044769287, "logits/real": 2.1246488094329834, "logps/generated": -965.0455932617188, "logps/real": -399.2043151855469, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -56.458168029785156, "rewards/margins": 49.10603713989258, "rewards/real": -7.352133274078369, "step": 8410 }, { "epoch": 2.69, "learning_rate": 5.65959464264549e-08, "logits/generated": 3.6350479125976562, "logits/real": 1.9505516290664673, "logps/generated": -977.1804809570312, "logps/real": -402.97857666015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -56.70595169067383, "rewards/margins": 49.58742904663086, "rewards/real": -7.118523597717285, "step": 8420 }, { "epoch": 2.7, "learning_rate": 5.600331871518312e-08, "logits/generated": 3.839154005050659, "logits/real": 1.7790920734405518, "logps/generated": -1017.8132934570312, "logps/real": -397.7266540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.7443962097168, "rewards/margins": 54.29301834106445, "rewards/real": -7.451376438140869, "step": 8430 }, { "epoch": 2.7, "learning_rate": 5.5410691003911337e-08, "logits/generated": 3.7217185497283936, "logits/real": 2.226267099380493, "logps/generated": -907.8714599609375, "logps/real": -364.1994323730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -54.056365966796875, "rewards/margins": 47.113712310791016, "rewards/real": -6.94265079498291, "step": 8440 }, { "epoch": 2.7, "learning_rate": 5.4818063292639565e-08, "logits/generated": 3.527050018310547, "logits/real": 2.036137580871582, "logps/generated": -947.5823364257812, "logps/real": -409.0567626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -56.482383728027344, "rewards/margins": 48.5067253112793, "rewards/real": -7.9756646156311035, "step": 8450 }, { "epoch": 2.71, "learning_rate": 5.422543558136778e-08, "logits/generated": 3.7611911296844482, "logits/real": 1.8519208431243896, "logps/generated": -901.5185546875, "logps/real": -417.3238220214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.371253967285156, "rewards/margins": 47.136558532714844, "rewards/real": -7.234696388244629, "step": 8460 }, { "epoch": 2.71, "learning_rate": 5.3632807870096e-08, "logits/generated": 3.4174492359161377, "logits/real": 1.9680230617523193, "logps/generated": -1063.694580078125, "logps/real": -395.2354431152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.83134460449219, "rewards/margins": 58.03875732421875, "rewards/real": -7.792585849761963, "step": 8470 }, { "epoch": 2.71, "learning_rate": 5.3040180158824225e-08, "logits/generated": 3.9970059394836426, "logits/real": 2.3934884071350098, "logps/generated": -1125.6826171875, "logps/real": -379.00323486328125, "loss": 0.004, "rewards/accuracies": 0.987500011920929, "rewards/generated": -71.78718566894531, "rewards/margins": 63.51630783081055, "rewards/real": -8.270886421203613, "step": 8480 }, { "epoch": 2.72, "learning_rate": 5.2447552447552447e-08, "logits/generated": 3.7725205421447754, "logits/real": 1.9590591192245483, "logps/generated": -1091.79150390625, "logps/real": -390.5736389160156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -65.3434829711914, "rewards/margins": 56.868316650390625, "rewards/real": -8.475166320800781, "step": 8490 }, { "epoch": 2.72, "learning_rate": 5.185492473628066e-08, "logits/generated": 3.661846160888672, "logits/real": 2.312032699584961, "logps/generated": -933.3740234375, "logps/real": -411.44451904296875, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -55.91740798950195, "rewards/margins": 47.27542495727539, "rewards/real": -8.641983032226562, "step": 8500 }, { "epoch": 2.72, "learning_rate": 5.1262297025008884e-08, "logits/generated": 3.465627670288086, "logits/real": 2.2164180278778076, "logps/generated": -1076.426025390625, "logps/real": -389.19964599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -67.2499771118164, "rewards/margins": 59.05321502685547, "rewards/real": -8.196765899658203, "step": 8510 }, { "epoch": 2.73, "learning_rate": 5.066966931373711e-08, "logits/generated": 4.173044681549072, "logits/real": 1.8490111827850342, "logps/generated": -1132.7874755859375, "logps/real": -432.0376892089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.95558166503906, "rewards/margins": 62.96862030029297, "rewards/real": -7.986954689025879, "step": 8520 }, { "epoch": 2.73, "learning_rate": 5.007704160246533e-08, "logits/generated": 4.139945030212402, "logits/real": 2.526951313018799, "logps/generated": -1054.085693359375, "logps/real": -377.7095031738281, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -65.83952331542969, "rewards/margins": 57.67792510986328, "rewards/real": -8.161606788635254, "step": 8530 }, { "epoch": 2.73, "learning_rate": 4.948441389119355e-08, "logits/generated": 3.516796827316284, "logits/real": 2.3811421394348145, "logps/generated": -1006.6256103515625, "logps/real": -362.2398986816406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -59.41242218017578, "rewards/margins": 51.91820526123047, "rewards/real": -7.494215965270996, "step": 8540 }, { "epoch": 2.74, "learning_rate": 4.889178617992177e-08, "logits/generated": 3.9033493995666504, "logits/real": 1.774997353553772, "logps/generated": -972.8084716796875, "logps/real": -427.7626037597656, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -56.33424758911133, "rewards/margins": 48.570919036865234, "rewards/real": -7.763323783874512, "step": 8550 }, { "epoch": 2.74, "learning_rate": 4.8299158468649994e-08, "logits/generated": 3.971569061279297, "logits/real": 2.0908877849578857, "logps/generated": -1032.648193359375, "logps/real": -371.1000671386719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -64.35382080078125, "rewards/margins": 56.81888961791992, "rewards/real": -7.534930229187012, "step": 8560 }, { "epoch": 2.74, "learning_rate": 4.770653075737821e-08, "logits/generated": 4.424673557281494, "logits/real": 2.3915224075317383, "logps/generated": -959.6032104492188, "logps/real": -414.59918212890625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -55.32141876220703, "rewards/margins": 47.93350601196289, "rewards/real": -7.387907981872559, "step": 8570 }, { "epoch": 2.75, "learning_rate": 4.711390304610643e-08, "logits/generated": 4.043940544128418, "logits/real": 2.225125789642334, "logps/generated": -865.1267700195312, "logps/real": -404.07330322265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -49.90807342529297, "rewards/margins": 42.133445739746094, "rewards/real": -7.774628639221191, "step": 8580 }, { "epoch": 2.75, "learning_rate": 4.6521275334834654e-08, "logits/generated": 4.111706733703613, "logits/real": 2.0812008380889893, "logps/generated": -1015.0589599609375, "logps/real": -458.95703125, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -61.14873123168945, "rewards/margins": 52.93597412109375, "rewards/real": -8.21275520324707, "step": 8590 }, { "epoch": 2.75, "learning_rate": 4.5928647623562876e-08, "logits/generated": 3.711606502532959, "logits/real": 2.339512586593628, "logps/generated": -953.4273681640625, "logps/real": -405.60662841796875, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.3015022277832, "rewards/margins": 49.46393966674805, "rewards/real": -7.837560176849365, "step": 8600 }, { "epoch": 2.76, "learning_rate": 4.53360199122911e-08, "logits/generated": 3.7957377433776855, "logits/real": 2.4589855670928955, "logps/generated": -1112.1412353515625, "logps/real": -392.3586120605469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -68.88035583496094, "rewards/margins": 59.36815643310547, "rewards/real": -9.512189865112305, "step": 8610 }, { "epoch": 2.76, "learning_rate": 4.474339220101932e-08, "logits/generated": 3.2196178436279297, "logits/real": 1.9958690404891968, "logps/generated": -870.9078979492188, "logps/real": -462.06121826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.91344451904297, "rewards/margins": 42.71410369873047, "rewards/real": -8.199337005615234, "step": 8620 }, { "epoch": 2.76, "learning_rate": 4.415076448974754e-08, "logits/generated": 3.8534913063049316, "logits/real": 2.2192482948303223, "logps/generated": -990.10986328125, "logps/real": -397.8377380371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -60.09471893310547, "rewards/margins": 53.13765335083008, "rewards/real": -6.957066535949707, "step": 8630 }, { "epoch": 2.76, "learning_rate": 4.355813677847576e-08, "logits/generated": 3.7722702026367188, "logits/real": 2.3620645999908447, "logps/generated": -1003.5438232421875, "logps/real": -389.4685363769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.21651077270508, "rewards/margins": 52.0927848815918, "rewards/real": -8.12373161315918, "step": 8640 }, { "epoch": 2.77, "learning_rate": 4.296550906720398e-08, "logits/generated": 3.63665509223938, "logits/real": 2.1879465579986572, "logps/generated": -1004.0223388671875, "logps/real": -360.257080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.52576446533203, "rewards/margins": 54.45721435546875, "rewards/real": -7.068545341491699, "step": 8650 }, { "epoch": 2.77, "learning_rate": 4.23728813559322e-08, "logits/generated": 3.8347854614257812, "logits/real": 2.080061435699463, "logps/generated": -911.7888793945312, "logps/real": -376.4729919433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -54.39191818237305, "rewards/margins": 47.83964157104492, "rewards/real": -6.552272796630859, "step": 8660 }, { "epoch": 2.77, "learning_rate": 4.178025364466042e-08, "logits/generated": 3.5550475120544434, "logits/real": 2.0093512535095215, "logps/generated": -983.3677978515625, "logps/real": -421.22088623046875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -59.86065673828125, "rewards/margins": 52.51118087768555, "rewards/real": -7.349481105804443, "step": 8670 }, { "epoch": 2.78, "learning_rate": 4.118762593338864e-08, "logits/generated": 3.9317879676818848, "logits/real": 2.3645236492156982, "logps/generated": -1096.2442626953125, "logps/real": -377.3437194824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.23106384277344, "rewards/margins": 60.86164093017578, "rewards/real": -8.369420051574707, "step": 8680 }, { "epoch": 2.78, "learning_rate": 4.059499822211687e-08, "logits/generated": 4.139974117279053, "logits/real": 2.1882903575897217, "logps/generated": -930.52197265625, "logps/real": -407.3905944824219, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -56.66914749145508, "rewards/margins": 48.477088928222656, "rewards/real": -8.192062377929688, "step": 8690 }, { "epoch": 2.78, "learning_rate": 4.000237051084508e-08, "logits/generated": 4.2538299560546875, "logits/real": 2.099956750869751, "logps/generated": -1089.7821044921875, "logps/real": -393.08734130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -69.39396667480469, "rewards/margins": 61.89726638793945, "rewards/real": -7.496708869934082, "step": 8700 }, { "epoch": 2.79, "learning_rate": 3.9409742799573305e-08, "logits/generated": 4.161600589752197, "logits/real": 1.944954514503479, "logps/generated": -1037.1029052734375, "logps/real": -436.84149169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.92815399169922, "rewards/margins": 56.2886848449707, "rewards/real": -7.639472961425781, "step": 8710 }, { "epoch": 2.79, "learning_rate": 3.8817115088301533e-08, "logits/generated": 3.7674357891082764, "logits/real": 2.2307162284851074, "logps/generated": -1004.86865234375, "logps/real": -408.85260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.471038818359375, "rewards/margins": 52.617469787597656, "rewards/real": -7.853572845458984, "step": 8720 }, { "epoch": 2.79, "learning_rate": 3.822448737702975e-08, "logits/generated": 3.9538588523864746, "logits/real": 2.351992607116699, "logps/generated": -1198.9942626953125, "logps/real": -407.2468566894531, "loss": 0.0029, "rewards/accuracies": 0.987500011920929, "rewards/generated": -77.22299194335938, "rewards/margins": 69.22731018066406, "rewards/real": -7.995683193206787, "step": 8730 }, { "epoch": 2.8, "learning_rate": 3.763185966575797e-08, "logits/generated": 3.734550952911377, "logits/real": 2.352360963821411, "logps/generated": -969.0906372070312, "logps/real": -395.86383056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -58.753684997558594, "rewards/margins": 51.309471130371094, "rewards/real": -7.444212436676025, "step": 8740 }, { "epoch": 2.8, "learning_rate": 3.7039231954486186e-08, "logits/generated": 3.949242353439331, "logits/real": 2.179558277130127, "logps/generated": -986.6300048828125, "logps/real": -360.14801025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.20587158203125, "rewards/margins": 51.371360778808594, "rewards/real": -7.834508419036865, "step": 8750 }, { "epoch": 2.8, "learning_rate": 3.6446604243214415e-08, "logits/generated": 4.206214904785156, "logits/real": 2.566666603088379, "logps/generated": -959.1954956054688, "logps/real": -437.5762634277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.717437744140625, "rewards/margins": 49.8985481262207, "rewards/real": -8.81889533996582, "step": 8760 }, { "epoch": 2.81, "learning_rate": 3.585397653194263e-08, "logits/generated": 4.577523231506348, "logits/real": 2.33097505569458, "logps/generated": -1163.2091064453125, "logps/real": -381.373779296875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -74.9511489868164, "rewards/margins": 66.1609878540039, "rewards/real": -8.790148735046387, "step": 8770 }, { "epoch": 2.81, "learning_rate": 3.526134882067085e-08, "logits/generated": 4.372532844543457, "logits/real": 2.314779281616211, "logps/generated": -991.7755126953125, "logps/real": -387.3517761230469, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -62.28171920776367, "rewards/margins": 54.22426223754883, "rewards/real": -8.05746078491211, "step": 8780 }, { "epoch": 2.81, "learning_rate": 3.4668721109399074e-08, "logits/generated": 3.949869155883789, "logits/real": 2.473696231842041, "logps/generated": -995.8297119140625, "logps/real": -377.43212890625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -61.33977127075195, "rewards/margins": 54.054656982421875, "rewards/real": -7.2851152420043945, "step": 8790 }, { "epoch": 2.82, "learning_rate": 3.4076093398127296e-08, "logits/generated": 3.7317681312561035, "logits/real": 2.186537265777588, "logps/generated": -1112.25927734375, "logps/real": -374.804931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.85594940185547, "rewards/margins": 64.02801513671875, "rewards/real": -8.827939987182617, "step": 8800 }, { "epoch": 2.82, "learning_rate": 3.348346568685552e-08, "logits/generated": 4.337752819061279, "logits/real": 2.0777242183685303, "logps/generated": -968.1364135742188, "logps/real": -401.09771728515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -60.210594177246094, "rewards/margins": 51.63945388793945, "rewards/real": -8.571136474609375, "step": 8810 }, { "epoch": 2.82, "learning_rate": 3.2890837975583734e-08, "logits/generated": 3.9325528144836426, "logits/real": 2.016432762145996, "logps/generated": -1130.004638671875, "logps/real": -402.5047302246094, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -72.4874038696289, "rewards/margins": 64.43824768066406, "rewards/real": -8.04914379119873, "step": 8820 }, { "epoch": 2.83, "learning_rate": 3.229821026431196e-08, "logits/generated": 4.252560615539551, "logits/real": 2.5569992065429688, "logps/generated": -1086.0369873046875, "logps/real": -417.00726318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -69.71559143066406, "rewards/margins": 60.46100616455078, "rewards/real": -9.254584312438965, "step": 8830 }, { "epoch": 2.83, "learning_rate": 3.170558255304018e-08, "logits/generated": 4.101076126098633, "logits/real": 2.5329222679138184, "logps/generated": -1007.7769775390625, "logps/real": -371.10723876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.38042449951172, "rewards/margins": 54.435874938964844, "rewards/real": -7.944557189941406, "step": 8840 }, { "epoch": 2.83, "learning_rate": 3.11129548417684e-08, "logits/generated": 4.133565902709961, "logits/real": 2.0584716796875, "logps/generated": -1027.533203125, "logps/real": -421.91339111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.9887580871582, "rewards/margins": 55.15087890625, "rewards/real": -8.837879180908203, "step": 8850 }, { "epoch": 2.84, "learning_rate": 3.052032713049662e-08, "logits/generated": 3.8249733448028564, "logits/real": 2.2053627967834473, "logps/generated": -1117.9556884765625, "logps/real": -410.8575134277344, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -68.75425720214844, "rewards/margins": 60.45509719848633, "rewards/real": -8.299158096313477, "step": 8860 }, { "epoch": 2.84, "learning_rate": 2.9927699419224844e-08, "logits/generated": 4.576693058013916, "logits/real": 1.9404109716415405, "logps/generated": -937.0902099609375, "logps/real": -389.9156494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.26157760620117, "rewards/margins": 46.93947982788086, "rewards/real": -8.322099685668945, "step": 8870 }, { "epoch": 2.84, "learning_rate": 2.9335071707953063e-08, "logits/generated": 4.113223552703857, "logits/real": 2.3892219066619873, "logps/generated": -1015.8532104492188, "logps/real": -362.73370361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -61.505126953125, "rewards/margins": 53.80535888671875, "rewards/real": -7.699770450592041, "step": 8880 }, { "epoch": 2.84, "learning_rate": 2.8742443996681285e-08, "logits/generated": 4.111103534698486, "logits/real": 2.273388624191284, "logps/generated": -1063.7801513671875, "logps/real": -408.4124450683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.34366607666016, "rewards/margins": 57.898590087890625, "rewards/real": -8.44508171081543, "step": 8890 }, { "epoch": 2.85, "learning_rate": 2.8149816285409503e-08, "logits/generated": 4.130972862243652, "logits/real": 2.298684597015381, "logps/generated": -1081.184326171875, "logps/real": -401.2452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.52656555175781, "rewards/margins": 58.92407989501953, "rewards/real": -8.602482795715332, "step": 8900 }, { "epoch": 2.85, "learning_rate": 2.7557188574137725e-08, "logits/generated": 3.882814407348633, "logits/real": 2.370919704437256, "logps/generated": -1150.154052734375, "logps/real": -383.59954833984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -74.73722839355469, "rewards/margins": 67.35450744628906, "rewards/real": -7.3827223777771, "step": 8910 }, { "epoch": 2.85, "learning_rate": 2.6964560862865947e-08, "logits/generated": 4.083722114562988, "logits/real": 2.3151917457580566, "logps/generated": -955.68701171875, "logps/real": -425.1591796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -59.24004364013672, "rewards/margins": 50.92094802856445, "rewards/real": -8.319098472595215, "step": 8920 }, { "epoch": 2.86, "learning_rate": 2.6371933151594166e-08, "logits/generated": 4.148406028747559, "logits/real": 2.6285533905029297, "logps/generated": -990.8770751953125, "logps/real": -418.384521484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -60.00004959106445, "rewards/margins": 51.72795486450195, "rewards/real": -8.272089004516602, "step": 8930 }, { "epoch": 2.86, "learning_rate": 2.5779305440322388e-08, "logits/generated": 3.855437755584717, "logits/real": 2.579738140106201, "logps/generated": -1010.6025390625, "logps/real": -381.7750549316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.51027297973633, "rewards/margins": 54.409332275390625, "rewards/real": -8.100942611694336, "step": 8940 }, { "epoch": 2.86, "learning_rate": 2.5186677729050607e-08, "logits/generated": 3.730900526046753, "logits/real": 2.5897955894470215, "logps/generated": -1115.298583984375, "logps/real": -409.9163513183594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -70.0462646484375, "rewards/margins": 61.09590530395508, "rewards/real": -8.950363159179688, "step": 8950 }, { "epoch": 2.87, "learning_rate": 2.459405001777883e-08, "logits/generated": 4.09194278717041, "logits/real": 1.8238937854766846, "logps/generated": -1002.7340087890625, "logps/real": -387.93414306640625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -60.72015380859375, "rewards/margins": 54.630889892578125, "rewards/real": -6.089266777038574, "step": 8960 }, { "epoch": 2.87, "learning_rate": 2.4001422306507054e-08, "logits/generated": 4.2426862716674805, "logits/real": 2.1907787322998047, "logps/generated": -947.5045776367188, "logps/real": -446.52203369140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -58.48749542236328, "rewards/margins": 49.036041259765625, "rewards/real": -9.451457977294922, "step": 8970 }, { "epoch": 2.87, "learning_rate": 2.3408794595235273e-08, "logits/generated": 3.7106690406799316, "logits/real": 2.4908013343811035, "logps/generated": -1102.080810546875, "logps/real": -377.4757080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.90482330322266, "rewards/margins": 61.837921142578125, "rewards/real": -8.066899299621582, "step": 8980 }, { "epoch": 2.88, "learning_rate": 2.2816166883963495e-08, "logits/generated": 3.4732189178466797, "logits/real": 2.1402063369750977, "logps/generated": -1026.4468994140625, "logps/real": -419.9058532714844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -62.887428283691406, "rewards/margins": 54.51227951049805, "rewards/real": -8.375152587890625, "step": 8990 }, { "epoch": 2.88, "learning_rate": 2.2223539172691714e-08, "logits/generated": 3.951288938522339, "logits/real": 1.9303228855133057, "logps/generated": -1031.7862548828125, "logps/real": -366.66424560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -63.44452667236328, "rewards/margins": 56.662071228027344, "rewards/real": -6.7824554443359375, "step": 9000 }, { "epoch": 2.88, "learning_rate": 2.1630911461419936e-08, "logits/generated": 3.3687922954559326, "logits/real": 2.513150691986084, "logps/generated": -1048.0078125, "logps/real": -411.56842041015625, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -64.83985900878906, "rewards/margins": 55.295440673828125, "rewards/real": -9.544424057006836, "step": 9010 }, { "epoch": 2.89, "learning_rate": 2.1038283750148154e-08, "logits/generated": 4.0956807136535645, "logits/real": 2.561434268951416, "logps/generated": -1174.1719970703125, "logps/real": -407.2411193847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.4465560913086, "rewards/margins": 67.99554443359375, "rewards/real": -7.451010227203369, "step": 9020 }, { "epoch": 2.89, "learning_rate": 2.0445656038876377e-08, "logits/generated": 3.2312393188476562, "logits/real": 2.3635706901550293, "logps/generated": -1132.919677734375, "logps/real": -415.96783447265625, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -72.41719055175781, "rewards/margins": 63.97467803955078, "rewards/real": -8.442511558532715, "step": 9030 }, { "epoch": 2.89, "learning_rate": 1.98530283276046e-08, "logits/generated": 3.547464370727539, "logits/real": 2.1589770317077637, "logps/generated": -1229.614013671875, "logps/real": -364.1869812011719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -79.16869354248047, "rewards/margins": 71.33837127685547, "rewards/real": -7.830325126647949, "step": 9040 }, { "epoch": 2.9, "learning_rate": 1.9260400616332817e-08, "logits/generated": 4.280892848968506, "logits/real": 2.2274768352508545, "logps/generated": -977.6754150390625, "logps/real": -428.25872802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.16440963745117, "rewards/margins": 51.583404541015625, "rewards/real": -8.581003189086914, "step": 9050 }, { "epoch": 2.9, "learning_rate": 1.866777290506104e-08, "logits/generated": 3.8407740592956543, "logits/real": 2.5023605823516846, "logps/generated": -1122.100830078125, "logps/real": -435.5116271972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.32371520996094, "rewards/margins": 63.18709182739258, "rewards/real": -8.136619567871094, "step": 9060 }, { "epoch": 2.9, "learning_rate": 1.807514519378926e-08, "logits/generated": 4.040691375732422, "logits/real": 2.2008137702941895, "logps/generated": -1000.4134521484375, "logps/real": -381.9537658691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -62.254302978515625, "rewards/margins": 54.66272735595703, "rewards/real": -7.591574192047119, "step": 9070 }, { "epoch": 2.91, "learning_rate": 1.7482517482517483e-08, "logits/generated": 3.8093605041503906, "logits/real": 2.327657699584961, "logps/generated": -1024.83935546875, "logps/real": -419.9295959472656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -63.84809494018555, "rewards/margins": 54.47718048095703, "rewards/real": -9.370905876159668, "step": 9080 }, { "epoch": 2.91, "learning_rate": 1.6889889771245702e-08, "logits/generated": 4.164027690887451, "logits/real": 2.1145646572113037, "logps/generated": -954.1112060546875, "logps/real": -414.4833068847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -57.67053985595703, "rewards/margins": 49.82292938232422, "rewards/real": -7.847611904144287, "step": 9090 }, { "epoch": 2.91, "learning_rate": 1.6297262059973924e-08, "logits/generated": 4.627682685852051, "logits/real": 2.4523158073425293, "logps/generated": -1147.0657958984375, "logps/real": -402.86785888671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -73.46512603759766, "rewards/margins": 63.60245895385742, "rewards/real": -9.862655639648438, "step": 9100 }, { "epoch": 2.92, "learning_rate": 1.5704634348702146e-08, "logits/generated": 4.3307390213012695, "logits/real": 2.7241694927215576, "logps/generated": -1024.493896484375, "logps/real": -414.1810607910156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -64.47026062011719, "rewards/margins": 53.803749084472656, "rewards/real": -10.666508674621582, "step": 9110 }, { "epoch": 2.92, "learning_rate": 1.5112006637430365e-08, "logits/generated": 4.022457122802734, "logits/real": 2.0878734588623047, "logps/generated": -937.5765380859375, "logps/real": -448.01654052734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -54.977317810058594, "rewards/margins": 47.14643478393555, "rewards/real": -7.830883979797363, "step": 9120 }, { "epoch": 2.92, "learning_rate": 1.4519378926158587e-08, "logits/generated": 4.017087459564209, "logits/real": 2.5776567459106445, "logps/generated": -965.0074462890625, "logps/real": -391.77374267578125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.873931884765625, "rewards/margins": 49.04096984863281, "rewards/real": -8.83295726776123, "step": 9130 }, { "epoch": 2.92, "learning_rate": 1.3926751214886807e-08, "logits/generated": 4.196539402008057, "logits/real": 2.2769038677215576, "logps/generated": -883.5095825195312, "logps/real": -425.8203125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -51.294769287109375, "rewards/margins": 43.60906982421875, "rewards/real": -7.685697078704834, "step": 9140 }, { "epoch": 2.93, "learning_rate": 1.333412350361503e-08, "logits/generated": 4.753024101257324, "logits/real": 2.3693277835845947, "logps/generated": -1102.330810546875, "logps/real": -407.6561279296875, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/generated": -68.81044006347656, "rewards/margins": 60.021881103515625, "rewards/real": -8.788552284240723, "step": 9150 }, { "epoch": 2.93, "learning_rate": 1.274149579234325e-08, "logits/generated": 4.085358619689941, "logits/real": 2.0097265243530273, "logps/generated": -1012.0568237304688, "logps/real": -457.06085205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.34282684326172, "rewards/margins": 53.456886291503906, "rewards/real": -8.885940551757812, "step": 9160 }, { "epoch": 2.93, "learning_rate": 1.214886808107147e-08, "logits/generated": 4.291256904602051, "logits/real": 2.43717360496521, "logps/generated": -1060.977294921875, "logps/real": -424.43280029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -66.03826141357422, "rewards/margins": 57.78627395629883, "rewards/real": -8.25198745727539, "step": 9170 }, { "epoch": 2.94, "learning_rate": 1.155624036979969e-08, "logits/generated": 4.13791036605835, "logits/real": 2.2680563926696777, "logps/generated": -1029.0772705078125, "logps/real": -391.6253356933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.715553283691406, "rewards/margins": 55.91587448120117, "rewards/real": -7.799679756164551, "step": 9180 }, { "epoch": 2.94, "learning_rate": 1.0963612658527912e-08, "logits/generated": 3.743330717086792, "logits/real": 2.188157796859741, "logps/generated": -1004.7896728515625, "logps/real": -380.8592834472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.408721923828125, "rewards/margins": 53.28612518310547, "rewards/real": -8.122594833374023, "step": 9190 }, { "epoch": 2.94, "learning_rate": 1.0370984947256134e-08, "logits/generated": 3.9687373638153076, "logits/real": 2.5319037437438965, "logps/generated": -975.3508911132812, "logps/real": -369.2455139160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.98682403564453, "rewards/margins": 52.99494171142578, "rewards/real": -7.991887092590332, "step": 9200 }, { "epoch": 2.95, "learning_rate": 9.778357235984355e-09, "logits/generated": 3.6886813640594482, "logits/real": 2.2309367656707764, "logps/generated": -988.72265625, "logps/real": -423.3807678222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.65971755981445, "rewards/margins": 51.6239013671875, "rewards/real": -7.035823822021484, "step": 9210 }, { "epoch": 2.95, "learning_rate": 9.185729524712575e-09, "logits/generated": 3.9481704235076904, "logits/real": 1.865582823753357, "logps/generated": -1115.6072998046875, "logps/real": -436.29742431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -71.58280944824219, "rewards/margins": 63.30155563354492, "rewards/real": -8.28125, "step": 9220 }, { "epoch": 2.95, "learning_rate": 8.593101813440796e-09, "logits/generated": 4.100003242492676, "logits/real": 2.4489364624023438, "logps/generated": -1035.8955078125, "logps/real": -374.9039001464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.61082077026367, "rewards/margins": 54.9398078918457, "rewards/real": -7.671013832092285, "step": 9230 }, { "epoch": 2.96, "learning_rate": 8.000474102169016e-09, "logits/generated": 4.383337497711182, "logits/real": 2.721869945526123, "logps/generated": -1061.5406494140625, "logps/real": -418.4205627441406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -67.38023376464844, "rewards/margins": 57.91606521606445, "rewards/real": -9.464168548583984, "step": 9240 }, { "epoch": 2.96, "learning_rate": 7.407846390897238e-09, "logits/generated": 4.2132391929626465, "logits/real": 2.0285253524780273, "logps/generated": -1053.735107421875, "logps/real": -380.25665283203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -67.59648132324219, "rewards/margins": 59.252655029296875, "rewards/real": -8.343820571899414, "step": 9250 }, { "epoch": 2.96, "learning_rate": 6.815218679625459e-09, "logits/generated": 4.097989082336426, "logits/real": 2.0763533115386963, "logps/generated": -948.5368041992188, "logps/real": -408.927001953125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -57.566383361816406, "rewards/margins": 49.99573516845703, "rewards/real": -7.570644378662109, "step": 9260 }, { "epoch": 2.97, "learning_rate": 6.22259096835368e-09, "logits/generated": 3.7166342735290527, "logits/real": 2.241440773010254, "logps/generated": -1085.999267578125, "logps/real": -462.370361328125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -66.93648529052734, "rewards/margins": 58.837615966796875, "rewards/real": -8.098878860473633, "step": 9270 }, { "epoch": 2.97, "learning_rate": 5.629963257081901e-09, "logits/generated": 4.10945463180542, "logits/real": 2.291794776916504, "logps/generated": -950.1226806640625, "logps/real": -354.39849853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.405418395996094, "rewards/margins": 49.583351135253906, "rewards/real": -7.822066307067871, "step": 9280 }, { "epoch": 2.97, "learning_rate": 5.037335545810122e-09, "logits/generated": 4.05794095993042, "logits/real": 2.0275235176086426, "logps/generated": -1017.9361572265625, "logps/real": -413.9666442871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -61.35076141357422, "rewards/margins": 53.6244010925293, "rewards/real": -7.726365566253662, "step": 9290 }, { "epoch": 2.98, "learning_rate": 4.444707834538343e-09, "logits/generated": 3.8201382160186768, "logits/real": 2.22807240486145, "logps/generated": -1124.951416015625, "logps/real": -389.0148010253906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -72.00746154785156, "rewards/margins": 64.85371398925781, "rewards/real": -7.153740882873535, "step": 9300 }, { "epoch": 2.98, "learning_rate": 3.8520801232665634e-09, "logits/generated": 3.948748826980591, "logits/real": 2.387542247772217, "logps/generated": -1052.9998779296875, "logps/real": -437.97003173828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -65.4850845336914, "rewards/margins": 56.93280029296875, "rewards/real": -8.552282333374023, "step": 9310 }, { "epoch": 2.98, "learning_rate": 3.2594524119947846e-09, "logits/generated": 3.9022059440612793, "logits/real": 2.5131003856658936, "logps/generated": -1026.8658447265625, "logps/real": -355.9730224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.99104309082031, "rewards/margins": 57.335411071777344, "rewards/real": -7.655627250671387, "step": 9320 }, { "epoch": 2.99, "learning_rate": 2.6668247007230054e-09, "logits/generated": 3.8674044609069824, "logits/real": 2.395481586456299, "logps/generated": -957.7073974609375, "logps/real": -383.9956970214844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -58.217140197753906, "rewards/margins": 49.42625045776367, "rewards/real": -8.790895462036133, "step": 9330 }, { "epoch": 2.99, "learning_rate": 2.0741969894512266e-09, "logits/generated": 4.056595802307129, "logits/real": 2.295872449874878, "logps/generated": -1188.929931640625, "logps/real": -401.1290588378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.59089660644531, "rewards/margins": 67.40728759765625, "rewards/real": -8.18360424041748, "step": 9340 }, { "epoch": 2.99, "learning_rate": 1.4815692781794476e-09, "logits/generated": 3.5111021995544434, "logits/real": 2.16282320022583, "logps/generated": -1066.041259765625, "logps/real": -386.13299560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.75672912597656, "rewards/margins": 59.711891174316406, "rewards/real": -9.044843673706055, "step": 9350 }, { "epoch": 3.0, "learning_rate": 8.889415669076685e-10, "logits/generated": 3.9372799396514893, "logits/real": 2.3717198371887207, "logps/generated": -1054.9495849609375, "logps/real": -430.40264892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.59407043457031, "rewards/margins": 57.335960388183594, "rewards/real": -8.258099555969238, "step": 9360 }, { "epoch": 3.0, "learning_rate": 2.963138556358895e-10, "logits/generated": 3.690178394317627, "logits/real": 2.565126657485962, "logps/generated": -1029.033935546875, "logps/real": -439.5101013183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.89173126220703, "rewards/margins": 53.2917594909668, "rewards/real": -9.599966049194336, "step": 9370 }, { "epoch": 3.0, "step": 9375, "total_flos": 0.0, "train_loss": 0.028198444762210128, "train_runtime": 73703.5855, "train_samples_per_second": 4.07, "train_steps_per_second": 0.127 } ], "logging_steps": 10, "max_steps": 9375, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }