diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13162 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 9375, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.33049040511727e-10, + "logits/generated": -1.8826184272766113, + "logits/real": -1.7995665073394775, + "logps/generated": -403.8936767578125, + "logps/real": -344.43768310546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.3304904051172705e-09, + "logits/generated": -1.7489397525787354, + "logits/real": -1.9043943881988525, + "logps/generated": -387.8950500488281, + "logps/real": -287.7349853515625, + "loss": 0.6909, + "rewards/accuracies": 0.4027777910232544, + "rewards/generated": 0.0008104961016215384, + "rewards/margins": 0.008820068091154099, + "rewards/real": 0.009630562737584114, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.0660980810234541e-08, + "logits/generated": -1.6604442596435547, + "logits/real": -1.7629890441894531, + "logps/generated": -389.0264892578125, + "logps/real": -304.28338623046875, + "loss": 0.6709, + "rewards/accuracies": 0.6499999761581421, + "rewards/generated": -0.0583014115691185, + "rewards/margins": 0.061269670724868774, + "rewards/real": 0.002968253567814827, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.5991471215351812e-08, + "logits/generated": -1.754969596862793, + "logits/real": -1.8286685943603516, + "logps/generated": -407.74810791015625, + "logps/real": -312.97747802734375, + "loss": 0.6054, + "rewards/accuracies": 0.800000011920929, + "rewards/generated": -0.26939621567726135, + "rewards/margins": 0.2280093878507614, + "rewards/real": -0.04138679429888725, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 2.1321961620469082e-08, + "logits/generated": -1.7067832946777344, + "logits/real": -1.7931840419769287, + "logps/generated": -386.2824401855469, + "logps/real": -292.9158020019531, + "loss": 0.5189, + "rewards/accuracies": 0.800000011920929, + "rewards/generated": -0.5650928616523743, + "rewards/margins": 0.44205719232559204, + "rewards/real": -0.12303560972213745, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.6652452025586352e-08, + "logits/generated": -1.7791906595230103, + "logits/real": -1.9039980173110962, + "logps/generated": -402.95343017578125, + "logps/real": -345.21856689453125, + "loss": 0.4618, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -0.9019126892089844, + "rewards/margins": 0.7108197212219238, + "rewards/real": -0.19109299778938293, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.1982942430703625e-08, + "logits/generated": -1.7135913372039795, + "logits/real": -1.8214619159698486, + "logps/generated": -382.85003662109375, + "logps/real": -295.09246826171875, + "loss": 0.3989, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -1.145892858505249, + "rewards/margins": 0.9312199354171753, + "rewards/real": -0.21467280387878418, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 3.731343283582089e-08, + "logits/generated": -1.6685903072357178, + "logits/real": -1.7730849981307983, + "logps/generated": -405.3398132324219, + "logps/real": -307.63970947265625, + "loss": 0.3404, + "rewards/accuracies": 0.887499988079071, + "rewards/generated": -1.7393802404403687, + "rewards/margins": 1.3477346897125244, + "rewards/real": -0.39164555072784424, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 4.2643923240938164e-08, + "logits/generated": -1.6948124170303345, + "logits/real": -1.846187949180603, + "logps/generated": -405.7284240722656, + "logps/real": -360.30682373046875, + "loss": 0.3091, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -2.1506481170654297, + "rewards/margins": 1.6161304712295532, + "rewards/real": -0.5345176458358765, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.7974413646055434e-08, + "logits/generated": -1.7541179656982422, + "logits/real": -1.8123397827148438, + "logps/generated": -416.25689697265625, + "logps/real": -325.4775695800781, + "loss": 0.2753, + "rewards/accuracies": 0.9125000238418579, + "rewards/generated": -2.514233112335205, + "rewards/margins": 2.092501640319824, + "rewards/real": -0.42173153162002563, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5.3304904051172704e-08, + "logits/generated": -1.8153146505355835, + "logits/real": -1.8279272317886353, + "logps/generated": -395.2728271484375, + "logps/real": -335.0040588378906, + "loss": 0.2524, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -2.448599338531494, + "rewards/margins": 2.130467414855957, + "rewards/real": -0.31813228130340576, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5.8635394456289973e-08, + "logits/generated": -1.6887117624282837, + "logits/real": -1.7604955434799194, + "logps/generated": -447.2493591308594, + "logps/real": -334.47344970703125, + "loss": 0.2073, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -3.40751314163208, + "rewards/margins": 2.9876201152801514, + "rewards/real": -0.41989272832870483, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 6.396588486140725e-08, + "logits/generated": -1.6772953271865845, + "logits/real": -1.7890123128890991, + "logps/generated": -393.61126708984375, + "logps/real": -350.9790954589844, + "loss": 0.2025, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -2.881321668624878, + "rewards/margins": 2.3837037086486816, + "rewards/real": -0.4976181387901306, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 6.929637526652451e-08, + "logits/generated": -1.6851392984390259, + "logits/real": -1.7750492095947266, + "logps/generated": -376.60565185546875, + "logps/real": -317.86749267578125, + "loss": 0.1834, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -3.27082896232605, + "rewards/margins": 2.8993308544158936, + "rewards/real": -0.3714984655380249, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 7.462686567164178e-08, + "logits/generated": -1.5641597509384155, + "logits/real": -1.7553110122680664, + "logps/generated": -423.54559326171875, + "logps/real": -311.1752624511719, + "loss": 0.1446, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -4.2097249031066895, + "rewards/margins": 3.8489441871643066, + "rewards/real": -0.36078035831451416, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 7.995735607675907e-08, + "logits/generated": -1.6019731760025024, + "logits/real": -1.7481319904327393, + "logps/generated": -453.563720703125, + "logps/real": -314.98651123046875, + "loss": 0.1327, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -5.146961212158203, + "rewards/margins": 4.662848472595215, + "rewards/real": -0.48411256074905396, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.528784648187633e-08, + "logits/generated": -1.5494823455810547, + "logits/real": -1.7241367101669312, + "logps/generated": -444.03521728515625, + "logps/real": -352.8829040527344, + "loss": 0.1291, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -5.463521957397461, + "rewards/margins": 4.476443290710449, + "rewards/real": -0.9870781898498535, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 9.061833688699359e-08, + "logits/generated": -1.591257095336914, + "logits/real": -1.7483450174331665, + "logps/generated": -450.844970703125, + "logps/real": -319.1195068359375, + "loss": 0.1246, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -5.638657569885254, + "rewards/margins": 4.8695173263549805, + "rewards/real": -0.7691398859024048, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 9.594882729211087e-08, + "logits/generated": -1.5604255199432373, + "logits/real": -1.6746511459350586, + "logps/generated": -455.9513244628906, + "logps/real": -324.9789123535156, + "loss": 0.1172, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -6.108395576477051, + "rewards/margins": 4.853656768798828, + "rewards/real": -1.254738450050354, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 1.0127931769722814e-07, + "logits/generated": -1.536947250366211, + "logits/real": -1.5838528871536255, + "logps/generated": -464.4476623535156, + "logps/real": -289.5835266113281, + "loss": 0.1013, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -6.77472448348999, + "rewards/margins": 5.815009593963623, + "rewards/real": -0.9597145318984985, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 1.0660980810234541e-07, + "logits/generated": -1.559515357017517, + "logits/real": -1.6441863775253296, + "logps/generated": -485.25335693359375, + "logps/real": -331.6617736816406, + "loss": 0.0884, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -7.514554500579834, + "rewards/margins": 6.487561225891113, + "rewards/real": -1.0269935131072998, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 1.1194029850746268e-07, + "logits/generated": -1.542763352394104, + "logits/real": -1.6364444494247437, + "logps/generated": -456.9220275878906, + "logps/real": -353.18841552734375, + "loss": 0.0903, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -7.124857425689697, + "rewards/margins": 5.564694404602051, + "rewards/real": -1.5601634979248047, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 1.1727078891257995e-07, + "logits/generated": -1.5230910778045654, + "logits/real": -1.661611795425415, + "logps/generated": -465.93853759765625, + "logps/real": -355.095703125, + "loss": 0.0933, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -8.115549087524414, + "rewards/margins": 6.8037614822387695, + "rewards/real": -1.31178879737854, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 1.226012793176972e-07, + "logits/generated": -1.4759384393692017, + "logits/real": -1.7497766017913818, + "logps/generated": -486.30755615234375, + "logps/real": -409.23126220703125, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/generated": -8.072816848754883, + "rewards/margins": 6.9501166343688965, + "rewards/real": -1.1227010488510132, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 1.279317697228145e-07, + "logits/generated": -1.4614694118499756, + "logits/real": -1.6280324459075928, + "logps/generated": -480.77569580078125, + "logps/real": -312.9775085449219, + "loss": 0.0667, + "rewards/accuracies": 1.0, + "rewards/generated": -8.565336227416992, + "rewards/margins": 7.487596035003662, + "rewards/real": -1.0777397155761719, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 1.3326226012793176e-07, + "logits/generated": -1.5403387546539307, + "logits/real": -1.71304190158844, + "logps/generated": -497.54376220703125, + "logps/real": -393.9128723144531, + "loss": 0.0848, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -8.98359489440918, + "rewards/margins": 7.220086574554443, + "rewards/real": -1.7635078430175781, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 1.3859275053304903e-07, + "logits/generated": -1.395140290260315, + "logits/real": -1.6418180465698242, + "logps/generated": -477.68243408203125, + "logps/real": -353.9394836425781, + "loss": 0.0671, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -8.58221435546875, + "rewards/margins": 7.423464775085449, + "rewards/real": -1.1587491035461426, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 1.439232409381663e-07, + "logits/generated": -1.4475288391113281, + "logits/real": -1.5991582870483398, + "logps/generated": -463.43817138671875, + "logps/real": -314.2174987792969, + "loss": 0.065, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -7.937777519226074, + "rewards/margins": 7.0476579666137695, + "rewards/real": -0.890119194984436, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 1.4925373134328355e-07, + "logits/generated": -1.4230304956436157, + "logits/real": -1.5950751304626465, + "logps/generated": -451.63006591796875, + "logps/real": -333.942138671875, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/generated": -8.133646965026855, + "rewards/margins": 7.216717720031738, + "rewards/real": -0.9169293642044067, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 1.5458422174840087e-07, + "logits/generated": -1.4459788799285889, + "logits/real": -1.5633540153503418, + "logps/generated": -492.1522521972656, + "logps/real": -360.3606262207031, + "loss": 0.0527, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -9.392778396606445, + "rewards/margins": 8.276235580444336, + "rewards/real": -1.1165430545806885, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 1.5991471215351813e-07, + "logits/generated": -1.4453847408294678, + "logits/real": -1.5341203212738037, + "logps/generated": -458.6255798339844, + "logps/real": -305.7059326171875, + "loss": 0.0729, + "rewards/accuracies": 1.0, + "rewards/generated": -9.357995986938477, + "rewards/margins": 8.50068187713623, + "rewards/real": -0.8573150634765625, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 1.652452025586354e-07, + "logits/generated": -1.423008680343628, + "logits/real": -1.6011879444122314, + "logps/generated": -469.68695068359375, + "logps/real": -369.8955078125, + "loss": 0.0546, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.097238540649414, + "rewards/margins": 8.363798141479492, + "rewards/real": -0.7334394454956055, + "step": 310 + }, + { + "epoch": 0.1, + "learning_rate": 1.7057569296375266e-07, + "logits/generated": -1.3465330600738525, + "logits/real": -1.4571083784103394, + "logps/generated": -523.9188232421875, + "logps/real": -312.72979736328125, + "loss": 0.0486, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -10.398682594299316, + "rewards/margins": 10.032429695129395, + "rewards/real": -0.36625421047210693, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 1.7590618336886992e-07, + "logits/generated": -1.4364181756973267, + "logits/real": -1.6128456592559814, + "logps/generated": -487.78912353515625, + "logps/real": -324.5415344238281, + "loss": 0.06, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.630109786987305, + "rewards/margins": 8.89991569519043, + "rewards/real": -0.7301940321922302, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 1.8123667377398718e-07, + "logits/generated": -1.4497346878051758, + "logits/real": -1.5822639465332031, + "logps/generated": -505.73028564453125, + "logps/real": -292.947265625, + "loss": 0.0385, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -10.920869827270508, + "rewards/margins": 9.890485763549805, + "rewards/real": -1.030383825302124, + "step": 340 + }, + { + "epoch": 0.11, + "learning_rate": 1.8656716417910447e-07, + "logits/generated": -1.3929362297058105, + "logits/real": -1.5212783813476562, + "logps/generated": -541.8226318359375, + "logps/real": -317.99932861328125, + "loss": 0.0415, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -12.88868522644043, + "rewards/margins": 11.808540344238281, + "rewards/real": -1.0801454782485962, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 1.9189765458422174e-07, + "logits/generated": -1.3346354961395264, + "logits/real": -1.5566127300262451, + "logps/generated": -527.8130493164062, + "logps/real": -329.54571533203125, + "loss": 0.0442, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -11.514655113220215, + "rewards/margins": 10.130718231201172, + "rewards/real": -1.3839375972747803, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 1.9722814498933903e-07, + "logits/generated": -1.2722876071929932, + "logits/real": -1.5687021017074585, + "logps/generated": -506.6629943847656, + "logps/real": -324.5752258300781, + "loss": 0.034, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -11.572894096374512, + "rewards/margins": 10.359813690185547, + "rewards/real": -1.213080644607544, + "step": 370 + }, + { + "epoch": 0.12, + "learning_rate": 2.025586353944563e-07, + "logits/generated": -1.3695201873779297, + "logits/real": -1.6315975189208984, + "logps/generated": -464.67181396484375, + "logps/real": -353.6014709472656, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/generated": -10.642364501953125, + "rewards/margins": 9.726277351379395, + "rewards/real": -0.9160875082015991, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 2.0788912579957355e-07, + "logits/generated": -1.3165477514266968, + "logits/real": -1.5160284042358398, + "logps/generated": -515.6984252929688, + "logps/real": -316.2281799316406, + "loss": 0.0507, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -12.083415031433105, + "rewards/margins": 11.66950798034668, + "rewards/real": -0.41390690207481384, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 2.1321961620469082e-07, + "logits/generated": -1.1762816905975342, + "logits/real": -1.505501389503479, + "logps/generated": -522.8754272460938, + "logps/real": -369.6803283691406, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/generated": -13.3060302734375, + "rewards/margins": 12.078089714050293, + "rewards/real": -1.2279411554336548, + "step": 400 + }, + { + "epoch": 0.13, + "learning_rate": 2.185501066098081e-07, + "logits/generated": -1.1128976345062256, + "logits/real": -1.4507527351379395, + "logps/generated": -563.6268310546875, + "logps/real": -351.15802001953125, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/generated": -15.432531356811523, + "rewards/margins": 14.190030097961426, + "rewards/real": -1.2425031661987305, + "step": 410 + }, + { + "epoch": 0.13, + "learning_rate": 2.2388059701492537e-07, + "logits/generated": -1.210235357284546, + "logits/real": -1.5492087602615356, + "logps/generated": -517.4049072265625, + "logps/real": -346.5636291503906, + "loss": 0.0368, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -13.797286987304688, + "rewards/margins": 12.366052627563477, + "rewards/real": -1.431235671043396, + "step": 420 + }, + { + "epoch": 0.14, + "learning_rate": 2.2921108742004263e-07, + "logits/generated": -1.2352956533432007, + "logits/real": -1.5816096067428589, + "logps/generated": -529.5254516601562, + "logps/real": -333.84271240234375, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/generated": -13.768930435180664, + "rewards/margins": 13.00990104675293, + "rewards/real": -0.7590312361717224, + "step": 430 + }, + { + "epoch": 0.14, + "learning_rate": 2.345415778251599e-07, + "logits/generated": -1.2411397695541382, + "logits/real": -1.5679179430007935, + "logps/generated": -503.085205078125, + "logps/real": -343.2332458496094, + "loss": 0.0391, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.77052116394043, + "rewards/margins": 12.486634254455566, + "rewards/real": -0.28388747572898865, + "step": 440 + }, + { + "epoch": 0.14, + "learning_rate": 2.3987206823027716e-07, + "logits/generated": -1.135864019393921, + "logits/real": -1.5101805925369263, + "logps/generated": -536.2482299804688, + "logps/real": -373.39569091796875, + "loss": 0.0328, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -14.448722839355469, + "rewards/margins": 13.376909255981445, + "rewards/real": -1.0718127489089966, + "step": 450 + }, + { + "epoch": 0.15, + "learning_rate": 2.452025586353944e-07, + "logits/generated": -1.1627209186553955, + "logits/real": -1.4507901668548584, + "logps/generated": -542.908447265625, + "logps/real": -350.0838317871094, + "loss": 0.0669, + "rewards/accuracies": 1.0, + "rewards/generated": -15.044692993164062, + "rewards/margins": 14.695103645324707, + "rewards/real": -0.3495886027812958, + "step": 460 + }, + { + "epoch": 0.15, + "learning_rate": 2.505330490405117e-07, + "logits/generated": -1.1846784353256226, + "logits/real": -1.3428871631622314, + "logps/generated": -531.530517578125, + "logps/real": -300.04010009765625, + "loss": 0.0504, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -15.481117248535156, + "rewards/margins": 14.126989364624023, + "rewards/real": -1.3541295528411865, + "step": 470 + }, + { + "epoch": 0.15, + "learning_rate": 2.55863539445629e-07, + "logits/generated": -1.2153561115264893, + "logits/real": -1.4080696105957031, + "logps/generated": -598.9599609375, + "logps/real": -333.67205810546875, + "loss": 0.0524, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.319135665893555, + "rewards/margins": 16.647058486938477, + "rewards/real": -1.672079086303711, + "step": 480 + }, + { + "epoch": 0.16, + "learning_rate": 2.611940298507462e-07, + "logits/generated": -1.1052753925323486, + "logits/real": -1.4038926362991333, + "logps/generated": -625.4494018554688, + "logps/real": -298.06854248046875, + "loss": 0.0277, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.859481811523438, + "rewards/margins": 18.040180206298828, + "rewards/real": -1.8193010091781616, + "step": 490 + }, + { + "epoch": 0.16, + "learning_rate": 2.665245202558635e-07, + "logits/generated": -1.1053588390350342, + "logits/real": -1.3924005031585693, + "logps/generated": -576.0348510742188, + "logps/real": -353.4833068847656, + "loss": 0.0337, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.51312255859375, + "rewards/margins": 15.454565048217773, + "rewards/real": -1.058556318283081, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 2.7185501066098084e-07, + "logits/generated": -1.1117361783981323, + "logits/real": -1.4525091648101807, + "logps/generated": -566.1034545898438, + "logps/real": -364.39886474609375, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/generated": -17.288684844970703, + "rewards/margins": 15.412053108215332, + "rewards/real": -1.876631498336792, + "step": 510 + }, + { + "epoch": 0.17, + "learning_rate": 2.7718550106609805e-07, + "logits/generated": -0.9736140966415405, + "logits/real": -1.4299156665802002, + "logps/generated": -591.044921875, + "logps/real": -321.2178649902344, + "loss": 0.0314, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.490947723388672, + "rewards/margins": 16.805511474609375, + "rewards/real": -0.685435950756073, + "step": 520 + }, + { + "epoch": 0.17, + "learning_rate": 2.8251599147121537e-07, + "logits/generated": -1.0898406505584717, + "logits/real": -1.4943970441818237, + "logps/generated": -500.5962829589844, + "logps/real": -335.22882080078125, + "loss": 0.0277, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.250688552856445, + "rewards/margins": 12.409687995910645, + "rewards/real": 0.1589992791414261, + "step": 530 + }, + { + "epoch": 0.17, + "learning_rate": 2.878464818763326e-07, + "logits/generated": -0.9211187362670898, + "logits/real": -1.2392743825912476, + "logps/generated": -626.4434814453125, + "logps/real": -259.01593017578125, + "loss": 0.0719, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -20.40229606628418, + "rewards/margins": 20.59931182861328, + "rewards/real": 0.19701404869556427, + "step": 540 + }, + { + "epoch": 0.18, + "learning_rate": 2.931769722814499e-07, + "logits/generated": -1.020733118057251, + "logits/real": -1.3917875289916992, + "logps/generated": -598.0836791992188, + "logps/real": -350.3652038574219, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/generated": -18.26373863220215, + "rewards/margins": 18.554927825927734, + "rewards/real": 0.2911873757839203, + "step": 550 + }, + { + "epoch": 0.18, + "learning_rate": 2.985074626865671e-07, + "logits/generated": -1.160035490989685, + "logits/real": -1.3027687072753906, + "logps/generated": -570.22705078125, + "logps/real": -318.4032287597656, + "loss": 0.0222, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -16.718929290771484, + "rewards/margins": 16.853124618530273, + "rewards/real": 0.1341935694217682, + "step": 560 + }, + { + "epoch": 0.18, + "learning_rate": 3.038379530916844e-07, + "logits/generated": -1.0827645063400269, + "logits/real": -1.3972750902175903, + "logps/generated": -562.3015747070312, + "logps/real": -343.81829833984375, + "loss": 0.0299, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.274452209472656, + "rewards/margins": 15.849164009094238, + "rewards/real": -0.4252890944480896, + "step": 570 + }, + { + "epoch": 0.19, + "learning_rate": 3.0916844349680174e-07, + "logits/generated": -0.9818236231803894, + "logits/real": -1.3140885829925537, + "logps/generated": -516.5469360351562, + "logps/real": -355.1119689941406, + "loss": 0.0385, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.40649127960205, + "rewards/margins": 14.21912670135498, + "rewards/real": -1.187363624572754, + "step": 580 + }, + { + "epoch": 0.19, + "learning_rate": 3.1449893390191895e-07, + "logits/generated": -1.0166418552398682, + "logits/real": -1.3364452123641968, + "logps/generated": -538.85400390625, + "logps/real": -324.9503479003906, + "loss": 0.0917, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -15.649869918823242, + "rewards/margins": 15.868939399719238, + "rewards/real": 0.21906885504722595, + "step": 590 + }, + { + "epoch": 0.19, + "learning_rate": 3.1982942430703626e-07, + "logits/generated": -1.0271713733673096, + "logits/real": -1.263146162033081, + "logps/generated": -567.3302612304688, + "logps/real": -291.7242431640625, + "loss": 0.0682, + "rewards/accuracies": 1.0, + "rewards/generated": -18.165842056274414, + "rewards/margins": 17.980998992919922, + "rewards/real": -0.1848386824131012, + "step": 600 + }, + { + "epoch": 0.2, + "learning_rate": 3.2515991471215347e-07, + "logits/generated": -0.9488552212715149, + "logits/real": -1.2689321041107178, + "logps/generated": -598.2969970703125, + "logps/real": -355.51934814453125, + "loss": 0.0623, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.530506134033203, + "rewards/margins": 19.943836212158203, + "rewards/real": -1.5866692066192627, + "step": 610 + }, + { + "epoch": 0.2, + "learning_rate": 3.304904051172708e-07, + "logits/generated": -1.002000331878662, + "logits/real": -1.1596615314483643, + "logps/generated": -631.7313232421875, + "logps/real": -325.359130859375, + "loss": 0.022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.148792266845703, + "rewards/margins": 21.61235237121582, + "rewards/real": -1.5364404916763306, + "step": 620 + }, + { + "epoch": 0.2, + "learning_rate": 3.3582089552238805e-07, + "logits/generated": -0.8440952301025391, + "logits/real": -1.2660382986068726, + "logps/generated": -635.0595092773438, + "logps/real": -346.52239990234375, + "loss": 0.0217, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.026508331298828, + "rewards/margins": 21.37133026123047, + "rewards/real": -2.6551785469055176, + "step": 630 + }, + { + "epoch": 0.2, + "learning_rate": 3.411513859275053e-07, + "logits/generated": -0.9189499020576477, + "logits/real": -1.3209021091461182, + "logps/generated": -581.1162109375, + "logps/real": -347.0169372558594, + "loss": 0.0424, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -20.726146697998047, + "rewards/margins": 18.92522621154785, + "rewards/real": -1.8009216785430908, + "step": 640 + }, + { + "epoch": 0.21, + "learning_rate": 3.464818763326226e-07, + "logits/generated": -0.9096330404281616, + "logits/real": -1.1646416187286377, + "logps/generated": -638.0400390625, + "logps/real": -326.70819091796875, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/generated": -23.011674880981445, + "rewards/margins": 20.560462951660156, + "rewards/real": -2.4512124061584473, + "step": 650 + }, + { + "epoch": 0.21, + "learning_rate": 3.5181236673773984e-07, + "logits/generated": -0.8807134628295898, + "logits/real": -1.0881080627441406, + "logps/generated": -645.7661743164062, + "logps/real": -340.04681396484375, + "loss": 0.0481, + "rewards/accuracies": 1.0, + "rewards/generated": -24.14626693725586, + "rewards/margins": 21.388887405395508, + "rewards/real": -2.757380962371826, + "step": 660 + }, + { + "epoch": 0.21, + "learning_rate": 3.5714285714285716e-07, + "logits/generated": -1.2381738424301147, + "logits/real": -1.346702218055725, + "logps/generated": -547.367919921875, + "logps/real": -348.85137939453125, + "loss": 0.0613, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -13.925430297851562, + "rewards/margins": 13.218635559082031, + "rewards/real": -0.7067966461181641, + "step": 670 + }, + { + "epoch": 0.22, + "learning_rate": 3.6247334754797437e-07, + "logits/generated": -1.0652974843978882, + "logits/real": -1.292110800743103, + "logps/generated": -543.0350952148438, + "logps/real": -339.61285400390625, + "loss": 0.0284, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.47148895263672, + "rewards/margins": 14.554428100585938, + "rewards/real": -1.9170618057250977, + "step": 680 + }, + { + "epoch": 0.22, + "learning_rate": 3.678038379530917e-07, + "logits/generated": -0.9800616502761841, + "logits/real": -1.1700372695922852, + "logps/generated": -584.2894287109375, + "logps/real": -363.9668273925781, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/generated": -18.656932830810547, + "rewards/margins": 17.38612937927246, + "rewards/real": -1.2708007097244263, + "step": 690 + }, + { + "epoch": 0.22, + "learning_rate": 3.7313432835820895e-07, + "logits/generated": -1.0084197521209717, + "logits/real": -0.9993604421615601, + "logps/generated": -538.5408325195312, + "logps/real": -324.64520263671875, + "loss": 0.0687, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -15.12083625793457, + "rewards/margins": 14.68799877166748, + "rewards/real": -0.4328370988368988, + "step": 700 + }, + { + "epoch": 0.23, + "learning_rate": 3.784648187633262e-07, + "logits/generated": -0.9193227887153625, + "logits/real": -1.1186764240264893, + "logps/generated": -556.6070556640625, + "logps/real": -377.97259521484375, + "loss": 0.0925, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.488893508911133, + "rewards/margins": 15.544939994812012, + "rewards/real": -0.943952202796936, + "step": 710 + }, + { + "epoch": 0.23, + "learning_rate": 3.8379530916844347e-07, + "logits/generated": -0.9582284092903137, + "logits/real": -1.1462305784225464, + "logps/generated": -557.4412841796875, + "logps/real": -316.1856994628906, + "loss": 0.0587, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.733484268188477, + "rewards/margins": 17.631059646606445, + "rewards/real": -0.10242464393377304, + "step": 720 + }, + { + "epoch": 0.23, + "learning_rate": 3.8912579957356074e-07, + "logits/generated": -0.9639987945556641, + "logits/real": -1.1116389036178589, + "logps/generated": -579.411376953125, + "logps/real": -360.36968994140625, + "loss": 0.0502, + "rewards/accuracies": 1.0, + "rewards/generated": -19.355396270751953, + "rewards/margins": 18.144670486450195, + "rewards/real": -1.2107274532318115, + "step": 730 + }, + { + "epoch": 0.24, + "learning_rate": 3.9445628997867805e-07, + "logits/generated": -0.7743756175041199, + "logits/real": -1.1329243183135986, + "logps/generated": -600.0618896484375, + "logps/real": -326.8385925292969, + "loss": 0.0344, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.37815284729004, + "rewards/margins": 19.694599151611328, + "rewards/real": -0.683555543422699, + "step": 740 + }, + { + "epoch": 0.24, + "learning_rate": 3.9978678038379526e-07, + "logits/generated": -0.7812870144844055, + "logits/real": -0.9252561330795288, + "logps/generated": -565.31787109375, + "logps/real": -309.7308654785156, + "loss": 0.011, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.80691909790039, + "rewards/margins": 17.52267074584961, + "rewards/real": -0.2842453420162201, + "step": 750 + }, + { + "epoch": 0.24, + "learning_rate": 4.051172707889126e-07, + "logits/generated": -0.721796452999115, + "logits/real": -0.9380915760993958, + "logps/generated": -636.4271850585938, + "logps/real": -328.95880126953125, + "loss": 0.0225, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.856843948364258, + "rewards/margins": 21.279296875, + "rewards/real": -0.577548623085022, + "step": 760 + }, + { + "epoch": 0.25, + "learning_rate": 4.1044776119402984e-07, + "logits/generated": -0.8223736882209778, + "logits/real": -0.9600740671157837, + "logps/generated": -642.3658447265625, + "logps/real": -351.7137756347656, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/generated": -23.127605438232422, + "rewards/margins": 21.979970932006836, + "rewards/real": -1.1476361751556396, + "step": 770 + }, + { + "epoch": 0.25, + "learning_rate": 4.157782515991471e-07, + "logits/generated": -0.6737440228462219, + "logits/real": -0.9427323341369629, + "logps/generated": -606.7221069335938, + "logps/real": -317.69134521484375, + "loss": 0.1004, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -20.423994064331055, + "rewards/margins": 20.49788475036621, + "rewards/real": 0.0738908052444458, + "step": 780 + }, + { + "epoch": 0.25, + "learning_rate": 4.2110874200426437e-07, + "logits/generated": -0.5200916528701782, + "logits/real": -0.9301559329032898, + "logps/generated": -597.6541748046875, + "logps/real": -360.3106384277344, + "loss": 0.0697, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.777925491333008, + "rewards/margins": 17.938873291015625, + "rewards/real": -1.839052438735962, + "step": 790 + }, + { + "epoch": 0.26, + "learning_rate": 4.2643923240938163e-07, + "logits/generated": -0.48490971326828003, + "logits/real": -0.8440488576889038, + "logps/generated": -638.52392578125, + "logps/real": -314.540771484375, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/generated": -23.257488250732422, + "rewards/margins": 22.10464096069336, + "rewards/real": -1.1528491973876953, + "step": 800 + }, + { + "epoch": 0.26, + "learning_rate": 4.317697228144989e-07, + "logits/generated": -0.6229613423347473, + "logits/real": -0.9615219831466675, + "logps/generated": -565.7908935546875, + "logps/real": -279.216064453125, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/generated": -20.155855178833008, + "rewards/margins": 19.441150665283203, + "rewards/real": -0.7147022485733032, + "step": 810 + }, + { + "epoch": 0.26, + "learning_rate": 4.371002132196162e-07, + "logits/generated": -0.5994982123374939, + "logits/real": -1.146607756614685, + "logps/generated": -589.715087890625, + "logps/real": -355.19775390625, + "loss": 0.0646, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.48986053466797, + "rewards/margins": 19.900867462158203, + "rewards/real": -0.5889959335327148, + "step": 820 + }, + { + "epoch": 0.27, + "learning_rate": 4.4243070362473347e-07, + "logits/generated": -0.7427669167518616, + "logits/real": -1.1212642192840576, + "logps/generated": -686.93994140625, + "logps/real": -332.35089111328125, + "loss": 0.0821, + "rewards/accuracies": 1.0, + "rewards/generated": -27.279861450195312, + "rewards/margins": 24.79286003112793, + "rewards/real": -2.4869980812072754, + "step": 830 + }, + { + "epoch": 0.27, + "learning_rate": 4.4776119402985074e-07, + "logits/generated": -0.6697017550468445, + "logits/real": -1.123647928237915, + "logps/generated": -608.34619140625, + "logps/real": -361.9219055175781, + "loss": 0.2129, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -22.527164459228516, + "rewards/margins": 20.38083839416504, + "rewards/real": -2.1463239192962646, + "step": 840 + }, + { + "epoch": 0.27, + "learning_rate": 4.53091684434968e-07, + "logits/generated": -0.6786950826644897, + "logits/real": -1.1746528148651123, + "logps/generated": -612.7151489257812, + "logps/real": -316.81231689453125, + "loss": 0.0921, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -21.420848846435547, + "rewards/margins": 21.539230346679688, + "rewards/real": 0.11838479340076447, + "step": 850 + }, + { + "epoch": 0.28, + "learning_rate": 4.5842217484008526e-07, + "logits/generated": -0.647723913192749, + "logits/real": -1.2561864852905273, + "logps/generated": -609.9498901367188, + "logps/real": -344.63385009765625, + "loss": 0.0967, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -21.064952850341797, + "rewards/margins": 20.77135467529297, + "rewards/real": -0.29359906911849976, + "step": 860 + }, + { + "epoch": 0.28, + "learning_rate": 4.637526652452025e-07, + "logits/generated": -0.9066628217697144, + "logits/real": -1.4026243686676025, + "logps/generated": -566.8709106445312, + "logps/real": -324.4098815917969, + "loss": 0.0464, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -18.857772827148438, + "rewards/margins": 18.00718116760254, + "rewards/real": -0.8505916595458984, + "step": 870 + }, + { + "epoch": 0.28, + "learning_rate": 4.690831556503198e-07, + "logits/generated": -1.0487242937088013, + "logits/real": -1.3853559494018555, + "logps/generated": -573.9552001953125, + "logps/real": -296.62115478515625, + "loss": 0.028, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.28946304321289, + "rewards/margins": 19.398359298706055, + "rewards/real": 2.108898639678955, + "step": 880 + }, + { + "epoch": 0.28, + "learning_rate": 4.744136460554371e-07, + "logits/generated": -0.7771695256233215, + "logits/real": -1.3479080200195312, + "logps/generated": -620.6242065429688, + "logps/real": -335.8597717285156, + "loss": 0.0447, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -20.570037841796875, + "rewards/margins": 21.122669219970703, + "rewards/real": 0.552629828453064, + "step": 890 + }, + { + "epoch": 0.29, + "learning_rate": 4.797441364605543e-07, + "logits/generated": -1.0141932964324951, + "logits/real": -1.4638208150863647, + "logps/generated": -529.6288452148438, + "logps/real": -303.21026611328125, + "loss": 0.059, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -15.210214614868164, + "rewards/margins": 16.335933685302734, + "rewards/real": 1.1257202625274658, + "step": 900 + }, + { + "epoch": 0.29, + "learning_rate": 4.850746268656717e-07, + "logits/generated": -0.8784846067428589, + "logits/real": -1.3946049213409424, + "logps/generated": -570.2777099609375, + "logps/real": -336.4716796875, + "loss": 0.0831, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.878326416015625, + "rewards/margins": 18.196462631225586, + "rewards/real": 0.3181368112564087, + "step": 910 + }, + { + "epoch": 0.29, + "learning_rate": 4.904051172707888e-07, + "logits/generated": -0.5604439377784729, + "logits/real": -1.0903195142745972, + "logps/generated": -625.4072265625, + "logps/real": -302.01470947265625, + "loss": 0.0594, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -21.253019332885742, + "rewards/margins": 21.284252166748047, + "rewards/real": 0.0312324408441782, + "step": 920 + }, + { + "epoch": 0.3, + "learning_rate": 4.957356076759062e-07, + "logits/generated": -0.4214113652706146, + "logits/real": -1.1398684978485107, + "logps/generated": -675.2928466796875, + "logps/real": -317.0589599609375, + "loss": 0.0624, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.430103302001953, + "rewards/margins": 25.21658706665039, + "rewards/real": -0.21351394057273865, + "step": 930 + }, + { + "epoch": 0.3, + "learning_rate": 4.998814744577456e-07, + "logits/generated": -0.7198468446731567, + "logits/real": -1.2382924556732178, + "logps/generated": -663.3746337890625, + "logps/real": -340.4453125, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/generated": -27.136676788330078, + "rewards/margins": 24.233760833740234, + "rewards/real": -2.902914524078369, + "step": 940 + }, + { + "epoch": 0.3, + "learning_rate": 4.992888467464738e-07, + "logits/generated": -0.6365340352058411, + "logits/real": -1.2204135656356812, + "logps/generated": -607.4127197265625, + "logps/real": -347.8791809082031, + "loss": 0.0395, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -22.3405818939209, + "rewards/margins": 20.96994972229004, + "rewards/real": -1.3706319332122803, + "step": 950 + }, + { + "epoch": 0.31, + "learning_rate": 4.986962190352021e-07, + "logits/generated": -0.3528062701225281, + "logits/real": -1.0751874446868896, + "logps/generated": -617.19384765625, + "logps/real": -343.6873474121094, + "loss": 0.0759, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.447063446044922, + "rewards/margins": 22.974315643310547, + "rewards/real": -1.4727448225021362, + "step": 960 + }, + { + "epoch": 0.31, + "learning_rate": 4.981035913239302e-07, + "logits/generated": -0.5571753978729248, + "logits/real": -1.3170697689056396, + "logps/generated": -509.511962890625, + "logps/real": -326.5060729980469, + "loss": 0.117, + "rewards/accuracies": 0.9125000238418579, + "rewards/generated": -13.880078315734863, + "rewards/margins": 14.30474853515625, + "rewards/real": 0.42466872930526733, + "step": 970 + }, + { + "epoch": 0.31, + "learning_rate": 4.975109636126585e-07, + "logits/generated": 0.031067097559571266, + "logits/real": -0.8951910734176636, + "logps/generated": -644.8367309570312, + "logps/real": -297.2452087402344, + "loss": 0.0844, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -25.296676635742188, + "rewards/margins": 24.991586685180664, + "rewards/real": -0.30508843064308167, + "step": 980 + }, + { + "epoch": 0.32, + "learning_rate": 4.969183359013867e-07, + "logits/generated": -0.1350618600845337, + "logits/real": -1.1355469226837158, + "logps/generated": -546.244873046875, + "logps/real": -359.3175048828125, + "loss": 0.0814, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -19.15607452392578, + "rewards/margins": 19.000545501708984, + "rewards/real": -0.15552793443202972, + "step": 990 + }, + { + "epoch": 0.32, + "learning_rate": 4.96325708190115e-07, + "logits/generated": -0.11877751350402832, + "logits/real": -1.002561092376709, + "logps/generated": -697.0966796875, + "logps/real": -361.0674743652344, + "loss": 0.0431, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.546356201171875, + "rewards/margins": 27.693737030029297, + "rewards/real": -2.8526175022125244, + "step": 1000 + }, + { + "epoch": 0.32, + "learning_rate": 4.957330804788432e-07, + "logits/generated": -0.3785037100315094, + "logits/real": -1.0688451528549194, + "logps/generated": -747.1729736328125, + "logps/real": -315.0045166015625, + "loss": 0.0902, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.87823486328125, + "rewards/margins": 30.961559295654297, + "rewards/real": -1.9166730642318726, + "step": 1010 + }, + { + "epoch": 0.33, + "learning_rate": 4.951404527675714e-07, + "logits/generated": -0.4004201889038086, + "logits/real": -1.2025635242462158, + "logps/generated": -725.2938232421875, + "logps/real": -362.38348388671875, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/generated": -32.35773849487305, + "rewards/margins": 29.627965927124023, + "rewards/real": -2.7297706604003906, + "step": 1020 + }, + { + "epoch": 0.33, + "learning_rate": 4.945478250562996e-07, + "logits/generated": -0.4088473916053772, + "logits/real": -1.1282621622085571, + "logps/generated": -723.2651977539062, + "logps/real": -359.61248779296875, + "loss": 0.0153, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.53559494018555, + "rewards/margins": 28.994110107421875, + "rewards/real": -4.541485786437988, + "step": 1030 + }, + { + "epoch": 0.33, + "learning_rate": 4.939551973450278e-07, + "logits/generated": -0.29500117897987366, + "logits/real": -1.215680718421936, + "logps/generated": -781.2555541992188, + "logps/real": -359.85198974609375, + "loss": 0.1038, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -36.472259521484375, + "rewards/margins": 31.787761688232422, + "rewards/real": -4.684496879577637, + "step": 1040 + }, + { + "epoch": 0.34, + "learning_rate": 4.933625696337561e-07, + "logits/generated": -0.2509937286376953, + "logits/real": -1.2540266513824463, + "logps/generated": -760.2052612304688, + "logps/real": -320.0561828613281, + "loss": 0.1264, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.826995849609375, + "rewards/margins": 32.62900924682617, + "rewards/real": -2.197985887527466, + "step": 1050 + }, + { + "epoch": 0.34, + "learning_rate": 4.927699419224843e-07, + "logits/generated": -0.141755610704422, + "logits/real": -1.0157017707824707, + "logps/generated": -612.1473388671875, + "logps/real": -355.87091064453125, + "loss": 0.0481, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -26.255905151367188, + "rewards/margins": 23.14793586730957, + "rewards/real": -3.107970714569092, + "step": 1060 + }, + { + "epoch": 0.34, + "learning_rate": 4.921773142112125e-07, + "logits/generated": 0.0805276483297348, + "logits/real": -0.4895111918449402, + "logps/generated": -673.2080078125, + "logps/real": -388.30377197265625, + "loss": 0.0606, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -28.200572967529297, + "rewards/margins": 23.83773422241211, + "rewards/real": -4.362841606140137, + "step": 1070 + }, + { + "epoch": 0.35, + "learning_rate": 4.915846864999407e-07, + "logits/generated": 0.006424567196518183, + "logits/real": -0.6142998337745667, + "logps/generated": -738.597900390625, + "logps/real": -373.8583984375, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/generated": -33.54041290283203, + "rewards/margins": 28.793270111083984, + "rewards/real": -4.7471418380737305, + "step": 1080 + }, + { + "epoch": 0.35, + "learning_rate": 4.909920587886689e-07, + "logits/generated": -0.3021875023841858, + "logits/real": -1.0494592189788818, + "logps/generated": -647.1887817382812, + "logps/real": -317.3564453125, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/generated": -26.67519187927246, + "rewards/margins": 26.018726348876953, + "rewards/real": -0.6564682722091675, + "step": 1090 + }, + { + "epoch": 0.35, + "learning_rate": 4.903994310773972e-07, + "logits/generated": -0.07100073248147964, + "logits/real": -0.7578374147415161, + "logps/generated": -597.0128173828125, + "logps/real": -317.7532958984375, + "loss": 0.1071, + "rewards/accuracies": 0.9375, + "rewards/generated": -20.897869110107422, + "rewards/margins": 21.192420959472656, + "rewards/real": 0.29454854130744934, + "step": 1100 + }, + { + "epoch": 0.36, + "learning_rate": 4.898068033661254e-07, + "logits/generated": 0.026780009269714355, + "logits/real": -0.961300253868103, + "logps/generated": -630.4881591796875, + "logps/real": -365.9146728515625, + "loss": 0.0805, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -26.19057846069336, + "rewards/margins": 25.535663604736328, + "rewards/real": -0.6549181938171387, + "step": 1110 + }, + { + "epoch": 0.36, + "learning_rate": 4.892141756548536e-07, + "logits/generated": 0.17940345406532288, + "logits/real": -0.9497494697570801, + "logps/generated": -639.5001220703125, + "logps/real": -297.89691162109375, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/generated": -25.884960174560547, + "rewards/margins": 26.278423309326172, + "rewards/real": 0.39346298575401306, + "step": 1120 + }, + { + "epoch": 0.36, + "learning_rate": 4.886215479435819e-07, + "logits/generated": -0.3005313575267792, + "logits/real": -1.163912296295166, + "logps/generated": -578.6134033203125, + "logps/real": -283.5059814453125, + "loss": 0.0517, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -20.6179256439209, + "rewards/margins": 21.1088809967041, + "rewards/real": 0.49095502495765686, + "step": 1130 + }, + { + "epoch": 0.36, + "learning_rate": 4.8802892023231e-07, + "logits/generated": -0.02864791825413704, + "logits/real": -1.1268925666809082, + "logps/generated": -725.9310913085938, + "logps/real": -330.99298095703125, + "loss": 0.0583, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -34.120079040527344, + "rewards/margins": 33.20871353149414, + "rewards/real": -0.9113671183586121, + "step": 1140 + }, + { + "epoch": 0.37, + "learning_rate": 4.874362925210383e-07, + "logits/generated": 0.1999046355485916, + "logits/real": -1.0573365688323975, + "logps/generated": -840.8147583007812, + "logps/real": -307.5840148925781, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/generated": -44.21206283569336, + "rewards/margins": 41.520259857177734, + "rewards/real": -2.691797971725464, + "step": 1150 + }, + { + "epoch": 0.37, + "learning_rate": 4.868436648097665e-07, + "logits/generated": -0.03737213462591171, + "logits/real": -1.0409324169158936, + "logps/generated": -826.9354248046875, + "logps/real": -359.90130615234375, + "loss": 0.0481, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -40.422664642333984, + "rewards/margins": 38.234439849853516, + "rewards/real": -2.188223361968994, + "step": 1160 + }, + { + "epoch": 0.37, + "learning_rate": 4.862510370984946e-07, + "logits/generated": 0.018914643675088882, + "logits/real": -1.1064562797546387, + "logps/generated": -721.578857421875, + "logps/real": -350.8349304199219, + "loss": 0.0247, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -33.86790466308594, + "rewards/margins": 31.102153778076172, + "rewards/real": -2.7657506465911865, + "step": 1170 + }, + { + "epoch": 0.38, + "learning_rate": 4.856584093872229e-07, + "logits/generated": 0.07794220000505447, + "logits/real": -1.065049409866333, + "logps/generated": -789.3442993164062, + "logps/real": -315.46148681640625, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/generated": -39.061622619628906, + "rewards/margins": 39.12267303466797, + "rewards/real": 0.06105160713195801, + "step": 1180 + }, + { + "epoch": 0.38, + "learning_rate": 4.850657816759511e-07, + "logits/generated": -0.010703866370022297, + "logits/real": -1.0528227090835571, + "logps/generated": -712.7218017578125, + "logps/real": -336.1849060058594, + "loss": 0.0817, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.728797912597656, + "rewards/margins": 30.812725067138672, + "rewards/real": -1.9160690307617188, + "step": 1190 + }, + { + "epoch": 0.38, + "learning_rate": 4.844731539646794e-07, + "logits/generated": -0.15801379084587097, + "logits/real": -1.0353472232818604, + "logps/generated": -763.3748168945312, + "logps/real": -316.615478515625, + "loss": 0.0816, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -36.90118408203125, + "rewards/margins": 33.61400604248047, + "rewards/real": -3.287179470062256, + "step": 1200 + }, + { + "epoch": 0.39, + "learning_rate": 4.838805262534076e-07, + "logits/generated": -0.4400274157524109, + "logits/real": -1.2942895889282227, + "logps/generated": -637.5701293945312, + "logps/real": -310.1935729980469, + "loss": 0.1157, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -26.2459659576416, + "rewards/margins": 26.2324275970459, + "rewards/real": -0.013539028353989124, + "step": 1210 + }, + { + "epoch": 0.39, + "learning_rate": 4.832878985421358e-07, + "logits/generated": -0.20033612847328186, + "logits/real": -1.2102962732315063, + "logps/generated": -688.7596435546875, + "logps/real": -299.0711364746094, + "loss": 0.0293, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -29.408504486083984, + "rewards/margins": 29.371475219726562, + "rewards/real": -0.03703027963638306, + "step": 1220 + }, + { + "epoch": 0.39, + "learning_rate": 4.82695270830864e-07, + "logits/generated": -0.22372718155384064, + "logits/real": -1.134982943534851, + "logps/generated": -646.0555419921875, + "logps/real": -304.77130126953125, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/generated": -24.955896377563477, + "rewards/margins": 24.676605224609375, + "rewards/real": -0.2792915105819702, + "step": 1230 + }, + { + "epoch": 0.4, + "learning_rate": 4.821026431195922e-07, + "logits/generated": -0.05184303969144821, + "logits/real": -1.1724616289138794, + "logps/generated": -745.6905517578125, + "logps/real": -334.4775390625, + "loss": 0.0277, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.423221588134766, + "rewards/margins": 33.6823616027832, + "rewards/real": 0.2591377794742584, + "step": 1240 + }, + { + "epoch": 0.4, + "learning_rate": 4.815100154083205e-07, + "logits/generated": -0.3657141625881195, + "logits/real": -1.26314115524292, + "logps/generated": -639.5381469726562, + "logps/real": -302.38604736328125, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/generated": -25.064176559448242, + "rewards/margins": 26.08724021911621, + "rewards/real": 1.0230640172958374, + "step": 1250 + }, + { + "epoch": 0.4, + "learning_rate": 4.809173876970487e-07, + "logits/generated": -0.28293323516845703, + "logits/real": -1.287521243095398, + "logps/generated": -651.6383056640625, + "logps/real": -277.32098388671875, + "loss": 0.1504, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.853628158569336, + "rewards/margins": 27.138935089111328, + "rewards/real": 1.2853089570999146, + "step": 1260 + }, + { + "epoch": 0.41, + "learning_rate": 4.80324759985777e-07, + "logits/generated": -0.1853010356426239, + "logits/real": -1.2585564851760864, + "logps/generated": -624.6441040039062, + "logps/real": -323.127685546875, + "loss": 0.0547, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -23.59101676940918, + "rewards/margins": 23.93788719177246, + "rewards/real": 0.3468722701072693, + "step": 1270 + }, + { + "epoch": 0.41, + "learning_rate": 4.797321322745052e-07, + "logits/generated": -0.3382716476917267, + "logits/real": -1.2762130498886108, + "logps/generated": -731.6915283203125, + "logps/real": -327.10223388671875, + "loss": 0.0485, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.39984893798828, + "rewards/margins": 30.74454116821289, + "rewards/real": -1.655306100845337, + "step": 1280 + }, + { + "epoch": 0.41, + "learning_rate": 4.791395045632333e-07, + "logits/generated": -0.35983893275260925, + "logits/real": -1.251419186592102, + "logps/generated": -721.2036743164062, + "logps/real": -380.6951599121094, + "loss": 0.1202, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -32.7227668762207, + "rewards/margins": 29.393871307373047, + "rewards/real": -3.3288989067077637, + "step": 1290 + }, + { + "epoch": 0.42, + "learning_rate": 4.785468768519616e-07, + "logits/generated": -0.5904273986816406, + "logits/real": -1.3770813941955566, + "logps/generated": -645.2593994140625, + "logps/real": -373.2558898925781, + "loss": 0.0258, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -26.56414794921875, + "rewards/margins": 24.9666748046875, + "rewards/real": -1.5974750518798828, + "step": 1300 + }, + { + "epoch": 0.42, + "learning_rate": 4.779542491406898e-07, + "logits/generated": -0.17467817664146423, + "logits/real": -1.0748966932296753, + "logps/generated": -705.0093383789062, + "logps/real": -353.30194091796875, + "loss": 0.0512, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -31.598918914794922, + "rewards/margins": 29.074283599853516, + "rewards/real": -2.52463436126709, + "step": 1310 + }, + { + "epoch": 0.42, + "learning_rate": 4.77361621429418e-07, + "logits/generated": -0.24216961860656738, + "logits/real": -1.2368093729019165, + "logps/generated": -689.7742919921875, + "logps/real": -341.96343994140625, + "loss": 0.0487, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -27.19826889038086, + "rewards/margins": 27.094905853271484, + "rewards/real": -0.10336218029260635, + "step": 1320 + }, + { + "epoch": 0.43, + "learning_rate": 4.7676899371814624e-07, + "logits/generated": -0.3762677311897278, + "logits/real": -1.3373234272003174, + "logps/generated": -652.1893310546875, + "logps/real": -318.98681640625, + "loss": 0.0895, + "rewards/accuracies": 1.0, + "rewards/generated": -25.372386932373047, + "rewards/margins": 26.68527603149414, + "rewards/real": 1.3128888607025146, + "step": 1330 + }, + { + "epoch": 0.43, + "learning_rate": 4.7617636600687443e-07, + "logits/generated": -0.11979229748249054, + "logits/real": -1.1395213603973389, + "logps/generated": -795.4387817382812, + "logps/real": -324.91082763671875, + "loss": 0.0125, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -39.38142013549805, + "rewards/margins": 35.4011344909668, + "rewards/real": -3.9802863597869873, + "step": 1340 + }, + { + "epoch": 0.43, + "learning_rate": 4.755837382956027e-07, + "logits/generated": -0.6125169992446899, + "logits/real": -1.3682019710540771, + "logps/generated": -678.634521484375, + "logps/real": -297.8531188964844, + "loss": 0.0546, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -31.750164031982422, + "rewards/margins": 30.06414222717285, + "rewards/real": -1.686022400856018, + "step": 1350 + }, + { + "epoch": 0.44, + "learning_rate": 4.7499111058433086e-07, + "logits/generated": -0.29297947883605957, + "logits/real": -1.2235249280929565, + "logps/generated": -619.5758056640625, + "logps/real": -346.8426513671875, + "loss": 0.0664, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.86092758178711, + "rewards/margins": 24.205778121948242, + "rewards/real": -0.6551482081413269, + "step": 1360 + }, + { + "epoch": 0.44, + "learning_rate": 4.743984828730591e-07, + "logits/generated": -0.22886662185192108, + "logits/real": -1.1904137134552002, + "logps/generated": -693.4284057617188, + "logps/real": -338.90582275390625, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/generated": -29.037893295288086, + "rewards/margins": 28.427536010742188, + "rewards/real": -0.6103585958480835, + "step": 1370 + }, + { + "epoch": 0.44, + "learning_rate": 4.7380585516178735e-07, + "logits/generated": -0.3873533010482788, + "logits/real": -1.3212558031082153, + "logps/generated": -695.8396606445312, + "logps/real": -314.5908203125, + "loss": 0.0406, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -29.094036102294922, + "rewards/margins": 28.612716674804688, + "rewards/real": -0.4813196063041687, + "step": 1380 + }, + { + "epoch": 0.44, + "learning_rate": 4.7321322745051554e-07, + "logits/generated": -0.2663424611091614, + "logits/real": -1.234851360321045, + "logps/generated": -626.5375366210938, + "logps/real": -329.78070068359375, + "loss": 0.0486, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.41695213317871, + "rewards/margins": 24.246206283569336, + "rewards/real": -1.1707462072372437, + "step": 1390 + }, + { + "epoch": 0.45, + "learning_rate": 4.726205997392438e-07, + "logits/generated": -0.27236634492874146, + "logits/real": -1.3489251136779785, + "logps/generated": -594.3746948242188, + "logps/real": -310.15008544921875, + "loss": 0.0353, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.29964256286621, + "rewards/margins": 21.168689727783203, + "rewards/real": -0.1309548020362854, + "step": 1400 + }, + { + "epoch": 0.45, + "learning_rate": 4.72027972027972e-07, + "logits/generated": -0.4870881140232086, + "logits/real": -1.3286640644073486, + "logps/generated": -613.6137084960938, + "logps/real": -301.20849609375, + "loss": 0.0798, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -23.393417358398438, + "rewards/margins": 24.340110778808594, + "rewards/real": 0.9466953277587891, + "step": 1410 + }, + { + "epoch": 0.45, + "learning_rate": 4.714353443167002e-07, + "logits/generated": -0.3089195489883423, + "logits/real": -1.2844128608703613, + "logps/generated": -629.444580078125, + "logps/real": -326.8503112792969, + "loss": 0.0602, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.135459899902344, + "rewards/margins": 25.159605026245117, + "rewards/real": 1.024143099784851, + "step": 1420 + }, + { + "epoch": 0.46, + "learning_rate": 4.7084271660542845e-07, + "logits/generated": 0.06970086693763733, + "logits/real": -0.873447060585022, + "logps/generated": -689.5084228515625, + "logps/real": -334.2491149902344, + "loss": 0.0457, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -30.447406768798828, + "rewards/margins": 28.454111099243164, + "rewards/real": -1.9932889938354492, + "step": 1430 + }, + { + "epoch": 0.46, + "learning_rate": 4.702500888941567e-07, + "logits/generated": -0.25644490122795105, + "logits/real": -1.1300890445709229, + "logps/generated": -681.3156127929688, + "logps/real": -349.94122314453125, + "loss": 0.0405, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -28.21697998046875, + "rewards/margins": 28.60993003845215, + "rewards/real": 0.3929504156112671, + "step": 1440 + }, + { + "epoch": 0.46, + "learning_rate": 4.696574611828849e-07, + "logits/generated": -0.0076753199100494385, + "logits/real": -0.9142985343933105, + "logps/generated": -709.6637573242188, + "logps/real": -343.50299072265625, + "loss": 0.1348, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -28.745708465576172, + "rewards/margins": 28.527847290039062, + "rewards/real": -0.2178615778684616, + "step": 1450 + }, + { + "epoch": 0.47, + "learning_rate": 4.690648334716131e-07, + "logits/generated": -0.10671776533126831, + "logits/real": -0.9834194183349609, + "logps/generated": -670.0909423828125, + "logps/real": -373.78857421875, + "loss": 0.0116, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.11911964416504, + "rewards/margins": 26.801151275634766, + "rewards/real": -0.3179682195186615, + "step": 1460 + }, + { + "epoch": 0.47, + "learning_rate": 4.6847220576034137e-07, + "logits/generated": -0.093577541410923, + "logits/real": -0.9135047197341919, + "logps/generated": -715.4546508789062, + "logps/real": -293.73681640625, + "loss": 0.1093, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -29.328060150146484, + "rewards/margins": 29.672901153564453, + "rewards/real": 0.34484216570854187, + "step": 1470 + }, + { + "epoch": 0.47, + "learning_rate": 4.6787957804906955e-07, + "logits/generated": 0.07950839400291443, + "logits/real": -0.7794169783592224, + "logps/generated": -624.20263671875, + "logps/real": -323.77630615234375, + "loss": 0.0277, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -23.57122039794922, + "rewards/margins": 22.5595703125, + "rewards/real": -1.0116502046585083, + "step": 1480 + }, + { + "epoch": 0.48, + "learning_rate": 4.6728695033779774e-07, + "logits/generated": 0.12502098083496094, + "logits/real": -0.893004298210144, + "logps/generated": -721.6754760742188, + "logps/real": -351.4068298339844, + "loss": 0.0341, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.803447723388672, + "rewards/margins": 30.353382110595703, + "rewards/real": -1.4500672817230225, + "step": 1490 + }, + { + "epoch": 0.48, + "learning_rate": 4.66694322626526e-07, + "logits/generated": -0.1611909121274948, + "logits/real": -1.0857502222061157, + "logps/generated": -606.7127685546875, + "logps/real": -346.6650085449219, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/generated": -23.76133918762207, + "rewards/margins": 23.08817481994629, + "rewards/real": -0.6731644868850708, + "step": 1500 + }, + { + "epoch": 0.48, + "learning_rate": 4.661016949152542e-07, + "logits/generated": -0.4079923629760742, + "logits/real": -1.1771427392959595, + "logps/generated": -712.0586547851562, + "logps/real": -318.7583312988281, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/generated": -29.198932647705078, + "rewards/margins": 29.653606414794922, + "rewards/real": 0.4546758234500885, + "step": 1510 + }, + { + "epoch": 0.49, + "learning_rate": 4.655090672039824e-07, + "logits/generated": -0.006799777038395405, + "logits/real": -0.9504464268684387, + "logps/generated": -763.8966674804688, + "logps/real": -341.6388244628906, + "loss": 0.0535, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.669090270996094, + "rewards/margins": 33.23848342895508, + "rewards/real": -2.43060564994812, + "step": 1520 + }, + { + "epoch": 0.49, + "learning_rate": 4.6491643949271066e-07, + "logits/generated": -0.04143080860376358, + "logits/real": -0.8772333860397339, + "logps/generated": -642.1051025390625, + "logps/real": -323.8268127441406, + "loss": 0.0595, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -26.772869110107422, + "rewards/margins": 25.335664749145508, + "rewards/real": -1.4372053146362305, + "step": 1530 + }, + { + "epoch": 0.49, + "learning_rate": 4.6432381178143885e-07, + "logits/generated": -0.3368912935256958, + "logits/real": -0.9504976272583008, + "logps/generated": -715.6137084960938, + "logps/real": -327.49212646484375, + "loss": 0.0526, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -31.8399715423584, + "rewards/margins": 29.859363555908203, + "rewards/real": -1.980611801147461, + "step": 1540 + }, + { + "epoch": 0.5, + "learning_rate": 4.637311840701671e-07, + "logits/generated": -0.04745306074619293, + "logits/real": -0.7478165626525879, + "logps/generated": -680.3505859375, + "logps/real": -313.1524963378906, + "loss": 0.0477, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -27.10567283630371, + "rewards/margins": 25.496183395385742, + "rewards/real": -1.609485387802124, + "step": 1550 + }, + { + "epoch": 0.5, + "learning_rate": 4.6313855635889533e-07, + "logits/generated": -0.09471658617258072, + "logits/real": -0.9122379422187805, + "logps/generated": -711.54248046875, + "logps/real": -362.5740966796875, + "loss": 0.0266, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.436248779296875, + "rewards/margins": 28.634613037109375, + "rewards/real": -2.8016300201416016, + "step": 1560 + }, + { + "epoch": 0.5, + "learning_rate": 4.625459286476235e-07, + "logits/generated": -0.5175934433937073, + "logits/real": -1.0976009368896484, + "logps/generated": -610.1480102539062, + "logps/real": -325.7567138671875, + "loss": 0.0686, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -23.987730026245117, + "rewards/margins": 23.50770378112793, + "rewards/real": -0.4800271987915039, + "step": 1570 + }, + { + "epoch": 0.51, + "learning_rate": 4.6195330093635176e-07, + "logits/generated": -0.3180214762687683, + "logits/real": -1.0340139865875244, + "logps/generated": -640.499755859375, + "logps/real": -286.6065368652344, + "loss": 0.0269, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -23.94277572631836, + "rewards/margins": 24.719507217407227, + "rewards/real": 0.776727557182312, + "step": 1580 + }, + { + "epoch": 0.51, + "learning_rate": 4.6136067322508e-07, + "logits/generated": -0.16230185329914093, + "logits/real": -1.1436270475387573, + "logps/generated": -604.614501953125, + "logps/real": -316.1036071777344, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/generated": -22.601945877075195, + "rewards/margins": 22.853349685668945, + "rewards/real": 0.2514052093029022, + "step": 1590 + }, + { + "epoch": 0.51, + "learning_rate": 4.607680455138082e-07, + "logits/generated": 0.43026676774024963, + "logits/real": -0.7559512853622437, + "logps/generated": -647.7166748046875, + "logps/real": -370.7508850097656, + "loss": 0.0879, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -24.863733291625977, + "rewards/margins": 23.403057098388672, + "rewards/real": -1.4606760740280151, + "step": 1600 + }, + { + "epoch": 0.52, + "learning_rate": 4.6017541780253643e-07, + "logits/generated": 0.9180746078491211, + "logits/real": 0.024421293288469315, + "logps/generated": -685.8744506835938, + "logps/real": -314.4345397949219, + "loss": 0.0898, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -29.287607192993164, + "rewards/margins": 27.995941162109375, + "rewards/real": -1.2916669845581055, + "step": 1610 + }, + { + "epoch": 0.52, + "learning_rate": 4.595827900912647e-07, + "logits/generated": 0.43732696771621704, + "logits/real": -0.18413802981376648, + "logps/generated": -630.1939086914062, + "logps/real": -307.03253173828125, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/generated": -24.569049835205078, + "rewards/margins": 23.723299026489258, + "rewards/real": -0.8457552194595337, + "step": 1620 + }, + { + "epoch": 0.52, + "learning_rate": 4.5899016237999286e-07, + "logits/generated": 0.6507248282432556, + "logits/real": -0.3299483358860016, + "logps/generated": -644.2907104492188, + "logps/real": -293.15740966796875, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/generated": -27.317459106445312, + "rewards/margins": 27.067108154296875, + "rewards/real": -0.2503497004508972, + "step": 1630 + }, + { + "epoch": 0.52, + "learning_rate": 4.583975346687211e-07, + "logits/generated": 0.7223843932151794, + "logits/real": -0.22026348114013672, + "logps/generated": -714.4222412109375, + "logps/real": -367.756103515625, + "loss": 0.0471, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -31.6726016998291, + "rewards/margins": 28.926502227783203, + "rewards/real": -2.746098756790161, + "step": 1640 + }, + { + "epoch": 0.53, + "learning_rate": 4.5780490695744935e-07, + "logits/generated": 0.7971788644790649, + "logits/real": -0.11250700801610947, + "logps/generated": -671.2685546875, + "logps/real": -360.0363464355469, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/generated": -27.458694458007812, + "rewards/margins": 24.904430389404297, + "rewards/real": -2.5542635917663574, + "step": 1650 + }, + { + "epoch": 0.53, + "learning_rate": 4.5721227924617754e-07, + "logits/generated": 0.7440794706344604, + "logits/real": 0.10373647511005402, + "logps/generated": -722.1324462890625, + "logps/real": -315.26153564453125, + "loss": 0.091, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -31.29463768005371, + "rewards/margins": 29.36053466796875, + "rewards/real": -1.9341026544570923, + "step": 1660 + }, + { + "epoch": 0.53, + "learning_rate": 4.566196515349057e-07, + "logits/generated": 0.45260196924209595, + "logits/real": 0.03100525215268135, + "logps/generated": -785.9488525390625, + "logps/real": -309.55194091796875, + "loss": 0.0448, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -34.245689392089844, + "rewards/margins": 33.135684967041016, + "rewards/real": -1.1099998950958252, + "step": 1670 + }, + { + "epoch": 0.54, + "learning_rate": 4.5602702382363397e-07, + "logits/generated": 0.7789133787155151, + "logits/real": -0.20548442006111145, + "logps/generated": -666.3745727539062, + "logps/real": -360.34490966796875, + "loss": 0.0752, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.628658294677734, + "rewards/margins": 27.15573501586914, + "rewards/real": -0.4729282259941101, + "step": 1680 + }, + { + "epoch": 0.54, + "learning_rate": 4.5543439611236216e-07, + "logits/generated": 0.797033429145813, + "logits/real": 0.16859188675880432, + "logps/generated": -727.2579345703125, + "logps/real": -318.44439697265625, + "loss": 0.0715, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.163982391357422, + "rewards/margins": 28.728031158447266, + "rewards/real": -2.4359545707702637, + "step": 1690 + }, + { + "epoch": 0.54, + "learning_rate": 4.548417684010904e-07, + "logits/generated": 0.5582669973373413, + "logits/real": -0.4732363224029541, + "logps/generated": -640.9185791015625, + "logps/real": -336.33404541015625, + "loss": 0.1114, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -25.40323829650879, + "rewards/margins": 25.0753173828125, + "rewards/real": -0.3279207646846771, + "step": 1700 + }, + { + "epoch": 0.55, + "learning_rate": 4.5424914068981864e-07, + "logits/generated": 0.6099811792373657, + "logits/real": -0.357994019985199, + "logps/generated": -672.3972778320312, + "logps/real": -352.52252197265625, + "loss": 0.0654, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -28.22308921813965, + "rewards/margins": 26.43735122680664, + "rewards/real": -1.7857351303100586, + "step": 1710 + }, + { + "epoch": 0.55, + "learning_rate": 4.5365651297854683e-07, + "logits/generated": 1.0697330236434937, + "logits/real": -0.14076311886310577, + "logps/generated": -790.8233642578125, + "logps/real": -360.5348205566406, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/generated": -36.21926498413086, + "rewards/margins": 33.47108459472656, + "rewards/real": -2.7481868267059326, + "step": 1720 + }, + { + "epoch": 0.55, + "learning_rate": 4.5306388526727507e-07, + "logits/generated": 0.7185707688331604, + "logits/real": -0.31060856580734253, + "logps/generated": -660.9615478515625, + "logps/real": -315.27874755859375, + "loss": 0.1065, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -26.4785213470459, + "rewards/margins": 25.37911033630371, + "rewards/real": -1.0994105339050293, + "step": 1730 + }, + { + "epoch": 0.56, + "learning_rate": 4.524712575560033e-07, + "logits/generated": 0.5896469950675964, + "logits/real": -0.13658718764781952, + "logps/generated": -768.370361328125, + "logps/real": -327.138427734375, + "loss": 0.0505, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -35.88934326171875, + "rewards/margins": 32.99297332763672, + "rewards/real": -2.8963723182678223, + "step": 1740 + }, + { + "epoch": 0.56, + "learning_rate": 4.518786298447315e-07, + "logits/generated": 0.5808233618736267, + "logits/real": -0.32730236649513245, + "logps/generated": -707.2631225585938, + "logps/real": -377.5989685058594, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/generated": -33.49466323852539, + "rewards/margins": 29.88504981994629, + "rewards/real": -3.6096129417419434, + "step": 1750 + }, + { + "epoch": 0.56, + "learning_rate": 4.5128600213345974e-07, + "logits/generated": 0.6635645627975464, + "logits/real": 0.02990163303911686, + "logps/generated": -744.2517700195312, + "logps/real": -344.38763427734375, + "loss": 0.1064, + "rewards/accuracies": 1.0, + "rewards/generated": -35.34321975708008, + "rewards/margins": 31.038623809814453, + "rewards/real": -4.304598331451416, + "step": 1760 + }, + { + "epoch": 0.57, + "learning_rate": 4.50693374422188e-07, + "logits/generated": 1.0504231452941895, + "logits/real": 0.14314612746238708, + "logps/generated": -722.0823364257812, + "logps/real": -415.1273498535156, + "loss": 0.0774, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -35.303855895996094, + "rewards/margins": 27.821096420288086, + "rewards/real": -7.482762813568115, + "step": 1770 + }, + { + "epoch": 0.57, + "learning_rate": 4.501007467109162e-07, + "logits/generated": 1.0505800247192383, + "logits/real": 0.025553371757268906, + "logps/generated": -792.7987060546875, + "logps/real": -369.75714111328125, + "loss": 0.0941, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -37.77853775024414, + "rewards/margins": 33.75434112548828, + "rewards/real": -4.024199485778809, + "step": 1780 + }, + { + "epoch": 0.57, + "learning_rate": 4.495081189996444e-07, + "logits/generated": 0.880537211894989, + "logits/real": -0.4084358811378479, + "logps/generated": -702.1065673828125, + "logps/real": -356.48077392578125, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/generated": -33.2712287902832, + "rewards/margins": 30.7994327545166, + "rewards/real": -2.47179913520813, + "step": 1790 + }, + { + "epoch": 0.58, + "learning_rate": 4.4891549128837266e-07, + "logits/generated": 0.8617936968803406, + "logits/real": -0.29051464796066284, + "logps/generated": -713.6237182617188, + "logps/real": -321.52752685546875, + "loss": 0.0241, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -33.624881744384766, + "rewards/margins": 32.551475524902344, + "rewards/real": -1.0734100341796875, + "step": 1800 + }, + { + "epoch": 0.58, + "learning_rate": 4.4832286357710085e-07, + "logits/generated": 0.7705877423286438, + "logits/real": 0.30394884943962097, + "logps/generated": -721.6909790039062, + "logps/real": -366.5400390625, + "loss": 0.0631, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -33.41383743286133, + "rewards/margins": 28.932180404663086, + "rewards/real": -4.481656074523926, + "step": 1810 + }, + { + "epoch": 0.58, + "learning_rate": 4.477302358658291e-07, + "logits/generated": 0.5584123134613037, + "logits/real": -0.6512025594711304, + "logps/generated": -729.8486938476562, + "logps/real": -348.49432373046875, + "loss": 0.036, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -34.38263702392578, + "rewards/margins": 31.932994842529297, + "rewards/real": -2.449641466140747, + "step": 1820 + }, + { + "epoch": 0.59, + "learning_rate": 4.4713760815455733e-07, + "logits/generated": 0.2919533848762512, + "logits/real": -0.666230320930481, + "logps/generated": -740.5325927734375, + "logps/real": -324.1904602050781, + "loss": 0.0746, + "rewards/accuracies": 1.0, + "rewards/generated": -35.278297424316406, + "rewards/margins": 33.55880355834961, + "rewards/real": -1.7194910049438477, + "step": 1830 + }, + { + "epoch": 0.59, + "learning_rate": 4.465449804432855e-07, + "logits/generated": 0.4498261511325836, + "logits/real": -0.43754512071609497, + "logps/generated": -744.0653076171875, + "logps/real": -335.212158203125, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/generated": -35.13162612915039, + "rewards/margins": 33.10210418701172, + "rewards/real": -2.0295262336730957, + "step": 1840 + }, + { + "epoch": 0.59, + "learning_rate": 4.459523527320137e-07, + "logits/generated": 0.427295982837677, + "logits/real": -0.6499985456466675, + "logps/generated": -658.7879638671875, + "logps/real": -397.44158935546875, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/generated": -29.81673812866211, + "rewards/margins": 26.594614028930664, + "rewards/real": -3.2221245765686035, + "step": 1850 + }, + { + "epoch": 0.6, + "learning_rate": 4.4535972502074195e-07, + "logits/generated": 0.3401271402835846, + "logits/real": -0.9475802183151245, + "logps/generated": -682.956298828125, + "logps/real": -343.54644775390625, + "loss": 0.051, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.154062271118164, + "rewards/margins": 29.059768676757812, + "rewards/real": -1.0942920446395874, + "step": 1860 + }, + { + "epoch": 0.6, + "learning_rate": 4.4476709730947014e-07, + "logits/generated": 0.48368996381759644, + "logits/real": -0.6674381494522095, + "logps/generated": -752.8349609375, + "logps/real": -310.81005859375, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/generated": -36.16936492919922, + "rewards/margins": 33.7068977355957, + "rewards/real": -2.4624717235565186, + "step": 1870 + }, + { + "epoch": 0.6, + "learning_rate": 4.441744695981984e-07, + "logits/generated": 0.27173930406570435, + "logits/real": -0.8330855369567871, + "logps/generated": -731.5062255859375, + "logps/real": -392.802001953125, + "loss": 0.0939, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -35.108642578125, + "rewards/margins": 31.652996063232422, + "rewards/real": -3.455641508102417, + "step": 1880 + }, + { + "epoch": 0.6, + "learning_rate": 4.435818418869266e-07, + "logits/generated": 0.5597046613693237, + "logits/real": -0.6597913503646851, + "logps/generated": -837.8775634765625, + "logps/real": -348.21966552734375, + "loss": 0.0481, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.762596130371094, + "rewards/margins": 38.68193817138672, + "rewards/real": -6.080657005310059, + "step": 1890 + }, + { + "epoch": 0.61, + "learning_rate": 4.429892141756548e-07, + "logits/generated": 0.7637578845024109, + "logits/real": -0.36679187417030334, + "logps/generated": -837.9700927734375, + "logps/real": -364.4372253417969, + "loss": 0.0329, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.84961700439453, + "rewards/margins": 36.390464782714844, + "rewards/real": -7.459149360656738, + "step": 1900 + }, + { + "epoch": 0.61, + "learning_rate": 4.4239658646438306e-07, + "logits/generated": 0.6158145666122437, + "logits/real": -0.3679594099521637, + "logps/generated": -796.8351440429688, + "logps/real": -405.11151123046875, + "loss": 0.0591, + "rewards/accuracies": 1.0, + "rewards/generated": -40.54536056518555, + "rewards/margins": 30.88002586364746, + "rewards/real": -9.665339469909668, + "step": 1910 + }, + { + "epoch": 0.61, + "learning_rate": 4.418039587531113e-07, + "logits/generated": 0.5541807413101196, + "logits/real": -0.3465508818626404, + "logps/generated": -842.4884643554688, + "logps/real": -400.547607421875, + "loss": 0.05, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.56201934814453, + "rewards/margins": 34.930511474609375, + "rewards/real": -8.631510734558105, + "step": 1920 + }, + { + "epoch": 0.62, + "learning_rate": 4.412113310418395e-07, + "logits/generated": 0.4438748359680176, + "logits/real": -0.649644136428833, + "logps/generated": -714.6018676757812, + "logps/real": -334.74505615234375, + "loss": 0.0969, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -31.423009872436523, + "rewards/margins": 29.62143898010254, + "rewards/real": -1.8015722036361694, + "step": 1930 + }, + { + "epoch": 0.62, + "learning_rate": 4.4061870333056773e-07, + "logits/generated": 0.918286144733429, + "logits/real": -0.5583127737045288, + "logps/generated": -660.2481689453125, + "logps/real": -349.63916015625, + "loss": 0.0385, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -27.5297794342041, + "rewards/margins": 24.28526496887207, + "rewards/real": -3.2445099353790283, + "step": 1940 + }, + { + "epoch": 0.62, + "learning_rate": 4.4002607561929597e-07, + "logits/generated": 0.7378710508346558, + "logits/real": -0.4875301718711853, + "logps/generated": -716.6314697265625, + "logps/real": -383.4659729003906, + "loss": 0.0606, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.32218933105469, + "rewards/margins": 29.484619140625, + "rewards/real": -2.837568998336792, + "step": 1950 + }, + { + "epoch": 0.63, + "learning_rate": 4.3943344790802416e-07, + "logits/generated": 0.704971432685852, + "logits/real": -0.3970826268196106, + "logps/generated": -685.1222534179688, + "logps/real": -337.703125, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/generated": -29.617624282836914, + "rewards/margins": 27.498523712158203, + "rewards/real": -2.1190993785858154, + "step": 1960 + }, + { + "epoch": 0.63, + "learning_rate": 4.388408201967524e-07, + "logits/generated": 0.48584890365600586, + "logits/real": -0.5464336276054382, + "logps/generated": -721.333740234375, + "logps/real": -357.57159423828125, + "loss": 0.0161, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.480567932128906, + "rewards/margins": 28.700443267822266, + "rewards/real": -3.7801260948181152, + "step": 1970 + }, + { + "epoch": 0.63, + "learning_rate": 4.3824819248548064e-07, + "logits/generated": 1.1303284168243408, + "logits/real": -0.15861235558986664, + "logps/generated": -724.344482421875, + "logps/real": -378.0018005371094, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/generated": -32.12859344482422, + "rewards/margins": 28.6373233795166, + "rewards/real": -3.491267681121826, + "step": 1980 + }, + { + "epoch": 0.64, + "learning_rate": 4.3765556477420883e-07, + "logits/generated": 1.001157283782959, + "logits/real": -0.1735081970691681, + "logps/generated": -642.5946655273438, + "logps/real": -329.3343505859375, + "loss": 0.0826, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -27.2222900390625, + "rewards/margins": 25.380970001220703, + "rewards/real": -1.8413175344467163, + "step": 1990 + }, + { + "epoch": 0.64, + "learning_rate": 4.3706293706293707e-07, + "logits/generated": 1.2101815938949585, + "logits/real": 0.2167239934206009, + "logps/generated": -784.0897216796875, + "logps/real": -338.1402893066406, + "loss": 0.0856, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.91041946411133, + "rewards/margins": 33.334041595458984, + "rewards/real": -2.576378345489502, + "step": 2000 + }, + { + "epoch": 0.64, + "learning_rate": 4.364703093516653e-07, + "logits/generated": 0.8496305346488953, + "logits/real": -0.08679083734750748, + "logps/generated": -722.7340087890625, + "logps/real": -333.61529541015625, + "loss": 0.041, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.327056884765625, + "rewards/margins": 29.975378036499023, + "rewards/real": -1.3516753911972046, + "step": 2010 + }, + { + "epoch": 0.65, + "learning_rate": 4.3587768164039345e-07, + "logits/generated": 1.025160312652588, + "logits/real": -0.004342697560787201, + "logps/generated": -671.9498901367188, + "logps/real": -371.9809875488281, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/generated": -28.117258071899414, + "rewards/margins": 26.58926773071289, + "rewards/real": -1.5279954671859741, + "step": 2020 + }, + { + "epoch": 0.65, + "learning_rate": 4.352850539291217e-07, + "logits/generated": 1.013951063156128, + "logits/real": 0.09798892587423325, + "logps/generated": -701.6458740234375, + "logps/real": -334.0567932128906, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/generated": -31.478445053100586, + "rewards/margins": 28.081411361694336, + "rewards/real": -3.3970324993133545, + "step": 2030 + }, + { + "epoch": 0.65, + "learning_rate": 4.346924262178499e-07, + "logits/generated": 0.6137873530387878, + "logits/real": -0.20404133200645447, + "logps/generated": -681.4293212890625, + "logps/real": -321.9293518066406, + "loss": 0.0687, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -28.87796401977539, + "rewards/margins": 27.810291290283203, + "rewards/real": -1.0676777362823486, + "step": 2040 + }, + { + "epoch": 0.66, + "learning_rate": 4.340997985065781e-07, + "logits/generated": 0.9127419590950012, + "logits/real": -0.434025377035141, + "logps/generated": -730.1388549804688, + "logps/real": -357.342529296875, + "loss": 0.0274, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -32.81401062011719, + "rewards/margins": 31.743839263916016, + "rewards/real": -1.0701699256896973, + "step": 2050 + }, + { + "epoch": 0.66, + "learning_rate": 4.3350717079530637e-07, + "logits/generated": 0.720097541809082, + "logits/real": -0.33784544467926025, + "logps/generated": -650.9515380859375, + "logps/real": -381.292724609375, + "loss": 0.0321, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.152469635009766, + "rewards/margins": 26.854297637939453, + "rewards/real": -0.29817166924476624, + "step": 2060 + }, + { + "epoch": 0.66, + "learning_rate": 4.3291454308403455e-07, + "logits/generated": 0.8382455110549927, + "logits/real": -0.13121643662452698, + "logps/generated": -728.041748046875, + "logps/real": -311.09844970703125, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/generated": -31.98345947265625, + "rewards/margins": 30.502544403076172, + "rewards/real": -1.4809117317199707, + "step": 2070 + }, + { + "epoch": 0.67, + "learning_rate": 4.323219153727628e-07, + "logits/generated": 0.4975649416446686, + "logits/real": -0.34417563676834106, + "logps/generated": -714.7693481445312, + "logps/real": -346.2616271972656, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/generated": -31.47088623046875, + "rewards/margins": 30.593393325805664, + "rewards/real": -0.8774968385696411, + "step": 2080 + }, + { + "epoch": 0.67, + "learning_rate": 4.3172928766149104e-07, + "logits/generated": 1.0943410396575928, + "logits/real": -0.14147847890853882, + "logps/generated": -729.1746826171875, + "logps/real": -393.4339599609375, + "loss": 0.0412, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.71548080444336, + "rewards/margins": 30.33559226989746, + "rewards/real": -3.3798928260803223, + "step": 2090 + }, + { + "epoch": 0.67, + "learning_rate": 4.3113665995021923e-07, + "logits/generated": 1.0804413557052612, + "logits/real": -0.07776399701833725, + "logps/generated": -672.5110473632812, + "logps/real": -358.855224609375, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/generated": -29.979761123657227, + "rewards/margins": 27.629995346069336, + "rewards/real": -2.349764347076416, + "step": 2100 + }, + { + "epoch": 0.68, + "learning_rate": 4.3054403223894747e-07, + "logits/generated": 0.8239561915397644, + "logits/real": -0.2564181983470917, + "logps/generated": -684.0074462890625, + "logps/real": -354.76300048828125, + "loss": 0.0756, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -29.912456512451172, + "rewards/margins": 27.336200714111328, + "rewards/real": -2.576260805130005, + "step": 2110 + }, + { + "epoch": 0.68, + "learning_rate": 4.299514045276757e-07, + "logits/generated": 0.6753214597702026, + "logits/real": -0.4390442371368408, + "logps/generated": -799.2703857421875, + "logps/real": -318.34698486328125, + "loss": 0.1472, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -39.853477478027344, + "rewards/margins": 36.59901428222656, + "rewards/real": -3.2544643878936768, + "step": 2120 + }, + { + "epoch": 0.68, + "learning_rate": 4.293587768164039e-07, + "logits/generated": 0.9953800439834595, + "logits/real": -0.3538312315940857, + "logps/generated": -812.5777587890625, + "logps/real": -328.5263671875, + "loss": 0.0639, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -40.352821350097656, + "rewards/margins": 38.133304595947266, + "rewards/real": -2.2195210456848145, + "step": 2130 + }, + { + "epoch": 0.68, + "learning_rate": 4.2876614910513214e-07, + "logits/generated": 1.1399072408676147, + "logits/real": 0.03536539152264595, + "logps/generated": -702.9224853515625, + "logps/real": -322.35809326171875, + "loss": 0.0495, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.2816047668457, + "rewards/margins": 30.668594360351562, + "rewards/real": -4.613009452819824, + "step": 2140 + }, + { + "epoch": 0.69, + "learning_rate": 4.281735213938604e-07, + "logits/generated": 0.9407382011413574, + "logits/real": 0.09566624462604523, + "logps/generated": -911.5589599609375, + "logps/real": -360.679931640625, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/generated": -49.54302215576172, + "rewards/margins": 44.427650451660156, + "rewards/real": -5.115373134613037, + "step": 2150 + }, + { + "epoch": 0.69, + "learning_rate": 4.2758089368258857e-07, + "logits/generated": 0.7384020090103149, + "logits/real": -0.2857319414615631, + "logps/generated": -767.8818969726562, + "logps/real": -369.85693359375, + "loss": 0.1224, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -37.77294921875, + "rewards/margins": 35.26585006713867, + "rewards/real": -2.5070996284484863, + "step": 2160 + }, + { + "epoch": 0.69, + "learning_rate": 4.269882659713168e-07, + "logits/generated": 0.7593884468078613, + "logits/real": -0.10088062286376953, + "logps/generated": -803.2346801757812, + "logps/real": -424.66497802734375, + "loss": 0.0911, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -39.97159957885742, + "rewards/margins": 36.61625289916992, + "rewards/real": -3.3553497791290283, + "step": 2170 + }, + { + "epoch": 0.7, + "learning_rate": 4.2639563826004506e-07, + "logits/generated": 0.6498333215713501, + "logits/real": -0.28292304277420044, + "logps/generated": -615.6293334960938, + "logps/real": -327.6316223144531, + "loss": 0.0275, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -25.123022079467773, + "rewards/margins": 23.52663803100586, + "rewards/real": -1.5963869094848633, + "step": 2180 + }, + { + "epoch": 0.7, + "learning_rate": 4.2580301054877325e-07, + "logits/generated": 0.5390284657478333, + "logits/real": -0.5178315043449402, + "logps/generated": -705.2702026367188, + "logps/real": -345.2986755371094, + "loss": 0.1221, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.884349822998047, + "rewards/margins": 29.818450927734375, + "rewards/real": -2.0658998489379883, + "step": 2190 + }, + { + "epoch": 0.7, + "learning_rate": 4.2521038283750143e-07, + "logits/generated": 0.13954707980155945, + "logits/real": -0.8350412249565125, + "logps/generated": -677.1754150390625, + "logps/real": -328.5754089355469, + "loss": 0.0282, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -27.805150985717773, + "rewards/margins": 27.79385757446289, + "rewards/real": -0.011295723728835583, + "step": 2200 + }, + { + "epoch": 0.71, + "learning_rate": 4.246177551262297e-07, + "logits/generated": 0.10666545480489731, + "logits/real": -0.9430726170539856, + "logps/generated": -637.8295288085938, + "logps/real": -362.80181884765625, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/generated": -25.40670394897461, + "rewards/margins": 25.37242317199707, + "rewards/real": -0.03428385406732559, + "step": 2210 + }, + { + "epoch": 0.71, + "learning_rate": 4.2402512741495787e-07, + "logits/generated": 0.26229003071784973, + "logits/real": -0.7658149003982544, + "logps/generated": -682.8697509765625, + "logps/real": -308.9587097167969, + "loss": 0.0293, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.598562240600586, + "rewards/margins": 27.416921615600586, + "rewards/real": -0.18163709342479706, + "step": 2220 + }, + { + "epoch": 0.71, + "learning_rate": 4.234324997036861e-07, + "logits/generated": 0.2817060947418213, + "logits/real": -0.8132452964782715, + "logps/generated": -684.8417358398438, + "logps/real": -323.7658386230469, + "loss": 0.0275, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -29.28280258178711, + "rewards/margins": 29.19304847717285, + "rewards/real": -0.08975468575954437, + "step": 2230 + }, + { + "epoch": 0.72, + "learning_rate": 4.2283987199241435e-07, + "logits/generated": 0.7587065696716309, + "logits/real": -0.7801111936569214, + "logps/generated": -752.9880981445312, + "logps/real": -334.2576599121094, + "loss": 0.0486, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.82455825805664, + "rewards/margins": 33.066871643066406, + "rewards/real": -1.7576910257339478, + "step": 2240 + }, + { + "epoch": 0.72, + "learning_rate": 4.2224724428114254e-07, + "logits/generated": 0.5060659050941467, + "logits/real": -0.7594629526138306, + "logps/generated": -717.951171875, + "logps/real": -326.2234802246094, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/generated": -34.36233139038086, + "rewards/margins": 31.187103271484375, + "rewards/real": -3.17522931098938, + "step": 2250 + }, + { + "epoch": 0.72, + "learning_rate": 4.216546165698708e-07, + "logits/generated": 0.27015620470046997, + "logits/real": -0.9821538925170898, + "logps/generated": -744.8015747070312, + "logps/real": -362.250244140625, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/generated": -37.133689880371094, + "rewards/margins": 34.903221130371094, + "rewards/real": -2.2304701805114746, + "step": 2260 + }, + { + "epoch": 0.73, + "learning_rate": 4.21061988858599e-07, + "logits/generated": 0.8057888150215149, + "logits/real": -0.6995252370834351, + "logps/generated": -900.9762573242188, + "logps/real": -348.0375061035156, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/generated": -50.746665954589844, + "rewards/margins": 46.1430549621582, + "rewards/real": -4.603612899780273, + "step": 2270 + }, + { + "epoch": 0.73, + "learning_rate": 4.204693611473272e-07, + "logits/generated": 0.5594112873077393, + "logits/real": -0.858725905418396, + "logps/generated": -786.5313720703125, + "logps/real": -342.39398193359375, + "loss": 0.0575, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.971519470214844, + "rewards/margins": 36.61110305786133, + "rewards/real": -3.3604228496551514, + "step": 2280 + }, + { + "epoch": 0.73, + "learning_rate": 4.1987673343605545e-07, + "logits/generated": 0.47553759813308716, + "logits/real": -0.9600407481193542, + "logps/generated": -716.9024658203125, + "logps/real": -329.8050842285156, + "loss": 0.0415, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.34065628051758, + "rewards/margins": 30.928781509399414, + "rewards/real": -1.411874771118164, + "step": 2290 + }, + { + "epoch": 0.74, + "learning_rate": 4.192841057247837e-07, + "logits/generated": 0.6827605962753296, + "logits/real": -0.6279144883155823, + "logps/generated": -748.5231323242188, + "logps/real": -326.0926208496094, + "loss": 0.0301, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -36.044044494628906, + "rewards/margins": 32.811458587646484, + "rewards/real": -3.232586622238159, + "step": 2300 + }, + { + "epoch": 0.74, + "learning_rate": 4.186914780135119e-07, + "logits/generated": 1.258171796798706, + "logits/real": -0.22543036937713623, + "logps/generated": -789.295654296875, + "logps/real": -404.57769775390625, + "loss": 0.0099, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.3532600402832, + "rewards/margins": 34.534461975097656, + "rewards/real": -4.8187994956970215, + "step": 2310 + }, + { + "epoch": 0.74, + "learning_rate": 4.180988503022401e-07, + "logits/generated": 0.8405712842941284, + "logits/real": -0.3352198600769043, + "logps/generated": -863.91064453125, + "logps/real": -342.0094909667969, + "loss": 0.1225, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -45.92886734008789, + "rewards/margins": 42.29278564453125, + "rewards/real": -3.636077880859375, + "step": 2320 + }, + { + "epoch": 0.75, + "learning_rate": 4.1750622259096837e-07, + "logits/generated": 0.5765670537948608, + "logits/real": -0.5540295839309692, + "logps/generated": -705.8466796875, + "logps/real": -369.53131103515625, + "loss": 0.0957, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.74860763549805, + "rewards/margins": 28.87831687927246, + "rewards/real": -3.8702900409698486, + "step": 2330 + }, + { + "epoch": 0.75, + "learning_rate": 4.1691359487969656e-07, + "logits/generated": 0.7854418158531189, + "logits/real": -0.22548596560955048, + "logps/generated": -680.0382080078125, + "logps/real": -356.9978942871094, + "loss": 0.0195, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.84836196899414, + "rewards/margins": 27.478572845458984, + "rewards/real": -4.369787693023682, + "step": 2340 + }, + { + "epoch": 0.75, + "learning_rate": 4.163209671684248e-07, + "logits/generated": 1.065263271331787, + "logits/real": -0.21278850734233856, + "logps/generated": -841.5086669921875, + "logps/real": -358.0589599609375, + "loss": 0.0514, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.41203689575195, + "rewards/margins": 38.31954574584961, + "rewards/real": -5.092487335205078, + "step": 2350 + }, + { + "epoch": 0.76, + "learning_rate": 4.1572833945715304e-07, + "logits/generated": 1.261389970779419, + "logits/real": -0.06634785234928131, + "logps/generated": -812.8255004882812, + "logps/real": -410.88409423828125, + "loss": 0.088, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.180484771728516, + "rewards/margins": 37.8367919921875, + "rewards/real": -5.34368896484375, + "step": 2360 + }, + { + "epoch": 0.76, + "learning_rate": 4.1513571174588123e-07, + "logits/generated": 1.4714341163635254, + "logits/real": -0.021521415561437607, + "logps/generated": -850.0011596679688, + "logps/real": -387.5086364746094, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/generated": -46.83484649658203, + "rewards/margins": 40.592525482177734, + "rewards/real": -6.242323875427246, + "step": 2370 + }, + { + "epoch": 0.76, + "learning_rate": 4.145430840346094e-07, + "logits/generated": 1.3156936168670654, + "logits/real": -0.018222743645310402, + "logps/generated": -890.7706298828125, + "logps/real": -364.0563049316406, + "loss": 0.0503, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -51.08381652832031, + "rewards/margins": 44.87675094604492, + "rewards/real": -6.2070631980896, + "step": 2380 + }, + { + "epoch": 0.76, + "learning_rate": 4.1395045632333766e-07, + "logits/generated": 1.3237955570220947, + "logits/real": -0.33364278078079224, + "logps/generated": -869.8988037109375, + "logps/real": -334.1073913574219, + "loss": 0.0599, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.939788818359375, + "rewards/margins": 45.201969146728516, + "rewards/real": -1.7378181219100952, + "step": 2390 + }, + { + "epoch": 0.77, + "learning_rate": 4.1335782861206585e-07, + "logits/generated": 1.3701781034469604, + "logits/real": -0.269438773393631, + "logps/generated": -753.6387329101562, + "logps/real": -360.86480712890625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/generated": -37.43694305419922, + "rewards/margins": 33.881229400634766, + "rewards/real": -3.5557167530059814, + "step": 2400 + }, + { + "epoch": 0.77, + "learning_rate": 4.127652009007941e-07, + "logits/generated": 1.4290558099746704, + "logits/real": -0.466459184885025, + "logps/generated": -799.3381958007812, + "logps/real": -360.6759338378906, + "loss": 0.0534, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.645240783691406, + "rewards/margins": 38.60773468017578, + "rewards/real": -2.0375008583068848, + "step": 2410 + }, + { + "epoch": 0.77, + "learning_rate": 4.1217257318952233e-07, + "logits/generated": 1.358341932296753, + "logits/real": -0.27567583322525024, + "logps/generated": -753.713623046875, + "logps/real": -386.53875732421875, + "loss": 0.0153, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -37.4615364074707, + "rewards/margins": 35.6826171875, + "rewards/real": -1.7789156436920166, + "step": 2420 + }, + { + "epoch": 0.78, + "learning_rate": 4.115799454782505e-07, + "logits/generated": 1.1428701877593994, + "logits/real": -0.5059648752212524, + "logps/generated": -835.0396728515625, + "logps/real": -323.19036865234375, + "loss": 0.0883, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -45.086402893066406, + "rewards/margins": 44.14323806762695, + "rewards/real": -0.9431692361831665, + "step": 2430 + }, + { + "epoch": 0.78, + "learning_rate": 4.1098731776697876e-07, + "logits/generated": 1.0485864877700806, + "logits/real": -0.4481213688850403, + "logps/generated": -812.9012451171875, + "logps/real": -368.0522155761719, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/generated": -40.25575637817383, + "rewards/margins": 37.742332458496094, + "rewards/real": -2.5134170055389404, + "step": 2440 + }, + { + "epoch": 0.78, + "learning_rate": 4.10394690055707e-07, + "logits/generated": 0.8519207239151001, + "logits/real": -0.6197376251220703, + "logps/generated": -725.4412231445312, + "logps/real": -374.92523193359375, + "loss": 0.0919, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.39899826049805, + "rewards/margins": 32.154273986816406, + "rewards/real": -3.2447314262390137, + "step": 2450 + }, + { + "epoch": 0.79, + "learning_rate": 4.098020623444352e-07, + "logits/generated": 0.6339820623397827, + "logits/real": -0.6408648490905762, + "logps/generated": -766.2493286132812, + "logps/real": -304.57000732421875, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/generated": -38.85112762451172, + "rewards/margins": 36.97227096557617, + "rewards/real": -1.878852128982544, + "step": 2460 + }, + { + "epoch": 0.79, + "learning_rate": 4.0920943463316344e-07, + "logits/generated": 0.5412781834602356, + "logits/real": -0.9522072076797485, + "logps/generated": -724.3004150390625, + "logps/real": -409.66693115234375, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/generated": -33.71949005126953, + "rewards/margins": 31.248050689697266, + "rewards/real": -2.47143816947937, + "step": 2470 + }, + { + "epoch": 0.79, + "learning_rate": 4.086168069218917e-07, + "logits/generated": 0.5702400207519531, + "logits/real": -0.8005739450454712, + "logps/generated": -813.8245849609375, + "logps/real": -353.59234619140625, + "loss": 0.0578, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.12620162963867, + "rewards/margins": 39.095558166503906, + "rewards/real": -3.0306408405303955, + "step": 2480 + }, + { + "epoch": 0.8, + "learning_rate": 4.0802417921061987e-07, + "logits/generated": 0.12249743938446045, + "logits/real": -0.9880908131599426, + "logps/generated": -701.9227905273438, + "logps/real": -305.2627868652344, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/generated": -32.676918029785156, + "rewards/margins": 30.052724838256836, + "rewards/real": -2.624189853668213, + "step": 2490 + }, + { + "epoch": 0.8, + "learning_rate": 4.074315514993481e-07, + "logits/generated": 0.5771058797836304, + "logits/real": -0.8167764544487, + "logps/generated": -770.1046142578125, + "logps/real": -381.06103515625, + "loss": 0.0452, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.183433532714844, + "rewards/margins": 34.773338317871094, + "rewards/real": -4.410101413726807, + "step": 2500 + }, + { + "epoch": 0.8, + "learning_rate": 4.0683892378807635e-07, + "logits/generated": 0.7594148516654968, + "logits/real": -0.6675012707710266, + "logps/generated": -858.6218872070312, + "logps/real": -345.3905944824219, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/generated": -43.47161102294922, + "rewards/margins": 40.07362365722656, + "rewards/real": -3.3979930877685547, + "step": 2510 + }, + { + "epoch": 0.81, + "learning_rate": 4.0624629607680454e-07, + "logits/generated": 0.6793375015258789, + "logits/real": -0.8097376823425293, + "logps/generated": -740.8778076171875, + "logps/real": -375.3699645996094, + "loss": 0.0348, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -36.646881103515625, + "rewards/margins": 33.874732971191406, + "rewards/real": -2.772146701812744, + "step": 2520 + }, + { + "epoch": 0.81, + "learning_rate": 4.056536683655328e-07, + "logits/generated": 0.7047010660171509, + "logits/real": -0.57855224609375, + "logps/generated": -712.8539428710938, + "logps/real": -426.24114990234375, + "loss": 0.0759, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.26369857788086, + "rewards/margins": 29.167776107788086, + "rewards/real": -4.095925331115723, + "step": 2530 + }, + { + "epoch": 0.81, + "learning_rate": 4.05061040654261e-07, + "logits/generated": 1.5344918966293335, + "logits/real": -0.032828450202941895, + "logps/generated": -729.2464599609375, + "logps/real": -380.0123291015625, + "loss": 0.0481, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -34.09107208251953, + "rewards/margins": 30.152652740478516, + "rewards/real": -3.9384231567382812, + "step": 2540 + }, + { + "epoch": 0.82, + "learning_rate": 4.044684129429892e-07, + "logits/generated": 1.892467737197876, + "logits/real": 0.30082520842552185, + "logps/generated": -806.906005859375, + "logps/real": -386.27667236328125, + "loss": 0.0098, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.894954681396484, + "rewards/margins": 36.069820404052734, + "rewards/real": -4.825134754180908, + "step": 2550 + }, + { + "epoch": 0.82, + "learning_rate": 4.038757852317174e-07, + "logits/generated": 1.9323184490203857, + "logits/real": 0.17705607414245605, + "logps/generated": -850.033203125, + "logps/real": -378.65594482421875, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/generated": -44.78071975708008, + "rewards/margins": 39.56897735595703, + "rewards/real": -5.211737632751465, + "step": 2560 + }, + { + "epoch": 0.82, + "learning_rate": 4.032831575204456e-07, + "logits/generated": 1.6935707330703735, + "logits/real": 0.023059988394379616, + "logps/generated": -759.7291259765625, + "logps/real": -374.7304992675781, + "loss": 0.1038, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -36.43316650390625, + "rewards/margins": 32.626365661621094, + "rewards/real": -3.8068041801452637, + "step": 2570 + }, + { + "epoch": 0.83, + "learning_rate": 4.0269052980917383e-07, + "logits/generated": 1.4341130256652832, + "logits/real": -0.06971609592437744, + "logps/generated": -665.3819580078125, + "logps/real": -333.2209777832031, + "loss": 0.0796, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.176000595092773, + "rewards/margins": 27.55510902404785, + "rewards/real": -2.6208901405334473, + "step": 2580 + }, + { + "epoch": 0.83, + "learning_rate": 4.0209790209790207e-07, + "logits/generated": 1.9503005743026733, + "logits/real": 0.09237826615571976, + "logps/generated": -765.220458984375, + "logps/real": -364.03558349609375, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/generated": -35.612098693847656, + "rewards/margins": 32.24553298950195, + "rewards/real": -3.3665618896484375, + "step": 2590 + }, + { + "epoch": 0.83, + "learning_rate": 4.0150527438663026e-07, + "logits/generated": 0.9755905866622925, + "logits/real": -0.44502443075180054, + "logps/generated": -657.8660888671875, + "logps/real": -296.46661376953125, + "loss": 0.1094, + "rewards/accuracies": 1.0, + "rewards/generated": -27.2720947265625, + "rewards/margins": 27.332311630249023, + "rewards/real": 0.06021898239850998, + "step": 2600 + }, + { + "epoch": 0.84, + "learning_rate": 4.009126466753585e-07, + "logits/generated": 1.4939401149749756, + "logits/real": -0.5174384117126465, + "logps/generated": -657.0867309570312, + "logps/real": -356.69244384765625, + "loss": 0.0858, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.77162742614746, + "rewards/margins": 25.447311401367188, + "rewards/real": -0.3243153393268585, + "step": 2610 + }, + { + "epoch": 0.84, + "learning_rate": 4.0032001896408675e-07, + "logits/generated": 1.1159507036209106, + "logits/real": -0.2947324216365814, + "logps/generated": -627.3345947265625, + "logps/real": -336.0748291015625, + "loss": 0.0606, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.557340621948242, + "rewards/margins": 22.41960906982422, + "rewards/real": -1.1377298831939697, + "step": 2620 + }, + { + "epoch": 0.84, + "learning_rate": 3.9972739125281494e-07, + "logits/generated": 1.513810396194458, + "logits/real": -0.5344418287277222, + "logps/generated": -590.8358154296875, + "logps/real": -370.3486633300781, + "loss": 0.0269, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -22.85166358947754, + "rewards/margins": 20.52071762084961, + "rewards/real": -2.330944299697876, + "step": 2630 + }, + { + "epoch": 0.84, + "learning_rate": 3.991347635415432e-07, + "logits/generated": 1.49335777759552, + "logits/real": -0.3261922001838684, + "logps/generated": -704.072021484375, + "logps/real": -277.5023498535156, + "loss": 0.0315, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.9993896484375, + "rewards/margins": 30.11093521118164, + "rewards/real": -0.8884493112564087, + "step": 2640 + }, + { + "epoch": 0.85, + "learning_rate": 3.985421358302714e-07, + "logits/generated": 0.7591944932937622, + "logits/real": -0.5074256062507629, + "logps/generated": -648.0003662109375, + "logps/real": -304.24810791015625, + "loss": 0.0761, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -26.947917938232422, + "rewards/margins": 25.95284080505371, + "rewards/real": -0.995078444480896, + "step": 2650 + }, + { + "epoch": 0.85, + "learning_rate": 3.979495081189996e-07, + "logits/generated": 1.0833790302276611, + "logits/real": -0.6860691905021667, + "logps/generated": -735.9494018554688, + "logps/real": -347.7568359375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/generated": -32.68233871459961, + "rewards/margins": 30.6114444732666, + "rewards/real": -2.0708956718444824, + "step": 2660 + }, + { + "epoch": 0.85, + "learning_rate": 3.9735688040772785e-07, + "logits/generated": 0.9727069139480591, + "logits/real": -0.7404571771621704, + "logps/generated": -681.7575073242188, + "logps/real": -368.0675048828125, + "loss": 0.0155, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -28.452762603759766, + "rewards/margins": 25.981693267822266, + "rewards/real": -2.47106671333313, + "step": 2670 + }, + { + "epoch": 0.86, + "learning_rate": 3.967642526964561e-07, + "logits/generated": 1.2547314167022705, + "logits/real": -0.7258102893829346, + "logps/generated": -676.3906860351562, + "logps/real": -340.873779296875, + "loss": 0.0195, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.694448471069336, + "rewards/margins": 25.771648406982422, + "rewards/real": -1.9227993488311768, + "step": 2680 + }, + { + "epoch": 0.86, + "learning_rate": 3.961716249851843e-07, + "logits/generated": 0.6079329252243042, + "logits/real": -0.9624984860420227, + "logps/generated": -601.7574462890625, + "logps/real": -360.03955078125, + "loss": 0.057, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -22.048744201660156, + "rewards/margins": 21.088842391967773, + "rewards/real": -0.9599024057388306, + "step": 2690 + }, + { + "epoch": 0.86, + "learning_rate": 3.955789972739125e-07, + "logits/generated": 1.1258697509765625, + "logits/real": -0.8072816133499146, + "logps/generated": -684.0726928710938, + "logps/real": -338.45941162109375, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/generated": -28.91874122619629, + "rewards/margins": 27.85943603515625, + "rewards/real": -1.059303879737854, + "step": 2700 + }, + { + "epoch": 0.87, + "learning_rate": 3.9498636956264076e-07, + "logits/generated": 1.4965988397598267, + "logits/real": -0.3411404490470886, + "logps/generated": -697.1265869140625, + "logps/real": -352.145751953125, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/generated": -32.76347732543945, + "rewards/margins": 29.158267974853516, + "rewards/real": -3.6052098274230957, + "step": 2710 + }, + { + "epoch": 0.87, + "learning_rate": 3.9439374185136895e-07, + "logits/generated": 1.4621516466140747, + "logits/real": -0.5707725286483765, + "logps/generated": -785.8665161132812, + "logps/real": -319.3700256347656, + "loss": 0.0298, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.61449432373047, + "rewards/margins": 33.8472900390625, + "rewards/real": -1.767205834388733, + "step": 2720 + }, + { + "epoch": 0.87, + "learning_rate": 3.9380111414009714e-07, + "logits/generated": 1.1874886751174927, + "logits/real": -0.09907079488039017, + "logps/generated": -687.0340576171875, + "logps/real": -303.25836181640625, + "loss": 0.0492, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.032939910888672, + "rewards/margins": 27.3311710357666, + "rewards/real": -3.7017662525177, + "step": 2730 + }, + { + "epoch": 0.88, + "learning_rate": 3.932084864288254e-07, + "logits/generated": 1.5363190174102783, + "logits/real": -0.20464110374450684, + "logps/generated": -632.426513671875, + "logps/real": -320.273681640625, + "loss": 0.1163, + "rewards/accuracies": 1.0, + "rewards/generated": -26.085735321044922, + "rewards/margins": 24.259374618530273, + "rewards/real": -1.8263591527938843, + "step": 2740 + }, + { + "epoch": 0.88, + "learning_rate": 3.9261585871755357e-07, + "logits/generated": 1.5121079683303833, + "logits/real": -0.11793769896030426, + "logps/generated": -633.5842895507812, + "logps/real": -350.47589111328125, + "loss": 0.0154, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.712158203125, + "rewards/margins": 24.083269119262695, + "rewards/real": -1.6288902759552002, + "step": 2750 + }, + { + "epoch": 0.88, + "learning_rate": 3.920232310062818e-07, + "logits/generated": 1.3446996212005615, + "logits/real": -0.29524847865104675, + "logps/generated": -689.9698486328125, + "logps/real": -320.5229187011719, + "loss": 0.0344, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -30.11871337890625, + "rewards/margins": 29.273574829101562, + "rewards/real": -0.8451415300369263, + "step": 2760 + }, + { + "epoch": 0.89, + "learning_rate": 3.9143060329501006e-07, + "logits/generated": 1.274344563484192, + "logits/real": -0.36024925112724304, + "logps/generated": -632.7415161132812, + "logps/real": -304.83941650390625, + "loss": 0.1162, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -23.04990005493164, + "rewards/margins": 23.171749114990234, + "rewards/real": 0.12185032665729523, + "step": 2770 + }, + { + "epoch": 0.89, + "learning_rate": 3.9083797558373825e-07, + "logits/generated": 1.2794438600540161, + "logits/real": -0.06939432770013809, + "logps/generated": -634.8363037109375, + "logps/real": -288.30963134765625, + "loss": 0.0578, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -23.818239212036133, + "rewards/margins": 23.314865112304688, + "rewards/real": -0.5033752918243408, + "step": 2780 + }, + { + "epoch": 0.89, + "learning_rate": 3.902453478724665e-07, + "logits/generated": 1.4398317337036133, + "logits/real": 0.08840557187795639, + "logps/generated": -682.88525390625, + "logps/real": -367.47454833984375, + "loss": 0.0726, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -27.898357391357422, + "rewards/margins": 26.814105987548828, + "rewards/real": -1.0842539072036743, + "step": 2790 + }, + { + "epoch": 0.9, + "learning_rate": 3.8965272016119473e-07, + "logits/generated": 1.6847522258758545, + "logits/real": 0.17343257367610931, + "logps/generated": -731.3448486328125, + "logps/real": -316.51141357421875, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/generated": -30.514354705810547, + "rewards/margins": 29.706554412841797, + "rewards/real": -0.8077989816665649, + "step": 2800 + }, + { + "epoch": 0.9, + "learning_rate": 3.890600924499229e-07, + "logits/generated": 1.7153816223144531, + "logits/real": -0.006909878458827734, + "logps/generated": -734.4896240234375, + "logps/real": -339.341064453125, + "loss": 0.0236, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.376312255859375, + "rewards/margins": 31.589298248291016, + "rewards/real": -0.7870132923126221, + "step": 2810 + }, + { + "epoch": 0.9, + "learning_rate": 3.8846746473865116e-07, + "logits/generated": 1.4768227338790894, + "logits/real": 0.0006786882877349854, + "logps/generated": -782.7421875, + "logps/real": -317.4217834472656, + "loss": 0.0289, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -36.7784423828125, + "rewards/margins": 35.54150390625, + "rewards/real": -1.2369422912597656, + "step": 2820 + }, + { + "epoch": 0.91, + "learning_rate": 3.878748370273794e-07, + "logits/generated": 1.4829686880111694, + "logits/real": -0.4692135453224182, + "logps/generated": -734.4862060546875, + "logps/real": -355.86480712890625, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/generated": -32.499908447265625, + "rewards/margins": 31.743759155273438, + "rewards/real": -0.7561527490615845, + "step": 2830 + }, + { + "epoch": 0.91, + "learning_rate": 3.872822093161076e-07, + "logits/generated": 1.4394299983978271, + "logits/real": -0.4337243139743805, + "logps/generated": -692.165771484375, + "logps/real": -344.65228271484375, + "loss": 0.06, + "rewards/accuracies": 1.0, + "rewards/generated": -28.684391021728516, + "rewards/margins": 28.18329429626465, + "rewards/real": -0.5010913014411926, + "step": 2840 + }, + { + "epoch": 0.91, + "learning_rate": 3.8668958160483583e-07, + "logits/generated": 0.8190478086471558, + "logits/real": -0.667715847492218, + "logps/generated": -671.9428100585938, + "logps/real": -304.90570068359375, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/generated": -28.43863296508789, + "rewards/margins": 29.4898624420166, + "rewards/real": 1.051224946975708, + "step": 2850 + }, + { + "epoch": 0.92, + "learning_rate": 3.860969538935641e-07, + "logits/generated": 1.1253232955932617, + "logits/real": -0.7709970474243164, + "logps/generated": -688.5896606445312, + "logps/real": -313.30645751953125, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/generated": -29.60751724243164, + "rewards/margins": 30.093318939208984, + "rewards/real": 0.48580265045166016, + "step": 2860 + }, + { + "epoch": 0.92, + "learning_rate": 3.8550432618229226e-07, + "logits/generated": 0.8397982716560364, + "logits/real": -0.8318503499031067, + "logps/generated": -710.3811645507812, + "logps/real": -330.5302734375, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/generated": -31.2648983001709, + "rewards/margins": 31.341577529907227, + "rewards/real": 0.07668063789606094, + "step": 2870 + }, + { + "epoch": 0.92, + "learning_rate": 3.849116984710205e-07, + "logits/generated": 0.9829031825065613, + "logits/real": -0.5523896217346191, + "logps/generated": -759.4322509765625, + "logps/real": -311.7576904296875, + "loss": 0.0791, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.34369659423828, + "rewards/margins": 34.16350555419922, + "rewards/real": -0.18019168078899384, + "step": 2880 + }, + { + "epoch": 0.92, + "learning_rate": 3.8431907075974875e-07, + "logits/generated": 1.3364379405975342, + "logits/real": -0.6124747395515442, + "logps/generated": -725.3342895507812, + "logps/real": -345.75811767578125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/generated": -33.45618438720703, + "rewards/margins": 31.886425018310547, + "rewards/real": -1.5697633028030396, + "step": 2890 + }, + { + "epoch": 0.93, + "learning_rate": 3.8372644304847694e-07, + "logits/generated": 1.2526007890701294, + "logits/real": -0.4890497326850891, + "logps/generated": -765.4427490234375, + "logps/real": -321.2254333496094, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/generated": -35.273494720458984, + "rewards/margins": 34.765052795410156, + "rewards/real": -0.5084399580955505, + "step": 2900 + }, + { + "epoch": 0.93, + "learning_rate": 3.831338153372051e-07, + "logits/generated": 1.3250693082809448, + "logits/real": -0.5833691358566284, + "logps/generated": -737.9568481445312, + "logps/real": -363.9207763671875, + "loss": 0.035, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.84966278076172, + "rewards/margins": 31.976970672607422, + "rewards/real": -1.8726847171783447, + "step": 2910 + }, + { + "epoch": 0.93, + "learning_rate": 3.8254118762593337e-07, + "logits/generated": 1.3177597522735596, + "logits/real": -0.6353263258934021, + "logps/generated": -697.212890625, + "logps/real": -352.11224365234375, + "loss": 0.0237, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.049537658691406, + "rewards/margins": 31.983068466186523, + "rewards/real": -0.06646408140659332, + "step": 2920 + }, + { + "epoch": 0.94, + "learning_rate": 3.8194855991466156e-07, + "logits/generated": 1.6161171197891235, + "logits/real": -0.48282140493392944, + "logps/generated": -801.6009521484375, + "logps/real": -340.4782409667969, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/generated": -39.814510345458984, + "rewards/margins": 39.41621780395508, + "rewards/real": -0.3982974588871002, + "step": 2930 + }, + { + "epoch": 0.94, + "learning_rate": 3.813559322033898e-07, + "logits/generated": 1.6583149433135986, + "logits/real": -0.20981892943382263, + "logps/generated": -731.3494873046875, + "logps/real": -325.0882873535156, + "loss": 0.0484, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -35.697593688964844, + "rewards/margins": 33.39555358886719, + "rewards/real": -2.3020355701446533, + "step": 2940 + }, + { + "epoch": 0.94, + "learning_rate": 3.8076330449211804e-07, + "logits/generated": 1.1138355731964111, + "logits/real": -0.4513324797153473, + "logps/generated": -596.7747802734375, + "logps/real": -337.89434814453125, + "loss": 0.0567, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -25.63961410522461, + "rewards/margins": 24.464258193969727, + "rewards/real": -1.175354242324829, + "step": 2950 + }, + { + "epoch": 0.95, + "learning_rate": 3.8017067678084623e-07, + "logits/generated": 1.5525901317596436, + "logits/real": -0.5297213196754456, + "logps/generated": -758.9441528320312, + "logps/real": -332.8437805175781, + "loss": 0.0674, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.707679748535156, + "rewards/margins": 34.92395782470703, + "rewards/real": 0.21627798676490784, + "step": 2960 + }, + { + "epoch": 0.95, + "learning_rate": 3.7957804906957447e-07, + "logits/generated": 1.6052268743515015, + "logits/real": -0.3243894875049591, + "logps/generated": -763.9934692382812, + "logps/real": -324.00164794921875, + "loss": 0.051, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.371944427490234, + "rewards/margins": 34.579681396484375, + "rewards/real": -0.7922636270523071, + "step": 2970 + }, + { + "epoch": 0.95, + "learning_rate": 3.789854213583027e-07, + "logits/generated": 1.6722183227539062, + "logits/real": -0.36099866032600403, + "logps/generated": -811.2255859375, + "logps/real": -328.02349853515625, + "loss": 0.0464, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.00334930419922, + "rewards/margins": 38.65221405029297, + "rewards/real": -1.3511369228363037, + "step": 2980 + }, + { + "epoch": 0.96, + "learning_rate": 3.783927936470309e-07, + "logits/generated": 1.4396727085113525, + "logits/real": -0.5413111448287964, + "logps/generated": -665.5133666992188, + "logps/real": -355.2272033691406, + "loss": 0.1088, + "rewards/accuracies": 0.925000011920929, + "rewards/generated": -29.053966522216797, + "rewards/margins": 28.401805877685547, + "rewards/real": -0.6521603465080261, + "step": 2990 + }, + { + "epoch": 0.96, + "learning_rate": 3.7780016593575914e-07, + "logits/generated": 1.3753300905227661, + "logits/real": -0.7443369030952454, + "logps/generated": -716.5177001953125, + "logps/real": -298.4733581542969, + "loss": 0.0703, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.979698181152344, + "rewards/margins": 32.879981994628906, + "rewards/real": -0.09971854090690613, + "step": 3000 + }, + { + "epoch": 0.96, + "learning_rate": 3.772075382244874e-07, + "logits/generated": 1.416666030883789, + "logits/real": -0.7260184288024902, + "logps/generated": -735.27880859375, + "logps/real": -341.00372314453125, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/generated": -35.005699157714844, + "rewards/margins": 34.513038635253906, + "rewards/real": -0.49265843629837036, + "step": 3010 + }, + { + "epoch": 0.97, + "learning_rate": 3.766149105132156e-07, + "logits/generated": 0.7679153680801392, + "logits/real": -1.0455870628356934, + "logps/generated": -735.6075439453125, + "logps/real": -299.59149169921875, + "loss": 0.0286, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.97742462158203, + "rewards/margins": 34.56840133666992, + "rewards/real": 0.5909751653671265, + "step": 3020 + }, + { + "epoch": 0.97, + "learning_rate": 3.760222828019438e-07, + "logits/generated": 1.5767030715942383, + "logits/real": -0.9797650575637817, + "logps/generated": -789.6905517578125, + "logps/real": -362.8406677246094, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/generated": -38.47359085083008, + "rewards/margins": 37.2587890625, + "rewards/real": -1.2148017883300781, + "step": 3030 + }, + { + "epoch": 0.97, + "learning_rate": 3.7542965509067206e-07, + "logits/generated": 1.5456877946853638, + "logits/real": -0.6443753242492676, + "logps/generated": -731.9356689453125, + "logps/real": -371.6806945800781, + "loss": 0.044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.98523712158203, + "rewards/margins": 33.70503616333008, + "rewards/real": -1.2801988124847412, + "step": 3040 + }, + { + "epoch": 0.98, + "learning_rate": 3.7483702737940025e-07, + "logits/generated": 2.2266459465026855, + "logits/real": -0.1782468557357788, + "logps/generated": -922.0955200195312, + "logps/real": -302.81658935546875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/generated": -50.11591339111328, + "rewards/margins": 47.3745231628418, + "rewards/real": -2.741389751434326, + "step": 3050 + }, + { + "epoch": 0.98, + "learning_rate": 3.742443996681285e-07, + "logits/generated": 2.1318583488464355, + "logits/real": -0.209198996424675, + "logps/generated": -823.5095825195312, + "logps/real": -371.42889404296875, + "loss": 0.0581, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -43.204071044921875, + "rewards/margins": 38.97028732299805, + "rewards/real": -4.233780384063721, + "step": 3060 + }, + { + "epoch": 0.98, + "learning_rate": 3.7365177195685673e-07, + "logits/generated": 1.9918349981307983, + "logits/real": -0.06721341609954834, + "logps/generated": -746.8140869140625, + "logps/real": -359.9891662597656, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/generated": -38.51268005371094, + "rewards/margins": 33.767127990722656, + "rewards/real": -4.74554443359375, + "step": 3070 + }, + { + "epoch": 0.99, + "learning_rate": 3.730591442455849e-07, + "logits/generated": 2.0346789360046387, + "logits/real": 0.10375523567199707, + "logps/generated": -743.0701904296875, + "logps/real": -356.4525146484375, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/generated": -37.91783905029297, + "rewards/margins": 32.76781463623047, + "rewards/real": -5.150022506713867, + "step": 3080 + }, + { + "epoch": 0.99, + "learning_rate": 3.724665165343131e-07, + "logits/generated": 1.9749126434326172, + "logits/real": 0.11264216899871826, + "logps/generated": -817.4457397460938, + "logps/real": -388.92205810546875, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/generated": -44.377166748046875, + "rewards/margins": 40.227516174316406, + "rewards/real": -4.149655818939209, + "step": 3090 + }, + { + "epoch": 0.99, + "learning_rate": 3.7187388882304135e-07, + "logits/generated": 2.158161163330078, + "logits/real": 0.28727996349334717, + "logps/generated": -889.4269409179688, + "logps/real": -356.7685241699219, + "loss": 0.0187, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -50.5318603515625, + "rewards/margins": 44.726715087890625, + "rewards/real": -5.80515193939209, + "step": 3100 + }, + { + "epoch": 1.0, + "learning_rate": 3.7128126111176954e-07, + "logits/generated": 2.0267348289489746, + "logits/real": -0.09791239351034164, + "logps/generated": -876.57080078125, + "logps/real": -396.95074462890625, + "loss": 0.0382, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -48.95111846923828, + "rewards/margins": 43.48713302612305, + "rewards/real": -5.463984966278076, + "step": 3110 + }, + { + "epoch": 1.0, + "learning_rate": 3.706886334004978e-07, + "logits/generated": 1.8139718770980835, + "logits/real": -0.5260879993438721, + "logps/generated": -770.6980590820312, + "logps/real": -346.76629638671875, + "loss": 0.0891, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -40.29029846191406, + "rewards/margins": 38.30472183227539, + "rewards/real": -1.9855766296386719, + "step": 3120 + }, + { + "epoch": 1.0, + "learning_rate": 3.70096005689226e-07, + "logits/generated": 1.0830538272857666, + "logits/real": -1.0247769355773926, + "logps/generated": -785.00537109375, + "logps/real": -329.46380615234375, + "loss": 0.0264, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -40.738319396972656, + "rewards/margins": 38.62738800048828, + "rewards/real": -2.1109251976013184, + "step": 3130 + }, + { + "epoch": 1.0, + "learning_rate": 3.695033779779542e-07, + "logits/generated": 1.399590253829956, + "logits/real": -0.8435190320014954, + "logps/generated": -834.1033325195312, + "logps/real": -364.3261413574219, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/generated": -43.888214111328125, + "rewards/margins": 41.6268310546875, + "rewards/real": -2.261387348175049, + "step": 3140 + }, + { + "epoch": 1.01, + "learning_rate": 3.6891075026668245e-07, + "logits/generated": 1.4242956638336182, + "logits/real": -0.9070215225219727, + "logps/generated": -738.67138671875, + "logps/real": -371.7659912109375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -35.76353073120117, + "rewards/margins": 34.30432891845703, + "rewards/real": -1.4592043161392212, + "step": 3150 + }, + { + "epoch": 1.01, + "learning_rate": 3.683181225554107e-07, + "logits/generated": 1.7003734111785889, + "logits/real": -0.6616460084915161, + "logps/generated": -864.3294067382812, + "logps/real": -326.4926452636719, + "loss": 0.0441, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -46.684593200683594, + "rewards/margins": 43.968421936035156, + "rewards/real": -2.7161707878112793, + "step": 3160 + }, + { + "epoch": 1.01, + "learning_rate": 3.677254948441389e-07, + "logits/generated": 1.5160343647003174, + "logits/real": -0.3163232207298279, + "logps/generated": -698.2178955078125, + "logps/real": -357.4117736816406, + "loss": 0.0113, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.3349723815918, + "rewards/margins": 32.157203674316406, + "rewards/real": -3.177769660949707, + "step": 3170 + }, + { + "epoch": 1.02, + "learning_rate": 3.6713286713286713e-07, + "logits/generated": 1.8610050678253174, + "logits/real": -0.1374693214893341, + "logps/generated": -784.779052734375, + "logps/real": -410.440185546875, + "loss": 0.0312, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.53495407104492, + "rewards/margins": 34.9903678894043, + "rewards/real": -4.54458475112915, + "step": 3180 + }, + { + "epoch": 1.02, + "learning_rate": 3.6654023942159537e-07, + "logits/generated": 1.525282382965088, + "logits/real": -0.2642039656639099, + "logps/generated": -842.4094848632812, + "logps/real": -385.2251892089844, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/generated": -43.943077087402344, + "rewards/margins": 39.83400344848633, + "rewards/real": -4.109073638916016, + "step": 3190 + }, + { + "epoch": 1.02, + "learning_rate": 3.6594761171032356e-07, + "logits/generated": 1.472769021987915, + "logits/real": -0.2506261169910431, + "logps/generated": -892.1646728515625, + "logps/real": -318.24041748046875, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/generated": -48.223716735839844, + "rewards/margins": 45.1031379699707, + "rewards/real": -3.1205811500549316, + "step": 3200 + }, + { + "epoch": 1.03, + "learning_rate": 3.653549839990518e-07, + "logits/generated": 1.7654889822006226, + "logits/real": -0.418379008769989, + "logps/generated": -924.9537353515625, + "logps/real": -380.7850341796875, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/generated": -51.7067756652832, + "rewards/margins": 46.04342269897461, + "rewards/real": -5.663352966308594, + "step": 3210 + }, + { + "epoch": 1.03, + "learning_rate": 3.6476235628778004e-07, + "logits/generated": 1.3192102909088135, + "logits/real": -0.404385507106781, + "logps/generated": -909.3551025390625, + "logps/real": -357.1786804199219, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/generated": -50.95429229736328, + "rewards/margins": 46.896427154541016, + "rewards/real": -4.057864665985107, + "step": 3220 + }, + { + "epoch": 1.03, + "learning_rate": 3.6416972857650823e-07, + "logits/generated": 1.3335298299789429, + "logits/real": -0.519232451915741, + "logps/generated": -801.8142700195312, + "logps/real": -407.16845703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -41.18467330932617, + "rewards/margins": 36.16065216064453, + "rewards/real": -5.024024963378906, + "step": 3230 + }, + { + "epoch": 1.04, + "learning_rate": 3.6357710086523647e-07, + "logits/generated": 1.255618691444397, + "logits/real": -0.41836825013160706, + "logps/generated": -905.0087890625, + "logps/real": -326.54229736328125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -50.212615966796875, + "rewards/margins": 46.54174041748047, + "rewards/real": -3.6708786487579346, + "step": 3240 + }, + { + "epoch": 1.04, + "learning_rate": 3.629844731539647e-07, + "logits/generated": 1.5446131229400635, + "logits/real": -0.5027315020561218, + "logps/generated": -786.07666015625, + "logps/real": -390.6357421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -41.219154357910156, + "rewards/margins": 36.56498718261719, + "rewards/real": -4.654166221618652, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 3.6239184544269285e-07, + "logits/generated": 1.2802150249481201, + "logits/real": -0.33009278774261475, + "logps/generated": -786.0957641601562, + "logps/real": -380.52276611328125, + "loss": 0.0229, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.477134704589844, + "rewards/margins": 36.94097137451172, + "rewards/real": -3.536158323287964, + "step": 3260 + }, + { + "epoch": 1.05, + "learning_rate": 3.617992177314211e-07, + "logits/generated": 1.5045629739761353, + "logits/real": -0.4740613102912903, + "logps/generated": -863.31640625, + "logps/real": -358.13055419921875, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/generated": -46.59673309326172, + "rewards/margins": 42.50022506713867, + "rewards/real": -4.096514701843262, + "step": 3270 + }, + { + "epoch": 1.05, + "learning_rate": 3.612065900201493e-07, + "logits/generated": 1.1638386249542236, + "logits/real": -0.5954693555831909, + "logps/generated": -802.9481201171875, + "logps/real": -337.4355163574219, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/generated": -39.91973876953125, + "rewards/margins": 38.482093811035156, + "rewards/real": -1.4376416206359863, + "step": 3280 + }, + { + "epoch": 1.05, + "learning_rate": 3.606139623088775e-07, + "logits/generated": 1.0161828994750977, + "logits/real": -0.6421123743057251, + "logps/generated": -781.8851928710938, + "logps/real": -335.606689453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/generated": -40.73774337768555, + "rewards/margins": 39.00830841064453, + "rewards/real": -1.7294337749481201, + "step": 3290 + }, + { + "epoch": 1.06, + "learning_rate": 3.6002133459760576e-07, + "logits/generated": 1.5001237392425537, + "logits/real": -0.3688901364803314, + "logps/generated": -846.2230224609375, + "logps/real": -343.529052734375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/generated": -45.00919723510742, + "rewards/margins": 42.41929244995117, + "rewards/real": -2.5899059772491455, + "step": 3300 + }, + { + "epoch": 1.06, + "learning_rate": 3.5942870688633395e-07, + "logits/generated": 2.151215076446533, + "logits/real": 0.26790112257003784, + "logps/generated": -887.7449340820312, + "logps/real": -350.8306884765625, + "loss": 0.0405, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.190330505371094, + "rewards/margins": 43.633689880371094, + "rewards/real": -5.556635856628418, + "step": 3310 + }, + { + "epoch": 1.06, + "learning_rate": 3.588360791750622e-07, + "logits/generated": 1.8760957717895508, + "logits/real": 0.3259205222129822, + "logps/generated": -811.8831176757812, + "logps/real": -371.9270324707031, + "loss": 0.0171, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.311912536621094, + "rewards/margins": 38.103431701660156, + "rewards/real": -5.208489418029785, + "step": 3320 + }, + { + "epoch": 1.07, + "learning_rate": 3.5824345146379044e-07, + "logits/generated": 2.2999815940856934, + "logits/real": 0.4950522780418396, + "logps/generated": -886.4505004882812, + "logps/real": -429.54644775390625, + "loss": 0.0435, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -49.098411560058594, + "rewards/margins": 40.163063049316406, + "rewards/real": -8.935354232788086, + "step": 3330 + }, + { + "epoch": 1.07, + "learning_rate": 3.576508237525186e-07, + "logits/generated": 2.213822841644287, + "logits/real": 0.05794317275285721, + "logps/generated": -868.7337036132812, + "logps/real": -422.2870178222656, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/generated": -47.833824157714844, + "rewards/margins": 39.84661865234375, + "rewards/real": -7.987205505371094, + "step": 3340 + }, + { + "epoch": 1.07, + "learning_rate": 3.5705819604124687e-07, + "logits/generated": 1.9691988229751587, + "logits/real": -0.0420486219227314, + "logps/generated": -902.9676513671875, + "logps/real": -397.20849609375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -50.641273498535156, + "rewards/margins": 44.71895217895508, + "rewards/real": -5.922321319580078, + "step": 3350 + }, + { + "epoch": 1.08, + "learning_rate": 3.564655683299751e-07, + "logits/generated": 1.859004259109497, + "logits/real": 0.009420597925782204, + "logps/generated": -985.9837036132812, + "logps/real": -397.4696350097656, + "loss": 0.0063, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.76782989501953, + "rewards/margins": 50.63762664794922, + "rewards/real": -7.130200386047363, + "step": 3360 + }, + { + "epoch": 1.08, + "learning_rate": 3.558729406187033e-07, + "logits/generated": 2.1972978115081787, + "logits/real": -0.18132592737674713, + "logps/generated": -1021.9801025390625, + "logps/real": -328.14886474609375, + "loss": 0.0137, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -59.56706619262695, + "rewards/margins": 55.32207107543945, + "rewards/real": -4.244994163513184, + "step": 3370 + }, + { + "epoch": 1.08, + "learning_rate": 3.5528031290743154e-07, + "logits/generated": 2.036914348602295, + "logits/real": -0.1863768994808197, + "logps/generated": -881.8966674804688, + "logps/real": -380.7073669433594, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -48.05780792236328, + "rewards/margins": 43.82422637939453, + "rewards/real": -4.233584403991699, + "step": 3380 + }, + { + "epoch": 1.08, + "learning_rate": 3.546876851961598e-07, + "logits/generated": 1.5535552501678467, + "logits/real": -0.3347831964492798, + "logps/generated": -863.25, + "logps/real": -407.659912109375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/generated": -49.036827087402344, + "rewards/margins": 44.28205490112305, + "rewards/real": -4.754773139953613, + "step": 3390 + }, + { + "epoch": 1.09, + "learning_rate": 3.5409505748488797e-07, + "logits/generated": 1.630731225013733, + "logits/real": -0.2988981604576111, + "logps/generated": -840.0236206054688, + "logps/real": -379.4244689941406, + "loss": 0.0041, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.604461669921875, + "rewards/margins": 40.27696228027344, + "rewards/real": -5.327497959136963, + "step": 3400 + }, + { + "epoch": 1.09, + "learning_rate": 3.535024297736162e-07, + "logits/generated": 1.6541545391082764, + "logits/real": -0.15251055359840393, + "logps/generated": -982.9104614257812, + "logps/real": -316.5390930175781, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/generated": -56.670555114746094, + "rewards/margins": 53.13850021362305, + "rewards/real": -3.5320611000061035, + "step": 3410 + }, + { + "epoch": 1.09, + "learning_rate": 3.5290980206234446e-07, + "logits/generated": 2.097510814666748, + "logits/real": 0.2182501256465912, + "logps/generated": -903.2131958007812, + "logps/real": -363.33587646484375, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/generated": -50.18659591674805, + "rewards/margins": 45.237586975097656, + "rewards/real": -4.949007034301758, + "step": 3420 + }, + { + "epoch": 1.1, + "learning_rate": 3.5231717435107264e-07, + "logits/generated": 1.8379976749420166, + "logits/real": 0.5325809121131897, + "logps/generated": -956.9562377929688, + "logps/real": -335.36041259765625, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/generated": -55.28343963623047, + "rewards/margins": 47.789520263671875, + "rewards/real": -7.493921756744385, + "step": 3430 + }, + { + "epoch": 1.1, + "learning_rate": 3.5172454663980083e-07, + "logits/generated": 2.3564677238464355, + "logits/real": 0.6742401123046875, + "logps/generated": -982.9732666015625, + "logps/real": -360.76025390625, + "loss": 0.0089, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.386474609375, + "rewards/margins": 51.120643615722656, + "rewards/real": -6.2658305168151855, + "step": 3440 + }, + { + "epoch": 1.1, + "learning_rate": 3.511319189285291e-07, + "logits/generated": 2.4261927604675293, + "logits/real": 0.7835529446601868, + "logps/generated": -946.44775390625, + "logps/real": -371.6916809082031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -56.288360595703125, + "rewards/margins": 47.98066711425781, + "rewards/real": -8.307696342468262, + "step": 3450 + }, + { + "epoch": 1.11, + "learning_rate": 3.5053929121725726e-07, + "logits/generated": 2.521564483642578, + "logits/real": 1.0440590381622314, + "logps/generated": -937.5362548828125, + "logps/real": -361.4960021972656, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -54.28572463989258, + "rewards/margins": 45.64744186401367, + "rewards/real": -8.638291358947754, + "step": 3460 + }, + { + "epoch": 1.11, + "learning_rate": 3.499466635059855e-07, + "logits/generated": 2.1938862800598145, + "logits/real": 0.6073617935180664, + "logps/generated": -873.291015625, + "logps/real": -421.0392150878906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -49.010807037353516, + "rewards/margins": 43.64905548095703, + "rewards/real": -5.361753940582275, + "step": 3470 + }, + { + "epoch": 1.11, + "learning_rate": 3.4935403579471375e-07, + "logits/generated": 2.1016104221343994, + "logits/real": 0.6839932203292847, + "logps/generated": -827.5435791015625, + "logps/real": -366.30169677734375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -44.64368438720703, + "rewards/margins": 39.09661865234375, + "rewards/real": -5.547061920166016, + "step": 3480 + }, + { + "epoch": 1.12, + "learning_rate": 3.4876140808344194e-07, + "logits/generated": 2.2634100914001465, + "logits/real": 0.384492963552475, + "logps/generated": -858.8563232421875, + "logps/real": -380.0473937988281, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/generated": -45.956275939941406, + "rewards/margins": 40.474647521972656, + "rewards/real": -5.481632709503174, + "step": 3490 + }, + { + "epoch": 1.12, + "learning_rate": 3.481687803721702e-07, + "logits/generated": 2.3847341537475586, + "logits/real": 0.3874856233596802, + "logps/generated": -764.6117553710938, + "logps/real": -375.92266845703125, + "loss": 0.0542, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.28316116333008, + "rewards/margins": 34.778892517089844, + "rewards/real": -4.504271507263184, + "step": 3500 + }, + { + "epoch": 1.12, + "learning_rate": 3.475761526608984e-07, + "logits/generated": 2.0984909534454346, + "logits/real": 0.2309379279613495, + "logps/generated": -733.2493896484375, + "logps/real": -395.85394287109375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -34.38832473754883, + "rewards/margins": 31.740093231201172, + "rewards/real": -2.648231029510498, + "step": 3510 + }, + { + "epoch": 1.13, + "learning_rate": 3.469835249496266e-07, + "logits/generated": 1.9958127737045288, + "logits/real": 0.1922609508037567, + "logps/generated": -799.5538330078125, + "logps/real": -367.2142028808594, + "loss": 0.0503, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.75733184814453, + "rewards/margins": 36.59238815307617, + "rewards/real": -4.164941310882568, + "step": 3520 + }, + { + "epoch": 1.13, + "learning_rate": 3.4639089723835485e-07, + "logits/generated": 1.7854121923446655, + "logits/real": 0.46532678604125977, + "logps/generated": -833.0718994140625, + "logps/real": -386.58343505859375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/generated": -42.356361389160156, + "rewards/margins": 37.993431091308594, + "rewards/real": -4.362931251525879, + "step": 3530 + }, + { + "epoch": 1.13, + "learning_rate": 3.457982695270831e-07, + "logits/generated": 2.039842128753662, + "logits/real": 0.41429099440574646, + "logps/generated": -839.6603393554688, + "logps/real": -404.8365783691406, + "loss": 0.006, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.19861602783203, + "rewards/margins": 39.94362258911133, + "rewards/real": -4.254998207092285, + "step": 3540 + }, + { + "epoch": 1.14, + "learning_rate": 3.452056418158113e-07, + "logits/generated": 1.9158881902694702, + "logits/real": 0.38622647523880005, + "logps/generated": -815.9961547851562, + "logps/real": -345.8466491699219, + "loss": 0.0055, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.367496490478516, + "rewards/margins": 38.660194396972656, + "rewards/real": -4.707301139831543, + "step": 3550 + }, + { + "epoch": 1.14, + "learning_rate": 3.446130141045395e-07, + "logits/generated": 2.2518858909606934, + "logits/real": 0.7111259698867798, + "logps/generated": -833.4215087890625, + "logps/real": -402.19537353515625, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.07600784301758, + "rewards/margins": 38.73112869262695, + "rewards/real": -5.344878196716309, + "step": 3560 + }, + { + "epoch": 1.14, + "learning_rate": 3.4402038639326777e-07, + "logits/generated": 2.102323293685913, + "logits/real": 0.916674792766571, + "logps/generated": -911.861328125, + "logps/real": -367.8331604003906, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -50.8501091003418, + "rewards/margins": 44.45732879638672, + "rewards/real": -6.392782211303711, + "step": 3570 + }, + { + "epoch": 1.15, + "learning_rate": 3.4342775868199595e-07, + "logits/generated": 2.297792911529541, + "logits/real": 0.6382473707199097, + "logps/generated": -1008.75146484375, + "logps/real": -392.03277587890625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -58.75178146362305, + "rewards/margins": 51.95452880859375, + "rewards/real": -6.797255516052246, + "step": 3580 + }, + { + "epoch": 1.15, + "learning_rate": 3.428351309707242e-07, + "logits/generated": 2.361895799636841, + "logits/real": 0.598645031452179, + "logps/generated": -983.6506958007812, + "logps/real": -378.78961181640625, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/generated": -58.34507369995117, + "rewards/margins": 51.65392303466797, + "rewards/real": -6.6911516189575195, + "step": 3590 + }, + { + "epoch": 1.15, + "learning_rate": 3.4224250325945244e-07, + "logits/generated": 2.56547212600708, + "logits/real": 0.38831624388694763, + "logps/generated": -878.9915161132812, + "logps/real": -406.8921813964844, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/generated": -48.78410720825195, + "rewards/margins": 42.4536247253418, + "rewards/real": -6.330479145050049, + "step": 3600 + }, + { + "epoch": 1.16, + "learning_rate": 3.4164987554818063e-07, + "logits/generated": 2.4987618923187256, + "logits/real": 0.4154025912284851, + "logps/generated": -838.1653442382812, + "logps/real": -414.14971923828125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/generated": -44.137351989746094, + "rewards/margins": 37.56629943847656, + "rewards/real": -6.571053504943848, + "step": 3610 + }, + { + "epoch": 1.16, + "learning_rate": 3.410572478369088e-07, + "logits/generated": 1.8420965671539307, + "logits/real": 0.007907414808869362, + "logps/generated": -804.8074340820312, + "logps/real": -371.8179016113281, + "loss": 0.0338, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -41.64263153076172, + "rewards/margins": 37.161338806152344, + "rewards/real": -4.481302261352539, + "step": 3620 + }, + { + "epoch": 1.16, + "learning_rate": 3.4046462012563706e-07, + "logits/generated": 1.755743384361267, + "logits/real": 0.14006485044956207, + "logps/generated": -842.6290893554688, + "logps/real": -353.6880187988281, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -44.62464141845703, + "rewards/margins": 39.30078125, + "rewards/real": -5.3238630294799805, + "step": 3630 + }, + { + "epoch": 1.16, + "learning_rate": 3.3987199241436525e-07, + "logits/generated": 2.199801445007324, + "logits/real": 0.4196700155735016, + "logps/generated": -938.73583984375, + "logps/real": -372.1141052246094, + "loss": 0.0123, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.06526565551758, + "rewards/margins": 48.55379867553711, + "rewards/real": -5.511466026306152, + "step": 3640 + }, + { + "epoch": 1.17, + "learning_rate": 3.392793647030935e-07, + "logits/generated": 2.6720054149627686, + "logits/real": 0.729618489742279, + "logps/generated": -789.3287963867188, + "logps/real": -437.05084228515625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/generated": -40.290653228759766, + "rewards/margins": 33.130977630615234, + "rewards/real": -7.159679412841797, + "step": 3650 + }, + { + "epoch": 1.17, + "learning_rate": 3.3868673699182173e-07, + "logits/generated": 2.8542654514312744, + "logits/real": 0.7255024313926697, + "logps/generated": -928.93408203125, + "logps/real": -410.2960510253906, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -52.122467041015625, + "rewards/margins": 44.70793533325195, + "rewards/real": -7.414539337158203, + "step": 3660 + }, + { + "epoch": 1.17, + "learning_rate": 3.380941092805499e-07, + "logits/generated": 2.3532071113586426, + "logits/real": 0.6757982969284058, + "logps/generated": -847.05517578125, + "logps/real": -421.7748107910156, + "loss": 0.0237, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.34987258911133, + "rewards/margins": 38.570709228515625, + "rewards/real": -6.779162406921387, + "step": 3670 + }, + { + "epoch": 1.18, + "learning_rate": 3.3750148156927816e-07, + "logits/generated": 2.545330286026001, + "logits/real": 0.812427818775177, + "logps/generated": -869.2293701171875, + "logps/real": -459.91326904296875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -48.34119415283203, + "rewards/margins": 38.871009826660156, + "rewards/real": -9.47019100189209, + "step": 3680 + }, + { + "epoch": 1.18, + "learning_rate": 3.369088538580064e-07, + "logits/generated": 2.70619535446167, + "logits/real": 0.9279934763908386, + "logps/generated": -1001.333984375, + "logps/real": -449.50537109375, + "loss": 0.005, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -58.6970329284668, + "rewards/margins": 48.146446228027344, + "rewards/real": -10.550590515136719, + "step": 3690 + }, + { + "epoch": 1.18, + "learning_rate": 3.363162261467346e-07, + "logits/generated": 2.323228359222412, + "logits/real": 0.5112559199333191, + "logps/generated": -924.51708984375, + "logps/real": -421.55029296875, + "loss": 0.0041, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.02006149291992, + "rewards/margins": 44.135807037353516, + "rewards/real": -9.884255409240723, + "step": 3700 + }, + { + "epoch": 1.19, + "learning_rate": 3.3572359843546283e-07, + "logits/generated": 2.655287265777588, + "logits/real": 1.0551230907440186, + "logps/generated": -939.03125, + "logps/real": -447.31439208984375, + "loss": 0.0177, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.758880615234375, + "rewards/margins": 43.87160873413086, + "rewards/real": -10.887271881103516, + "step": 3710 + }, + { + "epoch": 1.19, + "learning_rate": 3.351309707241911e-07, + "logits/generated": 2.5216193199157715, + "logits/real": 0.8036215901374817, + "logps/generated": -979.0418090820312, + "logps/real": -437.03717041015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -57.14995193481445, + "rewards/margins": 45.73650360107422, + "rewards/real": -11.413439750671387, + "step": 3720 + }, + { + "epoch": 1.19, + "learning_rate": 3.3453834301291927e-07, + "logits/generated": 2.5467095375061035, + "logits/real": 1.121552586555481, + "logps/generated": -893.8170776367188, + "logps/real": -416.1778869628906, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/generated": -51.46274948120117, + "rewards/margins": 40.75083923339844, + "rewards/real": -10.711912155151367, + "step": 3730 + }, + { + "epoch": 1.2, + "learning_rate": 3.339457153016475e-07, + "logits/generated": 2.41868257522583, + "logits/real": 0.8248863220214844, + "logps/generated": -893.6220703125, + "logps/real": -382.8924255371094, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/generated": -51.593109130859375, + "rewards/margins": 43.090850830078125, + "rewards/real": -8.502254486083984, + "step": 3740 + }, + { + "epoch": 1.2, + "learning_rate": 3.3335308759037575e-07, + "logits/generated": 1.5662766695022583, + "logits/real": 0.10898448526859283, + "logps/generated": -749.6936645507812, + "logps/real": -396.2360534667969, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/generated": -37.53596496582031, + "rewards/margins": 30.580371856689453, + "rewards/real": -6.955594539642334, + "step": 3750 + }, + { + "epoch": 1.2, + "learning_rate": 3.3276045987910394e-07, + "logits/generated": 1.6707637310028076, + "logits/real": 0.6430930495262146, + "logps/generated": -810.5835571289062, + "logps/real": -361.64154052734375, + "loss": 0.0062, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.466434478759766, + "rewards/margins": 33.841209411621094, + "rewards/real": -6.625218868255615, + "step": 3760 + }, + { + "epoch": 1.21, + "learning_rate": 3.321678321678322e-07, + "logits/generated": 2.048856019973755, + "logits/real": 0.6219021677970886, + "logps/generated": -761.279296875, + "logps/real": -398.64923095703125, + "loss": 0.0076, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -37.76602554321289, + "rewards/margins": 29.750417709350586, + "rewards/real": -8.015604972839355, + "step": 3770 + }, + { + "epoch": 1.21, + "learning_rate": 3.315752044565604e-07, + "logits/generated": 2.1167967319488525, + "logits/real": 0.3618480861186981, + "logps/generated": -771.1248779296875, + "logps/real": -381.4525146484375, + "loss": 0.0265, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -38.56813430786133, + "rewards/margins": 31.915334701538086, + "rewards/real": -6.652792453765869, + "step": 3780 + }, + { + "epoch": 1.21, + "learning_rate": 3.309825767452886e-07, + "logits/generated": 1.9735320806503296, + "logits/real": 0.5173706412315369, + "logps/generated": -823.0919799804688, + "logps/real": -388.74090576171875, + "loss": 0.04, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -41.97467803955078, + "rewards/margins": 34.158851623535156, + "rewards/real": -7.815822601318359, + "step": 3790 + }, + { + "epoch": 1.22, + "learning_rate": 3.303899490340168e-07, + "logits/generated": 1.871193289756775, + "logits/real": 0.2393765151500702, + "logps/generated": -826.5197143554688, + "logps/real": -416.1676330566406, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/generated": -41.28820037841797, + "rewards/margins": 33.3169059753418, + "rewards/real": -7.971290588378906, + "step": 3800 + }, + { + "epoch": 1.22, + "learning_rate": 3.29797321322745e-07, + "logits/generated": 1.6222301721572876, + "logits/real": 0.12104681879281998, + "logps/generated": -798.4268798828125, + "logps/real": -419.88128662109375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -40.26927947998047, + "rewards/margins": 34.2071418762207, + "rewards/real": -6.062142372131348, + "step": 3810 + }, + { + "epoch": 1.22, + "learning_rate": 3.2920469361147323e-07, + "logits/generated": 1.6478302478790283, + "logits/real": 0.234401136636734, + "logps/generated": -756.6922607421875, + "logps/real": -347.2097473144531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -37.91858673095703, + "rewards/margins": 32.255897521972656, + "rewards/real": -5.662688732147217, + "step": 3820 + }, + { + "epoch": 1.23, + "learning_rate": 3.2861206590020147e-07, + "logits/generated": 1.8979486227035522, + "logits/real": 0.01841040328145027, + "logps/generated": -775.7962646484375, + "logps/real": -377.652099609375, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/generated": -39.21514892578125, + "rewards/margins": 33.12729263305664, + "rewards/real": -6.0878586769104, + "step": 3830 + }, + { + "epoch": 1.23, + "learning_rate": 3.2801943818892966e-07, + "logits/generated": 1.9201444387435913, + "logits/real": -0.07562948018312454, + "logps/generated": -859.4422607421875, + "logps/real": -426.18402099609375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -45.821563720703125, + "rewards/margins": 39.22440719604492, + "rewards/real": -6.597151279449463, + "step": 3840 + }, + { + "epoch": 1.23, + "learning_rate": 3.274268104776579e-07, + "logits/generated": 1.7645994424819946, + "logits/real": 0.46368569135665894, + "logps/generated": -893.466796875, + "logps/real": -345.55316162109375, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/generated": -50.000816345214844, + "rewards/margins": 43.68457794189453, + "rewards/real": -6.316235542297363, + "step": 3850 + }, + { + "epoch": 1.24, + "learning_rate": 3.2683418276638614e-07, + "logits/generated": 1.8924249410629272, + "logits/real": 0.426472008228302, + "logps/generated": -893.7169189453125, + "logps/real": -357.8848571777344, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/generated": -48.02570343017578, + "rewards/margins": 41.03560256958008, + "rewards/real": -6.990099906921387, + "step": 3860 + }, + { + "epoch": 1.24, + "learning_rate": 3.2624155505511433e-07, + "logits/generated": 1.7478210926055908, + "logits/real": 0.3808758854866028, + "logps/generated": -865.9788818359375, + "logps/real": -384.21490478515625, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/generated": -45.07698440551758, + "rewards/margins": 39.27788543701172, + "rewards/real": -5.799102783203125, + "step": 3870 + }, + { + "epoch": 1.24, + "learning_rate": 3.256489273438426e-07, + "logits/generated": 1.8142160177230835, + "logits/real": -0.05650439113378525, + "logps/generated": -852.76416015625, + "logps/real": -394.3373718261719, + "loss": 0.0087, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.22393035888672, + "rewards/margins": 40.576942443847656, + "rewards/real": -5.646985054016113, + "step": 3880 + }, + { + "epoch": 1.24, + "learning_rate": 3.250562996325708e-07, + "logits/generated": 1.7576414346694946, + "logits/real": 0.16125845909118652, + "logps/generated": -737.7971801757812, + "logps/real": -355.10101318359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -37.76227569580078, + "rewards/margins": 32.49078369140625, + "rewards/real": -5.271491050720215, + "step": 3890 + }, + { + "epoch": 1.25, + "learning_rate": 3.24463671921299e-07, + "logits/generated": 2.224649429321289, + "logits/real": 0.034860990941524506, + "logps/generated": -835.9309692382812, + "logps/real": -386.48394775390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -44.693824768066406, + "rewards/margins": 40.14398956298828, + "rewards/real": -4.549836158752441, + "step": 3900 + }, + { + "epoch": 1.25, + "learning_rate": 3.2387104421002725e-07, + "logits/generated": 2.1280009746551514, + "logits/real": 0.18578016757965088, + "logps/generated": -811.9386596679688, + "logps/real": -364.31622314453125, + "loss": 0.0056, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.4742317199707, + "rewards/margins": 36.29412078857422, + "rewards/real": -6.18010950088501, + "step": 3910 + }, + { + "epoch": 1.25, + "learning_rate": 3.232784164987555e-07, + "logits/generated": 2.216989040374756, + "logits/real": -0.006085106637328863, + "logps/generated": -918.9568481445312, + "logps/real": -391.79437255859375, + "loss": 0.032, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -52.31040573120117, + "rewards/margins": 45.7618293762207, + "rewards/real": -6.548575401306152, + "step": 3920 + }, + { + "epoch": 1.26, + "learning_rate": 3.226857887874837e-07, + "logits/generated": 2.0761938095092773, + "logits/real": -0.2655293345451355, + "logps/generated": -860.9503784179688, + "logps/real": -412.0650939941406, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/generated": -47.962162017822266, + "rewards/margins": 41.897239685058594, + "rewards/real": -6.064918518066406, + "step": 3930 + }, + { + "epoch": 1.26, + "learning_rate": 3.220931610762119e-07, + "logits/generated": 2.1194260120391846, + "logits/real": 0.04633180424571037, + "logps/generated": -938.02099609375, + "logps/real": -395.6116638183594, + "loss": 0.0114, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -53.80571365356445, + "rewards/margins": 46.80584716796875, + "rewards/real": -6.999871253967285, + "step": 3940 + }, + { + "epoch": 1.26, + "learning_rate": 3.2150053336494016e-07, + "logits/generated": 1.8909871578216553, + "logits/real": 0.1479884833097458, + "logps/generated": -856.759765625, + "logps/real": -369.7672424316406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -46.83892059326172, + "rewards/margins": 40.16466522216797, + "rewards/real": -6.674252986907959, + "step": 3950 + }, + { + "epoch": 1.27, + "learning_rate": 3.2090790565366835e-07, + "logits/generated": 1.816232442855835, + "logits/real": -0.22243690490722656, + "logps/generated": -862.2125244140625, + "logps/real": -355.93292236328125, + "loss": 0.084, + "rewards/accuracies": 0.9375, + "rewards/generated": -47.1995735168457, + "rewards/margins": 40.92375183105469, + "rewards/real": -6.275822162628174, + "step": 3960 + }, + { + "epoch": 1.27, + "learning_rate": 3.2031527794239654e-07, + "logits/generated": 1.9847770929336548, + "logits/real": 0.19675599038600922, + "logps/generated": -775.3919677734375, + "logps/real": -366.6876220703125, + "loss": 0.0157, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.33246994018555, + "rewards/margins": 33.03014373779297, + "rewards/real": -6.302330493927002, + "step": 3970 + }, + { + "epoch": 1.27, + "learning_rate": 3.197226502311248e-07, + "logits/generated": 2.450059652328491, + "logits/real": -0.19365069270133972, + "logps/generated": -884.3902587890625, + "logps/real": -445.5484313964844, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/generated": -49.08539581298828, + "rewards/margins": 43.401023864746094, + "rewards/real": -5.684370994567871, + "step": 3980 + }, + { + "epoch": 1.28, + "learning_rate": 3.1913002251985297e-07, + "logits/generated": 1.7655264139175415, + "logits/real": -0.32271242141723633, + "logps/generated": -781.2020263671875, + "logps/real": -373.3470153808594, + "loss": 0.0407, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.973079681396484, + "rewards/margins": 35.65218734741211, + "rewards/real": -5.320893287658691, + "step": 3990 + }, + { + "epoch": 1.28, + "learning_rate": 3.185373948085812e-07, + "logits/generated": 1.7735016345977783, + "logits/real": -0.3135187327861786, + "logps/generated": -823.1165161132812, + "logps/real": -341.21514892578125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/generated": -42.40126419067383, + "rewards/margins": 37.98102569580078, + "rewards/real": -4.420238971710205, + "step": 4000 + }, + { + "epoch": 1.28, + "learning_rate": 3.1794476709730946e-07, + "logits/generated": 1.9088976383209229, + "logits/real": 0.021071402356028557, + "logps/generated": -997.48291015625, + "logps/real": -341.7501220703125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/generated": -56.8674430847168, + "rewards/margins": 51.42482376098633, + "rewards/real": -5.44262170791626, + "step": 4010 + }, + { + "epoch": 1.29, + "learning_rate": 3.1735213938603764e-07, + "logits/generated": 1.8926769495010376, + "logits/real": -0.1542275995016098, + "logps/generated": -903.7127685546875, + "logps/real": -340.779296875, + "loss": 0.0239, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.57229232788086, + "rewards/margins": 45.800968170166016, + "rewards/real": -3.771319627761841, + "step": 4020 + }, + { + "epoch": 1.29, + "learning_rate": 3.167595116747659e-07, + "logits/generated": 1.7338800430297852, + "logits/real": -0.2945229113101959, + "logps/generated": -908.7874755859375, + "logps/real": -348.8018493652344, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/generated": -51.78312301635742, + "rewards/margins": 46.67133331298828, + "rewards/real": -5.111795425415039, + "step": 4030 + }, + { + "epoch": 1.29, + "learning_rate": 3.1616688396349413e-07, + "logits/generated": 2.5217385292053223, + "logits/real": 0.2091987580060959, + "logps/generated": -880.8609619140625, + "logps/real": -398.03302001953125, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/generated": -49.21094512939453, + "rewards/margins": 42.1538200378418, + "rewards/real": -7.057119846343994, + "step": 4040 + }, + { + "epoch": 1.3, + "learning_rate": 3.155742562522223e-07, + "logits/generated": 2.8754658699035645, + "logits/real": 0.4876670241355896, + "logps/generated": -896.7193603515625, + "logps/real": -405.14263916015625, + "loss": 0.018, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.94514465332031, + "rewards/margins": 40.763938903808594, + "rewards/real": -9.181201934814453, + "step": 4050 + }, + { + "epoch": 1.3, + "learning_rate": 3.1498162854095056e-07, + "logits/generated": 2.544055461883545, + "logits/real": 0.5535674095153809, + "logps/generated": -868.2188720703125, + "logps/real": -442.862548828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -47.983524322509766, + "rewards/margins": 35.19925308227539, + "rewards/real": -12.784273147583008, + "step": 4060 + }, + { + "epoch": 1.3, + "learning_rate": 3.143890008296788e-07, + "logits/generated": 2.4933745861053467, + "logits/real": 0.6904267072677612, + "logps/generated": -949.9890747070312, + "logps/real": -412.2945251464844, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.27671432495117, + "rewards/margins": 45.037681579589844, + "rewards/real": -10.239030838012695, + "step": 4070 + }, + { + "epoch": 1.31, + "learning_rate": 3.13796373118407e-07, + "logits/generated": 2.221097469329834, + "logits/real": 0.22244521975517273, + "logps/generated": -948.2183837890625, + "logps/real": -422.76214599609375, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/generated": -54.98572540283203, + "rewards/margins": 45.12064743041992, + "rewards/real": -9.865074157714844, + "step": 4080 + }, + { + "epoch": 1.31, + "learning_rate": 3.1320374540713523e-07, + "logits/generated": 2.7171072959899902, + "logits/real": 0.5666571855545044, + "logps/generated": -970.6345825195312, + "logps/real": -423.0457458496094, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -57.169898986816406, + "rewards/margins": 44.74327850341797, + "rewards/real": -12.426626205444336, + "step": 4090 + }, + { + "epoch": 1.31, + "learning_rate": 3.126111176958635e-07, + "logits/generated": 2.652374744415283, + "logits/real": 0.2705211639404297, + "logps/generated": -896.6983642578125, + "logps/real": -374.56097412109375, + "loss": 0.0447, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -50.743526458740234, + "rewards/margins": 43.60712432861328, + "rewards/real": -7.1364030838012695, + "step": 4100 + }, + { + "epoch": 1.32, + "learning_rate": 3.1201848998459166e-07, + "logits/generated": 2.483314037322998, + "logits/real": 0.3021407723426819, + "logps/generated": -808.2822875976562, + "logps/real": -410.1495666503906, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/generated": -43.35593795776367, + "rewards/margins": 38.681724548339844, + "rewards/real": -4.674206256866455, + "step": 4110 + }, + { + "epoch": 1.32, + "learning_rate": 3.114258622733199e-07, + "logits/generated": 1.7732019424438477, + "logits/real": -0.21743369102478027, + "logps/generated": -891.5406494140625, + "logps/real": -392.1903991699219, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/generated": -47.2230224609375, + "rewards/margins": 42.33424758911133, + "rewards/real": -4.888775825500488, + "step": 4120 + }, + { + "epoch": 1.32, + "learning_rate": 3.1083323456204815e-07, + "logits/generated": 1.927710771560669, + "logits/real": -0.045561954379081726, + "logps/generated": -904.3961791992188, + "logps/real": -359.3899841308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -49.64174270629883, + "rewards/margins": 43.88774871826172, + "rewards/real": -5.753993511199951, + "step": 4130 + }, + { + "epoch": 1.32, + "learning_rate": 3.1024060685077634e-07, + "logits/generated": 2.218238353729248, + "logits/real": 0.02407793700695038, + "logps/generated": -887.27880859375, + "logps/real": -433.8273010253906, + "loss": 0.0059, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -50.3277587890625, + "rewards/margins": 43.18590545654297, + "rewards/real": -7.141855716705322, + "step": 4140 + }, + { + "epoch": 1.33, + "learning_rate": 3.096479791395045e-07, + "logits/generated": 2.322061061859131, + "logits/real": 0.03750114515423775, + "logps/generated": -961.4918212890625, + "logps/real": -388.9190979003906, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -56.68904495239258, + "rewards/margins": 49.598663330078125, + "rewards/real": -7.090386867523193, + "step": 4150 + }, + { + "epoch": 1.33, + "learning_rate": 3.0905535142823277e-07, + "logits/generated": 1.7675060033798218, + "logits/real": 0.5792126059532166, + "logps/generated": -931.26806640625, + "logps/real": -415.6739196777344, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -53.85729217529297, + "rewards/margins": 45.11085510253906, + "rewards/real": -8.74643611907959, + "step": 4160 + }, + { + "epoch": 1.33, + "learning_rate": 3.0846272371696095e-07, + "logits/generated": 2.3035531044006348, + "logits/real": 0.19912122189998627, + "logps/generated": -935.2286376953125, + "logps/real": -372.4601135253906, + "loss": 0.01, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.5814094543457, + "rewards/margins": 46.517608642578125, + "rewards/real": -8.063804626464844, + "step": 4170 + }, + { + "epoch": 1.34, + "learning_rate": 3.078700960056892e-07, + "logits/generated": 1.9163198471069336, + "logits/real": 0.3440563678741455, + "logps/generated": -896.0811767578125, + "logps/real": -387.7689208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -50.69479751586914, + "rewards/margins": 42.404197692871094, + "rewards/real": -8.29059886932373, + "step": 4180 + }, + { + "epoch": 1.34, + "learning_rate": 3.0727746829441744e-07, + "logits/generated": 2.886960029602051, + "logits/real": 0.5897636413574219, + "logps/generated": -871.70068359375, + "logps/real": -396.7232971191406, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/generated": -49.031253814697266, + "rewards/margins": 40.121009826660156, + "rewards/real": -8.910244941711426, + "step": 4190 + }, + { + "epoch": 1.34, + "learning_rate": 3.0668484058314563e-07, + "logits/generated": 1.759080171585083, + "logits/real": 0.4325089454650879, + "logps/generated": -970.3538818359375, + "logps/real": -353.41009521484375, + "loss": 0.0364, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.57085037231445, + "rewards/margins": 47.87283706665039, + "rewards/real": -6.698004722595215, + "step": 4200 + }, + { + "epoch": 1.35, + "learning_rate": 3.0609221287187387e-07, + "logits/generated": 1.72823965549469, + "logits/real": 0.41172105073928833, + "logps/generated": -840.40625, + "logps/real": -365.85968017578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -46.86371612548828, + "rewards/margins": 40.378700256347656, + "rewards/real": -6.485014915466309, + "step": 4210 + }, + { + "epoch": 1.35, + "learning_rate": 3.054995851606021e-07, + "logits/generated": 2.494736909866333, + "logits/real": 0.6513184309005737, + "logps/generated": -943.0419921875, + "logps/real": -365.2388916015625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/generated": -54.51377487182617, + "rewards/margins": 47.15017318725586, + "rewards/real": -7.363605499267578, + "step": 4220 + }, + { + "epoch": 1.35, + "learning_rate": 3.049069574493303e-07, + "logits/generated": 2.206766128540039, + "logits/real": 0.5627135038375854, + "logps/generated": -907.9157104492188, + "logps/real": -370.6760559082031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -51.44373321533203, + "rewards/margins": 44.14038848876953, + "rewards/real": -7.303345680236816, + "step": 4230 + }, + { + "epoch": 1.36, + "learning_rate": 3.0431432973805854e-07, + "logits/generated": 2.555173397064209, + "logits/real": 0.41458195447921753, + "logps/generated": -791.3504638671875, + "logps/real": -386.2882385253906, + "loss": 0.0387, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.68230438232422, + "rewards/margins": 38.028541564941406, + "rewards/real": -5.653756141662598, + "step": 4240 + }, + { + "epoch": 1.36, + "learning_rate": 3.037217020267868e-07, + "logits/generated": 2.225285291671753, + "logits/real": 0.515532374382019, + "logps/generated": -836.8493041992188, + "logps/real": -346.5246276855469, + "loss": 0.0139, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.82666778564453, + "rewards/margins": 38.41389846801758, + "rewards/real": -7.4127702713012695, + "step": 4250 + }, + { + "epoch": 1.36, + "learning_rate": 3.0312907431551497e-07, + "logits/generated": 2.0543510913848877, + "logits/real": 0.785476565361023, + "logps/generated": -788.3221435546875, + "logps/real": -386.2995300292969, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -42.3862190246582, + "rewards/margins": 36.74425506591797, + "rewards/real": -5.641963958740234, + "step": 4260 + }, + { + "epoch": 1.37, + "learning_rate": 3.025364466042432e-07, + "logits/generated": 2.4212594032287598, + "logits/real": 0.8606536984443665, + "logps/generated": -940.974609375, + "logps/real": -378.3146667480469, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/generated": -54.0003776550293, + "rewards/margins": 47.51909637451172, + "rewards/real": -6.48128604888916, + "step": 4270 + }, + { + "epoch": 1.37, + "learning_rate": 3.0194381889297146e-07, + "logits/generated": 2.661616802215576, + "logits/real": 0.6014574766159058, + "logps/generated": -890.6624755859375, + "logps/real": -369.1997985839844, + "loss": 0.0148, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -50.58171844482422, + "rewards/margins": 45.810325622558594, + "rewards/real": -4.771393775939941, + "step": 4280 + }, + { + "epoch": 1.37, + "learning_rate": 3.0135119118169965e-07, + "logits/generated": 2.5344526767730713, + "logits/real": 0.4575222134590149, + "logps/generated": -839.5372924804688, + "logps/real": -391.0158996582031, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/generated": -43.62260437011719, + "rewards/margins": 40.09632110595703, + "rewards/real": -3.5262866020202637, + "step": 4290 + }, + { + "epoch": 1.38, + "learning_rate": 3.007585634704279e-07, + "logits/generated": 2.60728120803833, + "logits/real": 0.9728862643241882, + "logps/generated": -790.5014038085938, + "logps/real": -346.76055908203125, + "loss": 0.0126, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -41.20235061645508, + "rewards/margins": 36.835411071777344, + "rewards/real": -4.366939544677734, + "step": 4300 + }, + { + "epoch": 1.38, + "learning_rate": 3.0016593575915613e-07, + "logits/generated": 1.9322798252105713, + "logits/real": 0.4720967411994934, + "logps/generated": -877.2966918945312, + "logps/real": -434.5298767089844, + "loss": 0.0041, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -47.95698165893555, + "rewards/margins": 43.21381378173828, + "rewards/real": -4.743161201477051, + "step": 4310 + }, + { + "epoch": 1.38, + "learning_rate": 2.995733080478843e-07, + "logits/generated": 2.8624980449676514, + "logits/real": 0.5967206954956055, + "logps/generated": -826.7452392578125, + "logps/real": -389.9955139160156, + "loss": 0.0358, + "rewards/accuracies": 1.0, + "rewards/generated": -44.31755828857422, + "rewards/margins": 40.05508041381836, + "rewards/real": -4.262475967407227, + "step": 4320 + }, + { + "epoch": 1.39, + "learning_rate": 2.989806803366125e-07, + "logits/generated": 2.60481595993042, + "logits/real": 1.306947112083435, + "logps/generated": -876.9793701171875, + "logps/real": -359.3258361816406, + "loss": 0.0101, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.423545837402344, + "rewards/margins": 44.53766632080078, + "rewards/real": -3.885880947113037, + "step": 4330 + }, + { + "epoch": 1.39, + "learning_rate": 2.9838805262534075e-07, + "logits/generated": 2.8550238609313965, + "logits/real": 1.2833164930343628, + "logps/generated": -824.1707153320312, + "logps/real": -357.21307373046875, + "loss": 0.0177, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.847599029541016, + "rewards/margins": 41.7922248840332, + "rewards/real": -5.055374622344971, + "step": 4340 + }, + { + "epoch": 1.39, + "learning_rate": 2.9779542491406894e-07, + "logits/generated": 2.838146924972534, + "logits/real": 1.0210936069488525, + "logps/generated": -922.9793701171875, + "logps/real": -339.36236572265625, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/generated": -53.317161560058594, + "rewards/margins": 47.95136260986328, + "rewards/real": -5.365798473358154, + "step": 4350 + }, + { + "epoch": 1.4, + "learning_rate": 2.972027972027972e-07, + "logits/generated": 2.001512050628662, + "logits/real": 1.1333067417144775, + "logps/generated": -851.7052612304688, + "logps/real": -407.76678466796875, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/generated": -47.03935241699219, + "rewards/margins": 39.51409149169922, + "rewards/real": -7.5252580642700195, + "step": 4360 + }, + { + "epoch": 1.4, + "learning_rate": 2.966101694915254e-07, + "logits/generated": 2.5771706104278564, + "logits/real": 1.2043229341506958, + "logps/generated": -932.60302734375, + "logps/real": -404.4564514160156, + "loss": 0.0726, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -53.90533447265625, + "rewards/margins": 46.47903823852539, + "rewards/real": -7.42629861831665, + "step": 4370 + }, + { + "epoch": 1.4, + "learning_rate": 2.960175417802536e-07, + "logits/generated": 2.218547821044922, + "logits/real": 0.9215396046638489, + "logps/generated": -898.2188720703125, + "logps/real": -358.0943603515625, + "loss": 0.0122, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.935184478759766, + "rewards/margins": 44.388031005859375, + "rewards/real": -4.547152996063232, + "step": 4380 + }, + { + "epoch": 1.4, + "learning_rate": 2.9542491406898185e-07, + "logits/generated": 2.2845664024353027, + "logits/real": 0.8632771372795105, + "logps/generated": -885.0338134765625, + "logps/real": -379.1771545410156, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/generated": -49.426361083984375, + "rewards/margins": 44.742576599121094, + "rewards/real": -4.683784008026123, + "step": 4390 + }, + { + "epoch": 1.41, + "learning_rate": 2.948322863577101e-07, + "logits/generated": 2.0430283546447754, + "logits/real": 0.31897956132888794, + "logps/generated": -784.330078125, + "logps/real": -374.98016357421875, + "loss": 0.0268, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -41.49010467529297, + "rewards/margins": 37.16717529296875, + "rewards/real": -4.322933673858643, + "step": 4400 + }, + { + "epoch": 1.41, + "learning_rate": 2.942396586464383e-07, + "logits/generated": 2.0797367095947266, + "logits/real": 0.5651417970657349, + "logps/generated": -831.4734497070312, + "logps/real": -364.90423583984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -44.593421936035156, + "rewards/margins": 40.9267692565918, + "rewards/real": -3.666652202606201, + "step": 4410 + }, + { + "epoch": 1.41, + "learning_rate": 2.936470309351665e-07, + "logits/generated": 2.346694231033325, + "logits/real": 0.7109388709068298, + "logps/generated": -794.2559204101562, + "logps/real": -349.0956726074219, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/generated": -42.08417892456055, + "rewards/margins": 37.98200225830078, + "rewards/real": -4.102175712585449, + "step": 4420 + }, + { + "epoch": 1.42, + "learning_rate": 2.9305440322389477e-07, + "logits/generated": 2.7935266494750977, + "logits/real": 0.626765787601471, + "logps/generated": -846.2515869140625, + "logps/real": -372.41424560546875, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/generated": -47.647586822509766, + "rewards/margins": 43.59477996826172, + "rewards/real": -4.052794456481934, + "step": 4430 + }, + { + "epoch": 1.42, + "learning_rate": 2.9246177551262296e-07, + "logits/generated": 2.696134090423584, + "logits/real": 0.9759427905082703, + "logps/generated": -878.1096801757812, + "logps/real": -366.1974182128906, + "loss": 0.0086, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.2159538269043, + "rewards/margins": 42.51264953613281, + "rewards/real": -6.70331335067749, + "step": 4440 + }, + { + "epoch": 1.42, + "learning_rate": 2.918691478013512e-07, + "logits/generated": 2.3423283100128174, + "logits/real": 0.5175257921218872, + "logps/generated": -782.8167724609375, + "logps/real": -340.42901611328125, + "loss": 0.0269, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -41.3431396484375, + "rewards/margins": 37.00103759765625, + "rewards/real": -4.342096328735352, + "step": 4450 + }, + { + "epoch": 1.43, + "learning_rate": 2.9127652009007944e-07, + "logits/generated": 2.778712749481201, + "logits/real": 0.47254347801208496, + "logps/generated": -926.4280395507812, + "logps/real": -344.14739990234375, + "loss": 0.0356, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -50.66118240356445, + "rewards/margins": 47.607582092285156, + "rewards/real": -3.053595542907715, + "step": 4460 + }, + { + "epoch": 1.43, + "learning_rate": 2.9068389237880763e-07, + "logits/generated": 2.97314453125, + "logits/real": 0.9956085085868835, + "logps/generated": -832.6083984375, + "logps/real": -341.3013000488281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -46.001556396484375, + "rewards/margins": 42.96271514892578, + "rewards/real": -3.038844347000122, + "step": 4470 + }, + { + "epoch": 1.43, + "learning_rate": 2.9009126466753587e-07, + "logits/generated": 2.8147237300872803, + "logits/real": 0.8581963777542114, + "logps/generated": -854.1736450195312, + "logps/real": -357.0933532714844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -45.780052185058594, + "rewards/margins": 42.75517654418945, + "rewards/real": -3.0248818397521973, + "step": 4480 + }, + { + "epoch": 1.44, + "learning_rate": 2.8949863695626406e-07, + "logits/generated": 2.456997871398926, + "logits/real": 0.7250876426696777, + "logps/generated": -851.84814453125, + "logps/real": -371.81243896484375, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.140907287597656, + "rewards/margins": 41.824790954589844, + "rewards/real": -4.316125392913818, + "step": 4490 + }, + { + "epoch": 1.44, + "learning_rate": 2.889060092449923e-07, + "logits/generated": 2.994431734085083, + "logits/real": 1.062474012374878, + "logps/generated": -885.6672973632812, + "logps/real": -337.63092041015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -50.32817840576172, + "rewards/margins": 46.21593475341797, + "rewards/real": -4.112240314483643, + "step": 4500 + }, + { + "epoch": 1.44, + "learning_rate": 2.883133815337205e-07, + "logits/generated": 2.706831455230713, + "logits/real": 1.0632785558700562, + "logps/generated": -996.8551635742188, + "logps/real": -380.18408203125, + "loss": 0.0069, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -56.95098114013672, + "rewards/margins": 50.92827224731445, + "rewards/real": -6.022718906402588, + "step": 4510 + }, + { + "epoch": 1.45, + "learning_rate": 2.877207538224487e-07, + "logits/generated": 3.2903552055358887, + "logits/real": 1.3321194648742676, + "logps/generated": -977.85205078125, + "logps/real": -375.55126953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -56.94231033325195, + "rewards/margins": 50.63762664794922, + "rewards/real": -6.304681301116943, + "step": 4520 + }, + { + "epoch": 1.45, + "learning_rate": 2.871281261111769e-07, + "logits/generated": 3.114874839782715, + "logits/real": 0.9941811561584473, + "logps/generated": -954.8370971679688, + "logps/real": -366.42022705078125, + "loss": 0.0116, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -56.53069305419922, + "rewards/margins": 50.442726135253906, + "rewards/real": -6.0879669189453125, + "step": 4530 + }, + { + "epoch": 1.45, + "learning_rate": 2.8653549839990516e-07, + "logits/generated": 1.7220100164413452, + "logits/real": 0.6936120986938477, + "logps/generated": -793.5621948242188, + "logps/real": -320.93280029296875, + "loss": 0.0838, + "rewards/accuracies": 1.0, + "rewards/generated": -39.636451721191406, + "rewards/margins": 36.73711395263672, + "rewards/real": -2.8993372917175293, + "step": 4540 + }, + { + "epoch": 1.46, + "learning_rate": 2.8594287068863335e-07, + "logits/generated": 1.7378209829330444, + "logits/real": 0.3296111226081848, + "logps/generated": -746.2716674804688, + "logps/real": -364.6012878417969, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/generated": -35.58428192138672, + "rewards/margins": 34.59697723388672, + "rewards/real": -0.9873050451278687, + "step": 4550 + }, + { + "epoch": 1.46, + "learning_rate": 2.853502429773616e-07, + "logits/generated": 2.4758598804473877, + "logits/real": 0.46579688787460327, + "logps/generated": -721.5530395507812, + "logps/real": -349.1888427734375, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/generated": -35.16068649291992, + "rewards/margins": 33.51179885864258, + "rewards/real": -1.648890733718872, + "step": 4560 + }, + { + "epoch": 1.46, + "learning_rate": 2.8475761526608984e-07, + "logits/generated": 2.2155046463012695, + "logits/real": 0.7471126317977905, + "logps/generated": -775.1942138671875, + "logps/real": -339.3021545410156, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/generated": -37.29993438720703, + "rewards/margins": 34.87761688232422, + "rewards/real": -2.422313690185547, + "step": 4570 + }, + { + "epoch": 1.47, + "learning_rate": 2.84164987554818e-07, + "logits/generated": 1.954671859741211, + "logits/real": 0.5207287073135376, + "logps/generated": -704.0051879882812, + "logps/real": -368.79150390625, + "loss": 0.0707, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -32.33496856689453, + "rewards/margins": 28.184839248657227, + "rewards/real": -4.1501288414001465, + "step": 4580 + }, + { + "epoch": 1.47, + "learning_rate": 2.8357235984354627e-07, + "logits/generated": 2.2055437564849854, + "logits/real": 1.2804126739501953, + "logps/generated": -764.0189819335938, + "logps/real": -384.3921813964844, + "loss": 0.0198, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -36.48469924926758, + "rewards/margins": 32.02397918701172, + "rewards/real": -4.460718631744385, + "step": 4590 + }, + { + "epoch": 1.47, + "learning_rate": 2.829797321322745e-07, + "logits/generated": 2.4263312816619873, + "logits/real": 1.5482428073883057, + "logps/generated": -848.1448974609375, + "logps/real": -422.74664306640625, + "loss": 0.0116, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.13447570800781, + "rewards/margins": 35.92151641845703, + "rewards/real": -7.212961673736572, + "step": 4600 + }, + { + "epoch": 1.48, + "learning_rate": 2.823871044210027e-07, + "logits/generated": 1.9039783477783203, + "logits/real": 1.3764702081680298, + "logps/generated": -810.2239990234375, + "logps/real": -426.0375061035156, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/generated": -42.02518081665039, + "rewards/margins": 35.242366790771484, + "rewards/real": -6.782809257507324, + "step": 4610 + }, + { + "epoch": 1.48, + "learning_rate": 2.8179447670973094e-07, + "logits/generated": 2.3811886310577393, + "logits/real": 1.1642677783966064, + "logps/generated": -771.0386352539062, + "logps/real": -392.75433349609375, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.267356872558594, + "rewards/margins": 32.4055290222168, + "rewards/real": -6.8618292808532715, + "step": 4620 + }, + { + "epoch": 1.48, + "learning_rate": 2.812018489984592e-07, + "logits/generated": 2.4021406173706055, + "logits/real": 1.3993284702301025, + "logps/generated": -901.5540771484375, + "logps/real": -397.1238098144531, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/generated": -48.778297424316406, + "rewards/margins": 42.13416290283203, + "rewards/real": -6.644139289855957, + "step": 4630 + }, + { + "epoch": 1.48, + "learning_rate": 2.8060922128718737e-07, + "logits/generated": 2.892127513885498, + "logits/real": 1.1835445165634155, + "logps/generated": -836.6994018554688, + "logps/real": -339.24517822265625, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/generated": -44.078670501708984, + "rewards/margins": 38.7639045715332, + "rewards/real": -5.314770221710205, + "step": 4640 + }, + { + "epoch": 1.49, + "learning_rate": 2.800165935759156e-07, + "logits/generated": 2.559720516204834, + "logits/real": 0.9920206069946289, + "logps/generated": -721.725830078125, + "logps/real": -372.3648376464844, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -35.085899353027344, + "rewards/margins": 31.946239471435547, + "rewards/real": -3.1396594047546387, + "step": 4650 + }, + { + "epoch": 1.49, + "learning_rate": 2.7942396586464385e-07, + "logits/generated": 2.060070037841797, + "logits/real": 0.9687484502792358, + "logps/generated": -760.3370361328125, + "logps/real": -395.9288635253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -37.018699645996094, + "rewards/margins": 32.35004425048828, + "rewards/real": -4.6686577796936035, + "step": 4660 + }, + { + "epoch": 1.49, + "learning_rate": 2.7883133815337204e-07, + "logits/generated": 2.6550605297088623, + "logits/real": 1.1643413305282593, + "logps/generated": -787.5021362304688, + "logps/real": -328.51751708984375, + "loss": 0.0229, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -41.66749954223633, + "rewards/margins": 36.329803466796875, + "rewards/real": -5.337691783905029, + "step": 4670 + }, + { + "epoch": 1.5, + "learning_rate": 2.7823871044210023e-07, + "logits/generated": 3.0104825496673584, + "logits/real": 1.215192437171936, + "logps/generated": -844.298828125, + "logps/real": -346.3050842285156, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/generated": -43.288108825683594, + "rewards/margins": 39.522438049316406, + "rewards/real": -3.7656726837158203, + "step": 4680 + }, + { + "epoch": 1.5, + "learning_rate": 2.776460827308285e-07, + "logits/generated": 2.614835262298584, + "logits/real": 1.3394081592559814, + "logps/generated": -826.7473754882812, + "logps/real": -364.2169189453125, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/generated": -42.0161018371582, + "rewards/margins": 37.3761100769043, + "rewards/real": -4.63999080657959, + "step": 4690 + }, + { + "epoch": 1.5, + "learning_rate": 2.7705345501955666e-07, + "logits/generated": 2.882535696029663, + "logits/real": 1.3634650707244873, + "logps/generated": -787.8270263671875, + "logps/real": -324.0661926269531, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/generated": -39.588706970214844, + "rewards/margins": 35.58531188964844, + "rewards/real": -4.003393173217773, + "step": 4700 + }, + { + "epoch": 1.51, + "learning_rate": 2.764608273082849e-07, + "logits/generated": 2.41412615776062, + "logits/real": 0.9196175336837769, + "logps/generated": -770.9818725585938, + "logps/real": -370.63372802734375, + "loss": 0.0058, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.07651901245117, + "rewards/margins": 35.234256744384766, + "rewards/real": -3.842259645462036, + "step": 4710 + }, + { + "epoch": 1.51, + "learning_rate": 2.7586819959701315e-07, + "logits/generated": 2.0489985942840576, + "logits/real": 0.5775309801101685, + "logps/generated": -863.5361328125, + "logps/real": -341.0457458496094, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/generated": -46.23897933959961, + "rewards/margins": 41.55213165283203, + "rewards/real": -4.686847686767578, + "step": 4720 + }, + { + "epoch": 1.51, + "learning_rate": 2.7527557188574134e-07, + "logits/generated": 2.411741018295288, + "logits/real": 0.46629491448402405, + "logps/generated": -797.5379028320312, + "logps/real": -374.79754638671875, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/generated": -42.71403884887695, + "rewards/margins": 36.982208251953125, + "rewards/real": -5.731827735900879, + "step": 4730 + }, + { + "epoch": 1.52, + "learning_rate": 2.746829441744696e-07, + "logits/generated": 2.5271687507629395, + "logits/real": 0.4328778386116028, + "logps/generated": -859.7745971679688, + "logps/real": -399.00592041015625, + "loss": 0.0371, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -46.51274490356445, + "rewards/margins": 40.866783142089844, + "rewards/real": -5.645957946777344, + "step": 4740 + }, + { + "epoch": 1.52, + "learning_rate": 2.740903164631978e-07, + "logits/generated": 1.8134750127792358, + "logits/real": 0.6105095148086548, + "logps/generated": -856.2125244140625, + "logps/real": -349.9185791015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -46.258323669433594, + "rewards/margins": 41.7020378112793, + "rewards/real": -4.556281566619873, + "step": 4750 + }, + { + "epoch": 1.52, + "learning_rate": 2.73497688751926e-07, + "logits/generated": 2.384493827819824, + "logits/real": 0.6340434551239014, + "logps/generated": -861.9421997070312, + "logps/real": -365.2822265625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -46.07266616821289, + "rewards/margins": 39.855445861816406, + "rewards/real": -6.217224597930908, + "step": 4760 + }, + { + "epoch": 1.53, + "learning_rate": 2.7290506104065425e-07, + "logits/generated": 2.6208534240722656, + "logits/real": 0.19273574650287628, + "logps/generated": -884.6500244140625, + "logps/real": -394.6060485839844, + "loss": 0.0057, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.60136032104492, + "rewards/margins": 43.45311737060547, + "rewards/real": -6.1482391357421875, + "step": 4770 + }, + { + "epoch": 1.53, + "learning_rate": 2.723124333293825e-07, + "logits/generated": 2.2919468879699707, + "logits/real": 0.43106716871261597, + "logps/generated": -886.1224365234375, + "logps/real": -372.5547790527344, + "loss": 0.0165, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -47.8830680847168, + "rewards/margins": 42.46996307373047, + "rewards/real": -5.413102149963379, + "step": 4780 + }, + { + "epoch": 1.53, + "learning_rate": 2.717198056181107e-07, + "logits/generated": 1.85759699344635, + "logits/real": 0.4433859884738922, + "logps/generated": -809.621826171875, + "logps/real": -347.8978271484375, + "loss": 0.0598, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.997596740722656, + "rewards/margins": 37.66872024536133, + "rewards/real": -5.328876972198486, + "step": 4790 + }, + { + "epoch": 1.54, + "learning_rate": 2.711271779068389e-07, + "logits/generated": 1.4434945583343506, + "logits/real": 0.22783203423023224, + "logps/generated": -832.7960815429688, + "logps/real": -364.6251525878906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -42.317222595214844, + "rewards/margins": 36.227169036865234, + "rewards/real": -6.0900492668151855, + "step": 4800 + }, + { + "epoch": 1.54, + "learning_rate": 2.7053455019556716e-07, + "logits/generated": 1.6734075546264648, + "logits/real": 0.511894702911377, + "logps/generated": -785.6248779296875, + "logps/real": -377.2621154785156, + "loss": 0.0348, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -40.16128158569336, + "rewards/margins": 33.68966293334961, + "rewards/real": -6.47162389755249, + "step": 4810 + }, + { + "epoch": 1.54, + "learning_rate": 2.6994192248429535e-07, + "logits/generated": 2.0791707038879395, + "logits/real": 0.5065538287162781, + "logps/generated": -778.4967651367188, + "logps/real": -372.39385986328125, + "loss": 0.0263, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -39.8204460144043, + "rewards/margins": 34.27782440185547, + "rewards/real": -5.5426201820373535, + "step": 4820 + }, + { + "epoch": 1.55, + "learning_rate": 2.693492947730236e-07, + "logits/generated": 1.8600364923477173, + "logits/real": 0.7488612532615662, + "logps/generated": -782.809814453125, + "logps/real": -328.5298767089844, + "loss": 0.0187, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -39.31589126586914, + "rewards/margins": 34.81025314331055, + "rewards/real": -4.505640983581543, + "step": 4830 + }, + { + "epoch": 1.55, + "learning_rate": 2.6875666706175184e-07, + "logits/generated": 1.9832321405410767, + "logits/real": 0.5802012085914612, + "logps/generated": -748.4049072265625, + "logps/real": -365.29498291015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -38.709922790527344, + "rewards/margins": 34.65495681762695, + "rewards/real": -4.0549702644348145, + "step": 4840 + }, + { + "epoch": 1.55, + "learning_rate": 2.6816403935048e-07, + "logits/generated": 2.265695810317993, + "logits/real": 0.7832272052764893, + "logps/generated": -808.9696044921875, + "logps/real": -328.1351318359375, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/generated": -40.70402908325195, + "rewards/margins": 36.078895568847656, + "rewards/real": -4.625133037567139, + "step": 4850 + }, + { + "epoch": 1.56, + "learning_rate": 2.675714116392082e-07, + "logits/generated": 2.306931972503662, + "logits/real": 0.560020923614502, + "logps/generated": -856.3054809570312, + "logps/real": -397.19207763671875, + "loss": 0.0086, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.37586212158203, + "rewards/margins": 38.50695037841797, + "rewards/real": -5.868910789489746, + "step": 4860 + }, + { + "epoch": 1.56, + "learning_rate": 2.6697878392793646e-07, + "logits/generated": 2.0997326374053955, + "logits/real": 0.7822288274765015, + "logps/generated": -816.8917236328125, + "logps/real": -396.8566589355469, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/generated": -42.01362991333008, + "rewards/margins": 37.04615020751953, + "rewards/real": -4.96747350692749, + "step": 4870 + }, + { + "epoch": 1.56, + "learning_rate": 2.6638615621666465e-07, + "logits/generated": 2.201195478439331, + "logits/real": 0.5996983647346497, + "logps/generated": -790.5069580078125, + "logps/real": -410.57958984375, + "loss": 0.0203, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.32919692993164, + "rewards/margins": 34.35948181152344, + "rewards/real": -5.969719886779785, + "step": 4880 + }, + { + "epoch": 1.56, + "learning_rate": 2.657935285053929e-07, + "logits/generated": 2.2264912128448486, + "logits/real": 0.8718013763427734, + "logps/generated": -841.9896240234375, + "logps/real": -379.1394348144531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -43.1845703125, + "rewards/margins": 37.3220329284668, + "rewards/real": -5.862529754638672, + "step": 4890 + }, + { + "epoch": 1.57, + "learning_rate": 2.6520090079412113e-07, + "logits/generated": 2.2339439392089844, + "logits/real": 1.0355908870697021, + "logps/generated": -817.2131958007812, + "logps/real": -383.4435119628906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -42.284645080566406, + "rewards/margins": 36.35279083251953, + "rewards/real": -5.931859970092773, + "step": 4900 + }, + { + "epoch": 1.57, + "learning_rate": 2.646082730828493e-07, + "logits/generated": 2.570598602294922, + "logits/real": 1.2461090087890625, + "logps/generated": -821.4034423828125, + "logps/real": -356.2522277832031, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/generated": -43.50129318237305, + "rewards/margins": 36.500614166259766, + "rewards/real": -7.000680446624756, + "step": 4910 + }, + { + "epoch": 1.57, + "learning_rate": 2.6401564537157756e-07, + "logits/generated": 1.7966744899749756, + "logits/real": 1.0802150964736938, + "logps/generated": -777.5578002929688, + "logps/real": -347.0182189941406, + "loss": 0.0423, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -39.69795227050781, + "rewards/margins": 35.00285720825195, + "rewards/real": -4.695092678070068, + "step": 4920 + }, + { + "epoch": 1.58, + "learning_rate": 2.634230176603058e-07, + "logits/generated": 2.008174180984497, + "logits/real": 0.7534879446029663, + "logps/generated": -756.8148193359375, + "logps/real": -359.62908935546875, + "loss": 0.0446, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -37.1409912109375, + "rewards/margins": 32.44538497924805, + "rewards/real": -4.69560432434082, + "step": 4930 + }, + { + "epoch": 1.58, + "learning_rate": 2.62830389949034e-07, + "logits/generated": 2.101663112640381, + "logits/real": 0.6530848741531372, + "logps/generated": -819.5115356445312, + "logps/real": -343.81329345703125, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/generated": -41.89363479614258, + "rewards/margins": 36.44382095336914, + "rewards/real": -5.449813365936279, + "step": 4940 + }, + { + "epoch": 1.58, + "learning_rate": 2.6223776223776223e-07, + "logits/generated": 2.007061004638672, + "logits/real": 0.525862991809845, + "logps/generated": -821.1007080078125, + "logps/real": -413.4480895996094, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/generated": -41.377464294433594, + "rewards/margins": 35.79021453857422, + "rewards/real": -5.587252616882324, + "step": 4950 + }, + { + "epoch": 1.59, + "learning_rate": 2.616451345264905e-07, + "logits/generated": 2.005887746810913, + "logits/real": 0.49662095308303833, + "logps/generated": -714.2962036132812, + "logps/real": -372.1548767089844, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -34.395286560058594, + "rewards/margins": 30.319265365600586, + "rewards/real": -4.076025485992432, + "step": 4960 + }, + { + "epoch": 1.59, + "learning_rate": 2.6105250681521866e-07, + "logits/generated": 2.218632221221924, + "logits/real": 0.10305402427911758, + "logps/generated": -749.445556640625, + "logps/real": -393.5296325683594, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/generated": -36.19743347167969, + "rewards/margins": 31.335338592529297, + "rewards/real": -4.862092018127441, + "step": 4970 + }, + { + "epoch": 1.59, + "learning_rate": 2.604598791039469e-07, + "logits/generated": 1.9723600149154663, + "logits/real": 0.46274954080581665, + "logps/generated": -747.9332275390625, + "logps/real": -446.12237548828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -35.806602478027344, + "rewards/margins": 30.62856674194336, + "rewards/real": -5.178038597106934, + "step": 4980 + }, + { + "epoch": 1.6, + "learning_rate": 2.5986725139267515e-07, + "logits/generated": 2.192283868789673, + "logits/real": 0.6728182435035706, + "logps/generated": -806.6732788085938, + "logps/real": -330.36962890625, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/generated": -40.09038543701172, + "rewards/margins": 36.229026794433594, + "rewards/real": -3.861351490020752, + "step": 4990 + }, + { + "epoch": 1.6, + "learning_rate": 2.5927462368140334e-07, + "logits/generated": 2.228334665298462, + "logits/real": 0.18269702792167664, + "logps/generated": -713.8773803710938, + "logps/real": -395.35455322265625, + "loss": 0.0119, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.97146224975586, + "rewards/margins": 28.83901023864746, + "rewards/real": -4.132456302642822, + "step": 5000 + }, + { + "epoch": 1.6, + "learning_rate": 2.586819959701316e-07, + "logits/generated": 1.5860804319381714, + "logits/real": 0.3480927646160126, + "logps/generated": -733.3760986328125, + "logps/real": -387.63385009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -35.62555694580078, + "rewards/margins": 32.33049392700195, + "rewards/real": -3.2950634956359863, + "step": 5010 + }, + { + "epoch": 1.61, + "learning_rate": 2.580893682588598e-07, + "logits/generated": 1.6055123805999756, + "logits/real": 0.15970836579799652, + "logps/generated": -839.0968017578125, + "logps/real": -360.4985046386719, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/generated": -42.22630310058594, + "rewards/margins": 37.99366760253906, + "rewards/real": -4.232638359069824, + "step": 5020 + }, + { + "epoch": 1.61, + "learning_rate": 2.57496740547588e-07, + "logits/generated": 1.7855665683746338, + "logits/real": 0.1675606667995453, + "logps/generated": -744.6808471679688, + "logps/real": -343.895751953125, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/generated": -35.100830078125, + "rewards/margins": 32.6957893371582, + "rewards/real": -2.405043840408325, + "step": 5030 + }, + { + "epoch": 1.61, + "learning_rate": 2.569041128363162e-07, + "logits/generated": 1.64817214012146, + "logits/real": 0.07610142976045609, + "logps/generated": -732.545654296875, + "logps/real": -350.29962158203125, + "loss": 0.003, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.60157012939453, + "rewards/margins": 32.50680160522461, + "rewards/real": -2.094759941101074, + "step": 5040 + }, + { + "epoch": 1.62, + "learning_rate": 2.563114851250444e-07, + "logits/generated": 1.7408607006072998, + "logits/real": 0.3170376420021057, + "logps/generated": -762.3482055664062, + "logps/real": -335.9871826171875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/generated": -37.032527923583984, + "rewards/margins": 34.986961364746094, + "rewards/real": -2.045567512512207, + "step": 5050 + }, + { + "epoch": 1.62, + "learning_rate": 2.5571885741377263e-07, + "logits/generated": 2.073035717010498, + "logits/real": 0.3458942770957947, + "logps/generated": -802.7797241210938, + "logps/real": -336.88470458984375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -40.494102478027344, + "rewards/margins": 37.79814147949219, + "rewards/real": -2.6959633827209473, + "step": 5060 + }, + { + "epoch": 1.62, + "learning_rate": 2.5512622970250087e-07, + "logits/generated": 2.13322114944458, + "logits/real": 0.41790610551834106, + "logps/generated": -866.7073974609375, + "logps/real": -332.11383056640625, + "loss": 0.005, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.577178955078125, + "rewards/margins": 40.421714782714844, + "rewards/real": -3.155463457107544, + "step": 5070 + }, + { + "epoch": 1.63, + "learning_rate": 2.5453360199122906e-07, + "logits/generated": 2.54278826713562, + "logits/real": 0.6949285268783569, + "logps/generated": -735.1064453125, + "logps/real": -364.2638244628906, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/generated": -35.53596115112305, + "rewards/margins": 31.807302474975586, + "rewards/real": -3.7286624908447266, + "step": 5080 + }, + { + "epoch": 1.63, + "learning_rate": 2.539409742799573e-07, + "logits/generated": 2.21581768989563, + "logits/real": 0.4537831246852875, + "logps/generated": -821.2398681640625, + "logps/real": -365.47125244140625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/generated": -40.993080139160156, + "rewards/margins": 36.99878692626953, + "rewards/real": -3.994288921356201, + "step": 5090 + }, + { + "epoch": 1.63, + "learning_rate": 2.5334834656868554e-07, + "logits/generated": 2.1372594833374023, + "logits/real": 0.611926257610321, + "logps/generated": -754.3529052734375, + "logps/real": -342.5974426269531, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/generated": -37.834938049316406, + "rewards/margins": 32.91936492919922, + "rewards/real": -4.915571689605713, + "step": 5100 + }, + { + "epoch": 1.64, + "learning_rate": 2.5275571885741373e-07, + "logits/generated": 1.6273130178451538, + "logits/real": 0.826530933380127, + "logps/generated": -734.1217651367188, + "logps/real": -382.6690368652344, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/generated": -36.935123443603516, + "rewards/margins": 33.39425277709961, + "rewards/real": -3.540864944458008, + "step": 5110 + }, + { + "epoch": 1.64, + "learning_rate": 2.52163091146142e-07, + "logits/generated": 2.3816351890563965, + "logits/real": 0.943571925163269, + "logps/generated": -843.9786376953125, + "logps/real": -411.3369140625, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/generated": -44.03273010253906, + "rewards/margins": 38.141441345214844, + "rewards/real": -5.891286849975586, + "step": 5120 + }, + { + "epoch": 1.64, + "learning_rate": 2.515704634348702e-07, + "logits/generated": 2.5418403148651123, + "logits/real": 1.0677857398986816, + "logps/generated": -848.6131591796875, + "logps/real": -366.3487548828125, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/generated": -45.4301643371582, + "rewards/margins": 39.80161666870117, + "rewards/real": -5.628545761108398, + "step": 5130 + }, + { + "epoch": 1.64, + "learning_rate": 2.509778357235984e-07, + "logits/generated": 1.6727020740509033, + "logits/real": 0.3576027750968933, + "logps/generated": -851.21728515625, + "logps/real": -326.9202880859375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/generated": -43.89760208129883, + "rewards/margins": 40.6236457824707, + "rewards/real": -3.273960828781128, + "step": 5140 + }, + { + "epoch": 1.65, + "learning_rate": 2.5038520801232665e-07, + "logits/generated": 2.0805184841156006, + "logits/real": 0.1397530734539032, + "logps/generated": -761.8963012695312, + "logps/real": -356.8018493652344, + "loss": 0.0084, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -38.69506072998047, + "rewards/margins": 36.885677337646484, + "rewards/real": -1.8093852996826172, + "step": 5150 + }, + { + "epoch": 1.65, + "learning_rate": 2.497925803010549e-07, + "logits/generated": 1.626690149307251, + "logits/real": -0.054851166903972626, + "logps/generated": -776.6763305664062, + "logps/real": -346.9750061035156, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/generated": -38.96327209472656, + "rewards/margins": 36.2351188659668, + "rewards/real": -2.7281482219696045, + "step": 5160 + }, + { + "epoch": 1.65, + "learning_rate": 2.491999525897831e-07, + "logits/generated": 1.5822933912277222, + "logits/real": -0.24726232886314392, + "logps/generated": -781.2418212890625, + "logps/real": -323.7926330566406, + "loss": 0.0177, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -37.93315887451172, + "rewards/margins": 36.57786560058594, + "rewards/real": -1.3552961349487305, + "step": 5170 + }, + { + "epoch": 1.66, + "learning_rate": 2.486073248785113e-07, + "logits/generated": 1.8860301971435547, + "logits/real": 0.6691206693649292, + "logps/generated": -794.41845703125, + "logps/real": -324.8614807128906, + "loss": 0.0588, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -40.171295166015625, + "rewards/margins": 36.86614227294922, + "rewards/real": -3.3051559925079346, + "step": 5180 + }, + { + "epoch": 1.66, + "learning_rate": 2.480146971672395e-07, + "logits/generated": 2.0925936698913574, + "logits/real": 0.9095395803451538, + "logps/generated": -869.6298828125, + "logps/real": -373.828369140625, + "loss": 0.0081, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.91425323486328, + "rewards/margins": 40.90207290649414, + "rewards/real": -6.012181758880615, + "step": 5190 + }, + { + "epoch": 1.66, + "learning_rate": 2.4742206945596775e-07, + "logits/generated": 2.7456932067871094, + "logits/real": 0.4985496401786804, + "logps/generated": -859.5794067382812, + "logps/real": -390.0246276855469, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -46.29896926879883, + "rewards/margins": 40.370887756347656, + "rewards/real": -5.92807674407959, + "step": 5200 + }, + { + "epoch": 1.67, + "learning_rate": 2.46829441744696e-07, + "logits/generated": 2.1732583045959473, + "logits/real": 0.4838520586490631, + "logps/generated": -844.9871215820312, + "logps/real": -369.20745849609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -45.23528289794922, + "rewards/margins": 39.98804473876953, + "rewards/real": -5.247241020202637, + "step": 5210 + }, + { + "epoch": 1.67, + "learning_rate": 2.462368140334242e-07, + "logits/generated": 2.544534206390381, + "logits/real": 0.5111418962478638, + "logps/generated": -835.80712890625, + "logps/real": -378.8222961425781, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -44.27240753173828, + "rewards/margins": 39.94426727294922, + "rewards/real": -4.3281450271606445, + "step": 5220 + }, + { + "epoch": 1.67, + "learning_rate": 2.456441863221524e-07, + "logits/generated": 2.4491024017333984, + "logits/real": 0.6502278447151184, + "logps/generated": -811.8794555664062, + "logps/real": -355.411376953125, + "loss": 0.0431, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.56116485595703, + "rewards/margins": 37.1070671081543, + "rewards/real": -5.454099655151367, + "step": 5230 + }, + { + "epoch": 1.68, + "learning_rate": 2.4505155861088067e-07, + "logits/generated": 2.266544818878174, + "logits/real": 0.6723452806472778, + "logps/generated": -750.0426025390625, + "logps/real": -373.60003662109375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/generated": -37.43736267089844, + "rewards/margins": 31.519084930419922, + "rewards/real": -5.918280601501465, + "step": 5240 + }, + { + "epoch": 1.68, + "learning_rate": 2.4445893089960885e-07, + "logits/generated": 2.5079264640808105, + "logits/real": 0.6131478548049927, + "logps/generated": -853.6188354492188, + "logps/real": -425.047607421875, + "loss": 0.0194, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.624473571777344, + "rewards/margins": 39.093013763427734, + "rewards/real": -5.53145170211792, + "step": 5250 + }, + { + "epoch": 1.68, + "learning_rate": 2.4386630318833704e-07, + "logits/generated": 2.1609644889831543, + "logits/real": 0.6694498062133789, + "logps/generated": -731.3389892578125, + "logps/real": -364.8908386230469, + "loss": 0.0027, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -36.30664825439453, + "rewards/margins": 31.676645278930664, + "rewards/real": -4.630007266998291, + "step": 5260 + }, + { + "epoch": 1.69, + "learning_rate": 2.432736754770653e-07, + "logits/generated": 2.7540597915649414, + "logits/real": 0.8866742253303528, + "logps/generated": -916.9837646484375, + "logps/real": -382.40728759765625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/generated": -48.979942321777344, + "rewards/margins": 43.1822509765625, + "rewards/real": -5.797692775726318, + "step": 5270 + }, + { + "epoch": 1.69, + "learning_rate": 2.4268104776579353e-07, + "logits/generated": 1.9669885635375977, + "logits/real": 0.5683959722518921, + "logps/generated": -699.2192993164062, + "logps/real": -381.6709289550781, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/generated": -33.899147033691406, + "rewards/margins": 28.7582950592041, + "rewards/real": -5.140851020812988, + "step": 5280 + }, + { + "epoch": 1.69, + "learning_rate": 2.420884200545217e-07, + "logits/generated": 1.9316256046295166, + "logits/real": 0.5754297375679016, + "logps/generated": -809.5108642578125, + "logps/real": -327.8341369628906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -40.91669464111328, + "rewards/margins": 37.33782196044922, + "rewards/real": -3.5788674354553223, + "step": 5290 + }, + { + "epoch": 1.7, + "learning_rate": 2.4149579234324996e-07, + "logits/generated": 2.126364231109619, + "logits/real": 0.44575291872024536, + "logps/generated": -816.7424926757812, + "logps/real": -332.91131591796875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/generated": -42.222469329833984, + "rewards/margins": 38.331886291503906, + "rewards/real": -3.890582323074341, + "step": 5300 + }, + { + "epoch": 1.7, + "learning_rate": 2.409031646319782e-07, + "logits/generated": 2.043090343475342, + "logits/real": 0.42176419496536255, + "logps/generated": -790.253662109375, + "logps/real": -346.35931396484375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/generated": -38.56426239013672, + "rewards/margins": 35.83377456665039, + "rewards/real": -2.7304892539978027, + "step": 5310 + }, + { + "epoch": 1.7, + "learning_rate": 2.403105369207064e-07, + "logits/generated": 2.138488531112671, + "logits/real": 0.9766354560852051, + "logps/generated": -814.6067504882812, + "logps/real": -329.17364501953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -41.24921798706055, + "rewards/margins": 37.05216598510742, + "rewards/real": -4.197047710418701, + "step": 5320 + }, + { + "epoch": 1.71, + "learning_rate": 2.3971790920943463e-07, + "logits/generated": 2.2863852977752686, + "logits/real": 0.5989899635314941, + "logps/generated": -814.4603271484375, + "logps/real": -361.1416015625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/generated": -40.803672790527344, + "rewards/margins": 36.39472961425781, + "rewards/real": -4.408946514129639, + "step": 5330 + }, + { + "epoch": 1.71, + "learning_rate": 2.3912528149816287e-07, + "logits/generated": 2.3674325942993164, + "logits/real": 0.9967582821846008, + "logps/generated": -805.5167846679688, + "logps/real": -324.6876525878906, + "loss": 0.0161, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.41180419921875, + "rewards/margins": 38.46437072753906, + "rewards/real": -3.9474329948425293, + "step": 5340 + }, + { + "epoch": 1.71, + "learning_rate": 2.3853265378689106e-07, + "logits/generated": 2.268728017807007, + "logits/real": 0.6285626292228699, + "logps/generated": -808.87109375, + "logps/real": -392.3074035644531, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/generated": -41.41926193237305, + "rewards/margins": 37.2325553894043, + "rewards/real": -4.186707973480225, + "step": 5350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3794002607561928e-07, + "logits/generated": 2.5093436241149902, + "logits/real": 0.8249126672744751, + "logps/generated": -838.4953002929688, + "logps/real": -426.6451721191406, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/generated": -43.6255989074707, + "rewards/margins": 39.069976806640625, + "rewards/real": -4.555621147155762, + "step": 5360 + }, + { + "epoch": 1.72, + "learning_rate": 2.373473983643475e-07, + "logits/generated": 2.685079336166382, + "logits/real": 1.4434565305709839, + "logps/generated": -856.2312622070312, + "logps/real": -302.8211669921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -47.4712028503418, + "rewards/margins": 42.512794494628906, + "rewards/real": -4.958414554595947, + "step": 5370 + }, + { + "epoch": 1.72, + "learning_rate": 2.3675477065307573e-07, + "logits/generated": 2.7511277198791504, + "logits/real": 0.9180728793144226, + "logps/generated": -831.4625854492188, + "logps/real": -413.7120056152344, + "loss": 0.0082, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.00263977050781, + "rewards/margins": 38.33123016357422, + "rewards/real": -5.67141056060791, + "step": 5380 + }, + { + "epoch": 1.72, + "learning_rate": 2.3616214294180395e-07, + "logits/generated": 3.376774549484253, + "logits/real": 1.1215420961380005, + "logps/generated": -803.61572265625, + "logps/real": -338.5130615234375, + "loss": 0.0167, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -41.036102294921875, + "rewards/margins": 38.5570068359375, + "rewards/real": -2.479102611541748, + "step": 5390 + }, + { + "epoch": 1.73, + "learning_rate": 2.3556951523053216e-07, + "logits/generated": 3.0865302085876465, + "logits/real": 1.2384517192840576, + "logps/generated": -826.81982421875, + "logps/real": -316.6307067871094, + "loss": 0.0232, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.875328063964844, + "rewards/margins": 39.101463317871094, + "rewards/real": -3.7738614082336426, + "step": 5400 + }, + { + "epoch": 1.73, + "learning_rate": 2.349768875192604e-07, + "logits/generated": 3.274893283843994, + "logits/real": 1.1233675479888916, + "logps/generated": -907.5140380859375, + "logps/real": -352.7439270019531, + "loss": 0.0082, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -50.2931022644043, + "rewards/margins": 46.628196716308594, + "rewards/real": -3.6648964881896973, + "step": 5410 + }, + { + "epoch": 1.73, + "learning_rate": 2.3438425980798862e-07, + "logits/generated": 3.189450740814209, + "logits/real": 1.2092788219451904, + "logps/generated": -851.10205078125, + "logps/real": -388.83795166015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -45.47392654418945, + "rewards/margins": 41.89067840576172, + "rewards/real": -3.583237886428833, + "step": 5420 + }, + { + "epoch": 1.74, + "learning_rate": 2.3379163209671684e-07, + "logits/generated": 2.926393985748291, + "logits/real": 1.3891630172729492, + "logps/generated": -914.0875244140625, + "logps/real": -353.0323791503906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -48.88922882080078, + "rewards/margins": 45.91227340698242, + "rewards/real": -2.976952314376831, + "step": 5430 + }, + { + "epoch": 1.74, + "learning_rate": 2.3319900438544505e-07, + "logits/generated": 2.6641769409179688, + "logits/real": 1.5170855522155762, + "logps/generated": -818.1910400390625, + "logps/real": -356.567138671875, + "loss": 0.0164, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -42.017494201660156, + "rewards/margins": 37.39936828613281, + "rewards/real": -4.618124961853027, + "step": 5440 + }, + { + "epoch": 1.74, + "learning_rate": 2.3260637667417327e-07, + "logits/generated": 3.466313600540161, + "logits/real": 1.1704745292663574, + "logps/generated": -768.80810546875, + "logps/real": -351.5523681640625, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/generated": -41.95710372924805, + "rewards/margins": 36.66460418701172, + "rewards/real": -5.2924981117248535, + "step": 5450 + }, + { + "epoch": 1.75, + "learning_rate": 2.3201374896290148e-07, + "logits/generated": 2.5862226486206055, + "logits/real": 0.6317356824874878, + "logps/generated": -752.8883666992188, + "logps/real": -329.9690856933594, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/generated": -36.86394500732422, + "rewards/margins": 34.974632263183594, + "rewards/real": -1.889310598373413, + "step": 5460 + }, + { + "epoch": 1.75, + "learning_rate": 2.3142112125162973e-07, + "logits/generated": 2.7336578369140625, + "logits/real": 0.7334250211715698, + "logps/generated": -734.8052978515625, + "logps/real": -357.79412841796875, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/generated": -35.966670989990234, + "rewards/margins": 33.897769927978516, + "rewards/real": -2.068906545639038, + "step": 5470 + }, + { + "epoch": 1.75, + "learning_rate": 2.3082849354035794e-07, + "logits/generated": 3.1192550659179688, + "logits/real": 0.8802781105041504, + "logps/generated": -740.2861328125, + "logps/real": -380.763427734375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -37.469337463378906, + "rewards/margins": 35.78041076660156, + "rewards/real": -1.6889280080795288, + "step": 5480 + }, + { + "epoch": 1.76, + "learning_rate": 2.3023586582908616e-07, + "logits/generated": 2.9946959018707275, + "logits/real": 1.2346898317337036, + "logps/generated": -813.5284423828125, + "logps/real": -338.32196044921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -41.38557815551758, + "rewards/margins": 39.37213897705078, + "rewards/real": -2.0134406089782715, + "step": 5490 + }, + { + "epoch": 1.76, + "learning_rate": 2.296432381178144e-07, + "logits/generated": 2.4427175521850586, + "logits/real": 1.0994888544082642, + "logps/generated": -835.2789306640625, + "logps/real": -315.66058349609375, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/generated": -43.05907440185547, + "rewards/margins": 40.51990509033203, + "rewards/real": -2.5391736030578613, + "step": 5500 + }, + { + "epoch": 1.76, + "learning_rate": 2.2905061040654261e-07, + "logits/generated": 2.4791624546051025, + "logits/real": 0.6451729536056519, + "logps/generated": -777.4930419921875, + "logps/real": -379.1453857421875, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/generated": -38.13090133666992, + "rewards/margins": 36.711158752441406, + "rewards/real": -1.4197418689727783, + "step": 5510 + }, + { + "epoch": 1.77, + "learning_rate": 2.284579826952708e-07, + "logits/generated": 2.8448033332824707, + "logits/real": 0.7822138071060181, + "logps/generated": -798.93115234375, + "logps/real": -339.82275390625, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/generated": -40.21331024169922, + "rewards/margins": 38.072349548339844, + "rewards/real": -2.1409621238708496, + "step": 5520 + }, + { + "epoch": 1.77, + "learning_rate": 2.2786535498399902e-07, + "logits/generated": 3.2314224243164062, + "logits/real": 1.2801387310028076, + "logps/generated": -813.0155029296875, + "logps/real": -379.53485107421875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -42.63431167602539, + "rewards/margins": 39.404544830322266, + "rewards/real": -3.2297680377960205, + "step": 5530 + }, + { + "epoch": 1.77, + "learning_rate": 2.2727272727272726e-07, + "logits/generated": 3.7130725383758545, + "logits/real": 1.5691579580307007, + "logps/generated": -829.2418823242188, + "logps/real": -378.4855651855469, + "loss": 0.0273, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.16777801513672, + "rewards/margins": 38.785911560058594, + "rewards/real": -4.38187313079834, + "step": 5540 + }, + { + "epoch": 1.78, + "learning_rate": 2.2668009956145548e-07, + "logits/generated": 3.3218631744384766, + "logits/real": 2.25030517578125, + "logps/generated": -856.4890747070312, + "logps/real": -371.36651611328125, + "loss": 0.0578, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -46.51011657714844, + "rewards/margins": 40.797462463378906, + "rewards/real": -5.712653160095215, + "step": 5550 + }, + { + "epoch": 1.78, + "learning_rate": 2.260874718501837e-07, + "logits/generated": 3.470602035522461, + "logits/real": 1.9761238098144531, + "logps/generated": -806.2717895507812, + "logps/real": -401.69171142578125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/generated": -42.60056686401367, + "rewards/margins": 37.1119270324707, + "rewards/real": -5.488642692565918, + "step": 5560 + }, + { + "epoch": 1.78, + "learning_rate": 2.2549484413891193e-07, + "logits/generated": 3.0738184452056885, + "logits/real": 2.1658029556274414, + "logps/generated": -887.8929443359375, + "logps/real": -368.98480224609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -48.385704040527344, + "rewards/margins": 43.07398223876953, + "rewards/real": -5.311723709106445, + "step": 5570 + }, + { + "epoch": 1.79, + "learning_rate": 2.2490221642764015e-07, + "logits/generated": 3.19155216217041, + "logits/real": 2.1180126667022705, + "logps/generated": -903.9332885742188, + "logps/real": -400.9152526855469, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/generated": -49.44337844848633, + "rewards/margins": 44.32373809814453, + "rewards/real": -5.119642734527588, + "step": 5580 + }, + { + "epoch": 1.79, + "learning_rate": 2.2430958871636836e-07, + "logits/generated": 3.5626659393310547, + "logits/real": 2.015824794769287, + "logps/generated": -892.6324462890625, + "logps/real": -367.1024475097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -50.268959045410156, + "rewards/margins": 44.50008010864258, + "rewards/real": -5.7688775062561035, + "step": 5590 + }, + { + "epoch": 1.79, + "learning_rate": 2.237169610050966e-07, + "logits/generated": 3.5565497875213623, + "logits/real": 2.07562255859375, + "logps/generated": -913.05224609375, + "logps/real": -386.5362243652344, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/generated": -52.48082733154297, + "rewards/margins": 46.69334030151367, + "rewards/real": -5.787491321563721, + "step": 5600 + }, + { + "epoch": 1.8, + "learning_rate": 2.231243332938248e-07, + "logits/generated": 3.433581829071045, + "logits/real": 1.7816625833511353, + "logps/generated": -827.8282470703125, + "logps/real": -381.58648681640625, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.09467315673828, + "rewards/margins": 39.211341857910156, + "rewards/real": -5.883331298828125, + "step": 5610 + }, + { + "epoch": 1.8, + "learning_rate": 2.22531705582553e-07, + "logits/generated": 3.4961745738983154, + "logits/real": 2.1615192890167236, + "logps/generated": -926.9774169921875, + "logps/real": -351.229736328125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -54.567840576171875, + "rewards/margins": 48.61988830566406, + "rewards/real": -5.9479498863220215, + "step": 5620 + }, + { + "epoch": 1.8, + "learning_rate": 2.2193907787128125e-07, + "logits/generated": 3.8028595447540283, + "logits/real": 1.9373070001602173, + "logps/generated": -887.8173828125, + "logps/real": -427.7560119628906, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -49.20186233520508, + "rewards/margins": 42.82405471801758, + "rewards/real": -6.377808094024658, + "step": 5630 + }, + { + "epoch": 1.8, + "learning_rate": 2.2134645016000947e-07, + "logits/generated": 3.5432114601135254, + "logits/real": 2.064028739929199, + "logps/generated": -952.5003662109375, + "logps/real": -382.4255065917969, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/generated": -55.820465087890625, + "rewards/margins": 48.72205352783203, + "rewards/real": -7.09841251373291, + "step": 5640 + }, + { + "epoch": 1.81, + "learning_rate": 2.2075382244873768e-07, + "logits/generated": 3.588714122772217, + "logits/real": 2.4020583629608154, + "logps/generated": -1001.1809692382812, + "logps/real": -391.5857849121094, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -59.59962844848633, + "rewards/margins": 52.501380920410156, + "rewards/real": -7.0982489585876465, + "step": 5650 + }, + { + "epoch": 1.81, + "learning_rate": 2.2016119473746592e-07, + "logits/generated": 3.438514232635498, + "logits/real": 1.7384579181671143, + "logps/generated": -796.9383544921875, + "logps/real": -426.78216552734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -43.00736999511719, + "rewards/margins": 36.70927810668945, + "rewards/real": -6.298093318939209, + "step": 5660 + }, + { + "epoch": 1.81, + "learning_rate": 2.1956856702619414e-07, + "logits/generated": 3.048379421234131, + "logits/real": 1.5780932903289795, + "logps/generated": -797.85595703125, + "logps/real": -373.5823669433594, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/generated": -42.487648010253906, + "rewards/margins": 37.081180572509766, + "rewards/real": -5.406468391418457, + "step": 5670 + }, + { + "epoch": 1.82, + "learning_rate": 2.1897593931492236e-07, + "logits/generated": 3.206472396850586, + "logits/real": 1.849116563796997, + "logps/generated": -904.5208129882812, + "logps/real": -415.0508728027344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -50.7608528137207, + "rewards/margins": 44.02716827392578, + "rewards/real": -6.7336859703063965, + "step": 5680 + }, + { + "epoch": 1.82, + "learning_rate": 2.183833116036506e-07, + "logits/generated": 3.2869651317596436, + "logits/real": 2.0496938228607178, + "logps/generated": -878.7900390625, + "logps/real": -352.3899841308594, + "loss": 0.0184, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.67104721069336, + "rewards/margins": 43.60902786254883, + "rewards/real": -5.062024116516113, + "step": 5690 + }, + { + "epoch": 1.82, + "learning_rate": 2.1779068389237879e-07, + "logits/generated": 3.1695003509521484, + "logits/real": 1.8025085926055908, + "logps/generated": -850.7145385742188, + "logps/real": -353.2436828613281, + "loss": 0.0114, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.12095260620117, + "rewards/margins": 40.61774444580078, + "rewards/real": -4.503202438354492, + "step": 5700 + }, + { + "epoch": 1.83, + "learning_rate": 2.17198056181107e-07, + "logits/generated": 3.089661121368408, + "logits/real": 2.070514678955078, + "logps/generated": -927.8453369140625, + "logps/real": -398.13427734375, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/generated": -51.43744659423828, + "rewards/margins": 46.39044952392578, + "rewards/real": -5.046995162963867, + "step": 5710 + }, + { + "epoch": 1.83, + "learning_rate": 2.1660542846983524e-07, + "logits/generated": 3.114896535873413, + "logits/real": 1.9745397567749023, + "logps/generated": -781.8901977539062, + "logps/real": -356.8275451660156, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -42.2356071472168, + "rewards/margins": 37.365867614746094, + "rewards/real": -4.8697357177734375, + "step": 5720 + }, + { + "epoch": 1.83, + "learning_rate": 2.1601280075856346e-07, + "logits/generated": 3.693910598754883, + "logits/real": 2.225154161453247, + "logps/generated": -930.9620971679688, + "logps/real": -383.722412109375, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/generated": -52.1117057800293, + "rewards/margins": 46.126792907714844, + "rewards/real": -5.984915256500244, + "step": 5730 + }, + { + "epoch": 1.84, + "learning_rate": 2.1542017304729167e-07, + "logits/generated": 3.4360313415527344, + "logits/real": 2.171807050704956, + "logps/generated": -856.5650634765625, + "logps/real": -377.8895263671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -46.9614372253418, + "rewards/margins": 41.58063507080078, + "rewards/real": -5.380807399749756, + "step": 5740 + }, + { + "epoch": 1.84, + "learning_rate": 2.1482754533601992e-07, + "logits/generated": 3.5638012886047363, + "logits/real": 1.7955198287963867, + "logps/generated": -908.9310302734375, + "logps/real": -368.2813415527344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -50.34418487548828, + "rewards/margins": 46.025291442871094, + "rewards/real": -4.318894863128662, + "step": 5750 + }, + { + "epoch": 1.84, + "learning_rate": 2.1423491762474813e-07, + "logits/generated": 3.2547402381896973, + "logits/real": 2.044173240661621, + "logps/generated": -988.8541870117188, + "logps/real": -356.18853759765625, + "loss": 0.0124, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.65973663330078, + "rewards/margins": 52.295738220214844, + "rewards/real": -5.363998889923096, + "step": 5760 + }, + { + "epoch": 1.85, + "learning_rate": 2.1364228991347635e-07, + "logits/generated": 3.638179302215576, + "logits/real": 1.7333250045776367, + "logps/generated": -824.5289306640625, + "logps/real": -428.39642333984375, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/generated": -43.69512176513672, + "rewards/margins": 38.19672393798828, + "rewards/real": -5.498401165008545, + "step": 5770 + }, + { + "epoch": 1.85, + "learning_rate": 2.130496622022046e-07, + "logits/generated": 3.4846606254577637, + "logits/real": 1.9787170886993408, + "logps/generated": -798.28369140625, + "logps/real": -341.7613220214844, + "loss": 0.0505, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -42.80746841430664, + "rewards/margins": 38.155216217041016, + "rewards/real": -4.652247428894043, + "step": 5780 + }, + { + "epoch": 1.85, + "learning_rate": 2.1245703449093278e-07, + "logits/generated": 3.268078327178955, + "logits/real": 2.0589845180511475, + "logps/generated": -847.8894653320312, + "logps/real": -384.1971740722656, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/generated": -45.46085739135742, + "rewards/margins": 39.66806411743164, + "rewards/real": -5.792795181274414, + "step": 5790 + }, + { + "epoch": 1.86, + "learning_rate": 2.11864406779661e-07, + "logits/generated": 3.5666251182556152, + "logits/real": 2.2275381088256836, + "logps/generated": -968.0863037109375, + "logps/real": -347.9147033691406, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/generated": -55.46030807495117, + "rewards/margins": 48.82093048095703, + "rewards/real": -6.639379978179932, + "step": 5800 + }, + { + "epoch": 1.86, + "learning_rate": 2.1127177906838923e-07, + "logits/generated": 3.158949375152588, + "logits/real": 2.068129062652588, + "logps/generated": -872.3759765625, + "logps/real": -394.13427734375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -47.67719268798828, + "rewards/margins": 41.926361083984375, + "rewards/real": -5.750826835632324, + "step": 5810 + }, + { + "epoch": 1.86, + "learning_rate": 2.1067915135711745e-07, + "logits/generated": 3.339181900024414, + "logits/real": 2.1461918354034424, + "logps/generated": -872.42578125, + "logps/real": -356.68890380859375, + "loss": 0.0065, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.21758270263672, + "rewards/margins": 41.57461929321289, + "rewards/real": -6.6429643630981445, + "step": 5820 + }, + { + "epoch": 1.87, + "learning_rate": 2.1008652364584567e-07, + "logits/generated": 2.7967050075531006, + "logits/real": 2.063382625579834, + "logps/generated": -861.4765625, + "logps/real": -397.9095764160156, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/generated": -47.75603103637695, + "rewards/margins": 40.92485809326172, + "rewards/real": -6.831167697906494, + "step": 5830 + }, + { + "epoch": 1.87, + "learning_rate": 2.0949389593457388e-07, + "logits/generated": 3.5979182720184326, + "logits/real": 2.320913791656494, + "logps/generated": -890.7649536132812, + "logps/real": -396.95733642578125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/generated": -50.28728485107422, + "rewards/margins": 42.32263946533203, + "rewards/real": -7.964641571044922, + "step": 5840 + }, + { + "epoch": 1.87, + "learning_rate": 2.0890126822330212e-07, + "logits/generated": 3.5564308166503906, + "logits/real": 2.129290819168091, + "logps/generated": -887.2586669921875, + "logps/real": -397.1784973144531, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/generated": -48.73828887939453, + "rewards/margins": 40.97037887573242, + "rewards/real": -7.76791524887085, + "step": 5850 + }, + { + "epoch": 1.88, + "learning_rate": 2.0830864051203034e-07, + "logits/generated": 3.1811208724975586, + "logits/real": 2.288074016571045, + "logps/generated": -960.1119995117188, + "logps/real": -390.6427917480469, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/generated": -54.5243034362793, + "rewards/margins": 47.607810974121094, + "rewards/real": -6.916497230529785, + "step": 5860 + }, + { + "epoch": 1.88, + "learning_rate": 2.0771601280075855e-07, + "logits/generated": 3.8049683570861816, + "logits/real": 2.2913711071014404, + "logps/generated": -888.8303833007812, + "logps/real": -407.41241455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -50.86738204956055, + "rewards/margins": 42.75567626953125, + "rewards/real": -8.111705780029297, + "step": 5870 + }, + { + "epoch": 1.88, + "learning_rate": 2.0712338508948677e-07, + "logits/generated": 3.279576539993286, + "logits/real": 2.593757152557373, + "logps/generated": -971.8060302734375, + "logps/real": -401.72796630859375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -56.71451950073242, + "rewards/margins": 48.520233154296875, + "rewards/real": -8.194283485412598, + "step": 5880 + }, + { + "epoch": 1.88, + "learning_rate": 2.0653075737821498e-07, + "logits/generated": 3.639120578765869, + "logits/real": 2.5512237548828125, + "logps/generated": -865.2919921875, + "logps/real": -404.64776611328125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -48.085731506347656, + "rewards/margins": 40.82741165161133, + "rewards/real": -7.25832462310791, + "step": 5890 + }, + { + "epoch": 1.89, + "learning_rate": 2.059381296669432e-07, + "logits/generated": 3.123408079147339, + "logits/real": 2.0407650470733643, + "logps/generated": -863.9957275390625, + "logps/real": -385.7313537597656, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/generated": -48.116233825683594, + "rewards/margins": 40.49714660644531, + "rewards/real": -7.6190900802612305, + "step": 5900 + }, + { + "epoch": 1.89, + "learning_rate": 2.0534550195567144e-07, + "logits/generated": 3.9081223011016846, + "logits/real": 2.101438045501709, + "logps/generated": -935.6234130859375, + "logps/real": -417.54400634765625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/generated": -54.65632247924805, + "rewards/margins": 47.10184097290039, + "rewards/real": -7.554482936859131, + "step": 5910 + }, + { + "epoch": 1.89, + "learning_rate": 2.0475287424439966e-07, + "logits/generated": 4.165920257568359, + "logits/real": 2.4171690940856934, + "logps/generated": -1005.3514404296875, + "logps/real": -408.0506896972656, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/generated": -59.3927116394043, + "rewards/margins": 50.65937805175781, + "rewards/real": -8.73333740234375, + "step": 5920 + }, + { + "epoch": 1.9, + "learning_rate": 2.0416024653312787e-07, + "logits/generated": 3.847409725189209, + "logits/real": 2.7022252082824707, + "logps/generated": -952.6790161132812, + "logps/real": -365.72772216796875, + "loss": 0.0046, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -56.093994140625, + "rewards/margins": 48.033348083496094, + "rewards/real": -8.06064224243164, + "step": 5930 + }, + { + "epoch": 1.9, + "learning_rate": 2.0356761882185611e-07, + "logits/generated": 3.6713790893554688, + "logits/real": 1.9362661838531494, + "logps/generated": -941.9142456054688, + "logps/real": -410.051513671875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/generated": -56.05419921875, + "rewards/margins": 48.42200469970703, + "rewards/real": -7.632190704345703, + "step": 5940 + }, + { + "epoch": 1.9, + "learning_rate": 2.0297499111058433e-07, + "logits/generated": 3.6606521606445312, + "logits/real": 1.7234245538711548, + "logps/generated": -911.0177001953125, + "logps/real": -489.6560974121094, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/generated": -52.9130973815918, + "rewards/margins": 44.15934371948242, + "rewards/real": -8.753759384155273, + "step": 5950 + }, + { + "epoch": 1.91, + "learning_rate": 2.0238236339931255e-07, + "logits/generated": 4.219864845275879, + "logits/real": 2.23287034034729, + "logps/generated": -1063.9403076171875, + "logps/real": -397.0310974121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.46912384033203, + "rewards/margins": 56.39514923095703, + "rewards/real": -9.073973655700684, + "step": 5960 + }, + { + "epoch": 1.91, + "learning_rate": 2.0178973568804076e-07, + "logits/generated": 3.6329212188720703, + "logits/real": 2.1656136512756348, + "logps/generated": -952.6925659179688, + "logps/real": -447.70623779296875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -57.937950134277344, + "rewards/margins": 49.494651794433594, + "rewards/real": -8.443305015563965, + "step": 5970 + }, + { + "epoch": 1.91, + "learning_rate": 2.0119710797676898e-07, + "logits/generated": 3.5467257499694824, + "logits/real": 2.4322285652160645, + "logps/generated": -973.0812377929688, + "logps/real": -360.7705993652344, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -59.72382354736328, + "rewards/margins": 50.79979705810547, + "rewards/real": -8.924032211303711, + "step": 5980 + }, + { + "epoch": 1.92, + "learning_rate": 2.006044802654972e-07, + "logits/generated": 3.495687961578369, + "logits/real": 2.4661941528320312, + "logps/generated": -958.0787353515625, + "logps/real": -354.7956848144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.43269729614258, + "rewards/margins": 50.86808776855469, + "rewards/real": -7.5646071434021, + "step": 5990 + }, + { + "epoch": 1.92, + "learning_rate": 2.0001185255422543e-07, + "logits/generated": 3.7011466026306152, + "logits/real": 2.2297897338867188, + "logps/generated": -946.3480224609375, + "logps/real": -385.870361328125, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.50101852416992, + "rewards/margins": 50.18718338012695, + "rewards/real": -7.313836574554443, + "step": 6000 + }, + { + "epoch": 1.92, + "learning_rate": 1.9941922484295365e-07, + "logits/generated": 3.404715061187744, + "logits/real": 2.819580554962158, + "logps/generated": -985.5870361328125, + "logps/real": -360.0356140136719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -59.78424072265625, + "rewards/margins": 52.910499572753906, + "rewards/real": -6.873734951019287, + "step": 6010 + }, + { + "epoch": 1.93, + "learning_rate": 1.9882659713168186e-07, + "logits/generated": 3.6169943809509277, + "logits/real": 2.3183822631835938, + "logps/generated": -869.0330810546875, + "logps/real": -422.5990295410156, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/generated": -51.07073211669922, + "rewards/margins": 43.21461486816406, + "rewards/real": -7.856122016906738, + "step": 6020 + }, + { + "epoch": 1.93, + "learning_rate": 1.982339694204101e-07, + "logits/generated": 3.7115769386291504, + "logits/real": 1.8723506927490234, + "logps/generated": -1036.148681640625, + "logps/real": -418.6692810058594, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/generated": -63.597801208496094, + "rewards/margins": 56.10791015625, + "rewards/real": -7.48989725112915, + "step": 6030 + }, + { + "epoch": 1.93, + "learning_rate": 1.9764134170913832e-07, + "logits/generated": 3.66517972946167, + "logits/real": 1.5840797424316406, + "logps/generated": -1040.49267578125, + "logps/real": -388.66314697265625, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/generated": -63.50476837158203, + "rewards/margins": 57.59077072143555, + "rewards/real": -5.913995265960693, + "step": 6040 + }, + { + "epoch": 1.94, + "learning_rate": 1.9704871399786654e-07, + "logits/generated": 4.248284816741943, + "logits/real": 2.633690357208252, + "logps/generated": -1084.5806884765625, + "logps/real": -372.9784240722656, + "loss": 0.0316, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -67.48811340332031, + "rewards/margins": 59.094947814941406, + "rewards/real": -8.393167495727539, + "step": 6050 + }, + { + "epoch": 1.94, + "learning_rate": 1.9645608628659475e-07, + "logits/generated": 3.789734363555908, + "logits/real": 2.168854236602783, + "logps/generated": -1045.887451171875, + "logps/real": -426.54022216796875, + "loss": 0.0474, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -63.383766174316406, + "rewards/margins": 56.191429138183594, + "rewards/real": -7.19232702255249, + "step": 6060 + }, + { + "epoch": 1.94, + "learning_rate": 1.9586345857532297e-07, + "logits/generated": 3.780759334564209, + "logits/real": 2.035515308380127, + "logps/generated": -850.5437622070312, + "logps/real": -390.14569091796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -47.13395690917969, + "rewards/margins": 41.21845245361328, + "rewards/real": -5.915501117706299, + "step": 6070 + }, + { + "epoch": 1.95, + "learning_rate": 1.9527083086405118e-07, + "logits/generated": 3.0783743858337402, + "logits/real": 1.924883246421814, + "logps/generated": -857.7428588867188, + "logps/real": -336.7803649902344, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/generated": -47.51791000366211, + "rewards/margins": 42.42029571533203, + "rewards/real": -5.097611427307129, + "step": 6080 + }, + { + "epoch": 1.95, + "learning_rate": 1.9467820315277943e-07, + "logits/generated": 3.5931499004364014, + "logits/real": 1.9940860271453857, + "logps/generated": -846.85205078125, + "logps/real": -376.54876708984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -46.58185577392578, + "rewards/margins": 40.98250198364258, + "rewards/real": -5.599356651306152, + "step": 6090 + }, + { + "epoch": 1.95, + "learning_rate": 1.9408557544150764e-07, + "logits/generated": 3.1684060096740723, + "logits/real": 1.8693767786026, + "logps/generated": -842.78271484375, + "logps/real": -347.5132751464844, + "loss": 0.0186, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -47.030582427978516, + "rewards/margins": 42.137516021728516, + "rewards/real": -4.893065452575684, + "step": 6100 + }, + { + "epoch": 1.96, + "learning_rate": 1.9349294773023586e-07, + "logits/generated": 3.25065279006958, + "logits/real": 1.6499922275543213, + "logps/generated": -870.6170043945312, + "logps/real": -388.0818786621094, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -48.79248046875, + "rewards/margins": 43.6743049621582, + "rewards/real": -5.118174076080322, + "step": 6110 + }, + { + "epoch": 1.96, + "learning_rate": 1.929003200189641e-07, + "logits/generated": 3.3154430389404297, + "logits/real": 1.2816754579544067, + "logps/generated": -726.7086181640625, + "logps/real": -455.2189025878906, + "loss": 0.0198, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -38.447166442871094, + "rewards/margins": 31.914520263671875, + "rewards/real": -6.532646179199219, + "step": 6120 + }, + { + "epoch": 1.96, + "learning_rate": 1.9230769230769231e-07, + "logits/generated": 3.1148016452789307, + "logits/real": 1.5517610311508179, + "logps/generated": -815.5079956054688, + "logps/real": -337.00311279296875, + "loss": 0.0109, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -43.745155334472656, + "rewards/margins": 39.53904342651367, + "rewards/real": -4.206110000610352, + "step": 6130 + }, + { + "epoch": 1.96, + "learning_rate": 1.9171506459642053e-07, + "logits/generated": 3.7183945178985596, + "logits/real": 1.6947906017303467, + "logps/generated": -754.2486572265625, + "logps/real": -365.54620361328125, + "loss": 0.0406, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -40.77758026123047, + "rewards/margins": 35.4954948425293, + "rewards/real": -5.2820868492126465, + "step": 6140 + }, + { + "epoch": 1.97, + "learning_rate": 1.9112243688514872e-07, + "logits/generated": 3.4877898693084717, + "logits/real": 2.0037782192230225, + "logps/generated": -820.0745239257812, + "logps/real": -384.6826171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -45.665775299072266, + "rewards/margins": 40.5218391418457, + "rewards/real": -5.143938064575195, + "step": 6150 + }, + { + "epoch": 1.97, + "learning_rate": 1.9052980917387696e-07, + "logits/generated": 3.5603244304656982, + "logits/real": 2.0485646724700928, + "logps/generated": -848.9260864257812, + "logps/real": -382.2392272949219, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -46.82533645629883, + "rewards/margins": 41.54307556152344, + "rewards/real": -5.282263278961182, + "step": 6160 + }, + { + "epoch": 1.97, + "learning_rate": 1.8993718146260517e-07, + "logits/generated": 3.8129520416259766, + "logits/real": 1.7690954208374023, + "logps/generated": -838.8488159179688, + "logps/real": -382.4895324707031, + "loss": 0.0386, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -44.65959930419922, + "rewards/margins": 39.390281677246094, + "rewards/real": -5.269317626953125, + "step": 6170 + }, + { + "epoch": 1.98, + "learning_rate": 1.893445537513334e-07, + "logits/generated": 3.376861572265625, + "logits/real": 1.9085153341293335, + "logps/generated": -936.9320068359375, + "logps/real": -353.51983642578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -53.39668655395508, + "rewards/margins": 48.76415252685547, + "rewards/real": -4.632534503936768, + "step": 6180 + }, + { + "epoch": 1.98, + "learning_rate": 1.8875192604006163e-07, + "logits/generated": 3.5113492012023926, + "logits/real": 1.8916263580322266, + "logps/generated": -856.7068481445312, + "logps/real": -403.841552734375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -45.738426208496094, + "rewards/margins": 40.86699676513672, + "rewards/real": -4.871423244476318, + "step": 6190 + }, + { + "epoch": 1.98, + "learning_rate": 1.8815929832878985e-07, + "logits/generated": 3.886976718902588, + "logits/real": 1.7546895742416382, + "logps/generated": -941.3308715820312, + "logps/real": -387.34759521484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -52.989166259765625, + "rewards/margins": 47.38789367675781, + "rewards/real": -5.601273536682129, + "step": 6200 + }, + { + "epoch": 1.99, + "learning_rate": 1.8756667061751806e-07, + "logits/generated": 3.5155677795410156, + "logits/real": 1.8104327917099, + "logps/generated": -875.697265625, + "logps/real": -374.0039367675781, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/generated": -47.55449295043945, + "rewards/margins": 43.92311477661133, + "rewards/real": -3.6313769817352295, + "step": 6210 + }, + { + "epoch": 1.99, + "learning_rate": 1.869740429062463e-07, + "logits/generated": 3.1275806427001953, + "logits/real": 1.9204256534576416, + "logps/generated": -919.3878173828125, + "logps/real": -360.70538330078125, + "loss": 0.046, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.14870834350586, + "rewards/margins": 45.66701126098633, + "rewards/real": -3.4816970825195312, + "step": 6220 + }, + { + "epoch": 1.99, + "learning_rate": 1.863814151949745e-07, + "logits/generated": 3.363603115081787, + "logits/real": 1.4255322217941284, + "logps/generated": -873.4091796875, + "logps/real": -364.060546875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/generated": -46.276676177978516, + "rewards/margins": 42.56911087036133, + "rewards/real": -3.70756459236145, + "step": 6230 + }, + { + "epoch": 2.0, + "learning_rate": 1.857887874837027e-07, + "logits/generated": 3.5246219635009766, + "logits/real": 2.0341663360595703, + "logps/generated": -902.45849609375, + "logps/real": -346.0574951171875, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/generated": -49.6707649230957, + "rewards/margins": 45.00032424926758, + "rewards/real": -4.670444011688232, + "step": 6240 + }, + { + "epoch": 2.0, + "learning_rate": 1.8519615977243095e-07, + "logits/generated": 3.884793519973755, + "logits/real": 1.5322035551071167, + "logps/generated": -897.5712890625, + "logps/real": -401.52740478515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -48.86095428466797, + "rewards/margins": 43.582603454589844, + "rewards/real": -5.278354644775391, + "step": 6250 + }, + { + "epoch": 2.0, + "learning_rate": 1.8460353206115917e-07, + "logits/generated": 3.5952534675598145, + "logits/real": 1.9434171915054321, + "logps/generated": -908.01220703125, + "logps/real": -356.5626525878906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -51.807945251464844, + "rewards/margins": 47.32154083251953, + "rewards/real": -4.486405372619629, + "step": 6260 + }, + { + "epoch": 2.01, + "learning_rate": 1.8401090434988738e-07, + "logits/generated": 3.4772255420684814, + "logits/real": 1.803269624710083, + "logps/generated": -894.8136596679688, + "logps/real": -383.3649597167969, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -49.74882507324219, + "rewards/margins": 44.56361389160156, + "rewards/real": -5.185210227966309, + "step": 6270 + }, + { + "epoch": 2.01, + "learning_rate": 1.8341827663861562e-07, + "logits/generated": 4.13782262802124, + "logits/real": 1.8974603414535522, + "logps/generated": -891.482421875, + "logps/real": -354.6378479003906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -49.27983856201172, + "rewards/margins": 44.55103302001953, + "rewards/real": -4.7288079261779785, + "step": 6280 + }, + { + "epoch": 2.01, + "learning_rate": 1.8282564892734384e-07, + "logits/generated": 3.494687557220459, + "logits/real": 1.7981832027435303, + "logps/generated": -840.1969604492188, + "logps/real": -391.729248046875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -44.981666564941406, + "rewards/margins": 39.8217658996582, + "rewards/real": -5.159902095794678, + "step": 6290 + }, + { + "epoch": 2.02, + "learning_rate": 1.8223302121607205e-07, + "logits/generated": 3.345310688018799, + "logits/real": 1.3194358348846436, + "logps/generated": -789.7905883789062, + "logps/real": -376.018798828125, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/generated": -41.84284210205078, + "rewards/margins": 37.92809295654297, + "rewards/real": -3.91474986076355, + "step": 6300 + }, + { + "epoch": 2.02, + "learning_rate": 1.816403935048003e-07, + "logits/generated": 3.058318614959717, + "logits/real": 1.6646451950073242, + "logps/generated": -938.5059814453125, + "logps/real": -379.447021484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -52.523712158203125, + "rewards/margins": 47.76115036010742, + "rewards/real": -4.7625603675842285, + "step": 6310 + }, + { + "epoch": 2.02, + "learning_rate": 1.8104776579352849e-07, + "logits/generated": 3.089402198791504, + "logits/real": 1.5612413883209229, + "logps/generated": -831.9298095703125, + "logps/real": -374.03814697265625, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/generated": -44.23519515991211, + "rewards/margins": 39.55861282348633, + "rewards/real": -4.676581859588623, + "step": 6320 + }, + { + "epoch": 2.03, + "learning_rate": 1.804551380822567e-07, + "logits/generated": 3.766859769821167, + "logits/real": 1.84218430519104, + "logps/generated": -907.6774291992188, + "logps/real": -392.12603759765625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -50.28938293457031, + "rewards/margins": 46.311309814453125, + "rewards/real": -3.978079319000244, + "step": 6330 + }, + { + "epoch": 2.03, + "learning_rate": 1.7986251037098494e-07, + "logits/generated": 3.7131881713867188, + "logits/real": 1.8214771747589111, + "logps/generated": -884.7658081054688, + "logps/real": -373.6928405761719, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -48.31201934814453, + "rewards/margins": 43.099334716796875, + "rewards/real": -5.2126851081848145, + "step": 6340 + }, + { + "epoch": 2.03, + "learning_rate": 1.7926988265971316e-07, + "logits/generated": 3.696737289428711, + "logits/real": 1.6888158321380615, + "logps/generated": -837.7440185546875, + "logps/real": -367.4432067871094, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.287147521972656, + "rewards/margins": 41.43773651123047, + "rewards/real": -3.8494114875793457, + "step": 6350 + }, + { + "epoch": 2.04, + "learning_rate": 1.7867725494844137e-07, + "logits/generated": 3.6794025897979736, + "logits/real": 1.402541995048523, + "logps/generated": -898.3410034179688, + "logps/real": -381.45672607421875, + "loss": 0.0068, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.66602325439453, + "rewards/margins": 44.5916633605957, + "rewards/real": -5.074358940124512, + "step": 6360 + }, + { + "epoch": 2.04, + "learning_rate": 1.7808462723716962e-07, + "logits/generated": 3.880311965942383, + "logits/real": 2.079180955886841, + "logps/generated": -954.888671875, + "logps/real": -398.05291748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -54.07482147216797, + "rewards/margins": 48.75034713745117, + "rewards/real": -5.324476718902588, + "step": 6370 + }, + { + "epoch": 2.04, + "learning_rate": 1.7749199952589783e-07, + "logits/generated": 3.0106310844421387, + "logits/real": 1.792249083518982, + "logps/generated": -884.6341552734375, + "logps/real": -361.8485412597656, + "loss": 0.0178, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.628013610839844, + "rewards/margins": 44.123741149902344, + "rewards/real": -5.504273891448975, + "step": 6380 + }, + { + "epoch": 2.04, + "learning_rate": 1.7689937181462605e-07, + "logits/generated": 3.680366039276123, + "logits/real": 1.9910833835601807, + "logps/generated": -1001.8021240234375, + "logps/real": -351.34906005859375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -58.97861862182617, + "rewards/margins": 53.63355255126953, + "rewards/real": -5.345067501068115, + "step": 6390 + }, + { + "epoch": 2.05, + "learning_rate": 1.763067441033543e-07, + "logits/generated": 3.5425190925598145, + "logits/real": 1.4850716590881348, + "logps/generated": -905.5157470703125, + "logps/real": -405.67022705078125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -51.5119743347168, + "rewards/margins": 46.31713104248047, + "rewards/real": -5.194847106933594, + "step": 6400 + }, + { + "epoch": 2.05, + "learning_rate": 1.7571411639208248e-07, + "logits/generated": 3.1999075412750244, + "logits/real": 1.6482913494110107, + "logps/generated": -807.9815673828125, + "logps/real": -366.31097412109375, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -43.2049560546875, + "rewards/margins": 38.5529670715332, + "rewards/real": -4.6519880294799805, + "step": 6410 + }, + { + "epoch": 2.05, + "learning_rate": 1.751214886808107e-07, + "logits/generated": 3.399055004119873, + "logits/real": 1.9470268487930298, + "logps/generated": -835.4879150390625, + "logps/real": -331.04522705078125, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -45.92859649658203, + "rewards/margins": 41.39247512817383, + "rewards/real": -4.536115646362305, + "step": 6420 + }, + { + "epoch": 2.06, + "learning_rate": 1.7452886096953893e-07, + "logits/generated": 3.8540852069854736, + "logits/real": 1.836313009262085, + "logps/generated": -970.19287109375, + "logps/real": -364.0809631347656, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -56.99243927001953, + "rewards/margins": 50.86602020263672, + "rewards/real": -6.126420021057129, + "step": 6430 + }, + { + "epoch": 2.06, + "learning_rate": 1.7393623325826715e-07, + "logits/generated": 3.5358593463897705, + "logits/real": 1.6226106882095337, + "logps/generated": -841.9315185546875, + "logps/real": -388.89984130859375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -46.219757080078125, + "rewards/margins": 40.3925666809082, + "rewards/real": -5.827186107635498, + "step": 6440 + }, + { + "epoch": 2.06, + "learning_rate": 1.7334360554699537e-07, + "logits/generated": 3.1885433197021484, + "logits/real": 2.0745949745178223, + "logps/generated": -913.2047119140625, + "logps/real": -387.1131896972656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -53.641197204589844, + "rewards/margins": 46.503761291503906, + "rewards/real": -7.1374335289001465, + "step": 6450 + }, + { + "epoch": 2.07, + "learning_rate": 1.7275097783572358e-07, + "logits/generated": 3.553014039993286, + "logits/real": 1.905940294265747, + "logps/generated": -875.4393310546875, + "logps/real": -395.08892822265625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -47.743553161621094, + "rewards/margins": 41.1551399230957, + "rewards/real": -6.588418006896973, + "step": 6460 + }, + { + "epoch": 2.07, + "learning_rate": 1.7215835012445182e-07, + "logits/generated": 3.791322708129883, + "logits/real": 2.0190250873565674, + "logps/generated": -947.0153198242188, + "logps/real": -372.3125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.31102752685547, + "rewards/margins": 47.426300048828125, + "rewards/real": -6.884730339050293, + "step": 6470 + }, + { + "epoch": 2.07, + "learning_rate": 1.7156572241318004e-07, + "logits/generated": 3.7165019512176514, + "logits/real": 2.2466185092926025, + "logps/generated": -896.0616455078125, + "logps/real": -396.95770263671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -49.876625061035156, + "rewards/margins": 41.883201599121094, + "rewards/real": -7.9934258460998535, + "step": 6480 + }, + { + "epoch": 2.08, + "learning_rate": 1.7097309470190825e-07, + "logits/generated": 3.761566638946533, + "logits/real": 1.9355154037475586, + "logps/generated": -883.3474731445312, + "logps/real": -397.15765380859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -51.003173828125, + "rewards/margins": 44.23944854736328, + "rewards/real": -6.763733863830566, + "step": 6490 + }, + { + "epoch": 2.08, + "learning_rate": 1.7038046699063647e-07, + "logits/generated": 3.918832302093506, + "logits/real": 2.098384380340576, + "logps/generated": -1068.8323974609375, + "logps/real": -377.49884033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -63.992462158203125, + "rewards/margins": 57.11040115356445, + "rewards/real": -6.882063865661621, + "step": 6500 + }, + { + "epoch": 2.08, + "learning_rate": 1.6978783927936468e-07, + "logits/generated": 3.3875460624694824, + "logits/real": 2.0191025733947754, + "logps/generated": -844.9122314453125, + "logps/real": -389.9608459472656, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -47.293983459472656, + "rewards/margins": 40.667396545410156, + "rewards/real": -6.626590728759766, + "step": 6510 + }, + { + "epoch": 2.09, + "learning_rate": 1.691952115680929e-07, + "logits/generated": 3.16455340385437, + "logits/real": 2.0957303047180176, + "logps/generated": -943.8391723632812, + "logps/real": -383.1474914550781, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/generated": -54.61397171020508, + "rewards/margins": 48.13213348388672, + "rewards/real": -6.481833457946777, + "step": 6520 + }, + { + "epoch": 2.09, + "learning_rate": 1.6860258385682114e-07, + "logits/generated": 3.742846727371216, + "logits/real": 1.8941633701324463, + "logps/generated": -833.7000122070312, + "logps/real": -375.1758117675781, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/generated": -46.05198287963867, + "rewards/margins": 40.61438751220703, + "rewards/real": -5.437596797943115, + "step": 6530 + }, + { + "epoch": 2.09, + "learning_rate": 1.6800995614554936e-07, + "logits/generated": 3.6632461547851562, + "logits/real": 2.2125802040100098, + "logps/generated": -909.6021728515625, + "logps/real": -375.26568603515625, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -49.762611389160156, + "rewards/margins": 43.21505355834961, + "rewards/real": -6.547552585601807, + "step": 6540 + }, + { + "epoch": 2.1, + "learning_rate": 1.6741732843427757e-07, + "logits/generated": 3.6171021461486816, + "logits/real": 2.102571487426758, + "logps/generated": -892.7225341796875, + "logps/real": -381.14483642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -49.49254608154297, + "rewards/margins": 42.738670349121094, + "rewards/real": -6.753874778747559, + "step": 6550 + }, + { + "epoch": 2.1, + "learning_rate": 1.6682470072300581e-07, + "logits/generated": 3.3604187965393066, + "logits/real": 2.235856294631958, + "logps/generated": -941.2318115234375, + "logps/real": -373.9045104980469, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -52.93433380126953, + "rewards/margins": 46.550376892089844, + "rewards/real": -6.383957386016846, + "step": 6560 + }, + { + "epoch": 2.1, + "learning_rate": 1.6623207301173403e-07, + "logits/generated": 3.521977663040161, + "logits/real": 2.065173625946045, + "logps/generated": -878.505859375, + "logps/real": -378.19122314453125, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -50.58102798461914, + "rewards/margins": 43.341697692871094, + "rewards/real": -7.2393341064453125, + "step": 6570 + }, + { + "epoch": 2.11, + "learning_rate": 1.6563944530046224e-07, + "logits/generated": 3.439244508743286, + "logits/real": 2.1543192863464355, + "logps/generated": -927.3391723632812, + "logps/real": -383.65509033203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -52.10762405395508, + "rewards/margins": 44.56721496582031, + "rewards/real": -7.540410041809082, + "step": 6580 + }, + { + "epoch": 2.11, + "learning_rate": 1.6504681758919046e-07, + "logits/generated": 3.5469043254852295, + "logits/real": 1.8698736429214478, + "logps/generated": -837.1285400390625, + "logps/real": -431.6897888183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.68436813354492, + "rewards/margins": 38.102054595947266, + "rewards/real": -6.58231258392334, + "step": 6590 + }, + { + "epoch": 2.11, + "learning_rate": 1.6445418987791868e-07, + "logits/generated": 3.2622406482696533, + "logits/real": 1.9931650161743164, + "logps/generated": -912.216796875, + "logps/real": -397.71429443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -51.05770492553711, + "rewards/margins": 44.12523651123047, + "rewards/real": -6.93247127532959, + "step": 6600 + }, + { + "epoch": 2.12, + "learning_rate": 1.638615621666469e-07, + "logits/generated": 3.3315768241882324, + "logits/real": 2.002892017364502, + "logps/generated": -1019.22021484375, + "logps/real": -371.0779724121094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -60.481849670410156, + "rewards/margins": 53.9934196472168, + "rewards/real": -6.488433837890625, + "step": 6610 + }, + { + "epoch": 2.12, + "learning_rate": 1.6326893445537513e-07, + "logits/generated": 3.30401611328125, + "logits/real": 2.0205698013305664, + "logps/generated": -857.5738525390625, + "logps/real": -406.16546630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -46.526493072509766, + "rewards/margins": 40.06731414794922, + "rewards/real": -6.459181308746338, + "step": 6620 + }, + { + "epoch": 2.12, + "learning_rate": 1.6267630674410335e-07, + "logits/generated": 3.6951255798339844, + "logits/real": 1.9617116451263428, + "logps/generated": -867.1686401367188, + "logps/real": -386.513427734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -48.22928237915039, + "rewards/margins": 41.44636535644531, + "rewards/real": -6.782916069030762, + "step": 6630 + }, + { + "epoch": 2.12, + "learning_rate": 1.6208367903283156e-07, + "logits/generated": 3.649329662322998, + "logits/real": 2.1086161136627197, + "logps/generated": -877.0367431640625, + "logps/real": -378.9393310546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -48.91767883300781, + "rewards/margins": 41.61117172241211, + "rewards/real": -7.306508541107178, + "step": 6640 + }, + { + "epoch": 2.13, + "learning_rate": 1.614910513215598e-07, + "logits/generated": 3.661804676055908, + "logits/real": 2.4163119792938232, + "logps/generated": -1004.2562255859375, + "logps/real": -388.51336669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -58.64870071411133, + "rewards/margins": 51.82874298095703, + "rewards/real": -6.819955348968506, + "step": 6650 + }, + { + "epoch": 2.13, + "learning_rate": 1.6089842361028802e-07, + "logits/generated": 3.2976551055908203, + "logits/real": 1.8500537872314453, + "logps/generated": -934.6765747070312, + "logps/real": -383.8113708496094, + "loss": 0.0176, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.98523712158203, + "rewards/margins": 48.496063232421875, + "rewards/real": -6.489166259765625, + "step": 6660 + }, + { + "epoch": 2.13, + "learning_rate": 1.6030579589901624e-07, + "logits/generated": 3.948852062225342, + "logits/real": 2.137843370437622, + "logps/generated": -885.4298706054688, + "logps/real": -369.081787109375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -50.035858154296875, + "rewards/margins": 43.878318786621094, + "rewards/real": -6.157540798187256, + "step": 6670 + }, + { + "epoch": 2.14, + "learning_rate": 1.5971316818774445e-07, + "logits/generated": 3.7219607830047607, + "logits/real": 1.7658170461654663, + "logps/generated": -920.0833129882812, + "logps/real": -419.18682861328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -53.35789108276367, + "rewards/margins": 45.76387405395508, + "rewards/real": -7.594014644622803, + "step": 6680 + }, + { + "epoch": 2.14, + "learning_rate": 1.5912054047647267e-07, + "logits/generated": 3.4460177421569824, + "logits/real": 2.163835048675537, + "logps/generated": -941.6736450195312, + "logps/real": -407.65576171875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -54.060951232910156, + "rewards/margins": 46.508575439453125, + "rewards/real": -7.552375793457031, + "step": 6690 + }, + { + "epoch": 2.14, + "learning_rate": 1.5852791276520088e-07, + "logits/generated": 4.178693771362305, + "logits/real": 1.8809093236923218, + "logps/generated": -841.2927856445312, + "logps/real": -398.60443115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -46.83857345581055, + "rewards/margins": 39.39279556274414, + "rewards/real": -7.445780277252197, + "step": 6700 + }, + { + "epoch": 2.15, + "learning_rate": 1.5793528505392912e-07, + "logits/generated": 3.8482487201690674, + "logits/real": 2.120398998260498, + "logps/generated": -881.3566284179688, + "logps/real": -398.9073791503906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -49.39754867553711, + "rewards/margins": 43.08720779418945, + "rewards/real": -6.310345649719238, + "step": 6710 + }, + { + "epoch": 2.15, + "learning_rate": 1.5734265734265734e-07, + "logits/generated": 3.056668758392334, + "logits/real": 1.9427289962768555, + "logps/generated": -944.3453979492188, + "logps/real": -381.8882141113281, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -53.652366638183594, + "rewards/margins": 46.86200714111328, + "rewards/real": -6.7903642654418945, + "step": 6720 + }, + { + "epoch": 2.15, + "learning_rate": 1.5675002963138556e-07, + "logits/generated": 3.5041098594665527, + "logits/real": 2.2960071563720703, + "logps/generated": -884.7781982421875, + "logps/real": -422.67413330078125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/generated": -49.832481384277344, + "rewards/margins": 42.40620803833008, + "rewards/real": -7.4262824058532715, + "step": 6730 + }, + { + "epoch": 2.16, + "learning_rate": 1.561574019201138e-07, + "logits/generated": 3.994569778442383, + "logits/real": 2.588862895965576, + "logps/generated": -1020.2581176757812, + "logps/real": -417.8421936035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.7502326965332, + "rewards/margins": 50.90573501586914, + "rewards/real": -9.844499588012695, + "step": 6740 + }, + { + "epoch": 2.16, + "learning_rate": 1.55564774208842e-07, + "logits/generated": 3.1953651905059814, + "logits/real": 2.4260640144348145, + "logps/generated": -947.6217041015625, + "logps/real": -375.6593933105469, + "loss": 0.0046, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.07343673706055, + "rewards/margins": 46.666297912597656, + "rewards/real": -8.407140731811523, + "step": 6750 + }, + { + "epoch": 2.16, + "learning_rate": 1.5497214649757023e-07, + "logits/generated": 4.424673557281494, + "logits/real": 2.2033803462982178, + "logps/generated": -939.3943481445312, + "logps/real": -373.49371337890625, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.96144485473633, + "rewards/margins": 47.3978385925293, + "rewards/real": -7.56360387802124, + "step": 6760 + }, + { + "epoch": 2.17, + "learning_rate": 1.5437951878629842e-07, + "logits/generated": 3.748819351196289, + "logits/real": 2.647487163543701, + "logps/generated": -920.8893432617188, + "logps/real": -416.6439514160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -51.697792053222656, + "rewards/margins": 43.080196380615234, + "rewards/real": -8.617592811584473, + "step": 6770 + }, + { + "epoch": 2.17, + "learning_rate": 1.5378689107502666e-07, + "logits/generated": 4.316756725311279, + "logits/real": 2.1666080951690674, + "logps/generated": -896.6207885742188, + "logps/real": -413.3868103027344, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -51.75537109375, + "rewards/margins": 43.20842742919922, + "rewards/real": -8.546947479248047, + "step": 6780 + }, + { + "epoch": 2.17, + "learning_rate": 1.5319426336375487e-07, + "logits/generated": 3.8626246452331543, + "logits/real": 1.7933346033096313, + "logps/generated": -931.4691162109375, + "logps/real": -358.85797119140625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/generated": -53.052490234375, + "rewards/margins": 47.16486358642578, + "rewards/real": -5.887631416320801, + "step": 6790 + }, + { + "epoch": 2.18, + "learning_rate": 1.526016356524831e-07, + "logits/generated": 4.089632034301758, + "logits/real": 1.6373741626739502, + "logps/generated": -954.58544921875, + "logps/real": -388.57720947265625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -55.31062698364258, + "rewards/margins": 49.37091064453125, + "rewards/real": -5.939715385437012, + "step": 6800 + }, + { + "epoch": 2.18, + "learning_rate": 1.5200900794121133e-07, + "logits/generated": 3.2313010692596436, + "logits/real": 1.5612775087356567, + "logps/generated": -862.0437622070312, + "logps/real": -378.39471435546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -47.85914611816406, + "rewards/margins": 41.0986213684082, + "rewards/real": -6.760532379150391, + "step": 6810 + }, + { + "epoch": 2.18, + "learning_rate": 1.5141638022993955e-07, + "logits/generated": 3.2439932823181152, + "logits/real": 2.2634949684143066, + "logps/generated": -932.8312377929688, + "logps/real": -362.20806884765625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -52.68410110473633, + "rewards/margins": 46.85740661621094, + "rewards/real": -5.826689720153809, + "step": 6820 + }, + { + "epoch": 2.19, + "learning_rate": 1.5082375251866776e-07, + "logits/generated": 3.551748275756836, + "logits/real": 1.9099966287612915, + "logps/generated": -870.3192138671875, + "logps/real": -412.82757568359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -49.41729736328125, + "rewards/margins": 42.34073257446289, + "rewards/real": -7.076570987701416, + "step": 6830 + }, + { + "epoch": 2.19, + "learning_rate": 1.50231124807396e-07, + "logits/generated": 3.5488381385803223, + "logits/real": 2.0896248817443848, + "logps/generated": -907.7509765625, + "logps/real": -351.4305114746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -51.6164665222168, + "rewards/margins": 46.79435729980469, + "rewards/real": -4.822107791900635, + "step": 6840 + }, + { + "epoch": 2.19, + "learning_rate": 1.496384970961242e-07, + "logits/generated": 3.959463119506836, + "logits/real": 1.8525702953338623, + "logps/generated": -875.3648681640625, + "logps/real": -353.7754821777344, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/generated": -49.686405181884766, + "rewards/margins": 44.30531692504883, + "rewards/real": -5.381083965301514, + "step": 6850 + }, + { + "epoch": 2.2, + "learning_rate": 1.490458693848524e-07, + "logits/generated": 3.450671434402466, + "logits/real": 1.645078420639038, + "logps/generated": -813.9600830078125, + "logps/real": -415.3131408691406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.97637176513672, + "rewards/margins": 36.70772933959961, + "rewards/real": -6.268640995025635, + "step": 6860 + }, + { + "epoch": 2.2, + "learning_rate": 1.4845324167358065e-07, + "logits/generated": 3.608870267868042, + "logits/real": 1.752415418624878, + "logps/generated": -844.51953125, + "logps/real": -392.44439697265625, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -47.45626449584961, + "rewards/margins": 41.60123825073242, + "rewards/real": -5.8550310134887695, + "step": 6870 + }, + { + "epoch": 2.2, + "learning_rate": 1.4786061396230887e-07, + "logits/generated": 3.241227626800537, + "logits/real": 1.8304872512817383, + "logps/generated": -965.0771484375, + "logps/real": -389.36749267578125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -56.4457893371582, + "rewards/margins": 50.60601043701172, + "rewards/real": -5.839773654937744, + "step": 6880 + }, + { + "epoch": 2.2, + "learning_rate": 1.4726798625103708e-07, + "logits/generated": 2.908162832260132, + "logits/real": 1.5250742435455322, + "logps/generated": -892.7222900390625, + "logps/real": -405.5044860839844, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/generated": -49.53814697265625, + "rewards/margins": 43.983612060546875, + "rewards/real": -5.554529666900635, + "step": 6890 + }, + { + "epoch": 2.21, + "learning_rate": 1.4667535853976532e-07, + "logits/generated": 3.5540802478790283, + "logits/real": 1.816138505935669, + "logps/generated": -1006.95654296875, + "logps/real": -356.22442626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -56.956817626953125, + "rewards/margins": 52.82642364501953, + "rewards/real": -4.13038969039917, + "step": 6900 + }, + { + "epoch": 2.21, + "learning_rate": 1.4608273082849354e-07, + "logits/generated": 2.8206403255462646, + "logits/real": 1.9676635265350342, + "logps/generated": -758.2788696289062, + "logps/real": -382.80731201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -41.22502899169922, + "rewards/margins": 36.453529357910156, + "rewards/real": -4.771500587463379, + "step": 6910 + }, + { + "epoch": 2.21, + "learning_rate": 1.4549010311722175e-07, + "logits/generated": 3.226039409637451, + "logits/real": 1.8796745538711548, + "logps/generated": -891.7478637695312, + "logps/real": -342.97296142578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -50.727088928222656, + "rewards/margins": 46.718544006347656, + "rewards/real": -4.008549213409424, + "step": 6920 + }, + { + "epoch": 2.22, + "learning_rate": 1.4489747540595e-07, + "logits/generated": 3.080256938934326, + "logits/real": 1.8325796127319336, + "logps/generated": -817.8787231445312, + "logps/real": -392.36981201171875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -43.375423431396484, + "rewards/margins": 38.49822235107422, + "rewards/real": -4.8772101402282715, + "step": 6930 + }, + { + "epoch": 2.22, + "learning_rate": 1.4430484769467818e-07, + "logits/generated": 2.9723448753356934, + "logits/real": 1.710695505142212, + "logps/generated": -892.23828125, + "logps/real": -414.94207763671875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -48.01591873168945, + "rewards/margins": 42.979949951171875, + "rewards/real": -5.035966396331787, + "step": 6940 + }, + { + "epoch": 2.22, + "learning_rate": 1.437122199834064e-07, + "logits/generated": 3.3378586769104004, + "logits/real": 1.6810868978500366, + "logps/generated": -873.9470825195312, + "logps/real": -399.42779541015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -48.72085952758789, + "rewards/margins": 43.833003997802734, + "rewards/real": -4.887852668762207, + "step": 6950 + }, + { + "epoch": 2.23, + "learning_rate": 1.4311959227213464e-07, + "logits/generated": 3.7277050018310547, + "logits/real": 1.8600651025772095, + "logps/generated": -961.6746215820312, + "logps/real": -376.4801025390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -54.674163818359375, + "rewards/margins": 48.67682647705078, + "rewards/real": -5.997340202331543, + "step": 6960 + }, + { + "epoch": 2.23, + "learning_rate": 1.4252696456086286e-07, + "logits/generated": 3.812844753265381, + "logits/real": 2.0348525047302246, + "logps/generated": -929.8878784179688, + "logps/real": -373.9241943359375, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -53.372467041015625, + "rewards/margins": 46.978092193603516, + "rewards/real": -6.394378662109375, + "step": 6970 + }, + { + "epoch": 2.23, + "learning_rate": 1.4193433684959107e-07, + "logits/generated": 3.270030975341797, + "logits/real": 2.5010368824005127, + "logps/generated": -923.5457763671875, + "logps/real": -381.654541015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -53.984703063964844, + "rewards/margins": 46.77741241455078, + "rewards/real": -7.2072906494140625, + "step": 6980 + }, + { + "epoch": 2.24, + "learning_rate": 1.4134170913831931e-07, + "logits/generated": 3.9600830078125, + "logits/real": 2.124016284942627, + "logps/generated": -926.6437377929688, + "logps/real": -432.20513916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.845184326171875, + "rewards/margins": 47.35279083251953, + "rewards/real": -7.492398262023926, + "step": 6990 + }, + { + "epoch": 2.24, + "learning_rate": 1.4074908142704753e-07, + "logits/generated": 3.5825181007385254, + "logits/real": 2.1191952228546143, + "logps/generated": -930.9474487304688, + "logps/real": -372.9541931152344, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -51.7147331237793, + "rewards/margins": 45.40174865722656, + "rewards/real": -6.312982559204102, + "step": 7000 + }, + { + "epoch": 2.24, + "learning_rate": 1.4015645371577575e-07, + "logits/generated": 3.6363234519958496, + "logits/real": 1.4622042179107666, + "logps/generated": -846.0232543945312, + "logps/real": -373.27886962890625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -46.39105987548828, + "rewards/margins": 40.55199432373047, + "rewards/real": -5.839066505432129, + "step": 7010 + }, + { + "epoch": 2.25, + "learning_rate": 1.39563826004504e-07, + "logits/generated": 3.2572197914123535, + "logits/real": 1.710909128189087, + "logps/generated": -842.8611450195312, + "logps/real": -403.93096923828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -46.79165267944336, + "rewards/margins": 40.92638397216797, + "rewards/real": -5.865267276763916, + "step": 7020 + }, + { + "epoch": 2.25, + "learning_rate": 1.3897119829323218e-07, + "logits/generated": 3.1691536903381348, + "logits/real": 2.1280269622802734, + "logps/generated": -882.4940185546875, + "logps/real": -374.7820739746094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -49.72075653076172, + "rewards/margins": 44.30762481689453, + "rewards/real": -5.4131364822387695, + "step": 7030 + }, + { + "epoch": 2.25, + "learning_rate": 1.383785705819604e-07, + "logits/generated": 3.2503700256347656, + "logits/real": 2.0797364711761475, + "logps/generated": -1018.7307739257812, + "logps/real": -360.0143127441406, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -60.452354431152344, + "rewards/margins": 53.7127571105957, + "rewards/real": -6.739597320556641, + "step": 7040 + }, + { + "epoch": 2.26, + "learning_rate": 1.3778594287068863e-07, + "logits/generated": 3.8283183574676514, + "logits/real": 2.013394832611084, + "logps/generated": -960.9522705078125, + "logps/real": -442.35516357421875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -54.580238342285156, + "rewards/margins": 46.32996368408203, + "rewards/real": -8.25028133392334, + "step": 7050 + }, + { + "epoch": 2.26, + "learning_rate": 1.3719331515941685e-07, + "logits/generated": 3.9131228923797607, + "logits/real": 1.9845058917999268, + "logps/generated": -936.5426635742188, + "logps/real": -378.13275146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -54.35515213012695, + "rewards/margins": 47.764652252197266, + "rewards/real": -6.5905046463012695, + "step": 7060 + }, + { + "epoch": 2.26, + "learning_rate": 1.3660068744814506e-07, + "logits/generated": 3.6134800910949707, + "logits/real": 2.446157932281494, + "logps/generated": -1077.667236328125, + "logps/real": -341.0533447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -63.74230194091797, + "rewards/margins": 57.78644561767578, + "rewards/real": -5.955852508544922, + "step": 7070 + }, + { + "epoch": 2.27, + "learning_rate": 1.3600805973687328e-07, + "logits/generated": 3.8961777687072754, + "logits/real": 2.213057041168213, + "logps/generated": -944.3114013671875, + "logps/real": -347.3383483886719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -54.40936279296875, + "rewards/margins": 47.75571823120117, + "rewards/real": -6.653644561767578, + "step": 7080 + }, + { + "epoch": 2.27, + "learning_rate": 1.3541543202560152e-07, + "logits/generated": 3.896191120147705, + "logits/real": 1.3951739072799683, + "logps/generated": -864.2570190429688, + "logps/real": -421.14453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.61088943481445, + "rewards/margins": 40.27512741088867, + "rewards/real": -7.335763454437256, + "step": 7090 + }, + { + "epoch": 2.27, + "learning_rate": 1.3482280431432974e-07, + "logits/generated": 3.3745689392089844, + "logits/real": 1.9235150814056396, + "logps/generated": -1001.2970581054688, + "logps/real": -392.7232971191406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -60.11412811279297, + "rewards/margins": 52.768280029296875, + "rewards/real": -7.345845699310303, + "step": 7100 + }, + { + "epoch": 2.28, + "learning_rate": 1.3423017660305795e-07, + "logits/generated": 3.599738359451294, + "logits/real": 2.035776376724243, + "logps/generated": -967.9103393554688, + "logps/real": -403.059326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -56.32331085205078, + "rewards/margins": 48.5847053527832, + "rewards/real": -7.738605499267578, + "step": 7110 + }, + { + "epoch": 2.28, + "learning_rate": 1.3363754889178617e-07, + "logits/generated": 4.224157810211182, + "logits/real": 1.5589847564697266, + "logps/generated": -872.8209838867188, + "logps/real": -383.27264404296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -48.575279235839844, + "rewards/margins": 42.46464157104492, + "rewards/real": -6.110633850097656, + "step": 7120 + }, + { + "epoch": 2.28, + "learning_rate": 1.3304492118051438e-07, + "logits/generated": 3.3103976249694824, + "logits/real": 2.0746779441833496, + "logps/generated": -998.7860107421875, + "logps/real": -399.69171142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -58.93140411376953, + "rewards/margins": 52.75010299682617, + "rewards/real": -6.181303024291992, + "step": 7130 + }, + { + "epoch": 2.28, + "learning_rate": 1.324522934692426e-07, + "logits/generated": 3.538538694381714, + "logits/real": 1.8912137746810913, + "logps/generated": -910.70263671875, + "logps/real": -382.2970275878906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -52.28912353515625, + "rewards/margins": 45.06175231933594, + "rewards/real": -7.2273664474487305, + "step": 7140 + }, + { + "epoch": 2.29, + "learning_rate": 1.3185966575797084e-07, + "logits/generated": 4.153119087219238, + "logits/real": 2.034271240234375, + "logps/generated": -919.7184448242188, + "logps/real": -432.1029357910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.59479522705078, + "rewards/margins": 45.460472106933594, + "rewards/real": -8.134321212768555, + "step": 7150 + }, + { + "epoch": 2.29, + "learning_rate": 1.3126703804669906e-07, + "logits/generated": 3.6166234016418457, + "logits/real": 2.463695526123047, + "logps/generated": -902.58642578125, + "logps/real": -394.44354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.12298583984375, + "rewards/margins": 45.50529861450195, + "rewards/real": -8.617691993713379, + "step": 7160 + }, + { + "epoch": 2.29, + "learning_rate": 1.3067441033542727e-07, + "logits/generated": 4.045973300933838, + "logits/real": 1.654720664024353, + "logps/generated": -943.9410400390625, + "logps/real": -402.5132751464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.6247444152832, + "rewards/margins": 46.62574005126953, + "rewards/real": -7.999002933502197, + "step": 7170 + }, + { + "epoch": 2.3, + "learning_rate": 1.3008178262415551e-07, + "logits/generated": 3.2982230186462402, + "logits/real": 2.241882801055908, + "logps/generated": -888.5969848632812, + "logps/real": -421.61328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.42265319824219, + "rewards/margins": 43.38030242919922, + "rewards/real": -7.042349338531494, + "step": 7180 + }, + { + "epoch": 2.3, + "learning_rate": 1.2948915491288373e-07, + "logits/generated": 3.672785997390747, + "logits/real": 1.6770737171173096, + "logps/generated": -922.3863525390625, + "logps/real": -394.39410400390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -53.93218231201172, + "rewards/margins": 46.440086364746094, + "rewards/real": -7.492100715637207, + "step": 7190 + }, + { + "epoch": 2.3, + "learning_rate": 1.2889652720161194e-07, + "logits/generated": 3.482133388519287, + "logits/real": 2.195340633392334, + "logps/generated": -903.5734252929688, + "logps/real": -406.4979248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -52.43483352661133, + "rewards/margins": 45.251285552978516, + "rewards/real": -7.183550834655762, + "step": 7200 + }, + { + "epoch": 2.31, + "learning_rate": 1.2830389949034016e-07, + "logits/generated": 3.6708450317382812, + "logits/real": 2.3602938652038574, + "logps/generated": -915.2332763671875, + "logps/real": -389.45269775390625, + "loss": 0.0069, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.72968292236328, + "rewards/margins": 46.505043029785156, + "rewards/real": -8.224641799926758, + "step": 7210 + }, + { + "epoch": 2.31, + "learning_rate": 1.2771127177906838e-07, + "logits/generated": 4.2751030921936035, + "logits/real": 2.1040592193603516, + "logps/generated": -1038.5245361328125, + "logps/real": -437.68536376953125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -64.43311309814453, + "rewards/margins": 55.898765563964844, + "rewards/real": -8.534345626831055, + "step": 7220 + }, + { + "epoch": 2.31, + "learning_rate": 1.271186440677966e-07, + "logits/generated": 3.725090503692627, + "logits/real": 2.492666244506836, + "logps/generated": -1149.4708251953125, + "logps/real": -439.4458923339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.71576690673828, + "rewards/margins": 62.1614990234375, + "rewards/real": -9.554258346557617, + "step": 7230 + }, + { + "epoch": 2.32, + "learning_rate": 1.2652601635652483e-07, + "logits/generated": 4.100815296173096, + "logits/real": 2.0770721435546875, + "logps/generated": -1117.468017578125, + "logps/real": -454.64239501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.69239807128906, + "rewards/margins": 61.77238082885742, + "rewards/real": -8.920014381408691, + "step": 7240 + }, + { + "epoch": 2.32, + "learning_rate": 1.2593338864525305e-07, + "logits/generated": 3.8950488567352295, + "logits/real": 1.7128747701644897, + "logps/generated": -866.4058837890625, + "logps/real": -407.7037048339844, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/generated": -49.534793853759766, + "rewards/margins": 43.238182067871094, + "rewards/real": -6.296614646911621, + "step": 7250 + }, + { + "epoch": 2.32, + "learning_rate": 1.2534076093398126e-07, + "logits/generated": 3.5611443519592285, + "logits/real": 1.8351194858551025, + "logps/generated": -830.7393798828125, + "logps/real": -351.31988525390625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -46.701263427734375, + "rewards/margins": 41.20185470581055, + "rewards/real": -5.499411582946777, + "step": 7260 + }, + { + "epoch": 2.33, + "learning_rate": 1.247481332227095e-07, + "logits/generated": 3.4794883728027344, + "logits/real": 1.7529456615447998, + "logps/generated": -1048.803955078125, + "logps/real": -391.748779296875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/generated": -61.122528076171875, + "rewards/margins": 54.686805725097656, + "rewards/real": -6.435726165771484, + "step": 7270 + }, + { + "epoch": 2.33, + "learning_rate": 1.241555055114377e-07, + "logits/generated": 3.7687296867370605, + "logits/real": 1.7407106161117554, + "logps/generated": -948.267578125, + "logps/real": -408.15948486328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -54.70806884765625, + "rewards/margins": 49.158287048339844, + "rewards/real": -5.549779415130615, + "step": 7280 + }, + { + "epoch": 2.33, + "learning_rate": 1.2356287780016594e-07, + "logits/generated": 4.017646789550781, + "logits/real": 1.896411657333374, + "logps/generated": -1000.8963012695312, + "logps/real": -381.00531005859375, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/generated": -61.927223205566406, + "rewards/margins": 55.34486770629883, + "rewards/real": -6.5823540687561035, + "step": 7290 + }, + { + "epoch": 2.34, + "learning_rate": 1.2297025008889415e-07, + "logits/generated": 3.857715606689453, + "logits/real": 1.914616346359253, + "logps/generated": -916.0032958984375, + "logps/real": -407.76373291015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -54.48760223388672, + "rewards/margins": 48.77091598510742, + "rewards/real": -5.716687202453613, + "step": 7300 + }, + { + "epoch": 2.34, + "learning_rate": 1.2237762237762237e-07, + "logits/generated": 3.556292772293091, + "logits/real": 1.2310831546783447, + "logps/generated": -949.7063598632812, + "logps/real": -416.2879333496094, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -56.6829833984375, + "rewards/margins": 50.5594482421875, + "rewards/real": -6.12353515625, + "step": 7310 + }, + { + "epoch": 2.34, + "learning_rate": 1.2178499466635058e-07, + "logits/generated": 3.877579927444458, + "logits/real": 2.2572357654571533, + "logps/generated": -892.0955810546875, + "logps/real": -380.8299865722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.075401306152344, + "rewards/margins": 46.75433349609375, + "rewards/real": -6.321071147918701, + "step": 7320 + }, + { + "epoch": 2.35, + "learning_rate": 1.2119236695507882e-07, + "logits/generated": 3.684466600418091, + "logits/real": 1.6927614212036133, + "logps/generated": -926.1092529296875, + "logps/real": -363.82403564453125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/generated": -54.74932098388672, + "rewards/margins": 49.10929870605469, + "rewards/real": -5.640023231506348, + "step": 7330 + }, + { + "epoch": 2.35, + "learning_rate": 1.2059973924380704e-07, + "logits/generated": 4.153500556945801, + "logits/real": 1.8311598300933838, + "logps/generated": -945.724609375, + "logps/real": -385.7518005371094, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.987701416015625, + "rewards/margins": 49.47273254394531, + "rewards/real": -5.514973163604736, + "step": 7340 + }, + { + "epoch": 2.35, + "learning_rate": 1.2000711153253525e-07, + "logits/generated": 3.6209239959716797, + "logits/real": 1.6371666193008423, + "logps/generated": -824.1793212890625, + "logps/real": -388.419189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -45.33180236816406, + "rewards/margins": 40.07854461669922, + "rewards/real": -5.25325870513916, + "step": 7350 + }, + { + "epoch": 2.36, + "learning_rate": 1.1941448382126347e-07, + "logits/generated": 3.3920371532440186, + "logits/real": 1.992281198501587, + "logps/generated": -894.01025390625, + "logps/real": -359.991943359375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -52.777183532714844, + "rewards/margins": 48.64227294921875, + "rewards/real": -4.134913444519043, + "step": 7360 + }, + { + "epoch": 2.36, + "learning_rate": 1.188218561099917e-07, + "logits/generated": 3.483046293258667, + "logits/real": 1.4694679975509644, + "logps/generated": -884.4298706054688, + "logps/real": -386.8287658691406, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -52.43204879760742, + "rewards/margins": 46.55149459838867, + "rewards/real": -5.880550384521484, + "step": 7370 + }, + { + "epoch": 2.36, + "learning_rate": 1.1822922839871991e-07, + "logits/generated": 3.7342770099639893, + "logits/real": 1.8004376888275146, + "logps/generated": -912.0466918945312, + "logps/real": -356.5923156738281, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/generated": -53.13550567626953, + "rewards/margins": 48.5265007019043, + "rewards/real": -4.60900354385376, + "step": 7380 + }, + { + "epoch": 2.36, + "learning_rate": 1.1763660068744814e-07, + "logits/generated": 3.6718952655792236, + "logits/real": 1.7568000555038452, + "logps/generated": -857.4403076171875, + "logps/real": -346.9060363769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -49.09733963012695, + "rewards/margins": 44.313941955566406, + "rewards/real": -4.7833943367004395, + "step": 7390 + }, + { + "epoch": 2.37, + "learning_rate": 1.1704397297617637e-07, + "logits/generated": 3.6978485584259033, + "logits/real": 1.942521333694458, + "logps/generated": -943.5989990234375, + "logps/real": -414.44232177734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -55.790122985839844, + "rewards/margins": 49.674232482910156, + "rewards/real": -6.115891933441162, + "step": 7400 + }, + { + "epoch": 2.37, + "learning_rate": 1.1645134526490457e-07, + "logits/generated": 3.929466724395752, + "logits/real": 1.9601457118988037, + "logps/generated": -913.3077392578125, + "logps/real": -395.75665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.7274169921875, + "rewards/margins": 47.70978927612305, + "rewards/real": -5.017629146575928, + "step": 7410 + }, + { + "epoch": 2.37, + "learning_rate": 1.158587175536328e-07, + "logits/generated": 3.940483570098877, + "logits/real": 1.827901840209961, + "logps/generated": -916.2366943359375, + "logps/real": -409.4593200683594, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -53.1384391784668, + "rewards/margins": 46.64787673950195, + "rewards/real": -6.490558624267578, + "step": 7420 + }, + { + "epoch": 2.38, + "learning_rate": 1.1526608984236103e-07, + "logits/generated": 3.3632397651672363, + "logits/real": 2.0485453605651855, + "logps/generated": -988.9066162109375, + "logps/real": -379.1597595214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.874603271484375, + "rewards/margins": 54.2638053894043, + "rewards/real": -5.610796928405762, + "step": 7430 + }, + { + "epoch": 2.38, + "learning_rate": 1.1467346213108925e-07, + "logits/generated": 3.9321389198303223, + "logits/real": 1.7708828449249268, + "logps/generated": -944.1676635742188, + "logps/real": -427.54498291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.71954345703125, + "rewards/margins": 50.34968948364258, + "rewards/real": -5.369858264923096, + "step": 7440 + }, + { + "epoch": 2.38, + "learning_rate": 1.1408083441981746e-07, + "logits/generated": 3.4171805381774902, + "logits/real": 1.8439222574234009, + "logps/generated": -954.8546752929688, + "logps/real": -375.6050720214844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -56.99078369140625, + "rewards/margins": 52.356239318847656, + "rewards/real": -4.634544849395752, + "step": 7450 + }, + { + "epoch": 2.39, + "learning_rate": 1.1348820670854568e-07, + "logits/generated": 3.6472580432891846, + "logits/real": 2.0352094173431396, + "logps/generated": -938.6751098632812, + "logps/real": -366.61639404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -55.14714813232422, + "rewards/margins": 49.33271408081055, + "rewards/real": -5.814435005187988, + "step": 7460 + }, + { + "epoch": 2.39, + "learning_rate": 1.128955789972739e-07, + "logits/generated": 3.1472151279449463, + "logits/real": 1.9693548679351807, + "logps/generated": -1045.47802734375, + "logps/real": -394.6998596191406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -64.78861236572266, + "rewards/margins": 59.74689865112305, + "rewards/real": -5.041702747344971, + "step": 7470 + }, + { + "epoch": 2.39, + "learning_rate": 1.1230295128600213e-07, + "logits/generated": 4.379024505615234, + "logits/real": 2.2068142890930176, + "logps/generated": -1073.003662109375, + "logps/real": -342.42840576171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -64.67329406738281, + "rewards/margins": 59.653419494628906, + "rewards/real": -5.019867897033691, + "step": 7480 + }, + { + "epoch": 2.4, + "learning_rate": 1.1171032357473035e-07, + "logits/generated": 4.089856147766113, + "logits/real": 2.041640281677246, + "logps/generated": -895.4403076171875, + "logps/real": -362.87530517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.54082107543945, + "rewards/margins": 46.44224166870117, + "rewards/real": -6.09857702255249, + "step": 7490 + }, + { + "epoch": 2.4, + "learning_rate": 1.1111769586345857e-07, + "logits/generated": 3.8495888710021973, + "logits/real": 1.9811060428619385, + "logps/generated": -947.15576171875, + "logps/real": -374.90264892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -56.577735900878906, + "rewards/margins": 50.850772857666016, + "rewards/real": -5.726959705352783, + "step": 7500 + }, + { + "epoch": 2.4, + "learning_rate": 1.105250681521868e-07, + "logits/generated": 3.28678822517395, + "logits/real": 2.0364482402801514, + "logps/generated": -970.5263671875, + "logps/real": -397.9372863769531, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -58.57470703125, + "rewards/margins": 52.99140548706055, + "rewards/real": -5.583299160003662, + "step": 7510 + }, + { + "epoch": 2.41, + "learning_rate": 1.0993244044091501e-07, + "logits/generated": 3.6762795448303223, + "logits/real": 1.876082420349121, + "logps/generated": -1004.3568115234375, + "logps/real": -333.42950439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.16901397705078, + "rewards/margins": 55.99580764770508, + "rewards/real": -5.173208713531494, + "step": 7520 + }, + { + "epoch": 2.41, + "learning_rate": 1.0933981272964324e-07, + "logits/generated": 3.516345977783203, + "logits/real": 1.7883695363998413, + "logps/generated": -1011.6312255859375, + "logps/real": -423.02978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -62.28319549560547, + "rewards/margins": 56.01143264770508, + "rewards/real": -6.271761894226074, + "step": 7530 + }, + { + "epoch": 2.41, + "learning_rate": 1.0874718501837145e-07, + "logits/generated": 3.5252366065979004, + "logits/real": 2.0527803897857666, + "logps/generated": -932.23974609375, + "logps/real": -385.8324279785156, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.8437614440918, + "rewards/margins": 49.315486907958984, + "rewards/real": -5.528271198272705, + "step": 7540 + }, + { + "epoch": 2.42, + "learning_rate": 1.0815455730709967e-07, + "logits/generated": 2.9808735847473145, + "logits/real": 1.8349164724349976, + "logps/generated": -999.6062622070312, + "logps/real": -376.09893798828125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -59.0669059753418, + "rewards/margins": 53.1815299987793, + "rewards/real": -5.885369300842285, + "step": 7550 + }, + { + "epoch": 2.42, + "learning_rate": 1.075619295958279e-07, + "logits/generated": 3.4894936084747314, + "logits/real": 2.2562403678894043, + "logps/generated": -1022.0750122070312, + "logps/real": -373.09075927734375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/generated": -60.48945236206055, + "rewards/margins": 55.06415939331055, + "rewards/real": -5.42529821395874, + "step": 7560 + }, + { + "epoch": 2.42, + "learning_rate": 1.0696930188455613e-07, + "logits/generated": 3.5014560222625732, + "logits/real": 2.2231686115264893, + "logps/generated": -1004.1393432617188, + "logps/real": -351.8412170410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -58.90232467651367, + "rewards/margins": 53.25592041015625, + "rewards/real": -5.646405220031738, + "step": 7570 + }, + { + "epoch": 2.43, + "learning_rate": 1.0637667417328434e-07, + "logits/generated": 4.085561275482178, + "logits/real": 2.251117467880249, + "logps/generated": -879.97216796875, + "logps/real": -404.2364807128906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -50.447505950927734, + "rewards/margins": 43.98231506347656, + "rewards/real": -6.465188503265381, + "step": 7580 + }, + { + "epoch": 2.43, + "learning_rate": 1.0578404646201256e-07, + "logits/generated": 3.8189921379089355, + "logits/real": 2.151841640472412, + "logps/generated": -995.25927734375, + "logps/real": -385.592041015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -58.707427978515625, + "rewards/margins": 53.73567581176758, + "rewards/real": -4.97175931930542, + "step": 7590 + }, + { + "epoch": 2.43, + "learning_rate": 1.0519141875074079e-07, + "logits/generated": 3.7049126625061035, + "logits/real": 1.8221962451934814, + "logps/generated": -974.4742431640625, + "logps/real": -368.1142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.956634521484375, + "rewards/margins": 53.39918899536133, + "rewards/real": -5.55744743347168, + "step": 7600 + }, + { + "epoch": 2.44, + "learning_rate": 1.04598791039469e-07, + "logits/generated": 4.49704647064209, + "logits/real": 2.193587303161621, + "logps/generated": -927.2984619140625, + "logps/real": -375.15283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.1307258605957, + "rewards/margins": 48.12715148925781, + "rewards/real": -6.003569602966309, + "step": 7610 + }, + { + "epoch": 2.44, + "learning_rate": 1.0400616332819723e-07, + "logits/generated": 3.3553459644317627, + "logits/real": 1.991625189781189, + "logps/generated": -951.2344970703125, + "logps/real": -379.1512756347656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -57.45017623901367, + "rewards/margins": 51.63641357421875, + "rewards/real": -5.8137640953063965, + "step": 7620 + }, + { + "epoch": 2.44, + "learning_rate": 1.0341353561692543e-07, + "logits/generated": 3.7080891132354736, + "logits/real": 1.968544602394104, + "logps/generated": -979.7561645507812, + "logps/real": -386.4315490722656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -58.76336669921875, + "rewards/margins": 52.9942626953125, + "rewards/real": -5.769102096557617, + "step": 7630 + }, + { + "epoch": 2.44, + "learning_rate": 1.0282090790565366e-07, + "logits/generated": 3.7121453285217285, + "logits/real": 2.1421244144439697, + "logps/generated": -1032.6112060546875, + "logps/real": -422.10003662109375, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -62.85908889770508, + "rewards/margins": 55.61089324951172, + "rewards/real": -7.248189449310303, + "step": 7640 + }, + { + "epoch": 2.45, + "learning_rate": 1.0222828019438189e-07, + "logits/generated": 4.168065071105957, + "logits/real": 1.7530624866485596, + "logps/generated": -1050.153564453125, + "logps/real": -412.28839111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.661293029785156, + "rewards/margins": 57.603965759277344, + "rewards/real": -6.057323932647705, + "step": 7650 + }, + { + "epoch": 2.45, + "learning_rate": 1.016356524831101e-07, + "logits/generated": 4.1445746421813965, + "logits/real": 2.0660576820373535, + "logps/generated": -936.4821166992188, + "logps/real": -324.1432800292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -54.63880157470703, + "rewards/margins": 49.89611053466797, + "rewards/real": -4.742682933807373, + "step": 7660 + }, + { + "epoch": 2.45, + "learning_rate": 1.0104302477183832e-07, + "logits/generated": 3.9583544731140137, + "logits/real": 2.2555437088012695, + "logps/generated": -877.7376098632812, + "logps/real": -390.02728271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.91754913330078, + "rewards/margins": 44.96452713012695, + "rewards/real": -5.9530229568481445, + "step": 7670 + }, + { + "epoch": 2.46, + "learning_rate": 1.0045039706056655e-07, + "logits/generated": 3.6879119873046875, + "logits/real": 2.3297011852264404, + "logps/generated": -987.4342041015625, + "logps/real": -383.0220031738281, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/generated": -58.22246170043945, + "rewards/margins": 52.973426818847656, + "rewards/real": -5.249035358428955, + "step": 7680 + }, + { + "epoch": 2.46, + "learning_rate": 9.985776934929476e-08, + "logits/generated": 4.630789756774902, + "logits/real": 2.2359023094177246, + "logps/generated": -1017.9152221679688, + "logps/real": -372.4312438964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.529258728027344, + "rewards/margins": 56.240379333496094, + "rewards/real": -6.288876056671143, + "step": 7690 + }, + { + "epoch": 2.46, + "learning_rate": 9.926514163802299e-08, + "logits/generated": 3.859633684158325, + "logits/real": 2.0333054065704346, + "logps/generated": -913.1114501953125, + "logps/real": -380.2625732421875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -56.29637908935547, + "rewards/margins": 51.327125549316406, + "rewards/real": -4.969260215759277, + "step": 7700 + }, + { + "epoch": 2.47, + "learning_rate": 9.867251392675122e-08, + "logits/generated": 3.7123591899871826, + "logits/real": 2.197134017944336, + "logps/generated": -1053.156982421875, + "logps/real": -429.6893005371094, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/generated": -64.27241516113281, + "rewards/margins": 56.07954788208008, + "rewards/real": -8.192865371704102, + "step": 7710 + }, + { + "epoch": 2.47, + "learning_rate": 9.807988621547942e-08, + "logits/generated": 4.180248260498047, + "logits/real": 2.1022727489471436, + "logps/generated": -951.1502075195312, + "logps/real": -434.37078857421875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -58.84419631958008, + "rewards/margins": 49.985015869140625, + "rewards/real": -8.859179496765137, + "step": 7720 + }, + { + "epoch": 2.47, + "learning_rate": 9.748725850420765e-08, + "logits/generated": 3.679893970489502, + "logits/real": 2.0825464725494385, + "logps/generated": -1107.1378173828125, + "logps/real": -381.36688232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.13761901855469, + "rewards/margins": 61.41798782348633, + "rewards/real": -6.719626426696777, + "step": 7730 + }, + { + "epoch": 2.48, + "learning_rate": 9.689463079293588e-08, + "logits/generated": 3.6188766956329346, + "logits/real": 2.2197442054748535, + "logps/generated": -897.60888671875, + "logps/real": -392.6204528808594, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -52.57334518432617, + "rewards/margins": 43.87472915649414, + "rewards/real": -8.698624610900879, + "step": 7740 + }, + { + "epoch": 2.48, + "learning_rate": 9.63020030816641e-08, + "logits/generated": 3.9322972297668457, + "logits/real": 2.2569358348846436, + "logps/generated": -1035.963623046875, + "logps/real": -410.9815979003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.803489685058594, + "rewards/margins": 55.50420379638672, + "rewards/real": -8.299293518066406, + "step": 7750 + }, + { + "epoch": 2.48, + "learning_rate": 9.570937537039231e-08, + "logits/generated": 4.188558578491211, + "logits/real": 2.2345645427703857, + "logps/generated": -1055.390380859375, + "logps/real": -375.19586181640625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -65.69261932373047, + "rewards/margins": 57.92228317260742, + "rewards/real": -7.770342826843262, + "step": 7760 + }, + { + "epoch": 2.49, + "learning_rate": 9.511674765912053e-08, + "logits/generated": 3.623384952545166, + "logits/real": 2.1524815559387207, + "logps/generated": -1005.8858642578125, + "logps/real": -343.5038146972656, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -61.33906936645508, + "rewards/margins": 55.49065017700195, + "rewards/real": -5.848414897918701, + "step": 7770 + }, + { + "epoch": 2.49, + "learning_rate": 9.452411994784876e-08, + "logits/generated": 4.0338263511657715, + "logits/real": 2.1665244102478027, + "logps/generated": -975.5823364257812, + "logps/real": -374.0013427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.512290954589844, + "rewards/margins": 52.536277770996094, + "rewards/real": -5.976012229919434, + "step": 7780 + }, + { + "epoch": 2.49, + "learning_rate": 9.393149223657698e-08, + "logits/generated": 4.073245048522949, + "logits/real": 1.9686634540557861, + "logps/generated": -992.0164794921875, + "logps/real": -363.16754150390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -59.51947021484375, + "rewards/margins": 53.338470458984375, + "rewards/real": -6.180994987487793, + "step": 7790 + }, + { + "epoch": 2.5, + "learning_rate": 9.33388645253052e-08, + "logits/generated": 4.028234481811523, + "logits/real": 2.0553243160247803, + "logps/generated": -1040.82470703125, + "logps/real": -382.91851806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.280296325683594, + "rewards/margins": 55.88728713989258, + "rewards/real": -7.393007755279541, + "step": 7800 + }, + { + "epoch": 2.5, + "learning_rate": 9.274623681403342e-08, + "logits/generated": 4.063427448272705, + "logits/real": 2.1127984523773193, + "logps/generated": -1055.404541015625, + "logps/real": -361.0789794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.79178619384766, + "rewards/margins": 58.767051696777344, + "rewards/real": -7.024729251861572, + "step": 7810 + }, + { + "epoch": 2.5, + "learning_rate": 9.215360910276164e-08, + "logits/generated": 3.2966866493225098, + "logits/real": 2.052476406097412, + "logps/generated": -918.8580322265625, + "logps/real": -417.35369873046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -54.87522506713867, + "rewards/margins": 47.907630920410156, + "rewards/real": -6.967595100402832, + "step": 7820 + }, + { + "epoch": 2.51, + "learning_rate": 9.156098139148986e-08, + "logits/generated": 4.767776966094971, + "logits/real": 1.9159488677978516, + "logps/generated": -961.6583251953125, + "logps/real": -372.76019287109375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -57.94257736206055, + "rewards/margins": 51.2998161315918, + "rewards/real": -6.642757415771484, + "step": 7830 + }, + { + "epoch": 2.51, + "learning_rate": 9.096835368021809e-08, + "logits/generated": 3.3908581733703613, + "logits/real": 2.069702386856079, + "logps/generated": -914.8912963867188, + "logps/real": -385.53857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.78641891479492, + "rewards/margins": 46.73820877075195, + "rewards/real": -7.048209190368652, + "step": 7840 + }, + { + "epoch": 2.51, + "learning_rate": 9.03757259689463e-08, + "logits/generated": 3.3565869331359863, + "logits/real": 1.883155107498169, + "logps/generated": -957.8240356445312, + "logps/real": -392.32806396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.133270263671875, + "rewards/margins": 49.50084686279297, + "rewards/real": -7.632418155670166, + "step": 7850 + }, + { + "epoch": 2.52, + "learning_rate": 8.978309825767452e-08, + "logits/generated": 4.155790328979492, + "logits/real": 2.111621141433716, + "logps/generated": -995.3546142578125, + "logps/real": -368.88470458984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -61.09455108642578, + "rewards/margins": 53.873817443847656, + "rewards/real": -7.220743656158447, + "step": 7860 + }, + { + "epoch": 2.52, + "learning_rate": 8.919047054640275e-08, + "logits/generated": 3.785146713256836, + "logits/real": 2.210160255432129, + "logps/generated": -969.7698974609375, + "logps/real": -407.4634094238281, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.033287048339844, + "rewards/margins": 49.69139862060547, + "rewards/real": -7.34189510345459, + "step": 7870 + }, + { + "epoch": 2.52, + "learning_rate": 8.859784283513098e-08, + "logits/generated": 3.6821417808532715, + "logits/real": 2.3126654624938965, + "logps/generated": -1092.364990234375, + "logps/real": -379.85687255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.31324005126953, + "rewards/margins": 62.71815872192383, + "rewards/real": -6.595080375671387, + "step": 7880 + }, + { + "epoch": 2.52, + "learning_rate": 8.800521512385919e-08, + "logits/generated": 4.2309370040893555, + "logits/real": 2.0690970420837402, + "logps/generated": -1044.2857666015625, + "logps/real": -356.1011047363281, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -63.82268142700195, + "rewards/margins": 57.819053649902344, + "rewards/real": -6.003624439239502, + "step": 7890 + }, + { + "epoch": 2.53, + "learning_rate": 8.741258741258741e-08, + "logits/generated": 3.7529544830322266, + "logits/real": 2.2485594749450684, + "logps/generated": -848.33740234375, + "logps/real": -392.252685546875, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -48.257568359375, + "rewards/margins": 41.74831771850586, + "rewards/real": -6.509249210357666, + "step": 7900 + }, + { + "epoch": 2.53, + "learning_rate": 8.681995970131564e-08, + "logits/generated": 3.6429622173309326, + "logits/real": 1.6829650402069092, + "logps/generated": -960.66064453125, + "logps/real": -374.3921813964844, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/generated": -57.94014358520508, + "rewards/margins": 52.40065383911133, + "rewards/real": -5.539486885070801, + "step": 7910 + }, + { + "epoch": 2.53, + "learning_rate": 8.622733199004385e-08, + "logits/generated": 3.404963970184326, + "logits/real": 1.7258421182632446, + "logps/generated": -930.3843994140625, + "logps/real": -406.9457702636719, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -55.154701232910156, + "rewards/margins": 49.09075164794922, + "rewards/real": -6.063943386077881, + "step": 7920 + }, + { + "epoch": 2.54, + "learning_rate": 8.563470427877208e-08, + "logits/generated": 3.465583086013794, + "logits/real": 1.9416353702545166, + "logps/generated": -966.3623046875, + "logps/real": -384.91851806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.843910217285156, + "rewards/margins": 51.18348693847656, + "rewards/real": -7.6604204177856445, + "step": 7930 + }, + { + "epoch": 2.54, + "learning_rate": 8.504207656750028e-08, + "logits/generated": 3.683955430984497, + "logits/real": 1.7104041576385498, + "logps/generated": -976.37109375, + "logps/real": -405.4421081542969, + "loss": 0.0044, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -59.630859375, + "rewards/margins": 52.724388122558594, + "rewards/real": -6.9064764976501465, + "step": 7940 + }, + { + "epoch": 2.54, + "learning_rate": 8.444944885622851e-08, + "logits/generated": 3.2371838092803955, + "logits/real": 2.3724024295806885, + "logps/generated": -996.0675659179688, + "logps/real": -357.4671325683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -59.767494201660156, + "rewards/margins": 53.17461013793945, + "rewards/real": -6.592886447906494, + "step": 7950 + }, + { + "epoch": 2.55, + "learning_rate": 8.385682114495674e-08, + "logits/generated": 3.7273597717285156, + "logits/real": 2.1956067085266113, + "logps/generated": -1034.621337890625, + "logps/real": -365.28460693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.633636474609375, + "rewards/margins": 56.25410079956055, + "rewards/real": -7.37954044342041, + "step": 7960 + }, + { + "epoch": 2.55, + "learning_rate": 8.326419343368495e-08, + "logits/generated": 3.5570411682128906, + "logits/real": 1.882310152053833, + "logps/generated": -911.0950927734375, + "logps/real": -381.05291748046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -52.910743713378906, + "rewards/margins": 46.59660720825195, + "rewards/real": -6.314126491546631, + "step": 7970 + }, + { + "epoch": 2.55, + "learning_rate": 8.267156572241317e-08, + "logits/generated": 3.335761308670044, + "logits/real": 1.4432555437088013, + "logps/generated": -1008.70361328125, + "logps/real": -412.27496337890625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/generated": -61.73523712158203, + "rewards/margins": 55.1703987121582, + "rewards/real": -6.564839839935303, + "step": 7980 + }, + { + "epoch": 2.56, + "learning_rate": 8.20789380111414e-08, + "logits/generated": 4.182002067565918, + "logits/real": 2.5581986904144287, + "logps/generated": -1070.162841796875, + "logps/real": -364.2648010253906, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/generated": -66.75981140136719, + "rewards/margins": 58.261871337890625, + "rewards/real": -8.497949600219727, + "step": 7990 + }, + { + "epoch": 2.56, + "learning_rate": 8.148631029986961e-08, + "logits/generated": 3.970362901687622, + "logits/real": 1.7085485458374023, + "logps/generated": -909.9697265625, + "logps/real": -420.698486328125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -53.256507873535156, + "rewards/margins": 46.76911163330078, + "rewards/real": -6.48738956451416, + "step": 8000 + }, + { + "epoch": 2.56, + "learning_rate": 8.089368258859784e-08, + "logits/generated": 4.060704231262207, + "logits/real": 1.9212415218353271, + "logps/generated": -972.4220581054688, + "logps/real": -417.5872497558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.375953674316406, + "rewards/margins": 52.89890670776367, + "rewards/real": -6.477044105529785, + "step": 8010 + }, + { + "epoch": 2.57, + "learning_rate": 8.030105487732607e-08, + "logits/generated": 3.519432544708252, + "logits/real": 2.1166985034942627, + "logps/generated": -876.7869873046875, + "logps/real": -398.08538818359375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -52.72206497192383, + "rewards/margins": 44.93832778930664, + "rewards/real": -7.783738136291504, + "step": 8020 + }, + { + "epoch": 2.57, + "learning_rate": 7.970842716605427e-08, + "logits/generated": 4.376236915588379, + "logits/real": 2.1150360107421875, + "logps/generated": -1027.4515380859375, + "logps/real": -388.5436096191406, + "loss": 0.0182, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -63.449058532714844, + "rewards/margins": 55.94439697265625, + "rewards/real": -7.504659175872803, + "step": 8030 + }, + { + "epoch": 2.57, + "learning_rate": 7.91157994547825e-08, + "logits/generated": 3.525820255279541, + "logits/real": 1.893680214881897, + "logps/generated": -1016.8726806640625, + "logps/real": -371.03125, + "loss": 0.0048, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -60.87693405151367, + "rewards/margins": 53.64154815673828, + "rewards/real": -7.235389709472656, + "step": 8040 + }, + { + "epoch": 2.58, + "learning_rate": 7.852317174351073e-08, + "logits/generated": 3.0457284450531006, + "logits/real": 1.5740561485290527, + "logps/generated": -916.3117065429688, + "logps/real": -406.9604797363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.98957443237305, + "rewards/margins": 45.95235824584961, + "rewards/real": -7.037219047546387, + "step": 8050 + }, + { + "epoch": 2.58, + "learning_rate": 7.793054403223895e-08, + "logits/generated": 3.4558169841766357, + "logits/real": 1.8014957904815674, + "logps/generated": -915.0403442382812, + "logps/real": -447.24462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -53.56114959716797, + "rewards/margins": 46.14393997192383, + "rewards/real": -7.417208671569824, + "step": 8060 + }, + { + "epoch": 2.58, + "learning_rate": 7.733791632096716e-08, + "logits/generated": 2.940850257873535, + "logits/real": 1.4414273500442505, + "logps/generated": -872.7760009765625, + "logps/real": -391.720703125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -49.339874267578125, + "rewards/margins": 43.12675094604492, + "rewards/real": -6.2131218910217285, + "step": 8070 + }, + { + "epoch": 2.59, + "learning_rate": 7.674528860969538e-08, + "logits/generated": 2.944129705429077, + "logits/real": 2.0338828563690186, + "logps/generated": -917.4520263671875, + "logps/real": -352.81646728515625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -53.3176155090332, + "rewards/margins": 47.27953338623047, + "rewards/real": -6.038083076477051, + "step": 8080 + }, + { + "epoch": 2.59, + "learning_rate": 7.61526608984236e-08, + "logits/generated": 3.909428119659424, + "logits/real": 1.7142349481582642, + "logps/generated": -950.1404418945312, + "logps/real": -337.119140625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -56.356048583984375, + "rewards/margins": 50.075618743896484, + "rewards/real": -6.280430793762207, + "step": 8090 + }, + { + "epoch": 2.59, + "learning_rate": 7.556003318715183e-08, + "logits/generated": 3.8688418865203857, + "logits/real": 1.6353342533111572, + "logps/generated": -1029.6884765625, + "logps/real": -378.70831298828125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -61.768348693847656, + "rewards/margins": 54.54430389404297, + "rewards/real": -7.224038600921631, + "step": 8100 + }, + { + "epoch": 2.6, + "learning_rate": 7.496740547588005e-08, + "logits/generated": 3.625739336013794, + "logits/real": 1.8834596872329712, + "logps/generated": -936.3740234375, + "logps/real": -369.0130920410156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -56.205955505371094, + "rewards/margins": 49.76278305053711, + "rewards/real": -6.44317102432251, + "step": 8110 + }, + { + "epoch": 2.6, + "learning_rate": 7.437477776460826e-08, + "logits/generated": 4.4700608253479, + "logits/real": 1.8153865337371826, + "logps/generated": -1135.7452392578125, + "logps/real": -387.67364501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.09259796142578, + "rewards/margins": 62.685211181640625, + "rewards/real": -7.407387733459473, + "step": 8120 + }, + { + "epoch": 2.6, + "learning_rate": 7.37821500533365e-08, + "logits/generated": 3.371682643890381, + "logits/real": 1.5304771661758423, + "logps/generated": -1005.3448486328125, + "logps/real": -396.2160339355469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -61.73991775512695, + "rewards/margins": 54.14348220825195, + "rewards/real": -7.596432685852051, + "step": 8130 + }, + { + "epoch": 2.6, + "learning_rate": 7.318952234206471e-08, + "logits/generated": 3.6937670707702637, + "logits/real": 1.5645238161087036, + "logps/generated": -1058.8162841796875, + "logps/real": -405.37615966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.2863540649414, + "rewards/margins": 59.2757453918457, + "rewards/real": -7.010606288909912, + "step": 8140 + }, + { + "epoch": 2.61, + "learning_rate": 7.259689463079294e-08, + "logits/generated": 3.9854750633239746, + "logits/real": 1.6261682510375977, + "logps/generated": -990.65966796875, + "logps/real": -408.9169921875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -61.356956481933594, + "rewards/margins": 53.9902229309082, + "rewards/real": -7.366732120513916, + "step": 8150 + }, + { + "epoch": 2.61, + "learning_rate": 7.200426691952115e-08, + "logits/generated": 4.002991676330566, + "logits/real": 1.825396180152893, + "logps/generated": -1012.3742065429688, + "logps/real": -411.4058532714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.14048385620117, + "rewards/margins": 53.03398513793945, + "rewards/real": -7.1064958572387695, + "step": 8160 + }, + { + "epoch": 2.61, + "learning_rate": 7.141163920824937e-08, + "logits/generated": 3.600146532058716, + "logits/real": 1.9582529067993164, + "logps/generated": -968.4308471679688, + "logps/real": -416.37945556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.93609619140625, + "rewards/margins": 49.888954162597656, + "rewards/real": -8.047143936157227, + "step": 8170 + }, + { + "epoch": 2.62, + "learning_rate": 7.08190114969776e-08, + "logits/generated": 3.9580321311950684, + "logits/real": 1.7216682434082031, + "logps/generated": -935.6194458007812, + "logps/real": -403.15704345703125, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -56.472373962402344, + "rewards/margins": 48.977821350097656, + "rewards/real": -7.494554042816162, + "step": 8180 + }, + { + "epoch": 2.62, + "learning_rate": 7.022638378570583e-08, + "logits/generated": 3.9833621978759766, + "logits/real": 1.8528293371200562, + "logps/generated": -929.1632690429688, + "logps/real": -435.58306884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.266265869140625, + "rewards/margins": 47.02381134033203, + "rewards/real": -7.242454528808594, + "step": 8190 + }, + { + "epoch": 2.62, + "learning_rate": 6.963375607443404e-08, + "logits/generated": 3.979447603225708, + "logits/real": 1.8997459411621094, + "logps/generated": -1097.211669921875, + "logps/real": -392.0874938964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.32579040527344, + "rewards/margins": 62.917991638183594, + "rewards/real": -7.407802581787109, + "step": 8200 + }, + { + "epoch": 2.63, + "learning_rate": 6.904112836316226e-08, + "logits/generated": 3.4925377368927, + "logits/real": 1.8967807292938232, + "logps/generated": -867.1658935546875, + "logps/real": -375.30450439453125, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/generated": -51.6424674987793, + "rewards/margins": 43.996559143066406, + "rewards/real": -7.645912170410156, + "step": 8210 + }, + { + "epoch": 2.63, + "learning_rate": 6.844850065189047e-08, + "logits/generated": 3.8538784980773926, + "logits/real": 1.8840980529785156, + "logps/generated": -895.7223510742188, + "logps/real": -406.9442443847656, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -52.7965087890625, + "rewards/margins": 45.01344680786133, + "rewards/real": -7.7830610275268555, + "step": 8220 + }, + { + "epoch": 2.63, + "learning_rate": 6.78558729406187e-08, + "logits/generated": 3.669043779373169, + "logits/real": 1.752126693725586, + "logps/generated": -949.6334228515625, + "logps/real": -342.74896240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -57.340171813964844, + "rewards/margins": 51.566688537597656, + "rewards/real": -5.773486137390137, + "step": 8230 + }, + { + "epoch": 2.64, + "learning_rate": 6.726324522934693e-08, + "logits/generated": 3.10225510597229, + "logits/real": 1.8635063171386719, + "logps/generated": -1024.183349609375, + "logps/real": -372.98455810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.195220947265625, + "rewards/margins": 56.8504753112793, + "rewards/real": -6.3447465896606445, + "step": 8240 + }, + { + "epoch": 2.64, + "learning_rate": 6.667061751807513e-08, + "logits/generated": 3.48057222366333, + "logits/real": 1.9885294437408447, + "logps/generated": -961.88330078125, + "logps/real": -385.88385009765625, + "loss": 0.0049, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -57.873268127441406, + "rewards/margins": 50.62914276123047, + "rewards/real": -7.244119167327881, + "step": 8250 + }, + { + "epoch": 2.64, + "learning_rate": 6.607798980680336e-08, + "logits/generated": 3.5142083168029785, + "logits/real": 2.191819667816162, + "logps/generated": -910.6423950195312, + "logps/real": -414.0364685058594, + "loss": 0.0318, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.2632942199707, + "rewards/margins": 46.317115783691406, + "rewards/real": -7.9461798667907715, + "step": 8260 + }, + { + "epoch": 2.65, + "learning_rate": 6.548536209553159e-08, + "logits/generated": 4.207846641540527, + "logits/real": 1.693935751914978, + "logps/generated": -955.6438598632812, + "logps/real": -400.48223876953125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/generated": -56.78264617919922, + "rewards/margins": 50.251136779785156, + "rewards/real": -6.531507968902588, + "step": 8270 + }, + { + "epoch": 2.65, + "learning_rate": 6.48927343842598e-08, + "logits/generated": 3.317568302154541, + "logits/real": 1.7609246969223022, + "logps/generated": -1018.4681396484375, + "logps/real": -376.03021240234375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -61.39073944091797, + "rewards/margins": 55.24901580810547, + "rewards/real": -6.141722679138184, + "step": 8280 + }, + { + "epoch": 2.65, + "learning_rate": 6.430010667298802e-08, + "logits/generated": 3.58172607421875, + "logits/real": 2.184634208679199, + "logps/generated": -884.3873901367188, + "logps/real": -399.60504150390625, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -52.401527404785156, + "rewards/margins": 46.67741394042969, + "rewards/real": -5.72410774230957, + "step": 8290 + }, + { + "epoch": 2.66, + "learning_rate": 6.370747896171625e-08, + "logits/generated": 4.151366233825684, + "logits/real": 2.0955371856689453, + "logps/generated": -941.9944458007812, + "logps/real": -357.70477294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.915794372558594, + "rewards/margins": 51.085426330566406, + "rewards/real": -6.8303632736206055, + "step": 8300 + }, + { + "epoch": 2.66, + "learning_rate": 6.311485125044446e-08, + "logits/generated": 4.350826740264893, + "logits/real": 2.037896156311035, + "logps/generated": -894.1232299804688, + "logps/real": -352.6371765136719, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -52.48077392578125, + "rewards/margins": 46.843807220458984, + "rewards/real": -5.636960506439209, + "step": 8310 + }, + { + "epoch": 2.66, + "learning_rate": 6.252222353917269e-08, + "logits/generated": 3.375370740890503, + "logits/real": 1.5641025304794312, + "logps/generated": -920.6629028320312, + "logps/real": -429.950439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.04949188232422, + "rewards/margins": 45.768009185791016, + "rewards/real": -7.281485080718994, + "step": 8320 + }, + { + "epoch": 2.67, + "learning_rate": 6.192959582790091e-08, + "logits/generated": 3.415733814239502, + "logits/real": 2.0732007026672363, + "logps/generated": -944.77294921875, + "logps/real": -397.92083740234375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -54.639923095703125, + "rewards/margins": 47.84805679321289, + "rewards/real": -6.791872978210449, + "step": 8330 + }, + { + "epoch": 2.67, + "learning_rate": 6.133696811662914e-08, + "logits/generated": 3.229666233062744, + "logits/real": 1.9830280542373657, + "logps/generated": -946.1580200195312, + "logps/real": -447.03179931640625, + "loss": 0.0374, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -54.59568405151367, + "rewards/margins": 46.74303436279297, + "rewards/real": -7.852652549743652, + "step": 8340 + }, + { + "epoch": 2.67, + "learning_rate": 6.074434040535735e-08, + "logits/generated": 3.708914279937744, + "logits/real": 2.1419925689697266, + "logps/generated": -946.6541748046875, + "logps/real": -415.590576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -56.01914596557617, + "rewards/margins": 48.601951599121094, + "rewards/real": -7.417193412780762, + "step": 8350 + }, + { + "epoch": 2.68, + "learning_rate": 6.015171269408558e-08, + "logits/generated": 3.5551514625549316, + "logits/real": 2.1436784267425537, + "logps/generated": -957.1892700195312, + "logps/real": -359.36767578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -57.14799880981445, + "rewards/margins": 51.1831169128418, + "rewards/real": -5.964877128601074, + "step": 8360 + }, + { + "epoch": 2.68, + "learning_rate": 5.955908498281379e-08, + "logits/generated": 4.05293607711792, + "logits/real": 2.651482343673706, + "logps/generated": -912.5654296875, + "logps/real": -379.1618347167969, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -54.689910888671875, + "rewards/margins": 46.95143508911133, + "rewards/real": -7.738478660583496, + "step": 8370 + }, + { + "epoch": 2.68, + "learning_rate": 5.896645727154202e-08, + "logits/generated": 4.161120414733887, + "logits/real": 2.1185717582702637, + "logps/generated": -923.5693359375, + "logps/real": -418.39532470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.5535774230957, + "rewards/margins": 47.09696960449219, + "rewards/real": -7.456611633300781, + "step": 8380 + }, + { + "epoch": 2.68, + "learning_rate": 5.837382956027023e-08, + "logits/generated": 3.6960418224334717, + "logits/real": 2.034367561340332, + "logps/generated": -1019.1530151367188, + "logps/real": -405.61175537109375, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -60.967247009277344, + "rewards/margins": 53.078208923339844, + "rewards/real": -7.889039039611816, + "step": 8390 + }, + { + "epoch": 2.69, + "learning_rate": 5.7781201848998455e-08, + "logits/generated": 3.299665927886963, + "logits/real": 1.9864240884780884, + "logps/generated": -1078.4036865234375, + "logps/real": -402.57269287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -66.20504760742188, + "rewards/margins": 58.318260192871094, + "rewards/real": -7.886781215667725, + "step": 8400 + }, + { + "epoch": 2.69, + "learning_rate": 5.7188574137726684e-08, + "logits/generated": 3.879106044769287, + "logits/real": 2.1246488094329834, + "logps/generated": -965.0455932617188, + "logps/real": -399.2043151855469, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -56.458168029785156, + "rewards/margins": 49.10603713989258, + "rewards/real": -7.352133274078369, + "step": 8410 + }, + { + "epoch": 2.69, + "learning_rate": 5.65959464264549e-08, + "logits/generated": 3.6350479125976562, + "logits/real": 1.9505516290664673, + "logps/generated": -977.1804809570312, + "logps/real": -402.97857666015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -56.70595169067383, + "rewards/margins": 49.58742904663086, + "rewards/real": -7.118523597717285, + "step": 8420 + }, + { + "epoch": 2.7, + "learning_rate": 5.600331871518312e-08, + "logits/generated": 3.839154005050659, + "logits/real": 1.7790920734405518, + "logps/generated": -1017.8132934570312, + "logps/real": -397.7266540527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.7443962097168, + "rewards/margins": 54.29301834106445, + "rewards/real": -7.451376438140869, + "step": 8430 + }, + { + "epoch": 2.7, + "learning_rate": 5.5410691003911337e-08, + "logits/generated": 3.7217185497283936, + "logits/real": 2.226267099380493, + "logps/generated": -907.8714599609375, + "logps/real": -364.1994323730469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -54.056365966796875, + "rewards/margins": 47.113712310791016, + "rewards/real": -6.94265079498291, + "step": 8440 + }, + { + "epoch": 2.7, + "learning_rate": 5.4818063292639565e-08, + "logits/generated": 3.527050018310547, + "logits/real": 2.036137580871582, + "logps/generated": -947.5823364257812, + "logps/real": -409.0567626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -56.482383728027344, + "rewards/margins": 48.5067253112793, + "rewards/real": -7.9756646156311035, + "step": 8450 + }, + { + "epoch": 2.71, + "learning_rate": 5.422543558136778e-08, + "logits/generated": 3.7611911296844482, + "logits/real": 1.8519208431243896, + "logps/generated": -901.5185546875, + "logps/real": -417.3238220214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.371253967285156, + "rewards/margins": 47.136558532714844, + "rewards/real": -7.234696388244629, + "step": 8460 + }, + { + "epoch": 2.71, + "learning_rate": 5.3632807870096e-08, + "logits/generated": 3.4174492359161377, + "logits/real": 1.9680230617523193, + "logps/generated": -1063.694580078125, + "logps/real": -395.2354431152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.83134460449219, + "rewards/margins": 58.03875732421875, + "rewards/real": -7.792585849761963, + "step": 8470 + }, + { + "epoch": 2.71, + "learning_rate": 5.3040180158824225e-08, + "logits/generated": 3.9970059394836426, + "logits/real": 2.3934884071350098, + "logps/generated": -1125.6826171875, + "logps/real": -379.00323486328125, + "loss": 0.004, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -71.78718566894531, + "rewards/margins": 63.51630783081055, + "rewards/real": -8.270886421203613, + "step": 8480 + }, + { + "epoch": 2.72, + "learning_rate": 5.2447552447552447e-08, + "logits/generated": 3.7725205421447754, + "logits/real": 1.9590591192245483, + "logps/generated": -1091.79150390625, + "logps/real": -390.5736389160156, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -65.3434829711914, + "rewards/margins": 56.868316650390625, + "rewards/real": -8.475166320800781, + "step": 8490 + }, + { + "epoch": 2.72, + "learning_rate": 5.185492473628066e-08, + "logits/generated": 3.661846160888672, + "logits/real": 2.312032699584961, + "logps/generated": -933.3740234375, + "logps/real": -411.44451904296875, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.91740798950195, + "rewards/margins": 47.27542495727539, + "rewards/real": -8.641983032226562, + "step": 8500 + }, + { + "epoch": 2.72, + "learning_rate": 5.1262297025008884e-08, + "logits/generated": 3.465627670288086, + "logits/real": 2.2164180278778076, + "logps/generated": -1076.426025390625, + "logps/real": -389.19964599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -67.2499771118164, + "rewards/margins": 59.05321502685547, + "rewards/real": -8.196765899658203, + "step": 8510 + }, + { + "epoch": 2.73, + "learning_rate": 5.066966931373711e-08, + "logits/generated": 4.173044681549072, + "logits/real": 1.8490111827850342, + "logps/generated": -1132.7874755859375, + "logps/real": -432.0376892089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.95558166503906, + "rewards/margins": 62.96862030029297, + "rewards/real": -7.986954689025879, + "step": 8520 + }, + { + "epoch": 2.73, + "learning_rate": 5.007704160246533e-08, + "logits/generated": 4.139945030212402, + "logits/real": 2.526951313018799, + "logps/generated": -1054.085693359375, + "logps/real": -377.7095031738281, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -65.83952331542969, + "rewards/margins": 57.67792510986328, + "rewards/real": -8.161606788635254, + "step": 8530 + }, + { + "epoch": 2.73, + "learning_rate": 4.948441389119355e-08, + "logits/generated": 3.516796827316284, + "logits/real": 2.3811421394348145, + "logps/generated": -1006.6256103515625, + "logps/real": -362.2398986816406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -59.41242218017578, + "rewards/margins": 51.91820526123047, + "rewards/real": -7.494215965270996, + "step": 8540 + }, + { + "epoch": 2.74, + "learning_rate": 4.889178617992177e-08, + "logits/generated": 3.9033493995666504, + "logits/real": 1.774997353553772, + "logps/generated": -972.8084716796875, + "logps/real": -427.7626037597656, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/generated": -56.33424758911133, + "rewards/margins": 48.570919036865234, + "rewards/real": -7.763323783874512, + "step": 8550 + }, + { + "epoch": 2.74, + "learning_rate": 4.8299158468649994e-08, + "logits/generated": 3.971569061279297, + "logits/real": 2.0908877849578857, + "logps/generated": -1032.648193359375, + "logps/real": -371.1000671386719, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -64.35382080078125, + "rewards/margins": 56.81888961791992, + "rewards/real": -7.534930229187012, + "step": 8560 + }, + { + "epoch": 2.74, + "learning_rate": 4.770653075737821e-08, + "logits/generated": 4.424673557281494, + "logits/real": 2.3915224075317383, + "logps/generated": -959.6032104492188, + "logps/real": -414.59918212890625, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -55.32141876220703, + "rewards/margins": 47.93350601196289, + "rewards/real": -7.387907981872559, + "step": 8570 + }, + { + "epoch": 2.75, + "learning_rate": 4.711390304610643e-08, + "logits/generated": 4.043940544128418, + "logits/real": 2.225125789642334, + "logps/generated": -865.1267700195312, + "logps/real": -404.07330322265625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -49.90807342529297, + "rewards/margins": 42.133445739746094, + "rewards/real": -7.774628639221191, + "step": 8580 + }, + { + "epoch": 2.75, + "learning_rate": 4.6521275334834654e-08, + "logits/generated": 4.111706733703613, + "logits/real": 2.0812008380889893, + "logps/generated": -1015.0589599609375, + "logps/real": -458.95703125, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -61.14873123168945, + "rewards/margins": 52.93597412109375, + "rewards/real": -8.21275520324707, + "step": 8590 + }, + { + "epoch": 2.75, + "learning_rate": 4.5928647623562876e-08, + "logits/generated": 3.711606502532959, + "logits/real": 2.339512586593628, + "logps/generated": -953.4273681640625, + "logps/real": -405.60662841796875, + "loss": 0.0025, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.3015022277832, + "rewards/margins": 49.46393966674805, + "rewards/real": -7.837560176849365, + "step": 8600 + }, + { + "epoch": 2.76, + "learning_rate": 4.53360199122911e-08, + "logits/generated": 3.7957377433776855, + "logits/real": 2.4589855670928955, + "logps/generated": -1112.1412353515625, + "logps/real": -392.3586120605469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -68.88035583496094, + "rewards/margins": 59.36815643310547, + "rewards/real": -9.512189865112305, + "step": 8610 + }, + { + "epoch": 2.76, + "learning_rate": 4.474339220101932e-08, + "logits/generated": 3.2196178436279297, + "logits/real": 1.9958690404891968, + "logps/generated": -870.9078979492188, + "logps/real": -462.06121826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.91344451904297, + "rewards/margins": 42.71410369873047, + "rewards/real": -8.199337005615234, + "step": 8620 + }, + { + "epoch": 2.76, + "learning_rate": 4.415076448974754e-08, + "logits/generated": 3.8534913063049316, + "logits/real": 2.2192482948303223, + "logps/generated": -990.10986328125, + "logps/real": -397.8377380371094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -60.09471893310547, + "rewards/margins": 53.13765335083008, + "rewards/real": -6.957066535949707, + "step": 8630 + }, + { + "epoch": 2.76, + "learning_rate": 4.355813677847576e-08, + "logits/generated": 3.7722702026367188, + "logits/real": 2.3620645999908447, + "logps/generated": -1003.5438232421875, + "logps/real": -389.4685363769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.21651077270508, + "rewards/margins": 52.0927848815918, + "rewards/real": -8.12373161315918, + "step": 8640 + }, + { + "epoch": 2.77, + "learning_rate": 4.296550906720398e-08, + "logits/generated": 3.63665509223938, + "logits/real": 2.1879465579986572, + "logps/generated": -1004.0223388671875, + "logps/real": -360.257080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.52576446533203, + "rewards/margins": 54.45721435546875, + "rewards/real": -7.068545341491699, + "step": 8650 + }, + { + "epoch": 2.77, + "learning_rate": 4.23728813559322e-08, + "logits/generated": 3.8347854614257812, + "logits/real": 2.080061435699463, + "logps/generated": -911.7888793945312, + "logps/real": -376.4729919433594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -54.39191818237305, + "rewards/margins": 47.83964157104492, + "rewards/real": -6.552272796630859, + "step": 8660 + }, + { + "epoch": 2.77, + "learning_rate": 4.178025364466042e-08, + "logits/generated": 3.5550475120544434, + "logits/real": 2.0093512535095215, + "logps/generated": -983.3677978515625, + "logps/real": -421.22088623046875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -59.86065673828125, + "rewards/margins": 52.51118087768555, + "rewards/real": -7.349481105804443, + "step": 8670 + }, + { + "epoch": 2.78, + "learning_rate": 4.118762593338864e-08, + "logits/generated": 3.9317879676818848, + "logits/real": 2.3645236492156982, + "logps/generated": -1096.2442626953125, + "logps/real": -377.3437194824219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.23106384277344, + "rewards/margins": 60.86164093017578, + "rewards/real": -8.369420051574707, + "step": 8680 + }, + { + "epoch": 2.78, + "learning_rate": 4.059499822211687e-08, + "logits/generated": 4.139974117279053, + "logits/real": 2.1882903575897217, + "logps/generated": -930.52197265625, + "logps/real": -407.3905944824219, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/generated": -56.66914749145508, + "rewards/margins": 48.477088928222656, + "rewards/real": -8.192062377929688, + "step": 8690 + }, + { + "epoch": 2.78, + "learning_rate": 4.000237051084508e-08, + "logits/generated": 4.2538299560546875, + "logits/real": 2.099956750869751, + "logps/generated": -1089.7821044921875, + "logps/real": -393.08734130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -69.39396667480469, + "rewards/margins": 61.89726638793945, + "rewards/real": -7.496708869934082, + "step": 8700 + }, + { + "epoch": 2.79, + "learning_rate": 3.9409742799573305e-08, + "logits/generated": 4.161600589752197, + "logits/real": 1.944954514503479, + "logps/generated": -1037.1029052734375, + "logps/real": -436.84149169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.92815399169922, + "rewards/margins": 56.2886848449707, + "rewards/real": -7.639472961425781, + "step": 8710 + }, + { + "epoch": 2.79, + "learning_rate": 3.8817115088301533e-08, + "logits/generated": 3.7674357891082764, + "logits/real": 2.2307162284851074, + "logps/generated": -1004.86865234375, + "logps/real": -408.85260009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.471038818359375, + "rewards/margins": 52.617469787597656, + "rewards/real": -7.853572845458984, + "step": 8720 + }, + { + "epoch": 2.79, + "learning_rate": 3.822448737702975e-08, + "logits/generated": 3.9538588523864746, + "logits/real": 2.351992607116699, + "logps/generated": -1198.9942626953125, + "logps/real": -407.2468566894531, + "loss": 0.0029, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -77.22299194335938, + "rewards/margins": 69.22731018066406, + "rewards/real": -7.995683193206787, + "step": 8730 + }, + { + "epoch": 2.8, + "learning_rate": 3.763185966575797e-08, + "logits/generated": 3.734550952911377, + "logits/real": 2.352360963821411, + "logps/generated": -969.0906372070312, + "logps/real": -395.86383056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -58.753684997558594, + "rewards/margins": 51.309471130371094, + "rewards/real": -7.444212436676025, + "step": 8740 + }, + { + "epoch": 2.8, + "learning_rate": 3.7039231954486186e-08, + "logits/generated": 3.949242353439331, + "logits/real": 2.179558277130127, + "logps/generated": -986.6300048828125, + "logps/real": -360.14801025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.20587158203125, + "rewards/margins": 51.371360778808594, + "rewards/real": -7.834508419036865, + "step": 8750 + }, + { + "epoch": 2.8, + "learning_rate": 3.6446604243214415e-08, + "logits/generated": 4.206214904785156, + "logits/real": 2.566666603088379, + "logps/generated": -959.1954956054688, + "logps/real": -437.5762634277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.717437744140625, + "rewards/margins": 49.8985481262207, + "rewards/real": -8.81889533996582, + "step": 8760 + }, + { + "epoch": 2.81, + "learning_rate": 3.585397653194263e-08, + "logits/generated": 4.577523231506348, + "logits/real": 2.33097505569458, + "logps/generated": -1163.2091064453125, + "logps/real": -381.373779296875, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -74.9511489868164, + "rewards/margins": 66.1609878540039, + "rewards/real": -8.790148735046387, + "step": 8770 + }, + { + "epoch": 2.81, + "learning_rate": 3.526134882067085e-08, + "logits/generated": 4.372532844543457, + "logits/real": 2.314779281616211, + "logps/generated": -991.7755126953125, + "logps/real": -387.3517761230469, + "loss": 0.0074, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -62.28171920776367, + "rewards/margins": 54.22426223754883, + "rewards/real": -8.05746078491211, + "step": 8780 + }, + { + "epoch": 2.81, + "learning_rate": 3.4668721109399074e-08, + "logits/generated": 3.949869155883789, + "logits/real": 2.473696231842041, + "logps/generated": -995.8297119140625, + "logps/real": -377.43212890625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -61.33977127075195, + "rewards/margins": 54.054656982421875, + "rewards/real": -7.2851152420043945, + "step": 8790 + }, + { + "epoch": 2.82, + "learning_rate": 3.4076093398127296e-08, + "logits/generated": 3.7317681312561035, + "logits/real": 2.186537265777588, + "logps/generated": -1112.25927734375, + "logps/real": -374.804931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.85594940185547, + "rewards/margins": 64.02801513671875, + "rewards/real": -8.827939987182617, + "step": 8800 + }, + { + "epoch": 2.82, + "learning_rate": 3.348346568685552e-08, + "logits/generated": 4.337752819061279, + "logits/real": 2.0777242183685303, + "logps/generated": -968.1364135742188, + "logps/real": -401.09771728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -60.210594177246094, + "rewards/margins": 51.63945388793945, + "rewards/real": -8.571136474609375, + "step": 8810 + }, + { + "epoch": 2.82, + "learning_rate": 3.2890837975583734e-08, + "logits/generated": 3.9325528144836426, + "logits/real": 2.016432762145996, + "logps/generated": -1130.004638671875, + "logps/real": -402.5047302246094, + "loss": 0.0089, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -72.4874038696289, + "rewards/margins": 64.43824768066406, + "rewards/real": -8.04914379119873, + "step": 8820 + }, + { + "epoch": 2.83, + "learning_rate": 3.229821026431196e-08, + "logits/generated": 4.252560615539551, + "logits/real": 2.5569992065429688, + "logps/generated": -1086.0369873046875, + "logps/real": -417.00726318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -69.71559143066406, + "rewards/margins": 60.46100616455078, + "rewards/real": -9.254584312438965, + "step": 8830 + }, + { + "epoch": 2.83, + "learning_rate": 3.170558255304018e-08, + "logits/generated": 4.101076126098633, + "logits/real": 2.5329222679138184, + "logps/generated": -1007.7769775390625, + "logps/real": -371.10723876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.38042449951172, + "rewards/margins": 54.435874938964844, + "rewards/real": -7.944557189941406, + "step": 8840 + }, + { + "epoch": 2.83, + "learning_rate": 3.11129548417684e-08, + "logits/generated": 4.133565902709961, + "logits/real": 2.0584716796875, + "logps/generated": -1027.533203125, + "logps/real": -421.91339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.9887580871582, + "rewards/margins": 55.15087890625, + "rewards/real": -8.837879180908203, + "step": 8850 + }, + { + "epoch": 2.84, + "learning_rate": 3.052032713049662e-08, + "logits/generated": 3.8249733448028564, + "logits/real": 2.2053627967834473, + "logps/generated": -1117.9556884765625, + "logps/real": -410.8575134277344, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -68.75425720214844, + "rewards/margins": 60.45509719848633, + "rewards/real": -8.299158096313477, + "step": 8860 + }, + { + "epoch": 2.84, + "learning_rate": 2.9927699419224844e-08, + "logits/generated": 4.576693058013916, + "logits/real": 1.9404109716415405, + "logps/generated": -937.0902099609375, + "logps/real": -389.9156494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.26157760620117, + "rewards/margins": 46.93947982788086, + "rewards/real": -8.322099685668945, + "step": 8870 + }, + { + "epoch": 2.84, + "learning_rate": 2.9335071707953063e-08, + "logits/generated": 4.113223552703857, + "logits/real": 2.3892219066619873, + "logps/generated": -1015.8532104492188, + "logps/real": -362.73370361328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -61.505126953125, + "rewards/margins": 53.80535888671875, + "rewards/real": -7.699770450592041, + "step": 8880 + }, + { + "epoch": 2.84, + "learning_rate": 2.8742443996681285e-08, + "logits/generated": 4.111103534698486, + "logits/real": 2.273388624191284, + "logps/generated": -1063.7801513671875, + "logps/real": -408.4124450683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.34366607666016, + "rewards/margins": 57.898590087890625, + "rewards/real": -8.44508171081543, + "step": 8890 + }, + { + "epoch": 2.85, + "learning_rate": 2.8149816285409503e-08, + "logits/generated": 4.130972862243652, + "logits/real": 2.298684597015381, + "logps/generated": -1081.184326171875, + "logps/real": -401.2452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.52656555175781, + "rewards/margins": 58.92407989501953, + "rewards/real": -8.602482795715332, + "step": 8900 + }, + { + "epoch": 2.85, + "learning_rate": 2.7557188574137725e-08, + "logits/generated": 3.882814407348633, + "logits/real": 2.370919704437256, + "logps/generated": -1150.154052734375, + "logps/real": -383.59954833984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -74.73722839355469, + "rewards/margins": 67.35450744628906, + "rewards/real": -7.3827223777771, + "step": 8910 + }, + { + "epoch": 2.85, + "learning_rate": 2.6964560862865947e-08, + "logits/generated": 4.083722114562988, + "logits/real": 2.3151917457580566, + "logps/generated": -955.68701171875, + "logps/real": -425.1591796875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/generated": -59.24004364013672, + "rewards/margins": 50.92094802856445, + "rewards/real": -8.319098472595215, + "step": 8920 + }, + { + "epoch": 2.86, + "learning_rate": 2.6371933151594166e-08, + "logits/generated": 4.148406028747559, + "logits/real": 2.6285533905029297, + "logps/generated": -990.8770751953125, + "logps/real": -418.384521484375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -60.00004959106445, + "rewards/margins": 51.72795486450195, + "rewards/real": -8.272089004516602, + "step": 8930 + }, + { + "epoch": 2.86, + "learning_rate": 2.5779305440322388e-08, + "logits/generated": 3.855437755584717, + "logits/real": 2.579738140106201, + "logps/generated": -1010.6025390625, + "logps/real": -381.7750549316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.51027297973633, + "rewards/margins": 54.409332275390625, + "rewards/real": -8.100942611694336, + "step": 8940 + }, + { + "epoch": 2.86, + "learning_rate": 2.5186677729050607e-08, + "logits/generated": 3.730900526046753, + "logits/real": 2.5897955894470215, + "logps/generated": -1115.298583984375, + "logps/real": -409.9163513183594, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -70.0462646484375, + "rewards/margins": 61.09590530395508, + "rewards/real": -8.950363159179688, + "step": 8950 + }, + { + "epoch": 2.87, + "learning_rate": 2.459405001777883e-08, + "logits/generated": 4.09194278717041, + "logits/real": 1.8238937854766846, + "logps/generated": -1002.7340087890625, + "logps/real": -387.93414306640625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -60.72015380859375, + "rewards/margins": 54.630889892578125, + "rewards/real": -6.089266777038574, + "step": 8960 + }, + { + "epoch": 2.87, + "learning_rate": 2.4001422306507054e-08, + "logits/generated": 4.2426862716674805, + "logits/real": 2.1907787322998047, + "logps/generated": -947.5045776367188, + "logps/real": -446.52203369140625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -58.48749542236328, + "rewards/margins": 49.036041259765625, + "rewards/real": -9.451457977294922, + "step": 8970 + }, + { + "epoch": 2.87, + "learning_rate": 2.3408794595235273e-08, + "logits/generated": 3.7106690406799316, + "logits/real": 2.4908013343811035, + "logps/generated": -1102.080810546875, + "logps/real": -377.4757080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.90482330322266, + "rewards/margins": 61.837921142578125, + "rewards/real": -8.066899299621582, + "step": 8980 + }, + { + "epoch": 2.88, + "learning_rate": 2.2816166883963495e-08, + "logits/generated": 3.4732189178466797, + "logits/real": 2.1402063369750977, + "logps/generated": -1026.4468994140625, + "logps/real": -419.9058532714844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -62.887428283691406, + "rewards/margins": 54.51227951049805, + "rewards/real": -8.375152587890625, + "step": 8990 + }, + { + "epoch": 2.88, + "learning_rate": 2.2223539172691714e-08, + "logits/generated": 3.951288938522339, + "logits/real": 1.9303228855133057, + "logps/generated": -1031.7862548828125, + "logps/real": -366.66424560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -63.44452667236328, + "rewards/margins": 56.662071228027344, + "rewards/real": -6.7824554443359375, + "step": 9000 + }, + { + "epoch": 2.88, + "learning_rate": 2.1630911461419936e-08, + "logits/generated": 3.3687922954559326, + "logits/real": 2.513150691986084, + "logps/generated": -1048.0078125, + "logps/real": -411.56842041015625, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -64.83985900878906, + "rewards/margins": 55.295440673828125, + "rewards/real": -9.544424057006836, + "step": 9010 + }, + { + "epoch": 2.89, + "learning_rate": 2.1038283750148154e-08, + "logits/generated": 4.0956807136535645, + "logits/real": 2.561434268951416, + "logps/generated": -1174.1719970703125, + "logps/real": -407.2411193847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.4465560913086, + "rewards/margins": 67.99554443359375, + "rewards/real": -7.451010227203369, + "step": 9020 + }, + { + "epoch": 2.89, + "learning_rate": 2.0445656038876377e-08, + "logits/generated": 3.2312393188476562, + "logits/real": 2.3635706901550293, + "logps/generated": -1132.919677734375, + "logps/real": -415.96783447265625, + "loss": 0.0025, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -72.41719055175781, + "rewards/margins": 63.97467803955078, + "rewards/real": -8.442511558532715, + "step": 9030 + }, + { + "epoch": 2.89, + "learning_rate": 1.98530283276046e-08, + "logits/generated": 3.547464370727539, + "logits/real": 2.1589770317077637, + "logps/generated": -1229.614013671875, + "logps/real": -364.1869812011719, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -79.16869354248047, + "rewards/margins": 71.33837127685547, + "rewards/real": -7.830325126647949, + "step": 9040 + }, + { + "epoch": 2.9, + "learning_rate": 1.9260400616332817e-08, + "logits/generated": 4.280892848968506, + "logits/real": 2.2274768352508545, + "logps/generated": -977.6754150390625, + "logps/real": -428.25872802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.16440963745117, + "rewards/margins": 51.583404541015625, + "rewards/real": -8.581003189086914, + "step": 9050 + }, + { + "epoch": 2.9, + "learning_rate": 1.866777290506104e-08, + "logits/generated": 3.8407740592956543, + "logits/real": 2.5023605823516846, + "logps/generated": -1122.100830078125, + "logps/real": -435.5116271972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.32371520996094, + "rewards/margins": 63.18709182739258, + "rewards/real": -8.136619567871094, + "step": 9060 + }, + { + "epoch": 2.9, + "learning_rate": 1.807514519378926e-08, + "logits/generated": 4.040691375732422, + "logits/real": 2.2008137702941895, + "logps/generated": -1000.4134521484375, + "logps/real": -381.9537658691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -62.254302978515625, + "rewards/margins": 54.66272735595703, + "rewards/real": -7.591574192047119, + "step": 9070 + }, + { + "epoch": 2.91, + "learning_rate": 1.7482517482517483e-08, + "logits/generated": 3.8093605041503906, + "logits/real": 2.327657699584961, + "logps/generated": -1024.83935546875, + "logps/real": -419.9295959472656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -63.84809494018555, + "rewards/margins": 54.47718048095703, + "rewards/real": -9.370905876159668, + "step": 9080 + }, + { + "epoch": 2.91, + "learning_rate": 1.6889889771245702e-08, + "logits/generated": 4.164027690887451, + "logits/real": 2.1145646572113037, + "logps/generated": -954.1112060546875, + "logps/real": -414.4833068847656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -57.67053985595703, + "rewards/margins": 49.82292938232422, + "rewards/real": -7.847611904144287, + "step": 9090 + }, + { + "epoch": 2.91, + "learning_rate": 1.6297262059973924e-08, + "logits/generated": 4.627682685852051, + "logits/real": 2.4523158073425293, + "logps/generated": -1147.0657958984375, + "logps/real": -402.86785888671875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -73.46512603759766, + "rewards/margins": 63.60245895385742, + "rewards/real": -9.862655639648438, + "step": 9100 + }, + { + "epoch": 2.92, + "learning_rate": 1.5704634348702146e-08, + "logits/generated": 4.3307390213012695, + "logits/real": 2.7241694927215576, + "logps/generated": -1024.493896484375, + "logps/real": -414.1810607910156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -64.47026062011719, + "rewards/margins": 53.803749084472656, + "rewards/real": -10.666508674621582, + "step": 9110 + }, + { + "epoch": 2.92, + "learning_rate": 1.5112006637430365e-08, + "logits/generated": 4.022457122802734, + "logits/real": 2.0878734588623047, + "logps/generated": -937.5765380859375, + "logps/real": -448.01654052734375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -54.977317810058594, + "rewards/margins": 47.14643478393555, + "rewards/real": -7.830883979797363, + "step": 9120 + }, + { + "epoch": 2.92, + "learning_rate": 1.4519378926158587e-08, + "logits/generated": 4.017087459564209, + "logits/real": 2.5776567459106445, + "logps/generated": -965.0074462890625, + "logps/real": -391.77374267578125, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.873931884765625, + "rewards/margins": 49.04096984863281, + "rewards/real": -8.83295726776123, + "step": 9130 + }, + { + "epoch": 2.92, + "learning_rate": 1.3926751214886807e-08, + "logits/generated": 4.196539402008057, + "logits/real": 2.2769038677215576, + "logps/generated": -883.5095825195312, + "logps/real": -425.8203125, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -51.294769287109375, + "rewards/margins": 43.60906982421875, + "rewards/real": -7.685697078704834, + "step": 9140 + }, + { + "epoch": 2.93, + "learning_rate": 1.333412350361503e-08, + "logits/generated": 4.753024101257324, + "logits/real": 2.3693277835845947, + "logps/generated": -1102.330810546875, + "logps/real": -407.6561279296875, + "loss": 0.0793, + "rewards/accuracies": 1.0, + "rewards/generated": -68.81044006347656, + "rewards/margins": 60.021881103515625, + "rewards/real": -8.788552284240723, + "step": 9150 + }, + { + "epoch": 2.93, + "learning_rate": 1.274149579234325e-08, + "logits/generated": 4.085358619689941, + "logits/real": 2.0097265243530273, + "logps/generated": -1012.0568237304688, + "logps/real": -457.06085205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.34282684326172, + "rewards/margins": 53.456886291503906, + "rewards/real": -8.885940551757812, + "step": 9160 + }, + { + "epoch": 2.93, + "learning_rate": 1.214886808107147e-08, + "logits/generated": 4.291256904602051, + "logits/real": 2.43717360496521, + "logps/generated": -1060.977294921875, + "logps/real": -424.43280029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -66.03826141357422, + "rewards/margins": 57.78627395629883, + "rewards/real": -8.25198745727539, + "step": 9170 + }, + { + "epoch": 2.94, + "learning_rate": 1.155624036979969e-08, + "logits/generated": 4.13791036605835, + "logits/real": 2.2680563926696777, + "logps/generated": -1029.0772705078125, + "logps/real": -391.6253356933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.715553283691406, + "rewards/margins": 55.91587448120117, + "rewards/real": -7.799679756164551, + "step": 9180 + }, + { + "epoch": 2.94, + "learning_rate": 1.0963612658527912e-08, + "logits/generated": 3.743330717086792, + "logits/real": 2.188157796859741, + "logps/generated": -1004.7896728515625, + "logps/real": -380.8592834472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.408721923828125, + "rewards/margins": 53.28612518310547, + "rewards/real": -8.122594833374023, + "step": 9190 + }, + { + "epoch": 2.94, + "learning_rate": 1.0370984947256134e-08, + "logits/generated": 3.9687373638153076, + "logits/real": 2.5319037437438965, + "logps/generated": -975.3508911132812, + "logps/real": -369.2455139160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.98682403564453, + "rewards/margins": 52.99494171142578, + "rewards/real": -7.991887092590332, + "step": 9200 + }, + { + "epoch": 2.95, + "learning_rate": 9.778357235984355e-09, + "logits/generated": 3.6886813640594482, + "logits/real": 2.2309367656707764, + "logps/generated": -988.72265625, + "logps/real": -423.3807678222656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.65971755981445, + "rewards/margins": 51.6239013671875, + "rewards/real": -7.035823822021484, + "step": 9210 + }, + { + "epoch": 2.95, + "learning_rate": 9.185729524712575e-09, + "logits/generated": 3.9481704235076904, + "logits/real": 1.865582823753357, + "logps/generated": -1115.6072998046875, + "logps/real": -436.29742431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -71.58280944824219, + "rewards/margins": 63.30155563354492, + "rewards/real": -8.28125, + "step": 9220 + }, + { + "epoch": 2.95, + "learning_rate": 8.593101813440796e-09, + "logits/generated": 4.100003242492676, + "logits/real": 2.4489364624023438, + "logps/generated": -1035.8955078125, + "logps/real": -374.9039001464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.61082077026367, + "rewards/margins": 54.9398078918457, + "rewards/real": -7.671013832092285, + "step": 9230 + }, + { + "epoch": 2.96, + "learning_rate": 8.000474102169016e-09, + "logits/generated": 4.383337497711182, + "logits/real": 2.721869945526123, + "logps/generated": -1061.5406494140625, + "logps/real": -418.4205627441406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -67.38023376464844, + "rewards/margins": 57.91606521606445, + "rewards/real": -9.464168548583984, + "step": 9240 + }, + { + "epoch": 2.96, + "learning_rate": 7.407846390897238e-09, + "logits/generated": 4.2132391929626465, + "logits/real": 2.0285253524780273, + "logps/generated": -1053.735107421875, + "logps/real": -380.25665283203125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -67.59648132324219, + "rewards/margins": 59.252655029296875, + "rewards/real": -8.343820571899414, + "step": 9250 + }, + { + "epoch": 2.96, + "learning_rate": 6.815218679625459e-09, + "logits/generated": 4.097989082336426, + "logits/real": 2.0763533115386963, + "logps/generated": -948.5368041992188, + "logps/real": -408.927001953125, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -57.566383361816406, + "rewards/margins": 49.99573516845703, + "rewards/real": -7.570644378662109, + "step": 9260 + }, + { + "epoch": 2.97, + "learning_rate": 6.22259096835368e-09, + "logits/generated": 3.7166342735290527, + "logits/real": 2.241440773010254, + "logps/generated": -1085.999267578125, + "logps/real": -462.370361328125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/generated": -66.93648529052734, + "rewards/margins": 58.837615966796875, + "rewards/real": -8.098878860473633, + "step": 9270 + }, + { + "epoch": 2.97, + "learning_rate": 5.629963257081901e-09, + "logits/generated": 4.10945463180542, + "logits/real": 2.291794776916504, + "logps/generated": -950.1226806640625, + "logps/real": -354.39849853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.405418395996094, + "rewards/margins": 49.583351135253906, + "rewards/real": -7.822066307067871, + "step": 9280 + }, + { + "epoch": 2.97, + "learning_rate": 5.037335545810122e-09, + "logits/generated": 4.05794095993042, + "logits/real": 2.0275235176086426, + "logps/generated": -1017.9361572265625, + "logps/real": -413.9666442871094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -61.35076141357422, + "rewards/margins": 53.6244010925293, + "rewards/real": -7.726365566253662, + "step": 9290 + }, + { + "epoch": 2.98, + "learning_rate": 4.444707834538343e-09, + "logits/generated": 3.8201382160186768, + "logits/real": 2.22807240486145, + "logps/generated": -1124.951416015625, + "logps/real": -389.0148010253906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -72.00746154785156, + "rewards/margins": 64.85371398925781, + "rewards/real": -7.153740882873535, + "step": 9300 + }, + { + "epoch": 2.98, + "learning_rate": 3.8520801232665634e-09, + "logits/generated": 3.948748826980591, + "logits/real": 2.387542247772217, + "logps/generated": -1052.9998779296875, + "logps/real": -437.97003173828125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -65.4850845336914, + "rewards/margins": 56.93280029296875, + "rewards/real": -8.552282333374023, + "step": 9310 + }, + { + "epoch": 2.98, + "learning_rate": 3.2594524119947846e-09, + "logits/generated": 3.9022059440612793, + "logits/real": 2.5131003856658936, + "logps/generated": -1026.8658447265625, + "logps/real": -355.9730224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.99104309082031, + "rewards/margins": 57.335411071777344, + "rewards/real": -7.655627250671387, + "step": 9320 + }, + { + "epoch": 2.99, + "learning_rate": 2.6668247007230054e-09, + "logits/generated": 3.8674044609069824, + "logits/real": 2.395481586456299, + "logps/generated": -957.7073974609375, + "logps/real": -383.9956970214844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -58.217140197753906, + "rewards/margins": 49.42625045776367, + "rewards/real": -8.790895462036133, + "step": 9330 + }, + { + "epoch": 2.99, + "learning_rate": 2.0741969894512266e-09, + "logits/generated": 4.056595802307129, + "logits/real": 2.295872449874878, + "logps/generated": -1188.929931640625, + "logps/real": -401.1290588378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.59089660644531, + "rewards/margins": 67.40728759765625, + "rewards/real": -8.18360424041748, + "step": 9340 + }, + { + "epoch": 2.99, + "learning_rate": 1.4815692781794476e-09, + "logits/generated": 3.5111021995544434, + "logits/real": 2.16282320022583, + "logps/generated": -1066.041259765625, + "logps/real": -386.13299560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.75672912597656, + "rewards/margins": 59.711891174316406, + "rewards/real": -9.044843673706055, + "step": 9350 + }, + { + "epoch": 3.0, + "learning_rate": 8.889415669076685e-10, + "logits/generated": 3.9372799396514893, + "logits/real": 2.3717198371887207, + "logps/generated": -1054.9495849609375, + "logps/real": -430.40264892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.59407043457031, + "rewards/margins": 57.335960388183594, + "rewards/real": -8.258099555969238, + "step": 9360 + }, + { + "epoch": 3.0, + "learning_rate": 2.963138556358895e-10, + "logits/generated": 3.690178394317627, + "logits/real": 2.565126657485962, + "logps/generated": -1029.033935546875, + "logps/real": -439.5101013183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.89173126220703, + "rewards/margins": 53.2917594909668, + "rewards/real": -9.599966049194336, + "step": 9370 + }, + { + "epoch": 3.0, + "step": 9375, + "total_flos": 0.0, + "train_loss": 0.028198444762210128, + "train_runtime": 73703.5855, + "train_samples_per_second": 4.07, + "train_steps_per_second": 0.127 + } + ], + "logging_steps": 10, + "max_steps": 9375, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}