{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 4689, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.066098081023454e-09, "logits/generated": -1.6967991590499878, "logits/real": -2.120976448059082, "logps/generated": -370.8404846191406, "logps/real": -499.59478759765625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": -1.8337050676345825, "logits/real": -1.9245092868804932, "logps/generated": -333.390380859375, "logps/real": -365.73236083984375, "loss": 0.6889, "rewards/accuracies": 0.4583333432674408, "rewards/generated": -0.0003336374065838754, "rewards/margins": -0.004248947836458683, "rewards/real": -0.004582585766911507, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": -1.7639226913452148, "logits/real": -1.8259729146957397, "logps/generated": -349.0731201171875, "logps/real": -296.0833740234375, "loss": 0.62, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -0.1328016072511673, "rewards/margins": 0.16884447634220123, "rewards/real": 0.03604286164045334, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": -1.7844903469085693, "logits/real": -1.8178746700286865, "logps/generated": -345.4620666503906, "logps/real": -322.0969543457031, "loss": 0.4722, "rewards/accuracies": 0.9375, "rewards/generated": -0.45160096883773804, "rewards/margins": 0.5608727931976318, "rewards/real": 0.10927174985408783, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": -1.7441495656967163, "logits/real": -1.7443422079086304, "logps/generated": -353.3804626464844, "logps/real": -315.62200927734375, "loss": 0.3243, "rewards/accuracies": 1.0, "rewards/generated": -0.9999493360519409, "rewards/margins": 1.3180910348892212, "rewards/real": 0.31814175844192505, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": -1.8026962280273438, "logits/real": -1.9527117013931274, "logps/generated": -321.531982421875, "logps/real": -302.255615234375, "loss": 0.2296, "rewards/accuracies": 1.0, "rewards/generated": -1.1298904418945312, "rewards/margins": 1.6732944250106812, "rewards/real": 0.543404221534729, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": -1.779604196548462, "logits/real": -1.8982963562011719, "logps/generated": -350.3965759277344, "logps/real": -316.2660217285156, "loss": 0.1794, "rewards/accuracies": 1.0, "rewards/generated": -1.4884535074234009, "rewards/margins": 2.3272576332092285, "rewards/real": 0.8388041257858276, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": -1.859035849571228, "logits/real": -1.9001390933990479, "logps/generated": -353.8606262207031, "logps/real": -311.6820983886719, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/generated": -1.713602066040039, "rewards/margins": 2.864536762237549, "rewards/real": 1.1509349346160889, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": -1.8230819702148438, "logits/real": -1.9508922100067139, "logps/generated": -387.6493225097656, "logps/real": -323.36077880859375, "loss": 0.1155, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.1021273136138916, "rewards/margins": 3.4381821155548096, "rewards/real": 1.3360549211502075, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": -1.7887563705444336, "logits/real": -1.876123070716858, "logps/generated": -385.62451171875, "logps/real": -290.9560852050781, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/generated": -2.636014699935913, "rewards/margins": 4.148850917816162, "rewards/real": 1.5128366947174072, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": -1.7992160320281982, "logits/real": -1.8133338689804077, "logps/generated": -353.61541748046875, "logps/real": -311.0178527832031, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/generated": -2.3523924350738525, "rewards/margins": 4.042506694793701, "rewards/real": 1.6901147365570068, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": -1.8330148458480835, "logits/real": -1.946874976158142, "logps/generated": -350.23895263671875, "logps/real": -340.55511474609375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/generated": -2.413532257080078, "rewards/margins": 4.323392391204834, "rewards/real": 1.9098598957061768, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": -1.8783290386199951, "logits/real": -1.9523353576660156, "logps/generated": -393.84991455078125, "logps/real": -329.712158203125, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/generated": -3.141986608505249, "rewards/margins": 4.949542045593262, "rewards/real": 1.80755615234375, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": -1.8550891876220703, "logits/real": -1.864587426185608, "logps/generated": -369.75726318359375, "logps/real": -279.78277587890625, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/generated": -3.097797393798828, "rewards/margins": 5.334259510040283, "rewards/real": 2.236462354660034, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": -1.8573582172393799, "logits/real": -1.9654550552368164, "logps/generated": -409.6256408691406, "logps/real": -318.17962646484375, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/generated": -4.087818622589111, "rewards/margins": 6.088145732879639, "rewards/real": 2.000326633453369, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": -1.8324253559112549, "logits/real": -1.948377013206482, "logps/generated": -378.8845520019531, "logps/real": -273.500244140625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/generated": -3.9910550117492676, "rewards/margins": 6.294719696044922, "rewards/real": 2.3036649227142334, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": -1.902753472328186, "logits/real": -2.073312520980835, "logps/generated": -368.2873840332031, "logps/real": -320.0721740722656, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/generated": -3.6813464164733887, "rewards/margins": 6.048798561096191, "rewards/real": 2.3674519062042236, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": -1.8630282878875732, "logits/real": -1.9579652547836304, "logps/generated": -382.7325134277344, "logps/real": -277.5039978027344, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/generated": -3.9725914001464844, "rewards/margins": 6.733571529388428, "rewards/real": 2.7609806060791016, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": -1.8955347537994385, "logits/real": -1.946244478225708, "logps/generated": -408.6842346191406, "logps/real": -272.0397644042969, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/generated": -4.944246292114258, "rewards/margins": 7.5308051109313965, "rewards/real": 2.586559772491455, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": -1.938311219215393, "logits/real": -1.985424280166626, "logps/generated": -393.00335693359375, "logps/real": -275.0752868652344, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/generated": -4.616616249084473, "rewards/margins": 7.231846809387207, "rewards/real": 2.615229606628418, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": -1.8322713375091553, "logits/real": -1.970806360244751, "logps/generated": -358.4288635253906, "logps/real": -276.014892578125, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/generated": -4.5019636154174805, "rewards/margins": 6.8791608810424805, "rewards/real": 2.3771979808807373, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": -1.921954870223999, "logits/real": -1.946009874343872, "logps/generated": -412.391845703125, "logps/real": -284.26971435546875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -5.532212257385254, "rewards/margins": 7.887700080871582, "rewards/real": 2.355487823486328, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": -1.8651634454727173, "logits/real": -1.9466516971588135, "logps/generated": -399.5992736816406, "logps/real": -290.73406982421875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/generated": -5.614806175231934, "rewards/margins": 8.224471092224121, "rewards/real": 2.6096653938293457, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": -1.8850021362304688, "logits/real": -1.9235738515853882, "logps/generated": -413.2723083496094, "logps/real": -278.3963928222656, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -5.726396560668945, "rewards/margins": 8.552070617675781, "rewards/real": 2.8256733417510986, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": -1.91208016872406, "logits/real": -1.8840696811676025, "logps/generated": -398.0130615234375, "logps/real": -244.80349731445312, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -5.73783016204834, "rewards/margins": 8.567638397216797, "rewards/real": 2.829808473587036, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": -1.7182989120483398, "logits/real": -1.8408054113388062, "logps/generated": -385.81988525390625, "logps/real": -270.5240478515625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/generated": -5.677046775817871, "rewards/margins": 9.149064064025879, "rewards/real": 3.4720168113708496, "step": 250 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": -1.8625351190567017, "logits/real": -2.044628858566284, "logps/generated": -381.6638488769531, "logps/real": -290.4682922363281, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -5.585369110107422, "rewards/margins": 8.358131408691406, "rewards/real": 2.772761344909668, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": -1.8034858703613281, "logits/real": -1.9378635883331299, "logps/generated": -424.35345458984375, "logps/real": -302.5401306152344, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -6.96203088760376, "rewards/margins": 10.229166030883789, "rewards/real": 3.2671356201171875, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": -1.8104196786880493, "logits/real": -1.9590213298797607, "logps/generated": -396.7835388183594, "logps/real": -292.92779541015625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -6.59836483001709, "rewards/margins": 9.687570571899414, "rewards/real": 3.0892066955566406, "step": 280 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": -1.8324377536773682, "logits/real": -1.9503692388534546, "logps/generated": -437.5638122558594, "logps/real": -290.1336364746094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -7.405195713043213, "rewards/margins": 10.31657600402832, "rewards/real": 2.91137957572937, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": -1.8387800455093384, "logits/real": -1.983649492263794, "logps/generated": -447.13348388671875, "logps/real": -301.80010986328125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -8.477107048034668, "rewards/margins": 11.132575988769531, "rewards/real": 2.6554691791534424, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": -1.8579336404800415, "logits/real": -1.9354400634765625, "logps/generated": -418.286376953125, "logps/real": -299.4320373535156, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -8.015484809875488, "rewards/margins": 10.762389183044434, "rewards/real": 2.746904134750366, "step": 310 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": -1.8565523624420166, "logits/real": -1.9314014911651611, "logps/generated": -467.3387145996094, "logps/real": -293.45037841796875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -9.601326942443848, "rewards/margins": 12.487638473510742, "rewards/real": 2.886310338973999, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": -1.734826683998108, "logits/real": -2.0361499786376953, "logps/generated": -404.30084228515625, "logps/real": -333.31341552734375, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.808723449707031, "rewards/margins": 10.949883460998535, "rewards/real": 2.141159772872925, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": -1.9974746704101562, "logits/real": -2.0462310314178467, "logps/generated": -381.44842529296875, "logps/real": -271.7964172363281, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -7.822106838226318, "rewards/margins": 10.79802131652832, "rewards/real": 2.9759135246276855, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": -1.9841034412384033, "logits/real": -2.104987382888794, "logps/generated": -450.67352294921875, "logps/real": -306.1277770996094, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -10.084294319152832, "rewards/margins": 12.300285339355469, "rewards/real": 2.215991258621216, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": -1.8056557178497314, "logits/real": -1.871917486190796, "logps/generated": -457.48919677734375, "logps/real": -302.74591064453125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -12.284451484680176, "rewards/margins": 14.20312213897705, "rewards/real": 1.9186710119247437, "step": 360 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": -1.8617709875106812, "logits/real": -1.884961485862732, "logps/generated": -441.7889099121094, "logps/real": -250.3365936279297, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -11.999165534973145, "rewards/margins": 14.89355754852295, "rewards/real": 2.8943910598754883, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": -1.7924455404281616, "logits/real": -1.7877981662750244, "logps/generated": -480.76495361328125, "logps/real": -274.81890869140625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -13.672399520874023, "rewards/margins": 15.9432373046875, "rewards/real": 2.2708370685577393, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": -1.7011951208114624, "logits/real": -1.8101356029510498, "logps/generated": -498.22021484375, "logps/real": -322.1400146484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -14.660806655883789, "rewards/margins": 16.827938079833984, "rewards/real": 2.1671292781829834, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": -1.8577959537506104, "logits/real": -1.776587724685669, "logps/generated": -483.7767639160156, "logps/real": -283.22412109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -14.044052124023438, "rewards/margins": 16.325794219970703, "rewards/real": 2.281740665435791, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": -1.6916249990463257, "logits/real": -1.8904259204864502, "logps/generated": -454.96417236328125, "logps/real": -280.4781188964844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -12.895904541015625, "rewards/margins": 15.247739791870117, "rewards/real": 2.3518359661102295, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": -1.8166958093643188, "logits/real": -1.9414608478546143, "logps/generated": -478.2059631347656, "logps/real": -320.0487976074219, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -13.441385269165039, "rewards/margins": 15.70519733428955, "rewards/real": 2.263813018798828, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": -1.7456238269805908, "logits/real": -1.8328170776367188, "logps/generated": -488.39434814453125, "logps/real": -287.6876525878906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -13.702066421508789, "rewards/margins": 15.967256546020508, "rewards/real": 2.265190601348877, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": -1.698526382446289, "logits/real": -1.8575769662857056, "logps/generated": -437.67498779296875, "logps/real": -296.1510314941406, "loss": 0.0117, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.22651481628418, "rewards/margins": 14.161250114440918, "rewards/real": 2.934735059738159, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": -1.8656270503997803, "logits/real": -1.892998456954956, "logps/generated": -494.43011474609375, "logps/real": -322.17230224609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -14.140520095825195, "rewards/margins": 16.458148956298828, "rewards/real": 2.31762957572937, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": -1.7699617147445679, "logits/real": -1.7789928913116455, "logps/generated": -524.9749755859375, "logps/real": -281.4552001953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -16.719074249267578, "rewards/margins": 19.231693267822266, "rewards/real": 2.5126194953918457, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.998815165876776e-07, "logits/generated": -1.784963607788086, "logits/real": -1.9442884922027588, "logps/generated": -479.30218505859375, "logps/real": -316.5262756347656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -13.92908000946045, "rewards/margins": 15.868054389953613, "rewards/real": 1.9389728307724, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.98696682464455e-07, "logits/generated": -1.747253656387329, "logits/real": -1.8168243169784546, "logps/generated": -495.6634216308594, "logps/real": -355.1924133300781, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -15.753562927246094, "rewards/margins": 17.310409545898438, "rewards/real": 1.5568463802337646, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.975118483412322e-07, "logits/generated": -1.6186755895614624, "logits/real": -1.8190091848373413, "logps/generated": -530.9622802734375, "logps/real": -285.904296875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -18.155773162841797, "rewards/margins": 20.424976348876953, "rewards/real": 2.2692039012908936, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.963270142180094e-07, "logits/generated": -1.7001310586929321, "logits/real": -2.018158435821533, "logps/generated": -510.54150390625, "logps/real": -323.7481994628906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -17.524988174438477, "rewards/margins": 19.471864700317383, "rewards/real": 1.9468772411346436, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.951421800947867e-07, "logits/generated": -1.9341545104980469, "logits/real": -2.5797524452209473, "logps/generated": -484.71014404296875, "logps/real": -341.454833984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -16.874191284179688, "rewards/margins": 18.175729751586914, "rewards/real": 1.301536202430725, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.93957345971564e-07, "logits/generated": -1.9779363870620728, "logits/real": -2.6067821979522705, "logps/generated": -531.2057495117188, "logps/real": -266.19439697265625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -18.99592399597168, "rewards/margins": 19.848346710205078, "rewards/real": 0.8524235486984253, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.927725118483413e-07, "logits/generated": -2.3831493854522705, "logits/real": -2.561941623687744, "logps/generated": -491.9111328125, "logps/real": -255.21780395507812, "loss": 0.015, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.051923751831055, "rewards/margins": 17.133852005004883, "rewards/real": 3.0819263458251953, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.915876777251184e-07, "logits/generated": -2.3821234703063965, "logits/real": -2.369266986846924, "logps/generated": -434.6355895996094, "logps/real": -273.24639892578125, "loss": 0.0252, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.59803581237793, "rewards/margins": 14.074541091918945, "rewards/real": 4.476504325866699, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.904028436018957e-07, "logits/generated": -2.253922939300537, "logits/real": -2.404557466506958, "logps/generated": -441.468505859375, "logps/real": -238.59030151367188, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -11.487462043762207, "rewards/margins": 15.422927856445312, "rewards/real": 3.93546724319458, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.892180094786729e-07, "logits/generated": -2.1230063438415527, "logits/real": -2.3471906185150146, "logps/generated": -480.81488037109375, "logps/real": -258.54400634765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -13.306009292602539, "rewards/margins": 16.83803367614746, "rewards/real": 3.532025098800659, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.880331753554502e-07, "logits/generated": -2.296109676361084, "logits/real": -2.39465069770813, "logps/generated": -481.37188720703125, "logps/real": -298.49066162109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -14.222930908203125, "rewards/margins": 17.496868133544922, "rewards/real": 3.273935317993164, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.868483412322275e-07, "logits/generated": -2.2604403495788574, "logits/real": -2.327944278717041, "logps/generated": -481.64093017578125, "logps/real": -298.69049072265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -13.817489624023438, "rewards/margins": 17.34719467163086, "rewards/real": 3.5297064781188965, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.856635071090047e-07, "logits/generated": -2.135833978652954, "logits/real": -2.331498622894287, "logps/generated": -483.1138610839844, "logps/real": -271.465576171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -14.44117259979248, "rewards/margins": 17.601207733154297, "rewards/real": 3.1600329875946045, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.84478672985782e-07, "logits/generated": -2.2497353553771973, "logits/real": -2.4269208908081055, "logps/generated": -531.066650390625, "logps/real": -275.12322998046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -16.0179386138916, "rewards/margins": 18.803037643432617, "rewards/real": 2.785097599029541, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.832938388625591e-07, "logits/generated": -2.1242854595184326, "logits/real": -2.3227486610412598, "logps/generated": -536.3455810546875, "logps/real": -250.3666534423828, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -18.37099838256836, "rewards/margins": 21.30178451538086, "rewards/real": 2.930786609649658, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.821090047393365e-07, "logits/generated": -2.1415462493896484, "logits/real": -2.349158525466919, "logps/generated": -527.6783447265625, "logps/real": -294.6853942871094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -18.106143951416016, "rewards/margins": 21.177995681762695, "rewards/real": 3.071850299835205, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.809241706161137e-07, "logits/generated": -2.105536460876465, "logits/real": -2.4301748275756836, "logps/generated": -501.6485290527344, "logps/real": -319.03118896484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -16.382831573486328, "rewards/margins": 19.062252044677734, "rewards/real": 2.6794214248657227, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.79739336492891e-07, "logits/generated": -2.0303921699523926, "logits/real": -2.2916502952575684, "logps/generated": -535.6454467773438, "logps/real": -306.72540283203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -18.12425994873047, "rewards/margins": 20.604740142822266, "rewards/real": 2.4804821014404297, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.785545023696682e-07, "logits/generated": -2.1498148441314697, "logits/real": -2.298567771911621, "logps/generated": -519.0200805664062, "logps/real": -287.62432861328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -18.297164916992188, "rewards/margins": 21.54360580444336, "rewards/real": 3.2464375495910645, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.773696682464455e-07, "logits/generated": -2.085416078567505, "logits/real": -2.2391746044158936, "logps/generated": -543.4639282226562, "logps/real": -265.80438232421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -18.824047088623047, "rewards/margins": 22.030954360961914, "rewards/real": 3.206907272338867, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.7618483412322273e-07, "logits/generated": -2.2138562202453613, "logits/real": -2.135565757751465, "logps/generated": -561.3629150390625, "logps/real": -312.9130554199219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -19.822673797607422, "rewards/margins": 22.511886596679688, "rewards/real": 2.689213275909424, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.7499999999999995e-07, "logits/generated": -2.141876697540283, "logits/real": -2.093463182449341, "logps/generated": -583.0494384765625, "logps/real": -246.857421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -21.667137145996094, "rewards/margins": 23.685245513916016, "rewards/real": 2.0181050300598145, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.738151658767772e-07, "logits/generated": -2.140349864959717, "logits/real": -2.07167387008667, "logps/generated": -526.1077880859375, "logps/real": -289.44287109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -19.2884521484375, "rewards/margins": 22.26778793334961, "rewards/real": 2.9793362617492676, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.726303317535545e-07, "logits/generated": -2.2514584064483643, "logits/real": -2.052227735519409, "logps/generated": -513.1663818359375, "logps/real": -304.800537109375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -17.734821319580078, "rewards/margins": 20.155948638916016, "rewards/real": 2.4211297035217285, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.7144549763033177e-07, "logits/generated": -2.2347190380096436, "logits/real": -2.118921995162964, "logps/generated": -493.17669677734375, "logps/real": -285.1862487792969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -16.90696144104004, "rewards/margins": 19.576229095458984, "rewards/real": 2.6692683696746826, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.70260663507109e-07, "logits/generated": -2.104949474334717, "logits/real": -2.124640941619873, "logps/generated": -489.8534240722656, "logps/real": -291.00177001953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -15.55286693572998, "rewards/margins": 19.4502010345459, "rewards/real": 3.8973336219787598, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.690758293838862e-07, "logits/generated": -2.103487968444824, "logits/real": -2.0749001502990723, "logps/generated": -550.6735229492188, "logps/real": -276.70458984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -19.296016693115234, "rewards/margins": 22.49779510498047, "rewards/real": 3.201781749725342, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.678909952606635e-07, "logits/generated": -2.1646389961242676, "logits/real": -2.0404460430145264, "logps/generated": -539.2667846679688, "logps/real": -294.21875, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.744670867919922, "rewards/margins": 23.31588363647461, "rewards/real": 2.5712103843688965, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.667061611374407e-07, "logits/generated": -1.887084722518921, "logits/real": -1.9593133926391602, "logps/generated": -517.9724731445312, "logps/real": -315.25091552734375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -19.357559204101562, "rewards/margins": 20.01168441772461, "rewards/real": 0.6541249752044678, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.65521327014218e-07, "logits/generated": -1.8494024276733398, "logits/real": -1.9312702417373657, "logps/generated": -541.5270385742188, "logps/real": -325.5600891113281, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -20.53277587890625, "rewards/margins": 20.788915634155273, "rewards/real": 0.25614097714424133, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6433649289099525e-07, "logits/generated": -1.7410087585449219, "logits/real": -1.9452238082885742, "logps/generated": -497.43682861328125, "logps/real": -329.633056640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -17.951251983642578, "rewards/margins": 19.430788040161133, "rewards/real": 1.479535460472107, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.631516587677725e-07, "logits/generated": -1.9552215337753296, "logits/real": -1.6955111026763916, "logps/generated": -600.9146728515625, "logps/real": -313.9770202636719, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -25.610576629638672, "rewards/margins": 25.478565216064453, "rewards/real": -0.13200750946998596, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.6196682464454974e-07, "logits/generated": -1.663207769393921, "logits/real": -1.819706916809082, "logps/generated": -516.4954223632812, "logps/real": -312.1180725097656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -19.421649932861328, "rewards/margins": 20.309907913208008, "rewards/real": 0.8882555961608887, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.60781990521327e-07, "logits/generated": -1.7579116821289062, "logits/real": -1.9028571844100952, "logps/generated": -526.2158203125, "logps/real": -351.00909423828125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -20.696455001831055, "rewards/margins": 21.518768310546875, "rewards/real": 0.8223112225532532, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.5959715639810423e-07, "logits/generated": -1.6531692743301392, "logits/real": -1.827567458152771, "logps/generated": -559.42578125, "logps/real": -336.23236083984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -22.543371200561523, "rewards/margins": 23.362524032592773, "rewards/real": 0.8191511034965515, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.5841232227488145e-07, "logits/generated": -1.7309825420379639, "logits/real": -1.8326222896575928, "logps/generated": -544.16845703125, "logps/real": -310.6878356933594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -21.207149505615234, "rewards/margins": 21.788557052612305, "rewards/real": 0.5814081430435181, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.5722748815165873e-07, "logits/generated": -1.6808931827545166, "logits/real": -1.841680884361267, "logps/generated": -538.439453125, "logps/real": -345.60443115234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -19.732093811035156, "rewards/margins": 21.810070037841797, "rewards/real": 2.0779757499694824, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.56042654028436e-07, "logits/generated": -1.686963438987732, "logits/real": -1.8044347763061523, "logps/generated": -580.0587768554688, "logps/real": -343.3204650878906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -23.416744232177734, "rewards/margins": 24.399158477783203, "rewards/real": 0.9824095964431763, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.5485781990521327e-07, "logits/generated": -1.7259963750839233, "logits/real": -1.8555939197540283, "logps/generated": -574.3632202148438, "logps/real": -295.0461730957031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -22.370664596557617, "rewards/margins": 25.08205795288086, "rewards/real": 2.7113938331604004, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.536729857819905e-07, "logits/generated": -1.83119797706604, "logits/real": -2.0709898471832275, "logps/generated": -465.01971435546875, "logps/real": -329.1356506347656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -15.01301097869873, "rewards/margins": 19.038002014160156, "rewards/real": 4.024991035461426, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.5248815165876776e-07, "logits/generated": -1.8352607488632202, "logits/real": -1.9555168151855469, "logps/generated": -527.7071533203125, "logps/real": -287.556884765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -17.499187469482422, "rewards/margins": 21.701129913330078, "rewards/real": 4.201944351196289, "step": 870 }, { "epoch": 0.56, "learning_rate": 4.5130331753554504e-07, "logits/generated": -1.7999629974365234, "logits/real": -1.846207857131958, "logps/generated": -553.356201171875, "logps/real": -229.6580047607422, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -20.74501609802246, "rewards/margins": 24.824995040893555, "rewards/real": 4.079979419708252, "step": 880 }, { "epoch": 0.57, "learning_rate": 4.5011848341232226e-07, "logits/generated": -1.7987083196640015, "logits/real": -1.8322045803070068, "logps/generated": -607.213134765625, "logps/real": -269.38751220703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -26.261367797851562, "rewards/margins": 27.604583740234375, "rewards/real": 1.3432180881500244, "step": 890 }, { "epoch": 0.58, "learning_rate": 4.489336492890995e-07, "logits/generated": -1.4970036745071411, "logits/real": -1.653838872909546, "logps/generated": -654.5391845703125, "logps/real": -267.154541015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -28.88338279724121, "rewards/margins": 30.190296173095703, "rewards/real": 1.3069137334823608, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.4774881516587675e-07, "logits/generated": -1.6070550680160522, "logits/real": -1.7505422830581665, "logps/generated": -537.8560791015625, "logps/real": -250.5921173095703, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -22.91861343383789, "rewards/margins": 25.364459991455078, "rewards/real": 2.4458467960357666, "step": 910 }, { "epoch": 0.59, "learning_rate": 4.46563981042654e-07, "logits/generated": -1.6866306066513062, "logits/real": -1.7877496480941772, "logps/generated": -608.6903076171875, "logps/real": -282.3499755859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -26.024982452392578, "rewards/margins": 28.0833740234375, "rewards/real": 2.058396577835083, "step": 920 }, { "epoch": 0.6, "learning_rate": 4.4537914691943124e-07, "logits/generated": -1.720298171043396, "logits/real": -1.8166612386703491, "logps/generated": -564.3702392578125, "logps/real": -298.9079284667969, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -24.169239044189453, "rewards/margins": 25.43836212158203, "rewards/real": 1.269120454788208, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.441943127962085e-07, "logits/generated": -1.6699787378311157, "logits/real": -1.7776901721954346, "logps/generated": -626.137939453125, "logps/real": -348.00390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.181921005249023, "rewards/margins": 27.278514862060547, "rewards/real": 1.096596121788025, "step": 940 }, { "epoch": 0.61, "learning_rate": 4.430094786729858e-07, "logits/generated": -1.5043730735778809, "logits/real": -1.6088955402374268, "logps/generated": -602.513671875, "logps/real": -265.69769287109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -26.079113006591797, "rewards/margins": 28.52200698852539, "rewards/real": 2.442892551422119, "step": 950 }, { "epoch": 0.61, "learning_rate": 4.4182464454976306e-07, "logits/generated": -1.4185912609100342, "logits/real": -1.6036865711212158, "logps/generated": -613.1339111328125, "logps/real": -259.897705078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.873516082763672, "rewards/margins": 28.42131996154785, "rewards/real": 2.5478055477142334, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4063981042654023e-07, "logits/generated": -1.4251104593276978, "logits/real": -1.6921848058700562, "logps/generated": -570.1812744140625, "logps/real": -357.4566955566406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.946605682373047, "rewards/margins": 25.011821746826172, "rewards/real": 1.065213918685913, "step": 970 }, { "epoch": 0.63, "learning_rate": 4.394549763033175e-07, "logits/generated": -1.462538242340088, "logits/real": -1.6375154256820679, "logps/generated": -590.6793212890625, "logps/real": -315.7594909667969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.185800552368164, "rewards/margins": 27.19512367248535, "rewards/real": 2.009324550628662, "step": 980 }, { "epoch": 0.63, "learning_rate": 4.382701421800948e-07, "logits/generated": -1.461849570274353, "logits/real": -1.558199405670166, "logps/generated": -705.1229248046875, "logps/real": -296.019775390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -32.2374267578125, "rewards/margins": 33.167137145996094, "rewards/real": 0.9297075271606445, "step": 990 }, { "epoch": 0.64, "learning_rate": 4.37085308056872e-07, "logits/generated": -1.4287445545196533, "logits/real": -1.7073854207992554, "logps/generated": -552.3958129882812, "logps/real": -314.26409912109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -23.174945831298828, "rewards/margins": 24.712303161621094, "rewards/real": 1.5373600721359253, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.3590047393364927e-07, "logits/generated": -1.237870454788208, "logits/real": -1.5853736400604248, "logps/generated": -615.478271484375, "logps/real": -293.41302490234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -28.30197525024414, "rewards/margins": 28.77528953552246, "rewards/real": 0.4733191430568695, "step": 1010 }, { "epoch": 0.65, "learning_rate": 4.3471563981042654e-07, "logits/generated": -1.4927613735198975, "logits/real": -1.6751525402069092, "logps/generated": -630.9479370117188, "logps/real": -320.72802734375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -26.875553131103516, "rewards/margins": 28.753032684326172, "rewards/real": 1.8774802684783936, "step": 1020 }, { "epoch": 0.66, "learning_rate": 4.335308056872038e-07, "logits/generated": -1.8911068439483643, "logits/real": -1.8962939977645874, "logps/generated": -512.6051635742188, "logps/real": -291.7500915527344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -16.037860870361328, "rewards/margins": 20.70450210571289, "rewards/real": 4.666642189025879, "step": 1030 }, { "epoch": 0.67, "learning_rate": 4.32345971563981e-07, "logits/generated": -1.8473392724990845, "logits/real": -1.8625446557998657, "logps/generated": -494.9473571777344, "logps/real": -275.43255615234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -16.360857009887695, "rewards/margins": 21.052448272705078, "rewards/real": 4.691592216491699, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.3116113744075825e-07, "logits/generated": -1.9104070663452148, "logits/real": -1.9039949178695679, "logps/generated": -512.19580078125, "logps/real": -283.31024169921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -17.456066131591797, "rewards/margins": 21.98530387878418, "rewards/real": 4.529239177703857, "step": 1050 }, { "epoch": 0.68, "learning_rate": 4.299763033175355e-07, "logits/generated": -1.8587678670883179, "logits/real": -1.881233811378479, "logps/generated": -484.9371032714844, "logps/real": -266.53228759765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -16.037864685058594, "rewards/margins": 20.32413101196289, "rewards/real": 4.2862653732299805, "step": 1060 }, { "epoch": 0.68, "learning_rate": 4.2879146919431274e-07, "logits/generated": -1.7572576999664307, "logits/real": -1.882163643836975, "logps/generated": -544.5855712890625, "logps/real": -322.8420104980469, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -18.69039535522461, "rewards/margins": 21.911693572998047, "rewards/real": 3.221299648284912, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.2760663507109e-07, "logits/generated": -1.7128732204437256, "logits/real": -1.8533213138580322, "logps/generated": -556.5257568359375, "logps/real": -309.1585693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -19.579999923706055, "rewards/margins": 23.458864212036133, "rewards/real": 3.8788623809814453, "step": 1080 }, { "epoch": 0.7, "learning_rate": 4.264218009478673e-07, "logits/generated": -1.7375463247299194, "logits/real": -1.8108612298965454, "logps/generated": -541.38623046875, "logps/real": -264.068115234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -19.656707763671875, "rewards/margins": 23.355690002441406, "rewards/real": 3.6989810466766357, "step": 1090 }, { "epoch": 0.7, "learning_rate": 4.2523696682464456e-07, "logits/generated": -1.673644781112671, "logits/real": -1.8532874584197998, "logps/generated": -537.6918334960938, "logps/real": -335.14056396484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -20.450809478759766, "rewards/margins": 23.150554656982422, "rewards/real": 2.699744701385498, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.240521327014218e-07, "logits/generated": -1.6889768838882446, "logits/real": -1.8691993951797485, "logps/generated": -504.6331481933594, "logps/real": -287.95257568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -18.386362075805664, "rewards/margins": 22.01326560974121, "rewards/real": 3.626904249191284, "step": 1110 }, { "epoch": 0.72, "learning_rate": 4.22867298578199e-07, "logits/generated": -1.5863233804702759, "logits/real": -1.7320009469985962, "logps/generated": -571.980224609375, "logps/real": -303.2630920410156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -22.643577575683594, "rewards/margins": 24.643146514892578, "rewards/real": 1.9995689392089844, "step": 1120 }, { "epoch": 0.72, "learning_rate": 4.216824644549763e-07, "logits/generated": -1.6582952737808228, "logits/real": -1.8881654739379883, "logps/generated": -556.7709350585938, "logps/real": -303.9754638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.5834903717041, "rewards/margins": 25.4108829498291, "rewards/real": 2.8273937702178955, "step": 1130 }, { "epoch": 0.73, "learning_rate": 4.2049763033175355e-07, "logits/generated": -1.6555780172348022, "logits/real": -1.8850457668304443, "logps/generated": -581.0602416992188, "logps/real": -351.2891845703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -22.79461097717285, "rewards/margins": 24.4954776763916, "rewards/real": 1.7008644342422485, "step": 1140 }, { "epoch": 0.74, "learning_rate": 4.1931279620853077e-07, "logits/generated": -1.6201865673065186, "logits/real": -1.8018090724945068, "logps/generated": -609.6699829101562, "logps/real": -313.0577087402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.84810447692871, "rewards/margins": 27.298587799072266, "rewards/real": 1.4504833221435547, "step": 1150 }, { "epoch": 0.74, "learning_rate": 4.1812796208530804e-07, "logits/generated": -1.7342790365219116, "logits/real": -1.8374478816986084, "logps/generated": -549.2157592773438, "logps/real": -279.5001525878906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -22.358470916748047, "rewards/margins": 23.83974838256836, "rewards/real": 1.4812744855880737, "step": 1160 }, { "epoch": 0.75, "learning_rate": 4.169431279620853e-07, "logits/generated": -1.9550073146820068, "logits/real": -2.0573925971984863, "logps/generated": -570.79150390625, "logps/real": -295.20391845703125, "loss": 0.0491, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.021099090576172, "rewards/margins": 25.061962127685547, "rewards/real": 0.04086267948150635, "step": 1170 }, { "epoch": 0.75, "learning_rate": 4.1575829383886253e-07, "logits/generated": -2.0269038677215576, "logits/real": -1.9945815801620483, "logps/generated": -698.2073364257812, "logps/real": -322.21905517578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -34.974361419677734, "rewards/margins": 33.608219146728516, "rewards/real": -1.3661425113677979, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.145734597156398e-07, "logits/generated": -2.0520718097686768, "logits/real": -2.0008015632629395, "logps/generated": -731.3348388671875, "logps/real": -348.91510009765625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -38.1467170715332, "rewards/margins": 34.06203079223633, "rewards/real": -4.084687232971191, "step": 1190 }, { "epoch": 0.77, "learning_rate": 4.1338862559241703e-07, "logits/generated": -2.2981107234954834, "logits/real": -2.2072577476501465, "logps/generated": -648.4285888671875, "logps/real": -362.78875732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.84146499633789, "rewards/margins": 26.762752532958984, "rewards/real": -3.0787107944488525, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.122037914691943e-07, "logits/generated": -2.267216682434082, "logits/real": -2.1433565616607666, "logps/generated": -635.2757568359375, "logps/real": -328.28741455078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -27.74496841430664, "rewards/margins": 26.068294525146484, "rewards/real": -1.6766729354858398, "step": 1210 }, { "epoch": 0.78, "learning_rate": 4.110189573459715e-07, "logits/generated": -2.369476079940796, "logits/real": -2.120939016342163, "logps/generated": -581.2291259765625, "logps/real": -282.62274169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.897258758544922, "rewards/margins": 24.88361167907715, "rewards/real": -0.013645303435623646, "step": 1220 }, { "epoch": 0.79, "learning_rate": 4.098341232227488e-07, "logits/generated": -2.305145025253296, "logits/real": -2.0979654788970947, "logps/generated": -560.5052490234375, "logps/real": -313.28009033203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -23.427352905273438, "rewards/margins": 23.26708221435547, "rewards/real": -0.16027137637138367, "step": 1230 }, { "epoch": 0.79, "learning_rate": 4.0864928909952607e-07, "logits/generated": -2.1186015605926514, "logits/real": -2.185734748840332, "logps/generated": -582.2239990234375, "logps/real": -334.1194152832031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.52047348022461, "rewards/margins": 24.60202980041504, "rewards/real": 1.0815545320510864, "step": 1240 }, { "epoch": 0.8, "learning_rate": 4.074644549763033e-07, "logits/generated": -2.286837100982666, "logits/real": -2.014310359954834, "logps/generated": -566.97216796875, "logps/real": -277.69964599609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.525135040283203, "rewards/margins": 25.032989501953125, "rewards/real": 1.5078563690185547, "step": 1250 }, { "epoch": 0.81, "learning_rate": 4.0627962085308056e-07, "logits/generated": -2.1133038997650146, "logits/real": -2.0989034175872803, "logps/generated": -622.9395751953125, "logps/real": -370.51837158203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -27.865087509155273, "rewards/margins": 26.2805233001709, "rewards/real": -1.584564447402954, "step": 1260 }, { "epoch": 0.81, "learning_rate": 4.0509478672985783e-07, "logits/generated": -2.301830530166626, "logits/real": -2.0627927780151367, "logps/generated": -649.766357421875, "logps/real": -332.054443359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -30.23395347595215, "rewards/margins": 27.423236846923828, "rewards/real": -2.8107187747955322, "step": 1270 }, { "epoch": 0.82, "learning_rate": 4.0390995260663505e-07, "logits/generated": -2.3979790210723877, "logits/real": -2.248131036758423, "logps/generated": -628.7196044921875, "logps/real": -383.232177734375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -28.031352996826172, "rewards/margins": 25.04072380065918, "rewards/real": -2.9906303882598877, "step": 1280 }, { "epoch": 0.83, "learning_rate": 4.0272511848341227e-07, "logits/generated": -2.785176992416382, "logits/real": -2.3389904499053955, "logps/generated": -546.8567504882812, "logps/real": -375.5636291503906, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -21.581762313842773, "rewards/margins": 21.785717010498047, "rewards/real": 0.20395250618457794, "step": 1290 }, { "epoch": 0.83, "learning_rate": 4.0154028436018954e-07, "logits/generated": -2.795858383178711, "logits/real": -2.425846815109253, "logps/generated": -574.3978271484375, "logps/real": -382.6394348144531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -23.64377212524414, "rewards/margins": 22.34258270263672, "rewards/real": -1.3011887073516846, "step": 1300 }, { "epoch": 0.84, "learning_rate": 4.003554502369668e-07, "logits/generated": -2.547922134399414, "logits/real": -2.317718029022217, "logps/generated": -584.1044921875, "logps/real": -305.9288024902344, "loss": 0.0202, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.52315330505371, "rewards/margins": 23.982017517089844, "rewards/real": 0.45886069536209106, "step": 1310 }, { "epoch": 0.84, "learning_rate": 3.991706161137441e-07, "logits/generated": -2.1883223056793213, "logits/real": -2.244220733642578, "logps/generated": -521.2716674804688, "logps/real": -302.87200927734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -18.600849151611328, "rewards/margins": 22.98223114013672, "rewards/real": 4.381381511688232, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.979857819905213e-07, "logits/generated": -2.32015061378479, "logits/real": -2.147599458694458, "logps/generated": -526.2401123046875, "logps/real": -310.9039001464844, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -18.004907608032227, "rewards/margins": 22.190221786499023, "rewards/real": 4.185314178466797, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.968009478672986e-07, "logits/generated": -2.3661770820617676, "logits/real": -2.156507968902588, "logps/generated": -505.3841247558594, "logps/real": -307.3638000488281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -16.244041442871094, "rewards/margins": 21.656387329101562, "rewards/real": 5.4123454093933105, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.9561611374407585e-07, "logits/generated": -2.0529398918151855, "logits/real": -2.15185809135437, "logps/generated": -487.58599853515625, "logps/real": -235.55545043945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -16.13335418701172, "rewards/margins": 21.287761688232422, "rewards/real": 5.1544084548950195, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.94431279620853e-07, "logits/generated": -2.091001033782959, "logits/real": -2.0580759048461914, "logps/generated": -533.045654296875, "logps/real": -292.6197814941406, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/generated": -17.37051010131836, "rewards/margins": 21.942045211791992, "rewards/real": 4.571534633636475, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.932464454976303e-07, "logits/generated": -2.1968071460723877, "logits/real": -2.022338390350342, "logps/generated": -538.3050537109375, "logps/real": -256.47332763671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -18.308841705322266, "rewards/margins": 22.015789031982422, "rewards/real": 3.7069435119628906, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.9206161137440757e-07, "logits/generated": -1.8904855251312256, "logits/real": -2.021427869796753, "logps/generated": -509.0938415527344, "logps/real": -261.49835205078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -16.933731079101562, "rewards/margins": 20.786678314208984, "rewards/real": 3.85294771194458, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.9087677725118484e-07, "logits/generated": -1.9848442077636719, "logits/real": -2.0338735580444336, "logps/generated": -584.007568359375, "logps/real": -298.0301818847656, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -22.632495880126953, "rewards/margins": 25.756046295166016, "rewards/real": 3.123551845550537, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.8969194312796206e-07, "logits/generated": -2.0216996669769287, "logits/real": -2.122128963470459, "logps/generated": -607.7069702148438, "logps/real": -264.29412841796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -24.96695327758789, "rewards/margins": 28.326374053955078, "rewards/real": 3.3594181537628174, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.8850710900473933e-07, "logits/generated": -2.027808904647827, "logits/real": -2.08689546585083, "logps/generated": -596.6235961914062, "logps/real": -321.3527526855469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.19536590576172, "rewards/margins": 27.47714614868164, "rewards/real": 2.2817790508270264, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.873222748815166e-07, "logits/generated": -1.6834685802459717, "logits/real": -2.207247734069824, "logps/generated": -526.8784790039062, "logps/real": -278.8542785644531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -20.474117279052734, "rewards/margins": 23.46799087524414, "rewards/real": 2.9938743114471436, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.8613744075829377e-07, "logits/generated": -1.8503425121307373, "logits/real": -2.1495347023010254, "logps/generated": -558.7788696289062, "logps/real": -290.7303466796875, "loss": 0.0032, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.188411712646484, "rewards/margins": 26.19222640991211, "rewards/real": 3.003814220428467, "step": 1430 }, { "epoch": 0.92, "learning_rate": 3.8495260663507104e-07, "logits/generated": -1.4719184637069702, "logits/real": -2.053095579147339, "logps/generated": -566.8973388671875, "logps/real": -316.41973876953125, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/generated": -21.69980812072754, "rewards/margins": 24.978700637817383, "rewards/real": 3.278891086578369, "step": 1440 }, { "epoch": 0.93, "learning_rate": 3.837677725118483e-07, "logits/generated": -1.857731819152832, "logits/real": -1.9685068130493164, "logps/generated": -536.6964721679688, "logps/real": -285.66973876953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -20.253883361816406, "rewards/margins": 24.264108657836914, "rewards/real": 4.010226249694824, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.825829383886256e-07, "logits/generated": -1.8522886037826538, "logits/real": -1.9278554916381836, "logps/generated": -563.2208251953125, "logps/real": -301.377685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.675525665283203, "rewards/margins": 25.396894454956055, "rewards/real": 3.7213714122772217, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.813981042654028e-07, "logits/generated": -1.6780341863632202, "logits/real": -2.0288522243499756, "logps/generated": -553.4193115234375, "logps/real": -242.666015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -21.265884399414062, "rewards/margins": 25.918964385986328, "rewards/real": 4.653078556060791, "step": 1470 }, { "epoch": 0.95, "learning_rate": 3.802132701421801e-07, "logits/generated": -1.8964929580688477, "logits/real": -1.9505048990249634, "logps/generated": -539.747314453125, "logps/real": -306.03814697265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -19.95806312561035, "rewards/margins": 23.623302459716797, "rewards/real": 3.665241241455078, "step": 1480 }, { "epoch": 0.95, "learning_rate": 3.7902843601895736e-07, "logits/generated": -1.5237603187561035, "logits/real": -1.518192172050476, "logps/generated": -537.8590087890625, "logps/real": -300.8917236328125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -19.73358917236328, "rewards/margins": 23.096216201782227, "rewards/real": 3.36262583732605, "step": 1490 }, { "epoch": 0.96, "learning_rate": 3.778436018957346e-07, "logits/generated": -1.5329254865646362, "logits/real": -1.430698037147522, "logps/generated": -476.19158935546875, "logps/real": -282.7904052734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -15.295829772949219, "rewards/margins": 20.918052673339844, "rewards/real": 5.622222423553467, "step": 1500 }, { "epoch": 0.97, "learning_rate": 3.766587677725118e-07, "logits/generated": -1.133873701095581, "logits/real": -1.5278857946395874, "logps/generated": -538.281982421875, "logps/real": -272.65142822265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -17.780771255493164, "rewards/margins": 22.087909698486328, "rewards/real": 4.307138919830322, "step": 1510 }, { "epoch": 0.97, "learning_rate": 3.7547393364928907e-07, "logits/generated": -1.3727457523345947, "logits/real": -1.5002796649932861, "logps/generated": -530.1630249023438, "logps/real": -264.24151611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -18.06686019897461, "rewards/margins": 22.706050872802734, "rewards/real": 4.639190196990967, "step": 1520 }, { "epoch": 0.98, "learning_rate": 3.7428909952606634e-07, "logits/generated": -1.5642679929733276, "logits/real": -1.4811842441558838, "logps/generated": -516.3220825195312, "logps/real": -261.48809814453125, "loss": 0.0053, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.3123836517334, "rewards/margins": 22.604694366455078, "rewards/real": 5.292308330535889, "step": 1530 }, { "epoch": 0.99, "learning_rate": 3.7310426540284356e-07, "logits/generated": -1.5852844715118408, "logits/real": -1.811846137046814, "logps/generated": -536.3941040039062, "logps/real": -300.4720153808594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -19.475051879882812, "rewards/margins": 24.125896453857422, "rewards/real": 4.65084171295166, "step": 1540 }, { "epoch": 0.99, "learning_rate": 3.7191943127962083e-07, "logits/generated": -1.5384024381637573, "logits/real": -1.8978979587554932, "logps/generated": -473.53802490234375, "logps/real": -262.2735290527344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -15.992510795593262, "rewards/margins": 20.536054611206055, "rewards/real": 4.543545722961426, "step": 1550 }, { "epoch": 1.0, "learning_rate": 3.707345971563981e-07, "logits/generated": -1.5666875839233398, "logits/real": -1.718675971031189, "logps/generated": -507.5492248535156, "logps/real": -248.30184936523438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -16.896936416625977, "rewards/margins": 22.22998809814453, "rewards/real": 5.333051681518555, "step": 1560 }, { "epoch": 1.0, "learning_rate": 3.695497630331754e-07, "logits/generated": -1.825209617614746, "logits/real": -1.8174819946289062, "logps/generated": -506.86712646484375, "logps/real": -271.7018737792969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -17.314891815185547, "rewards/margins": 21.846303939819336, "rewards/real": 4.5314106941223145, "step": 1570 }, { "epoch": 1.01, "learning_rate": 3.683649289099526e-07, "logits/generated": -1.7880823612213135, "logits/real": -1.7753536701202393, "logps/generated": -507.07794189453125, "logps/real": -286.667236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -16.956701278686523, "rewards/margins": 21.935237884521484, "rewards/real": 4.9785356521606445, "step": 1580 }, { "epoch": 1.02, "learning_rate": 3.671800947867298e-07, "logits/generated": -1.954362154006958, "logits/real": -1.646762490272522, "logps/generated": -518.947021484375, "logps/real": -273.3876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -18.16889190673828, "rewards/margins": 23.879365921020508, "rewards/real": 5.710473537445068, "step": 1590 }, { "epoch": 1.02, "learning_rate": 3.659952606635071e-07, "logits/generated": -1.8228673934936523, "logits/real": -1.5472794771194458, "logps/generated": -552.9884033203125, "logps/real": -279.546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -19.966049194335938, "rewards/margins": 24.874601364135742, "rewards/real": 4.908552169799805, "step": 1600 }, { "epoch": 1.03, "learning_rate": 3.648104265402843e-07, "logits/generated": -1.915820837020874, "logits/real": -1.8832050561904907, "logps/generated": -520.4482421875, "logps/real": -285.5626525878906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -16.817371368408203, "rewards/margins": 22.492910385131836, "rewards/real": 5.675540447235107, "step": 1610 }, { "epoch": 1.04, "learning_rate": 3.636255924170616e-07, "logits/generated": -1.7189966440200806, "logits/real": -1.6540539264678955, "logps/generated": -530.4070434570312, "logps/real": -272.47015380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -18.999343872070312, "rewards/margins": 24.16018295288086, "rewards/real": 5.160836219787598, "step": 1620 }, { "epoch": 1.04, "learning_rate": 3.6244075829383886e-07, "logits/generated": -1.6190435886383057, "logits/real": -1.8065773248672485, "logps/generated": -486.86993408203125, "logps/real": -250.3603057861328, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -16.41383171081543, "rewards/margins": 22.118118286132812, "rewards/real": 5.704287528991699, "step": 1630 }, { "epoch": 1.05, "learning_rate": 3.6125592417061613e-07, "logits/generated": -2.123809337615967, "logits/real": -1.734910249710083, "logps/generated": -588.7273559570312, "logps/real": -282.72662353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.60115623474121, "rewards/margins": 25.60028648376465, "rewards/real": 4.999133586883545, "step": 1640 }, { "epoch": 1.06, "learning_rate": 3.6007109004739335e-07, "logits/generated": -1.7661174535751343, "logits/real": -1.7345225811004639, "logps/generated": -512.4077758789062, "logps/real": -290.98876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -17.130510330200195, "rewards/margins": 21.595611572265625, "rewards/real": 4.465102195739746, "step": 1650 }, { "epoch": 1.06, "learning_rate": 3.588862559241706e-07, "logits/generated": -1.8894002437591553, "logits/real": -1.6541759967803955, "logps/generated": -515.79443359375, "logps/real": -284.05267333984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -18.652957916259766, "rewards/margins": 23.29606056213379, "rewards/real": 4.643100738525391, "step": 1660 }, { "epoch": 1.07, "learning_rate": 3.5770142180094784e-07, "logits/generated": -1.6802387237548828, "logits/real": -1.8646351099014282, "logps/generated": -513.9190673828125, "logps/real": -272.0225524902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -17.706335067749023, "rewards/margins": 23.3486385345459, "rewards/real": 5.64230489730835, "step": 1670 }, { "epoch": 1.07, "learning_rate": 3.5651658767772506e-07, "logits/generated": -1.6976789236068726, "logits/real": -1.8117681741714478, "logps/generated": -501.4063415527344, "logps/real": -272.6081848144531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -17.715633392333984, "rewards/margins": 23.162193298339844, "rewards/real": 5.446557998657227, "step": 1680 }, { "epoch": 1.08, "learning_rate": 3.5533175355450234e-07, "logits/generated": -2.062446355819702, "logits/real": -1.80794358253479, "logps/generated": -593.0606079101562, "logps/real": -245.48672485351562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.42470932006836, "rewards/margins": 27.74930763244629, "rewards/real": 4.324599266052246, "step": 1690 }, { "epoch": 1.09, "learning_rate": 3.541469194312796e-07, "logits/generated": -1.854752779006958, "logits/real": -1.775160789489746, "logps/generated": -556.636962890625, "logps/real": -262.096923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.30320930480957, "rewards/margins": 25.73041343688965, "rewards/real": 4.427203178405762, "step": 1700 }, { "epoch": 1.09, "learning_rate": 3.529620853080569e-07, "logits/generated": -1.9429004192352295, "logits/real": -1.9279718399047852, "logps/generated": -563.5242919921875, "logps/real": -288.07464599609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -21.423830032348633, "rewards/margins": 25.440364837646484, "rewards/real": 4.016537189483643, "step": 1710 }, { "epoch": 1.1, "learning_rate": 3.517772511848341e-07, "logits/generated": -1.8811416625976562, "logits/real": -1.9327713251113892, "logps/generated": -533.6873779296875, "logps/real": -268.5288391113281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.842792510986328, "rewards/margins": 24.459157943725586, "rewards/real": 3.616368055343628, "step": 1720 }, { "epoch": 1.11, "learning_rate": 3.505924170616114e-07, "logits/generated": -1.7853578329086304, "logits/real": -1.8555301427841187, "logps/generated": -615.3096313476562, "logps/real": -269.9205017089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.6888427734375, "rewards/margins": 28.536468505859375, "rewards/real": 2.8476245403289795, "step": 1730 }, { "epoch": 1.11, "learning_rate": 3.4940758293838865e-07, "logits/generated": -1.9660966396331787, "logits/real": -1.9446995258331299, "logps/generated": -534.47607421875, "logps/real": -259.787353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.871959686279297, "rewards/margins": 25.104110717773438, "rewards/real": 4.232152938842773, "step": 1740 }, { "epoch": 1.12, "learning_rate": 3.482227488151658e-07, "logits/generated": -2.066843271255493, "logits/real": -1.8329108953475952, "logps/generated": -561.7112426757812, "logps/real": -294.5611267089844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -22.66440773010254, "rewards/margins": 26.745464324951172, "rewards/real": 4.081055641174316, "step": 1750 }, { "epoch": 1.13, "learning_rate": 3.470379146919431e-07, "logits/generated": -1.8721287250518799, "logits/real": -1.9046977758407593, "logps/generated": -529.6755981445312, "logps/real": -310.81158447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.434810638427734, "rewards/margins": 25.175006866455078, "rewards/real": 3.740194797515869, "step": 1760 }, { "epoch": 1.13, "learning_rate": 3.4585308056872036e-07, "logits/generated": -1.825221300125122, "logits/real": -1.8675514459609985, "logps/generated": -570.296630859375, "logps/real": -258.2331237792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.523569107055664, "rewards/margins": 27.103931427001953, "rewards/real": 4.580359935760498, "step": 1770 }, { "epoch": 1.14, "learning_rate": 3.4466824644549763e-07, "logits/generated": -1.7838608026504517, "logits/real": -1.8900954723358154, "logps/generated": -529.5694580078125, "logps/real": -267.7345886230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.94317054748535, "rewards/margins": 24.32746124267578, "rewards/real": 3.384288787841797, "step": 1780 }, { "epoch": 1.15, "learning_rate": 3.4348341232227485e-07, "logits/generated": -2.0214123725891113, "logits/real": -1.8234527111053467, "logps/generated": -567.062744140625, "logps/real": -302.21771240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.83649444580078, "rewards/margins": 26.55861473083496, "rewards/real": 3.7221240997314453, "step": 1790 }, { "epoch": 1.15, "learning_rate": 3.422985781990521e-07, "logits/generated": -1.8804458379745483, "logits/real": -1.8793761730194092, "logps/generated": -590.5481567382812, "logps/real": -291.43756103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.374038696289062, "rewards/margins": 27.00777244567871, "rewards/real": 3.633733034133911, "step": 1800 }, { "epoch": 1.16, "learning_rate": 3.411137440758294e-07, "logits/generated": -1.6631559133529663, "logits/real": -2.0263304710388184, "logps/generated": -507.7372131347656, "logps/real": -290.40435791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -19.839054107666016, "rewards/margins": 23.84964370727539, "rewards/real": 4.010588645935059, "step": 1810 }, { "epoch": 1.16, "learning_rate": 3.3992890995260667e-07, "logits/generated": -1.9576873779296875, "logits/real": -1.8209441900253296, "logps/generated": -563.102294921875, "logps/real": -269.094482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.641881942749023, "rewards/margins": 27.298757553100586, "rewards/real": 4.656874656677246, "step": 1820 }, { "epoch": 1.17, "learning_rate": 3.3874407582938384e-07, "logits/generated": -1.9633119106292725, "logits/real": -1.986977219581604, "logps/generated": -554.8736572265625, "logps/real": -324.02032470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.392162322998047, "rewards/margins": 26.17142105102539, "rewards/real": 3.779256820678711, "step": 1830 }, { "epoch": 1.18, "learning_rate": 3.375592417061611e-07, "logits/generated": -1.8018850088119507, "logits/real": -1.9439207315444946, "logps/generated": -547.8863525390625, "logps/real": -308.1502685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.06063461303711, "rewards/margins": 24.50295639038086, "rewards/real": 3.4423205852508545, "step": 1840 }, { "epoch": 1.18, "learning_rate": 3.363744075829384e-07, "logits/generated": -1.6846554279327393, "logits/real": -1.8810228109359741, "logps/generated": -560.669189453125, "logps/real": -291.79937744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.370153427124023, "rewards/margins": 25.938251495361328, "rewards/real": 3.5680956840515137, "step": 1850 }, { "epoch": 1.19, "learning_rate": 3.351895734597156e-07, "logits/generated": -1.5178916454315186, "logits/real": -1.8470585346221924, "logps/generated": -577.6918334960938, "logps/real": -271.9674072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.935344696044922, "rewards/margins": 27.875802993774414, "rewards/real": 3.9404540061950684, "step": 1860 }, { "epoch": 1.2, "learning_rate": 3.340047393364929e-07, "logits/generated": -1.6082442998886108, "logits/real": -1.8646762371063232, "logps/generated": -606.97802734375, "logps/real": -271.8335266113281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.671606063842773, "rewards/margins": 27.991058349609375, "rewards/real": 3.319448471069336, "step": 1870 }, { "epoch": 1.2, "learning_rate": 3.3281990521327015e-07, "logits/generated": -1.880443811416626, "logits/real": -1.9807466268539429, "logps/generated": -549.1322021484375, "logps/real": -339.1936950683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.289169311523438, "rewards/margins": 25.542722702026367, "rewards/real": 3.253552198410034, "step": 1880 }, { "epoch": 1.21, "learning_rate": 3.316350710900474e-07, "logits/generated": -2.0523343086242676, "logits/real": -1.8144581317901611, "logps/generated": -597.450439453125, "logps/real": -279.53912353515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -25.03638458251953, "rewards/margins": 28.15945053100586, "rewards/real": 3.12306809425354, "step": 1890 }, { "epoch": 1.22, "learning_rate": 3.304502369668246e-07, "logits/generated": -2.064744472503662, "logits/real": -1.8931026458740234, "logps/generated": -582.7722778320312, "logps/real": -315.5670471191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.94598388671875, "rewards/margins": 27.76973533630371, "rewards/real": 3.823754072189331, "step": 1900 }, { "epoch": 1.22, "learning_rate": 3.2926540284360186e-07, "logits/generated": -1.7517362833023071, "logits/real": -2.0231666564941406, "logps/generated": -570.6331787109375, "logps/real": -321.64105224609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -23.200817108154297, "rewards/margins": 26.99349594116211, "rewards/real": 3.79267954826355, "step": 1910 }, { "epoch": 1.23, "learning_rate": 3.2808056872037913e-07, "logits/generated": -1.8343127965927124, "logits/real": -2.0571393966674805, "logps/generated": -547.771240234375, "logps/real": -312.111083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.2086181640625, "rewards/margins": 24.756610870361328, "rewards/real": 3.54799222946167, "step": 1920 }, { "epoch": 1.23, "learning_rate": 3.2689573459715635e-07, "logits/generated": -1.9270051717758179, "logits/real": -1.8062372207641602, "logps/generated": -565.4856567382812, "logps/real": -301.0167541503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.189224243164062, "rewards/margins": 27.335012435913086, "rewards/real": 4.145788669586182, "step": 1930 }, { "epoch": 1.24, "learning_rate": 3.2571090047393363e-07, "logits/generated": -1.8131685256958008, "logits/real": -2.0428571701049805, "logps/generated": -590.5784301757812, "logps/real": -266.54241943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.80461311340332, "rewards/margins": 29.08700180053711, "rewards/real": 4.282387733459473, "step": 1940 }, { "epoch": 1.25, "learning_rate": 3.245260663507109e-07, "logits/generated": -1.9174648523330688, "logits/real": -1.8358278274536133, "logps/generated": -582.83935546875, "logps/real": -287.4928283691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.553165435791016, "rewards/margins": 26.586658477783203, "rewards/real": 3.033494234085083, "step": 1950 }, { "epoch": 1.25, "learning_rate": 3.2334123222748817e-07, "logits/generated": -1.8878023624420166, "logits/real": -1.9368507862091064, "logps/generated": -585.2578125, "logps/real": -253.0170135498047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.012441635131836, "rewards/margins": 27.32438087463379, "rewards/real": 3.311936616897583, "step": 1960 }, { "epoch": 1.26, "learning_rate": 3.221563981042654e-07, "logits/generated": -1.9772708415985107, "logits/real": -1.8338463306427002, "logps/generated": -573.5317993164062, "logps/real": -306.5912170410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.974407196044922, "rewards/margins": 27.267017364501953, "rewards/real": 3.2926125526428223, "step": 1970 }, { "epoch": 1.27, "learning_rate": 3.209715639810426e-07, "logits/generated": -1.7102349996566772, "logits/real": -1.9159860610961914, "logps/generated": -618.8963623046875, "logps/real": -279.89013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.11549949645996, "rewards/margins": 29.651748657226562, "rewards/real": 3.5362460613250732, "step": 1980 }, { "epoch": 1.27, "learning_rate": 3.197867298578199e-07, "logits/generated": -1.6864392757415771, "logits/real": -1.8802299499511719, "logps/generated": -616.2498168945312, "logps/real": -275.2442932128906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -26.633068084716797, "rewards/margins": 30.36293601989746, "rewards/real": 3.729863405227661, "step": 1990 }, { "epoch": 1.28, "learning_rate": 3.186018957345971e-07, "logits/generated": -1.8381693363189697, "logits/real": -1.8384807109832764, "logps/generated": -585.6036376953125, "logps/real": -273.29888916015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.226926803588867, "rewards/margins": 27.738134384155273, "rewards/real": 3.5112099647521973, "step": 2000 }, { "epoch": 1.29, "learning_rate": 3.174170616113744e-07, "logits/generated": -1.6463654041290283, "logits/real": -1.977158546447754, "logps/generated": -502.1932678222656, "logps/real": -264.587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.546627044677734, "rewards/margins": 24.989709854125977, "rewards/real": 4.443085193634033, "step": 2010 }, { "epoch": 1.29, "learning_rate": 3.1623222748815165e-07, "logits/generated": -1.6322021484375, "logits/real": -1.8763443231582642, "logps/generated": -571.6034545898438, "logps/real": -261.6669616699219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.86781120300293, "rewards/margins": 27.432903289794922, "rewards/real": 3.5650887489318848, "step": 2020 }, { "epoch": 1.3, "learning_rate": 3.150473933649289e-07, "logits/generated": -1.8191667795181274, "logits/real": -2.0057833194732666, "logps/generated": -617.8831176757812, "logps/real": -298.8994445800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.405025482177734, "rewards/margins": 30.166950225830078, "rewards/real": 2.761923313140869, "step": 2030 }, { "epoch": 1.31, "learning_rate": 3.1386255924170614e-07, "logits/generated": -2.024550199508667, "logits/real": -1.8825534582138062, "logps/generated": -592.9784545898438, "logps/real": -306.79779052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.554508209228516, "rewards/margins": 28.323190689086914, "rewards/real": 2.7686805725097656, "step": 2040 }, { "epoch": 1.31, "learning_rate": 3.126777251184834e-07, "logits/generated": -1.8739843368530273, "logits/real": -1.8097909688949585, "logps/generated": -620.1505126953125, "logps/real": -293.37298583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.66128158569336, "rewards/margins": 29.812353134155273, "rewards/real": 3.1510748863220215, "step": 2050 }, { "epoch": 1.32, "learning_rate": 3.1149289099526064e-07, "logits/generated": -1.7223927974700928, "logits/real": -1.808201789855957, "logps/generated": -636.3043823242188, "logps/real": -315.7878112792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.204065322875977, "rewards/margins": 30.67660903930664, "rewards/real": 3.4725449085235596, "step": 2060 }, { "epoch": 1.32, "learning_rate": 3.103080568720379e-07, "logits/generated": -1.6901743412017822, "logits/real": -1.8218104839324951, "logps/generated": -578.0655517578125, "logps/real": -273.15093994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.49802589416504, "rewards/margins": 29.08624267578125, "rewards/real": 3.5882136821746826, "step": 2070 }, { "epoch": 1.33, "learning_rate": 3.0912322274881513e-07, "logits/generated": -1.7015644311904907, "logits/real": -1.8814846277236938, "logps/generated": -584.56298828125, "logps/real": -336.11920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.091106414794922, "rewards/margins": 28.29192543029785, "rewards/real": 3.200817823410034, "step": 2080 }, { "epoch": 1.34, "learning_rate": 3.079383886255924e-07, "logits/generated": -1.8347034454345703, "logits/real": -1.8702787160873413, "logps/generated": -616.7276611328125, "logps/real": -293.54827880859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -27.113285064697266, "rewards/margins": 29.179180145263672, "rewards/real": 2.06589674949646, "step": 2090 }, { "epoch": 1.34, "learning_rate": 3.067535545023697e-07, "logits/generated": -1.7410576343536377, "logits/real": -1.8008630275726318, "logps/generated": -661.8153076171875, "logps/real": -305.880615234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -32.84894943237305, "rewards/margins": 31.93963050842285, "rewards/real": -0.9093185663223267, "step": 2100 }, { "epoch": 1.35, "learning_rate": 3.055687203791469e-07, "logits/generated": -1.897117018699646, "logits/real": -1.8135017156600952, "logps/generated": -716.5050659179688, "logps/real": -370.1857604980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.69164276123047, "rewards/margins": 34.37704849243164, "rewards/real": -2.314600944519043, "step": 2110 }, { "epoch": 1.36, "learning_rate": 3.0438388625592417e-07, "logits/generated": -1.9421539306640625, "logits/real": -1.9636482000350952, "logps/generated": -605.3670043945312, "logps/real": -331.5389709472656, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.606775283813477, "rewards/margins": 29.530101776123047, "rewards/real": 0.9233258962631226, "step": 2120 }, { "epoch": 1.36, "learning_rate": 3.0319905213270144e-07, "logits/generated": -1.3943746089935303, "logits/real": -1.6971073150634766, "logps/generated": -596.7127685546875, "logps/real": -293.3782653808594, "loss": 0.0249, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.32013702392578, "rewards/margins": 25.084566116333008, "rewards/real": 1.7644271850585938, "step": 2130 }, { "epoch": 1.37, "learning_rate": 3.0201421800947866e-07, "logits/generated": -1.3084129095077515, "logits/real": -1.3234128952026367, "logps/generated": -531.8546142578125, "logps/real": -316.83709716796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -19.36252784729004, "rewards/margins": 20.09618377685547, "rewards/real": 0.7336557507514954, "step": 2140 }, { "epoch": 1.38, "learning_rate": 3.008293838862559e-07, "logits/generated": -1.228764533996582, "logits/real": -1.395554780960083, "logps/generated": -538.2271728515625, "logps/real": -304.032470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -19.07200813293457, "rewards/margins": 20.8383846282959, "rewards/real": 1.7663767337799072, "step": 2150 }, { "epoch": 1.38, "learning_rate": 2.9964454976303315e-07, "logits/generated": -1.3817543983459473, "logits/real": -1.4019010066986084, "logps/generated": -543.9381103515625, "logps/real": -314.1099853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -19.616451263427734, "rewards/margins": 21.924898147583008, "rewards/real": 2.3084473609924316, "step": 2160 }, { "epoch": 1.39, "learning_rate": 2.984597156398104e-07, "logits/generated": -1.068721055984497, "logits/real": -1.4888819456100464, "logps/generated": -550.9620361328125, "logps/real": -305.05035400390625, "loss": 0.017, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.184024810791016, "rewards/margins": 23.065868377685547, "rewards/real": 1.8818422555923462, "step": 2170 }, { "epoch": 1.39, "learning_rate": 2.9727488151658765e-07, "logits/generated": -1.153241753578186, "logits/real": -1.5276806354522705, "logps/generated": -582.9771118164062, "logps/real": -362.70574951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.368785858154297, "rewards/margins": 25.78885269165039, "rewards/real": 0.4200686812400818, "step": 2180 }, { "epoch": 1.4, "learning_rate": 2.960900473933649e-07, "logits/generated": -0.956773579120636, "logits/real": -1.400811791419983, "logps/generated": -588.4277954101562, "logps/real": -276.4454650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.368419647216797, "rewards/margins": 27.114978790283203, "rewards/real": 1.7465556859970093, "step": 2190 }, { "epoch": 1.41, "learning_rate": 2.949052132701422e-07, "logits/generated": -1.220428705215454, "logits/real": -1.4437458515167236, "logps/generated": -621.8016357421875, "logps/real": -308.28143310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.380334854125977, "rewards/margins": 27.834918975830078, "rewards/real": 1.4545824527740479, "step": 2200 }, { "epoch": 1.41, "learning_rate": 2.9372037914691946e-07, "logits/generated": -1.2705103158950806, "logits/real": -1.4929685592651367, "logps/generated": -644.2764892578125, "logps/real": -304.3802185058594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.42551612854004, "rewards/margins": 30.917144775390625, "rewards/real": 1.4916292428970337, "step": 2210 }, { "epoch": 1.42, "learning_rate": 2.9253554502369663e-07, "logits/generated": -1.1418436765670776, "logits/real": -1.4128586053848267, "logps/generated": -602.6786499023438, "logps/real": -270.84246826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.513574600219727, "rewards/margins": 28.531085968017578, "rewards/real": 2.0175089836120605, "step": 2220 }, { "epoch": 1.43, "learning_rate": 2.913507109004739e-07, "logits/generated": -1.0388188362121582, "logits/real": -1.4454652070999146, "logps/generated": -640.0425415039062, "logps/real": -315.54766845703125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -28.77117347717285, "rewards/margins": 30.052099227905273, "rewards/real": 1.280925989151001, "step": 2230 }, { "epoch": 1.43, "learning_rate": 2.901658767772512e-07, "logits/generated": -1.6473445892333984, "logits/real": -1.5472307205200195, "logps/generated": -539.6859130859375, "logps/real": -280.5103454589844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -23.28512954711914, "rewards/margins": 24.63581085205078, "rewards/real": 1.3506834506988525, "step": 2240 }, { "epoch": 1.44, "learning_rate": 2.889810426540284e-07, "logits/generated": -1.882515549659729, "logits/real": -1.6729192733764648, "logps/generated": -595.6201171875, "logps/real": -317.88507080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.310232162475586, "rewards/margins": 26.0330810546875, "rewards/real": 1.722848892211914, "step": 2250 }, { "epoch": 1.45, "learning_rate": 2.8779620853080567e-07, "logits/generated": -1.6899830102920532, "logits/real": -1.8054288625717163, "logps/generated": -537.6807861328125, "logps/real": -298.7416687011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.164093017578125, "rewards/margins": 23.843181610107422, "rewards/real": 1.679089903831482, "step": 2260 }, { "epoch": 1.45, "learning_rate": 2.8661137440758294e-07, "logits/generated": -1.740455985069275, "logits/real": -1.7120296955108643, "logps/generated": -608.4501342773438, "logps/real": -346.88946533203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -24.7525634765625, "rewards/margins": 26.00356674194336, "rewards/real": 1.2510055303573608, "step": 2270 }, { "epoch": 1.46, "learning_rate": 2.854265402843602e-07, "logits/generated": -1.8606593608856201, "logits/real": -1.6387965679168701, "logps/generated": -598.9221801757812, "logps/real": -313.3849792480469, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -24.296688079833984, "rewards/margins": 26.203506469726562, "rewards/real": 1.9068161249160767, "step": 2280 }, { "epoch": 1.47, "learning_rate": 2.842417061611374e-07, "logits/generated": -1.7343709468841553, "logits/real": -1.847726583480835, "logps/generated": -542.1185302734375, "logps/real": -318.7292785644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -19.835063934326172, "rewards/margins": 22.733062744140625, "rewards/real": 2.8979992866516113, "step": 2290 }, { "epoch": 1.47, "learning_rate": 2.8305687203791465e-07, "logits/generated": -1.6484702825546265, "logits/real": -1.8238489627838135, "logps/generated": -546.9429931640625, "logps/real": -301.6571960449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.158252716064453, "rewards/margins": 23.31367301940918, "rewards/real": 3.155423641204834, "step": 2300 }, { "epoch": 1.48, "learning_rate": 2.8187203791469193e-07, "logits/generated": -1.6703771352767944, "logits/real": -1.7239478826522827, "logps/generated": -543.681640625, "logps/real": -293.0142822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.65591049194336, "rewards/margins": 24.258466720581055, "rewards/real": 3.6025567054748535, "step": 2310 }, { "epoch": 1.48, "learning_rate": 2.806872037914692e-07, "logits/generated": -1.7382898330688477, "logits/real": -1.6915886402130127, "logps/generated": -573.1199951171875, "logps/real": -318.62677001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.412513732910156, "rewards/margins": 25.633304595947266, "rewards/real": 3.220787525177002, "step": 2320 }, { "epoch": 1.49, "learning_rate": 2.795023696682464e-07, "logits/generated": -1.8029648065567017, "logits/real": -1.6964941024780273, "logps/generated": -577.6658935546875, "logps/real": -240.9466094970703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.12717628479004, "rewards/margins": 24.653339385986328, "rewards/real": 3.526162624359131, "step": 2330 }, { "epoch": 1.5, "learning_rate": 2.783175355450237e-07, "logits/generated": -1.7998230457305908, "logits/real": -1.8626855611801147, "logps/generated": -518.8606567382812, "logps/real": -285.0722961425781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -19.61860466003418, "rewards/margins": 22.572162628173828, "rewards/real": 2.9535579681396484, "step": 2340 }, { "epoch": 1.5, "learning_rate": 2.7713270142180097e-07, "logits/generated": -1.7941243648529053, "logits/real": -1.765020728111267, "logps/generated": -559.3449096679688, "logps/real": -292.52874755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -21.82884979248047, "rewards/margins": 24.59012794494629, "rewards/real": 2.7612757682800293, "step": 2350 }, { "epoch": 1.51, "learning_rate": 2.759478672985782e-07, "logits/generated": -1.4995002746582031, "logits/real": -1.7280094623565674, "logps/generated": -542.0667724609375, "logps/real": -265.61456298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.489727020263672, "rewards/margins": 23.495431900024414, "rewards/real": 3.005704164505005, "step": 2360 }, { "epoch": 1.52, "learning_rate": 2.747630331753554e-07, "logits/generated": -1.6669187545776367, "logits/real": -1.7655481100082397, "logps/generated": -537.923583984375, "logps/real": -288.6806945800781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -20.93265724182129, "rewards/margins": 24.955150604248047, "rewards/real": 4.022492408752441, "step": 2370 }, { "epoch": 1.52, "learning_rate": 2.735781990521327e-07, "logits/generated": -1.6863346099853516, "logits/real": -1.6862386465072632, "logps/generated": -587.3677368164062, "logps/real": -274.9526062011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.90387725830078, "rewards/margins": 25.57526206970215, "rewards/real": 3.6713855266571045, "step": 2380 }, { "epoch": 1.53, "learning_rate": 2.7239336492890995e-07, "logits/generated": -1.6835005283355713, "logits/real": -1.6754205226898193, "logps/generated": -525.8923950195312, "logps/real": -260.65625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.118059158325195, "rewards/margins": 24.25434684753418, "rewards/real": 4.136286735534668, "step": 2390 }, { "epoch": 1.54, "learning_rate": 2.7120853080568717e-07, "logits/generated": -1.7202863693237305, "logits/real": -1.7276380062103271, "logps/generated": -544.0513916015625, "logps/real": -257.2022705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.765392303466797, "rewards/margins": 24.455705642700195, "rewards/real": 3.690312623977661, "step": 2400 }, { "epoch": 1.54, "learning_rate": 2.7002369668246444e-07, "logits/generated": -1.90768563747406, "logits/real": -1.7088896036148071, "logps/generated": -562.185791015625, "logps/real": -296.63421630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.622730255126953, "rewards/margins": 24.928424835205078, "rewards/real": 3.3056907653808594, "step": 2410 }, { "epoch": 1.55, "learning_rate": 2.688388625592417e-07, "logits/generated": -1.502138376235962, "logits/real": -1.7395317554473877, "logps/generated": -518.5991821289062, "logps/real": -298.8111267089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -19.619686126708984, "rewards/margins": 22.98178482055664, "rewards/real": 3.3620963096618652, "step": 2420 }, { "epoch": 1.55, "learning_rate": 2.6765402843601894e-07, "logits/generated": -1.629591941833496, "logits/real": -1.72141432762146, "logps/generated": -531.663818359375, "logps/real": -253.68130493164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.92636489868164, "rewards/margins": 24.698654174804688, "rewards/real": 3.7722859382629395, "step": 2430 }, { "epoch": 1.56, "learning_rate": 2.664691943127962e-07, "logits/generated": -1.879055380821228, "logits/real": -1.7535613775253296, "logps/generated": -539.4899291992188, "logps/real": -314.13934326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -20.46742820739746, "rewards/margins": 23.038652420043945, "rewards/real": 2.571223020553589, "step": 2440 }, { "epoch": 1.57, "learning_rate": 2.6528436018957343e-07, "logits/generated": -1.8296849727630615, "logits/real": -1.605936050415039, "logps/generated": -566.1544799804688, "logps/real": -230.4271697998047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.756004333496094, "rewards/margins": 26.74905014038086, "rewards/real": 3.9930431842803955, "step": 2450 }, { "epoch": 1.57, "learning_rate": 2.640995260663507e-07, "logits/generated": -1.6583645343780518, "logits/real": -1.799968957901001, "logps/generated": -585.8865966796875, "logps/real": -313.61785888671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -23.51220703125, "rewards/margins": 26.34749984741211, "rewards/real": 2.8352932929992676, "step": 2460 }, { "epoch": 1.58, "learning_rate": 2.629146919431279e-07, "logits/generated": -1.622989296913147, "logits/real": -1.6733757257461548, "logps/generated": -561.3709716796875, "logps/real": -303.838134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.487499237060547, "rewards/margins": 24.434743881225586, "rewards/real": 1.9472436904907227, "step": 2470 }, { "epoch": 1.59, "learning_rate": 2.617298578199052e-07, "logits/generated": -1.7829269170761108, "logits/real": -1.6524391174316406, "logps/generated": -592.2250366210938, "logps/real": -302.03204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.640161514282227, "rewards/margins": 26.890300750732422, "rewards/real": 3.25014066696167, "step": 2480 }, { "epoch": 1.59, "learning_rate": 2.6054502369668247e-07, "logits/generated": -1.7160056829452515, "logits/real": -1.678972840309143, "logps/generated": -566.9390258789062, "logps/real": -279.036376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.70218849182129, "rewards/margins": 25.37218475341797, "rewards/real": 2.6699976921081543, "step": 2490 }, { "epoch": 1.6, "learning_rate": 2.5936018957345974e-07, "logits/generated": -1.5256998538970947, "logits/real": -1.6813528537750244, "logps/generated": -575.34619140625, "logps/real": -272.69378662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.272594451904297, "rewards/margins": 26.237014770507812, "rewards/real": 2.964425563812256, "step": 2500 }, { "epoch": 1.61, "learning_rate": 2.5817535545023696e-07, "logits/generated": -1.5834429264068604, "logits/real": -1.686923623085022, "logps/generated": -552.1434326171875, "logps/real": -323.09906005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.91162109375, "rewards/margins": 24.901554107666016, "rewards/real": 2.9899332523345947, "step": 2510 }, { "epoch": 1.61, "learning_rate": 2.5699052132701423e-07, "logits/generated": -1.6007649898529053, "logits/real": -1.6745460033416748, "logps/generated": -545.5789794921875, "logps/real": -261.6502380371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.642099380493164, "rewards/margins": 24.076602935791016, "rewards/real": 2.4345054626464844, "step": 2520 }, { "epoch": 1.62, "learning_rate": 2.5580568720379145e-07, "logits/generated": -1.5265768766403198, "logits/real": -1.6884973049163818, "logps/generated": -558.7433471679688, "logps/real": -308.2214660644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.215543746948242, "rewards/margins": 24.414316177368164, "rewards/real": 2.198770046234131, "step": 2530 }, { "epoch": 1.63, "learning_rate": 2.5462085308056867e-07, "logits/generated": -1.5399916172027588, "logits/real": -1.837083101272583, "logps/generated": -564.5648193359375, "logps/real": -309.82147216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.15167236328125, "rewards/margins": 24.972917556762695, "rewards/real": 2.821242332458496, "step": 2540 }, { "epoch": 1.63, "learning_rate": 2.5343601895734595e-07, "logits/generated": -1.8350512981414795, "logits/real": -1.6069164276123047, "logps/generated": -596.8106079101562, "logps/real": -262.0002136230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.985454559326172, "rewards/margins": 26.541339874267578, "rewards/real": 2.555886745452881, "step": 2550 }, { "epoch": 1.64, "learning_rate": 2.522511848341232e-07, "logits/generated": -1.5803308486938477, "logits/real": -1.6225595474243164, "logps/generated": -580.0052490234375, "logps/real": -256.22113037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.678485870361328, "rewards/margins": 26.053295135498047, "rewards/real": 2.3748135566711426, "step": 2560 }, { "epoch": 1.64, "learning_rate": 2.510663507109005e-07, "logits/generated": -1.5564558506011963, "logits/real": -1.8049567937850952, "logps/generated": -550.60302734375, "logps/real": -325.60467529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.83065414428711, "rewards/margins": 23.404865264892578, "rewards/real": 1.574207067489624, "step": 2570 }, { "epoch": 1.65, "learning_rate": 2.498815165876777e-07, "logits/generated": -1.7282562255859375, "logits/real": -1.8445695638656616, "logps/generated": -521.8512573242188, "logps/real": -325.5350646972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.430896759033203, "rewards/margins": 23.056838989257812, "rewards/real": 2.6259427070617676, "step": 2580 }, { "epoch": 1.66, "learning_rate": 2.48696682464455e-07, "logits/generated": -1.690407156944275, "logits/real": -1.8025366067886353, "logps/generated": -530.1163330078125, "logps/real": -341.3690185546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -20.848060607910156, "rewards/margins": 23.671215057373047, "rewards/real": 2.823157787322998, "step": 2590 }, { "epoch": 1.66, "learning_rate": 2.475118483412322e-07, "logits/generated": -1.7176539897918701, "logits/real": -1.6664135456085205, "logps/generated": -591.307373046875, "logps/real": -284.5378723144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.69329833984375, "rewards/margins": 25.997201919555664, "rewards/real": 2.3039019107818604, "step": 2600 }, { "epoch": 1.67, "learning_rate": 2.463270142180095e-07, "logits/generated": -1.637500524520874, "logits/real": -1.666680097579956, "logps/generated": -578.0511474609375, "logps/real": -306.49853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.837610244750977, "rewards/margins": 25.457876205444336, "rewards/real": 2.6202683448791504, "step": 2610 }, { "epoch": 1.68, "learning_rate": 2.451421800947867e-07, "logits/generated": -1.6651175022125244, "logits/real": -1.6444517374038696, "logps/generated": -581.2926635742188, "logps/real": -298.8360290527344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -23.60995864868164, "rewards/margins": 26.311880111694336, "rewards/real": 2.7019219398498535, "step": 2620 }, { "epoch": 1.68, "learning_rate": 2.4395734597156397e-07, "logits/generated": -1.709985375404358, "logits/real": -1.6348882913589478, "logps/generated": -632.7125854492188, "logps/real": -234.1561279296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.975143432617188, "rewards/margins": 29.971363067626953, "rewards/real": 2.996216297149658, "step": 2630 }, { "epoch": 1.69, "learning_rate": 2.4277251184834124e-07, "logits/generated": -1.4963634014129639, "logits/real": -1.7173879146575928, "logps/generated": -608.433349609375, "logps/real": -298.2292175292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.566797256469727, "rewards/margins": 28.99003028869629, "rewards/real": 2.423236846923828, "step": 2640 }, { "epoch": 1.7, "learning_rate": 2.4158767772511846e-07, "logits/generated": -1.5703046321868896, "logits/real": -1.6707559823989868, "logps/generated": -576.9781494140625, "logps/real": -322.95428466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.956544876098633, "rewards/margins": 26.741918563842773, "rewards/real": 1.7853772640228271, "step": 2650 }, { "epoch": 1.7, "learning_rate": 2.4040284360189573e-07, "logits/generated": -1.4444162845611572, "logits/real": -1.5661994218826294, "logps/generated": -576.7459716796875, "logps/real": -319.0561828613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.719341278076172, "rewards/margins": 27.756183624267578, "rewards/real": 3.03684139251709, "step": 2660 }, { "epoch": 1.71, "learning_rate": 2.39218009478673e-07, "logits/generated": -1.8646812438964844, "logits/real": -1.5148258209228516, "logps/generated": -566.437255859375, "logps/real": -281.23040771484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.121912002563477, "rewards/margins": 27.168109893798828, "rewards/real": 2.0461976528167725, "step": 2670 }, { "epoch": 1.71, "learning_rate": 2.3803317535545023e-07, "logits/generated": -1.378333330154419, "logits/real": -1.5609514713287354, "logps/generated": -619.1652221679688, "logps/real": -347.43536376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.790395736694336, "rewards/margins": 27.905914306640625, "rewards/real": 1.115519642829895, "step": 2680 }, { "epoch": 1.72, "learning_rate": 2.3684834123222747e-07, "logits/generated": -1.36008882522583, "logits/real": -1.6660432815551758, "logps/generated": -561.6815795898438, "logps/real": -293.2484130859375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/generated": -23.929176330566406, "rewards/margins": 26.499109268188477, "rewards/real": 2.569932460784912, "step": 2690 }, { "epoch": 1.73, "learning_rate": 2.3566350710900475e-07, "logits/generated": -1.504368543624878, "logits/real": -1.9206537008285522, "logps/generated": -488.05645751953125, "logps/real": -287.9764404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -18.00989532470703, "rewards/margins": 22.635772705078125, "rewards/real": 4.625874996185303, "step": 2700 }, { "epoch": 1.73, "learning_rate": 2.3447867298578197e-07, "logits/generated": -1.4794145822525024, "logits/real": -1.7442691326141357, "logps/generated": -561.9444580078125, "logps/real": -247.56051635742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.199321746826172, "rewards/margins": 25.599380493164062, "rewards/real": 4.400059223175049, "step": 2710 }, { "epoch": 1.74, "learning_rate": 2.3329383886255924e-07, "logits/generated": -1.6442312002182007, "logits/real": -1.769339919090271, "logps/generated": -551.6962280273438, "logps/real": -257.5611572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.487812042236328, "rewards/margins": 24.571170806884766, "rewards/real": 4.0833587646484375, "step": 2720 }, { "epoch": 1.75, "learning_rate": 2.3210900473933649e-07, "logits/generated": -1.592903971672058, "logits/real": -1.850720763206482, "logps/generated": -531.4888916015625, "logps/real": -253.69613647460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -20.362010955810547, "rewards/margins": 24.975704193115234, "rewards/real": 4.613691329956055, "step": 2730 }, { "epoch": 1.75, "learning_rate": 2.3092417061611373e-07, "logits/generated": -1.3967700004577637, "logits/real": -1.8371734619140625, "logps/generated": -573.7647705078125, "logps/real": -279.97320556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.700817108154297, "rewards/margins": 26.90018653869629, "rewards/real": 3.199371814727783, "step": 2740 }, { "epoch": 1.76, "learning_rate": 2.2973933649289098e-07, "logits/generated": -1.4378259181976318, "logits/real": -1.7589184045791626, "logps/generated": -573.2015380859375, "logps/real": -272.30865478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.328006744384766, "rewards/margins": 25.927379608154297, "rewards/real": 3.5993778705596924, "step": 2750 }, { "epoch": 1.77, "learning_rate": 2.2855450236966822e-07, "logits/generated": -1.4664353132247925, "logits/real": -1.6138193607330322, "logps/generated": -635.1412353515625, "logps/real": -250.72994995117188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -26.56173324584961, "rewards/margins": 29.27005386352539, "rewards/real": 2.7083113193511963, "step": 2760 }, { "epoch": 1.77, "learning_rate": 2.273696682464455e-07, "logits/generated": -1.5649610757827759, "logits/real": -1.6842445135116577, "logps/generated": -598.2295532226562, "logps/real": -249.21609497070312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.869779586791992, "rewards/margins": 28.59454917907715, "rewards/real": 3.7247700691223145, "step": 2770 }, { "epoch": 1.78, "learning_rate": 2.2618483412322272e-07, "logits/generated": -1.411211371421814, "logits/real": -1.7182838916778564, "logps/generated": -573.9729614257812, "logps/real": -315.7457580566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.289600372314453, "rewards/margins": 26.97934913635254, "rewards/real": 3.689749240875244, "step": 2780 }, { "epoch": 1.79, "learning_rate": 2.25e-07, "logits/generated": -1.5727872848510742, "logits/real": -1.671972632408142, "logps/generated": -595.99853515625, "logps/real": -285.1081848144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.40205955505371, "rewards/margins": 27.199880599975586, "rewards/real": 2.797821044921875, "step": 2790 }, { "epoch": 1.79, "learning_rate": 2.2381516587677724e-07, "logits/generated": -1.652439832687378, "logits/real": -1.8253790140151978, "logps/generated": -546.5967407226562, "logps/real": -259.60369873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.855207443237305, "rewards/margins": 25.357242584228516, "rewards/real": 2.502035617828369, "step": 2800 }, { "epoch": 1.8, "learning_rate": 2.226303317535545e-07, "logits/generated": -1.3320658206939697, "logits/real": -1.6522868871688843, "logps/generated": -582.7369384765625, "logps/real": -282.2769470214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.61196517944336, "rewards/margins": 27.05718994140625, "rewards/real": 3.4452216625213623, "step": 2810 }, { "epoch": 1.8, "learning_rate": 2.2144549763033173e-07, "logits/generated": -1.6425540447235107, "logits/real": -1.7631067037582397, "logps/generated": -575.66796875, "logps/real": -297.32672119140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -24.034452438354492, "rewards/margins": 27.03037452697754, "rewards/real": 2.995924234390259, "step": 2820 }, { "epoch": 1.81, "learning_rate": 2.20260663507109e-07, "logits/generated": -1.5740302801132202, "logits/real": -1.8250007629394531, "logps/generated": -606.4158325195312, "logps/real": -306.7263488769531, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -25.581768035888672, "rewards/margins": 27.989206314086914, "rewards/real": 2.407435655593872, "step": 2830 }, { "epoch": 1.82, "learning_rate": 2.1907582938388625e-07, "logits/generated": -1.649320363998413, "logits/real": -1.846474051475525, "logps/generated": -605.8023071289062, "logps/real": -280.73626708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.99935531616211, "rewards/margins": 28.14822006225586, "rewards/real": 2.1488640308380127, "step": 2840 }, { "epoch": 1.82, "learning_rate": 2.178909952606635e-07, "logits/generated": -1.5549839735031128, "logits/real": -1.6120601892471313, "logps/generated": -611.2003173828125, "logps/real": -278.5312194824219, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.52594566345215, "rewards/margins": 29.042705535888672, "rewards/real": 0.516758382320404, "step": 2850 }, { "epoch": 1.83, "learning_rate": 2.1670616113744074e-07, "logits/generated": -1.415512204170227, "logits/real": -1.561818242073059, "logps/generated": -668.7357177734375, "logps/real": -279.74737548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.04682159423828, "rewards/margins": 31.371353149414062, "rewards/real": -0.6754702925682068, "step": 2860 }, { "epoch": 1.84, "learning_rate": 2.15521327014218e-07, "logits/generated": -1.423190712928772, "logits/real": -1.6400619745254517, "logps/generated": -664.2633056640625, "logps/real": -325.55865478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.040599822998047, "rewards/margins": 29.51907730102539, "rewards/real": -1.5215250253677368, "step": 2870 }, { "epoch": 1.84, "learning_rate": 2.1433649289099526e-07, "logits/generated": -1.573998212814331, "logits/real": -1.6320655345916748, "logps/generated": -703.1906127929688, "logps/real": -382.8506774902344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.363338470458984, "rewards/margins": 32.05171585083008, "rewards/real": -1.3116222620010376, "step": 2880 }, { "epoch": 1.85, "learning_rate": 2.131516587677725e-07, "logits/generated": -1.1965372562408447, "logits/real": -1.7842414379119873, "logps/generated": -620.596923828125, "logps/real": -386.71148681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.108715057373047, "rewards/margins": 28.015487670898438, "rewards/real": -1.0932306051254272, "step": 2890 }, { "epoch": 1.86, "learning_rate": 2.1196682464454975e-07, "logits/generated": -1.526530146598816, "logits/real": -1.8163702487945557, "logps/generated": -668.0113525390625, "logps/real": -387.68609619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.37652587890625, "rewards/margins": 30.585922241210938, "rewards/real": -0.7905967831611633, "step": 2900 }, { "epoch": 1.86, "learning_rate": 2.10781990521327e-07, "logits/generated": -1.4351173639297485, "logits/real": -1.7728666067123413, "logps/generated": -653.0692138671875, "logps/real": -356.24114990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.085880279541016, "rewards/margins": 29.810476303100586, "rewards/real": -0.2754055857658386, "step": 2910 }, { "epoch": 1.87, "learning_rate": 2.0959715639810427e-07, "logits/generated": -1.5915038585662842, "logits/real": -1.7488971948623657, "logps/generated": -654.3868408203125, "logps/real": -363.7198181152344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -30.631240844726562, "rewards/margins": 29.563976287841797, "rewards/real": -1.0672632455825806, "step": 2920 }, { "epoch": 1.87, "learning_rate": 2.0841232227488152e-07, "logits/generated": -1.351778507232666, "logits/real": -1.757230520248413, "logps/generated": -648.4581909179688, "logps/real": -365.08612060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.61551284790039, "rewards/margins": 28.9033145904541, "rewards/real": -0.712199330329895, "step": 2930 }, { "epoch": 1.88, "learning_rate": 2.0722748815165874e-07, "logits/generated": -1.6047985553741455, "logits/real": -1.8345510959625244, "logps/generated": -591.2472534179688, "logps/real": -301.5292663574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.9030704498291, "rewards/margins": 28.29180335998535, "rewards/real": 0.3887341022491455, "step": 2940 }, { "epoch": 1.89, "learning_rate": 2.06042654028436e-07, "logits/generated": -1.4556996822357178, "logits/real": -1.6542593240737915, "logps/generated": -675.2671508789062, "logps/real": -324.229248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.171680450439453, "rewards/margins": 30.4880428314209, "rewards/real": -0.6836373209953308, "step": 2950 }, { "epoch": 1.89, "learning_rate": 2.0485781990521326e-07, "logits/generated": -1.4190815687179565, "logits/real": -1.7127935886383057, "logps/generated": -594.9680786132812, "logps/real": -330.93756103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.250925064086914, "rewards/margins": 26.68577003479004, "rewards/real": -0.5651555061340332, "step": 2960 }, { "epoch": 1.9, "learning_rate": 2.0367298578199053e-07, "logits/generated": -1.3954211473464966, "logits/real": -1.7968018054962158, "logps/generated": -646.9190673828125, "logps/real": -373.4801940917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.02557945251465, "rewards/margins": 29.03072738647461, "rewards/real": -0.9948552250862122, "step": 2970 }, { "epoch": 1.91, "learning_rate": 2.0248815165876775e-07, "logits/generated": -1.444711685180664, "logits/real": -1.7315549850463867, "logps/generated": -644.6848754882812, "logps/real": -375.57135009765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -29.646875381469727, "rewards/margins": 28.820363998413086, "rewards/real": -0.8265080451965332, "step": 2980 }, { "epoch": 1.91, "learning_rate": 2.0130331753554502e-07, "logits/generated": -1.6923658847808838, "logits/real": -1.9737755060195923, "logps/generated": -544.4427490234375, "logps/real": -273.3411560058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.941570281982422, "rewards/margins": 24.66128158569336, "rewards/real": 2.7197117805480957, "step": 2990 }, { "epoch": 1.92, "learning_rate": 2.0011848341232227e-07, "logits/generated": -1.5422388315200806, "logits/real": -1.918755292892456, "logps/generated": -544.2753295898438, "logps/real": -266.0784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -21.313146591186523, "rewards/margins": 24.147686004638672, "rewards/real": 2.8345439434051514, "step": 3000 }, { "epoch": 1.93, "learning_rate": 1.9893364928909952e-07, "logits/generated": -1.5944409370422363, "logits/real": -1.947425127029419, "logps/generated": -556.9723510742188, "logps/real": -276.3074951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -22.075267791748047, "rewards/margins": 24.565574645996094, "rewards/real": 2.4903082847595215, "step": 3010 }, { "epoch": 1.93, "learning_rate": 1.9774881516587676e-07, "logits/generated": -1.6972744464874268, "logits/real": -1.8330917358398438, "logps/generated": -586.40087890625, "logps/real": -236.2491912841797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.290576934814453, "rewards/margins": 27.51314926147461, "rewards/real": 4.222574234008789, "step": 3020 }, { "epoch": 1.94, "learning_rate": 1.96563981042654e-07, "logits/generated": -1.2628109455108643, "logits/real": -1.5660161972045898, "logps/generated": -599.4718017578125, "logps/real": -367.8059387207031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -24.92922592163086, "rewards/margins": 26.454294204711914, "rewards/real": 1.5250660181045532, "step": 3030 }, { "epoch": 1.94, "learning_rate": 1.9537914691943128e-07, "logits/generated": -0.427297979593277, "logits/real": -1.0060253143310547, "logps/generated": -601.1849975585938, "logps/real": -309.7470703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -26.890268325805664, "rewards/margins": 28.041290283203125, "rewards/real": 1.15102219581604, "step": 3040 }, { "epoch": 1.95, "learning_rate": 1.9419431279620853e-07, "logits/generated": -0.8232213258743286, "logits/real": -0.875298798084259, "logps/generated": -633.26953125, "logps/real": -303.58538818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.64840316772461, "rewards/margins": 29.389022827148438, "rewards/real": -0.25938329100608826, "step": 3050 }, { "epoch": 1.96, "learning_rate": 1.9300947867298577e-07, "logits/generated": -0.5571356415748596, "logits/real": -1.1288228034973145, "logps/generated": -570.4361572265625, "logps/real": -328.63299560546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.28021240234375, "rewards/margins": 26.095117568969727, "rewards/real": 0.8149039149284363, "step": 3060 }, { "epoch": 1.96, "learning_rate": 1.9182464454976302e-07, "logits/generated": -0.514862060546875, "logits/real": -1.217932105064392, "logps/generated": -635.936767578125, "logps/real": -326.1944885253906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -28.763599395751953, "rewards/margins": 29.159343719482422, "rewards/real": 0.3957473337650299, "step": 3070 }, { "epoch": 1.97, "learning_rate": 1.906398104265403e-07, "logits/generated": -0.7496210932731628, "logits/real": -1.0620176792144775, "logps/generated": -674.8850708007812, "logps/real": -292.51666259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -30.62062644958496, "rewards/margins": 31.120975494384766, "rewards/real": 0.5003488659858704, "step": 3080 }, { "epoch": 1.98, "learning_rate": 1.8945497630331754e-07, "logits/generated": -0.7094641923904419, "logits/real": -1.248439073562622, "logps/generated": -603.7174072265625, "logps/real": -308.1839599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.94955825805664, "rewards/margins": 27.84343910217285, "rewards/real": 0.8938802480697632, "step": 3090 }, { "epoch": 1.98, "learning_rate": 1.8827014218009476e-07, "logits/generated": -1.1065939664840698, "logits/real": -1.3366183042526245, "logps/generated": -572.4763793945312, "logps/real": -325.922119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.0872745513916, "rewards/margins": 26.916667938232422, "rewards/real": 0.8293918371200562, "step": 3100 }, { "epoch": 1.99, "learning_rate": 1.8708530805687203e-07, "logits/generated": -0.9775465130805969, "logits/real": -1.2585813999176025, "logps/generated": -643.0035400390625, "logps/real": -307.4635009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.379985809326172, "rewards/margins": 29.811519622802734, "rewards/real": 1.431530237197876, "step": 3110 }, { "epoch": 2.0, "learning_rate": 1.8590047393364928e-07, "logits/generated": -1.2372281551361084, "logits/real": -1.2516653537750244, "logps/generated": -613.3665771484375, "logps/real": -339.9015808105469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -27.32230567932129, "rewards/margins": 27.04740333557129, "rewards/real": -0.2749008238315582, "step": 3120 }, { "epoch": 2.0, "learning_rate": 1.8471563981042655e-07, "logits/generated": -0.9734998941421509, "logits/real": -1.3104711771011353, "logps/generated": -552.6294555664062, "logps/real": -326.0587158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.91358757019043, "rewards/margins": 25.37361717224121, "rewards/real": 1.460028886795044, "step": 3130 }, { "epoch": 2.01, "learning_rate": 1.8353080568720377e-07, "logits/generated": -1.0094778537750244, "logits/real": -1.2244881391525269, "logps/generated": -614.523681640625, "logps/real": -322.91363525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.17666244506836, "rewards/margins": 29.085453033447266, "rewards/real": 0.9087867736816406, "step": 3140 }, { "epoch": 2.02, "learning_rate": 1.8234597156398104e-07, "logits/generated": -1.036756992340088, "logits/real": -1.1284120082855225, "logps/generated": -616.7979736328125, "logps/real": -283.32757568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.345693588256836, "rewards/margins": 29.235576629638672, "rewards/real": 1.8898861408233643, "step": 3150 }, { "epoch": 2.02, "learning_rate": 1.811611374407583e-07, "logits/generated": -0.8595845103263855, "logits/real": -1.355563998222351, "logps/generated": -576.180908203125, "logps/real": -316.4331970214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.81147003173828, "rewards/margins": 26.70420265197754, "rewards/real": 1.8927299976348877, "step": 3160 }, { "epoch": 2.03, "learning_rate": 1.7997630331753554e-07, "logits/generated": -0.8919976949691772, "logits/real": -1.2751586437225342, "logps/generated": -578.2721557617188, "logps/real": -283.2034912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.054473876953125, "rewards/margins": 27.355371475219727, "rewards/real": 2.3008973598480225, "step": 3170 }, { "epoch": 2.03, "learning_rate": 1.7879146919431278e-07, "logits/generated": -1.1588859558105469, "logits/real": -1.4349632263183594, "logps/generated": -569.14892578125, "logps/real": -382.6138610839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.063751220703125, "rewards/margins": 25.135150909423828, "rewards/real": 1.0714004039764404, "step": 3180 }, { "epoch": 2.04, "learning_rate": 1.7760663507109003e-07, "logits/generated": -0.9971303939819336, "logits/real": -1.304377555847168, "logps/generated": -563.427490234375, "logps/real": -372.6128845214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.147062301635742, "rewards/margins": 25.164873123168945, "rewards/real": 1.017809271812439, "step": 3190 }, { "epoch": 2.05, "learning_rate": 1.764218009478673e-07, "logits/generated": -1.1013610363006592, "logits/real": -1.262295126914978, "logps/generated": -600.2496337890625, "logps/real": -293.4721984863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.5828800201416, "rewards/margins": 27.238534927368164, "rewards/real": 1.6556533575057983, "step": 3200 }, { "epoch": 2.05, "learning_rate": 1.7523696682464452e-07, "logits/generated": -0.9266592860221863, "logits/real": -1.3672444820404053, "logps/generated": -612.4976806640625, "logps/real": -332.46380615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.804758071899414, "rewards/margins": 26.852863311767578, "rewards/real": 1.0480997562408447, "step": 3210 }, { "epoch": 2.06, "learning_rate": 1.740521327014218e-07, "logits/generated": -0.9361955523490906, "logits/real": -1.3824563026428223, "logps/generated": -598.46630859375, "logps/real": -332.8088073730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.769994735717773, "rewards/margins": 26.847116470336914, "rewards/real": 1.0771228075027466, "step": 3220 }, { "epoch": 2.07, "learning_rate": 1.7286729857819904e-07, "logits/generated": -0.985366940498352, "logits/real": -1.1709678173065186, "logps/generated": -616.9738159179688, "logps/real": -262.79107666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.542694091796875, "rewards/margins": 28.219512939453125, "rewards/real": 0.6768158078193665, "step": 3230 }, { "epoch": 2.07, "learning_rate": 1.7168246445497631e-07, "logits/generated": -1.1985851526260376, "logits/real": -1.531078815460205, "logps/generated": -625.8180541992188, "logps/real": -294.8016052246094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.276691436767578, "rewards/margins": 29.496532440185547, "rewards/real": 1.2198398113250732, "step": 3240 }, { "epoch": 2.08, "learning_rate": 1.7049763033175353e-07, "logits/generated": -1.2943631410598755, "logits/real": -1.387758493423462, "logps/generated": -613.4132080078125, "logps/real": -298.59088134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.818401336669922, "rewards/margins": 29.149784088134766, "rewards/real": 1.3313796520233154, "step": 3250 }, { "epoch": 2.09, "learning_rate": 1.693127962085308e-07, "logits/generated": -1.0421658754348755, "logits/real": -1.6923692226409912, "logps/generated": -564.6715698242188, "logps/real": -368.30938720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.875778198242188, "rewards/margins": 25.0933895111084, "rewards/real": 1.2176152467727661, "step": 3260 }, { "epoch": 2.09, "learning_rate": 1.6812796208530805e-07, "logits/generated": -1.0774108171463013, "logits/real": -1.4840654134750366, "logps/generated": -629.8865356445312, "logps/real": -337.7097473144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.065006256103516, "rewards/margins": 30.08737564086914, "rewards/real": 1.0223655700683594, "step": 3270 }, { "epoch": 2.1, "learning_rate": 1.669431279620853e-07, "logits/generated": -0.8973947763442993, "logits/real": -1.5233434438705444, "logps/generated": -554.3973999023438, "logps/real": -307.70880126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.43309783935547, "rewards/margins": 25.756237030029297, "rewards/real": 1.3231401443481445, "step": 3280 }, { "epoch": 2.1, "learning_rate": 1.6575829383886255e-07, "logits/generated": -1.200847864151001, "logits/real": -1.5147438049316406, "logps/generated": -633.1802978515625, "logps/real": -331.6228332519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.447185516357422, "rewards/margins": 28.465160369873047, "rewards/real": 1.0179722309112549, "step": 3290 }, { "epoch": 2.11, "learning_rate": 1.645734597156398e-07, "logits/generated": -1.3566243648529053, "logits/real": -1.4795764684677124, "logps/generated": -626.9256591796875, "logps/real": -325.57037353515625, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.60080909729004, "rewards/margins": 30.459680557250977, "rewards/real": 1.8588762283325195, "step": 3300 }, { "epoch": 2.12, "learning_rate": 1.6338862559241706e-07, "logits/generated": -1.0033454895019531, "logits/real": -1.476757287979126, "logps/generated": -601.5350341796875, "logps/real": -299.1015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.275096893310547, "rewards/margins": 29.220190048217773, "rewards/real": 1.9450889825820923, "step": 3310 }, { "epoch": 2.12, "learning_rate": 1.622037914691943e-07, "logits/generated": -1.2685123682022095, "logits/real": -1.3022490739822388, "logps/generated": -611.9641723632812, "logps/real": -268.879150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.80234718322754, "rewards/margins": 28.989404678344727, "rewards/real": 1.187056303024292, "step": 3320 }, { "epoch": 2.13, "learning_rate": 1.6101895734597156e-07, "logits/generated": -1.049785852432251, "logits/real": -1.5218408107757568, "logps/generated": -573.6893310546875, "logps/real": -278.3086853027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.665767669677734, "rewards/margins": 26.660511016845703, "rewards/real": 1.9947429895401, "step": 3330 }, { "epoch": 2.14, "learning_rate": 1.598341232227488e-07, "logits/generated": -1.2419451475143433, "logits/real": -1.5786956548690796, "logps/generated": -559.7239990234375, "logps/real": -295.64447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.051359176635742, "rewards/margins": 25.4386043548584, "rewards/real": 1.3872464895248413, "step": 3340 }, { "epoch": 2.14, "learning_rate": 1.5864928909952605e-07, "logits/generated": -1.2660870552062988, "logits/real": -1.5404198169708252, "logps/generated": -620.8883666992188, "logps/real": -299.25079345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.736989974975586, "rewards/margins": 28.408885955810547, "rewards/real": 0.6718959212303162, "step": 3350 }, { "epoch": 2.15, "learning_rate": 1.5746445497630332e-07, "logits/generated": -1.5147781372070312, "logits/real": -1.6086689233779907, "logps/generated": -633.5145874023438, "logps/real": -305.75982666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.531085968017578, "rewards/margins": 30.69515037536621, "rewards/real": 2.1640682220458984, "step": 3360 }, { "epoch": 2.16, "learning_rate": 1.5627962085308054e-07, "logits/generated": -1.3760803937911987, "logits/real": -1.5995023250579834, "logps/generated": -622.7931518554688, "logps/real": -309.18524169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.779796600341797, "rewards/margins": 29.4649715423584, "rewards/real": 1.685172438621521, "step": 3370 }, { "epoch": 2.16, "learning_rate": 1.5509478672985782e-07, "logits/generated": -1.2081844806671143, "logits/real": -1.6825498342514038, "logps/generated": -585.5609130859375, "logps/real": -303.3153991699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.359302520751953, "rewards/margins": 27.032634735107422, "rewards/real": 1.673335313796997, "step": 3380 }, { "epoch": 2.17, "learning_rate": 1.5390995260663506e-07, "logits/generated": -1.2355971336364746, "logits/real": -1.6540902853012085, "logps/generated": -600.400634765625, "logps/real": -289.13104248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.316720962524414, "rewards/margins": 28.519821166992188, "rewards/real": 2.2030978202819824, "step": 3390 }, { "epoch": 2.18, "learning_rate": 1.5272511848341233e-07, "logits/generated": -1.3537260293960571, "logits/real": -1.5167725086212158, "logps/generated": -657.4996948242188, "logps/real": -292.13690185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.545461654663086, "rewards/margins": 30.9395751953125, "rewards/real": 1.394112229347229, "step": 3400 }, { "epoch": 2.18, "learning_rate": 1.5154028436018955e-07, "logits/generated": -1.1476203203201294, "logits/real": -1.6213958263397217, "logps/generated": -586.9888916015625, "logps/real": -302.6298522949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.03945541381836, "rewards/margins": 26.6746768951416, "rewards/real": 1.6352207660675049, "step": 3410 }, { "epoch": 2.19, "learning_rate": 1.5035545023696683e-07, "logits/generated": -1.4069702625274658, "logits/real": -1.696362853050232, "logps/generated": -639.28173828125, "logps/real": -319.3243408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.550344467163086, "rewards/margins": 29.582622528076172, "rewards/real": 1.0322760343551636, "step": 3420 }, { "epoch": 2.19, "learning_rate": 1.4917061611374407e-07, "logits/generated": -1.4195467233657837, "logits/real": -1.597637414932251, "logps/generated": -605.4683837890625, "logps/real": -303.27606201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.668222427368164, "rewards/margins": 27.46142578125, "rewards/real": 0.7932027578353882, "step": 3430 }, { "epoch": 2.2, "learning_rate": 1.4798578199052132e-07, "logits/generated": -1.0041682720184326, "logits/real": -1.6599080562591553, "logps/generated": -570.593505859375, "logps/real": -321.76470947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.676586151123047, "rewards/margins": 26.52446937561035, "rewards/real": 1.847882866859436, "step": 3440 }, { "epoch": 2.21, "learning_rate": 1.4680094786729857e-07, "logits/generated": -1.2987579107284546, "logits/real": -1.702481985092163, "logps/generated": -634.9268798828125, "logps/real": -321.74114990234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -29.65555191040039, "rewards/margins": 30.214004516601562, "rewards/real": 0.5584555268287659, "step": 3450 }, { "epoch": 2.21, "learning_rate": 1.456161137440758e-07, "logits/generated": -1.4205043315887451, "logits/real": -1.7378380298614502, "logps/generated": -632.03271484375, "logps/real": -287.05926513671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -29.272380828857422, "rewards/margins": 31.304676055908203, "rewards/real": 2.032296657562256, "step": 3460 }, { "epoch": 2.22, "learning_rate": 1.4443127962085309e-07, "logits/generated": -1.4744882583618164, "logits/real": -1.7412210702896118, "logps/generated": -590.938232421875, "logps/real": -304.86236572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.62759017944336, "rewards/margins": 27.75132179260254, "rewards/real": 3.123731851577759, "step": 3470 }, { "epoch": 2.23, "learning_rate": 1.4324644549763033e-07, "logits/generated": -1.2393379211425781, "logits/real": -1.6715360879898071, "logps/generated": -604.7275390625, "logps/real": -273.6032409667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.18609046936035, "rewards/margins": 28.818084716796875, "rewards/real": 2.631998300552368, "step": 3480 }, { "epoch": 2.23, "learning_rate": 1.4206161137440758e-07, "logits/generated": -1.434203863143921, "logits/real": -1.650731086730957, "logps/generated": -635.7976684570312, "logps/real": -304.21490478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.737253189086914, "rewards/margins": 30.1563720703125, "rewards/real": 2.4191203117370605, "step": 3490 }, { "epoch": 2.24, "learning_rate": 1.4087677725118482e-07, "logits/generated": -1.4407684803009033, "logits/real": -1.6330921649932861, "logps/generated": -602.897705078125, "logps/real": -267.2117919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.354284286499023, "rewards/margins": 28.067819595336914, "rewards/real": 1.7135359048843384, "step": 3500 }, { "epoch": 2.25, "learning_rate": 1.396919431279621e-07, "logits/generated": -1.331176519393921, "logits/real": -1.674940824508667, "logps/generated": -600.8941650390625, "logps/real": -318.4970397949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.208206176757812, "rewards/margins": 27.928924560546875, "rewards/real": 2.7207229137420654, "step": 3510 }, { "epoch": 2.25, "learning_rate": 1.3850710900473934e-07, "logits/generated": -1.4939625263214111, "logits/real": -1.6055580377578735, "logps/generated": -609.1090087890625, "logps/real": -319.76141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.091556549072266, "rewards/margins": 29.48468017578125, "rewards/real": 3.3931243419647217, "step": 3520 }, { "epoch": 2.26, "learning_rate": 1.3732227488151656e-07, "logits/generated": -1.542456030845642, "logits/real": -1.5522772073745728, "logps/generated": -661.2096557617188, "logps/real": -270.73358154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.621795654296875, "rewards/margins": 31.113056182861328, "rewards/real": 2.491257905960083, "step": 3530 }, { "epoch": 2.26, "learning_rate": 1.3613744075829384e-07, "logits/generated": -1.4491689205169678, "logits/real": -1.6961276531219482, "logps/generated": -590.4296875, "logps/real": -313.83453369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.028005599975586, "rewards/margins": 27.955068588256836, "rewards/real": 2.9270637035369873, "step": 3540 }, { "epoch": 2.27, "learning_rate": 1.3495260663507108e-07, "logits/generated": -1.6204688549041748, "logits/real": -1.6379458904266357, "logps/generated": -570.3190307617188, "logps/real": -286.086669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.588516235351562, "rewards/margins": 27.206829071044922, "rewards/real": 2.618311643600464, "step": 3550 }, { "epoch": 2.28, "learning_rate": 1.3376777251184836e-07, "logits/generated": -1.3369686603546143, "logits/real": -1.6755695343017578, "logps/generated": -575.3544921875, "logps/real": -299.4778747558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.92896270751953, "rewards/margins": 27.78573226928711, "rewards/real": 2.856771945953369, "step": 3560 }, { "epoch": 2.28, "learning_rate": 1.3258293838862558e-07, "logits/generated": -1.441577434539795, "logits/real": -1.5940086841583252, "logps/generated": -577.3253784179688, "logps/real": -295.81829833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.420175552368164, "rewards/margins": 26.896053314208984, "rewards/real": 2.4758784770965576, "step": 3570 }, { "epoch": 2.29, "learning_rate": 1.3139810426540285e-07, "logits/generated": -1.3817598819732666, "logits/real": -1.5028915405273438, "logps/generated": -610.8389282226562, "logps/real": -312.4098815917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.545740127563477, "rewards/margins": 27.793224334716797, "rewards/real": 1.247484803199768, "step": 3580 }, { "epoch": 2.3, "learning_rate": 1.302132701421801e-07, "logits/generated": -1.5594265460968018, "logits/real": -1.5127546787261963, "logps/generated": -608.08544921875, "logps/real": -269.8175048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.948135375976562, "rewards/margins": 30.119991302490234, "rewards/real": 3.1718521118164062, "step": 3590 }, { "epoch": 2.3, "learning_rate": 1.2902843601895734e-07, "logits/generated": -1.156783103942871, "logits/real": -1.5360536575317383, "logps/generated": -613.1757202148438, "logps/real": -307.3021545410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.381826400756836, "rewards/margins": 29.390579223632812, "rewards/real": 2.008751392364502, "step": 3600 }, { "epoch": 2.31, "learning_rate": 1.278436018957346e-07, "logits/generated": -1.4847004413604736, "logits/real": -1.547949194908142, "logps/generated": -565.8986206054688, "logps/real": -313.78973388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.971221923828125, "rewards/margins": 27.48362159729004, "rewards/real": 2.5123953819274902, "step": 3610 }, { "epoch": 2.32, "learning_rate": 1.2665876777251183e-07, "logits/generated": -1.2976679801940918, "logits/real": -1.5460495948791504, "logps/generated": -579.7281494140625, "logps/real": -269.75341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.806621551513672, "rewards/margins": 27.99404525756836, "rewards/real": 2.187427043914795, "step": 3620 }, { "epoch": 2.32, "learning_rate": 1.254739336492891e-07, "logits/generated": -1.4941840171813965, "logits/real": -1.451777696609497, "logps/generated": -638.77587890625, "logps/real": -305.6056823730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.411884307861328, "rewards/margins": 29.89568519592285, "rewards/real": 1.4838016033172607, "step": 3630 }, { "epoch": 2.33, "learning_rate": 1.2428909952606635e-07, "logits/generated": -1.5942895412445068, "logits/real": -1.5046836137771606, "logps/generated": -611.2850341796875, "logps/real": -313.16949462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.082693099975586, "rewards/margins": 29.100727081298828, "rewards/real": 2.018031597137451, "step": 3640 }, { "epoch": 2.34, "learning_rate": 1.231042654028436e-07, "logits/generated": -1.520900011062622, "logits/real": -1.5740916728973389, "logps/generated": -575.7242431640625, "logps/real": -314.3369445800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.075252532958984, "rewards/margins": 27.779926300048828, "rewards/real": 2.7046732902526855, "step": 3650 }, { "epoch": 2.34, "learning_rate": 1.2191943127962085e-07, "logits/generated": -1.322203516960144, "logits/real": -1.6699758768081665, "logps/generated": -553.3631591796875, "logps/real": -292.0386962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -23.2994327545166, "rewards/margins": 26.353412628173828, "rewards/real": 3.053981304168701, "step": 3660 }, { "epoch": 2.35, "learning_rate": 1.207345971563981e-07, "logits/generated": -1.3638218641281128, "logits/real": -1.5092731714248657, "logps/generated": -617.5296020507812, "logps/real": -289.15185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.00275230407715, "rewards/margins": 29.480844497680664, "rewards/real": 2.4780898094177246, "step": 3670 }, { "epoch": 2.35, "learning_rate": 1.1954976303317534e-07, "logits/generated": -1.2839560508728027, "logits/real": -1.6789973974227905, "logps/generated": -585.281982421875, "logps/real": -344.36383056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.927204132080078, "rewards/margins": 26.435100555419922, "rewards/real": 1.5078972578048706, "step": 3680 }, { "epoch": 2.36, "learning_rate": 1.183649289099526e-07, "logits/generated": -1.2949450016021729, "logits/real": -1.3947932720184326, "logps/generated": -601.6485595703125, "logps/real": -291.6730651855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.565074920654297, "rewards/margins": 28.953100204467773, "rewards/real": 2.388024091720581, "step": 3690 }, { "epoch": 2.37, "learning_rate": 1.1718009478672986e-07, "logits/generated": -1.1880407333374023, "logits/real": -1.4357839822769165, "logps/generated": -598.7822875976562, "logps/real": -284.7917175292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.5440616607666, "rewards/margins": 29.608556747436523, "rewards/real": 3.0644936561584473, "step": 3700 }, { "epoch": 2.37, "learning_rate": 1.159952606635071e-07, "logits/generated": -1.574135661125183, "logits/real": -1.4538342952728271, "logps/generated": -650.85400390625, "logps/real": -298.19183349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.524139404296875, "rewards/margins": 30.310344696044922, "rewards/real": 1.7862030267715454, "step": 3710 }, { "epoch": 2.38, "learning_rate": 1.1481042654028436e-07, "logits/generated": -1.330590009689331, "logits/real": -1.489278793334961, "logps/generated": -613.0658569335938, "logps/real": -291.6548767089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.12112045288086, "rewards/margins": 29.043224334716797, "rewards/real": 1.9221032857894897, "step": 3720 }, { "epoch": 2.39, "learning_rate": 1.136255924170616e-07, "logits/generated": -1.5007903575897217, "logits/real": -1.472931981086731, "logps/generated": -589.7188720703125, "logps/real": -307.58294677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.387720108032227, "rewards/margins": 28.846736907958984, "rewards/real": 2.459017515182495, "step": 3730 }, { "epoch": 2.39, "learning_rate": 1.1244075829383886e-07, "logits/generated": -1.6159569025039673, "logits/real": -1.4722641706466675, "logps/generated": -585.0252685546875, "logps/real": -323.477294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.178213119506836, "rewards/margins": 27.594324111938477, "rewards/real": 2.416109561920166, "step": 3740 }, { "epoch": 2.4, "learning_rate": 1.112559241706161e-07, "logits/generated": -1.1531771421432495, "logits/real": -1.6213915348052979, "logps/generated": -611.3297119140625, "logps/real": -339.04681396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.570720672607422, "rewards/margins": 27.91266441345215, "rewards/real": 2.3419454097747803, "step": 3750 }, { "epoch": 2.41, "learning_rate": 1.1007109004739336e-07, "logits/generated": -1.3176251649856567, "logits/real": -1.4967129230499268, "logps/generated": -608.16748046875, "logps/real": -298.4925231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.648395538330078, "rewards/margins": 29.315967559814453, "rewards/real": 2.6675775051116943, "step": 3760 }, { "epoch": 2.41, "learning_rate": 1.0888625592417061e-07, "logits/generated": -1.439353346824646, "logits/real": -1.3691593408584595, "logps/generated": -667.100341796875, "logps/real": -301.4292297363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.206029891967773, "rewards/margins": 32.040687561035156, "rewards/real": 1.8346564769744873, "step": 3770 }, { "epoch": 2.42, "learning_rate": 1.0770142180094787e-07, "logits/generated": -1.5442355871200562, "logits/real": -1.4819167852401733, "logps/generated": -629.334228515625, "logps/real": -290.92889404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.574254989624023, "rewards/margins": 30.4357967376709, "rewards/real": 1.86154305934906, "step": 3780 }, { "epoch": 2.42, "learning_rate": 1.0651658767772511e-07, "logits/generated": -1.2547924518585205, "logits/real": -1.4876326322555542, "logps/generated": -640.6787719726562, "logps/real": -318.9430236816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.206369400024414, "rewards/margins": 29.790756225585938, "rewards/real": 1.5843837261199951, "step": 3790 }, { "epoch": 2.43, "learning_rate": 1.0533175355450237e-07, "logits/generated": -1.6805248260498047, "logits/real": -1.3248226642608643, "logps/generated": -655.8458251953125, "logps/real": -298.82037353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.976150512695312, "rewards/margins": 32.64189910888672, "rewards/real": 2.6657474040985107, "step": 3800 }, { "epoch": 2.44, "learning_rate": 1.0414691943127962e-07, "logits/generated": -1.4990580081939697, "logits/real": -1.3765968084335327, "logps/generated": -620.1951904296875, "logps/real": -304.34722900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.144628524780273, "rewards/margins": 29.235424041748047, "rewards/real": 2.0907957553863525, "step": 3810 }, { "epoch": 2.44, "learning_rate": 1.0296208530805687e-07, "logits/generated": -1.442575216293335, "logits/real": -1.526241660118103, "logps/generated": -626.4366455078125, "logps/real": -305.3477783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.773239135742188, "rewards/margins": 30.12148666381836, "rewards/real": 2.3482470512390137, "step": 3820 }, { "epoch": 2.45, "learning_rate": 1.0177725118483411e-07, "logits/generated": -1.1325329542160034, "logits/real": -1.6365556716918945, "logps/generated": -555.10791015625, "logps/real": -326.60675048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.147449493408203, "rewards/margins": 25.698932647705078, "rewards/real": 1.5514819622039795, "step": 3830 }, { "epoch": 2.46, "learning_rate": 1.0059241706161137e-07, "logits/generated": -1.6170870065689087, "logits/real": -1.3925259113311768, "logps/generated": -654.4223022460938, "logps/real": -307.74847412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.72853660583496, "rewards/margins": 30.62699317932129, "rewards/real": 0.8984566926956177, "step": 3840 }, { "epoch": 2.46, "learning_rate": 9.940758293838862e-08, "logits/generated": -1.2467596530914307, "logits/real": -1.2930128574371338, "logps/generated": -692.4020385742188, "logps/real": -265.07720947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.33624267578125, "rewards/margins": 32.83263397216797, "rewards/real": 1.496396780014038, "step": 3850 }, { "epoch": 2.47, "learning_rate": 9.822274881516588e-08, "logits/generated": -1.1992278099060059, "logits/real": -1.488287329673767, "logps/generated": -644.56396484375, "logps/real": -294.7264404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.09539794921875, "rewards/margins": 30.719635009765625, "rewards/real": 1.6242326498031616, "step": 3860 }, { "epoch": 2.48, "learning_rate": 9.703791469194312e-08, "logits/generated": -1.3583210706710815, "logits/real": -1.5180120468139648, "logps/generated": -587.4781494140625, "logps/real": -310.59600830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.057281494140625, "rewards/margins": 29.170948028564453, "rewards/real": 2.1136634349823, "step": 3870 }, { "epoch": 2.48, "learning_rate": 9.585308056872038e-08, "logits/generated": -1.0821243524551392, "logits/real": -1.5974535942077637, "logps/generated": -608.9141845703125, "logps/real": -336.1514587402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.943729400634766, "rewards/margins": 28.42697525024414, "rewards/real": 1.483243465423584, "step": 3880 }, { "epoch": 2.49, "learning_rate": 9.466824644549763e-08, "logits/generated": -1.353389024734497, "logits/real": -1.5304534435272217, "logps/generated": -612.4140014648438, "logps/real": -311.79327392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.42991065979004, "rewards/margins": 30.651296615600586, "rewards/real": 2.2213876247406006, "step": 3890 }, { "epoch": 2.5, "learning_rate": 9.348341232227488e-08, "logits/generated": -1.5256273746490479, "logits/real": -1.5510895252227783, "logps/generated": -619.5974731445312, "logps/real": -350.5033874511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.461856842041016, "rewards/margins": 28.82666015625, "rewards/real": 1.3648051023483276, "step": 3900 }, { "epoch": 2.5, "learning_rate": 9.229857819905212e-08, "logits/generated": -1.3603798151016235, "logits/real": -1.4141087532043457, "logps/generated": -622.8385009765625, "logps/real": -260.6363830566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.754039764404297, "rewards/margins": 30.439599990844727, "rewards/real": 2.6855628490448, "step": 3910 }, { "epoch": 2.51, "learning_rate": 9.111374407582938e-08, "logits/generated": -1.3342504501342773, "logits/real": -1.5273411273956299, "logps/generated": -645.1162109375, "logps/real": -263.3978271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.165130615234375, "rewards/margins": 31.719181060791016, "rewards/real": 3.554048538208008, "step": 3920 }, { "epoch": 2.51, "learning_rate": 8.992890995260663e-08, "logits/generated": -1.4757992029190063, "logits/real": -1.634913444519043, "logps/generated": -617.9478759765625, "logps/real": -314.14691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.34640884399414, "rewards/margins": 30.251697540283203, "rewards/real": 2.905287981033325, "step": 3930 }, { "epoch": 2.52, "learning_rate": 8.874407582938389e-08, "logits/generated": -1.3466564416885376, "logits/real": -1.5977327823638916, "logps/generated": -622.6076049804688, "logps/real": -284.58502197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.513778686523438, "rewards/margins": 31.245441436767578, "rewards/real": 2.7316622734069824, "step": 3940 }, { "epoch": 2.53, "learning_rate": 8.755924170616114e-08, "logits/generated": -1.5380483865737915, "logits/real": -1.7335315942764282, "logps/generated": -588.06982421875, "logps/real": -308.33282470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -24.619550704956055, "rewards/margins": 27.09970474243164, "rewards/real": 2.480154037475586, "step": 3950 }, { "epoch": 2.53, "learning_rate": 8.63744075829384e-08, "logits/generated": -1.6097685098648071, "logits/real": -1.6136796474456787, "logps/generated": -657.5408935546875, "logps/real": -284.61810302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.70947265625, "rewards/margins": 31.561779022216797, "rewards/real": 2.852311611175537, "step": 3960 }, { "epoch": 2.54, "learning_rate": 8.518957345971564e-08, "logits/generated": -1.4339498281478882, "logits/real": -1.7437279224395752, "logps/generated": -584.6334228515625, "logps/real": -270.9336853027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -25.41365623474121, "rewards/margins": 28.195552825927734, "rewards/real": 2.781895399093628, "step": 3970 }, { "epoch": 2.55, "learning_rate": 8.40047393364929e-08, "logits/generated": -1.2891197204589844, "logits/real": -1.7689502239227295, "logps/generated": -588.9421997070312, "logps/real": -325.373291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.317241668701172, "rewards/margins": 28.019933700561523, "rewards/real": 1.7026898860931396, "step": 3980 }, { "epoch": 2.55, "learning_rate": 8.281990521327013e-08, "logits/generated": -1.5776088237762451, "logits/real": -1.588118314743042, "logps/generated": -660.5469360351562, "logps/real": -273.999755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.123193740844727, "rewards/margins": 32.533695220947266, "rewards/real": 2.41049861907959, "step": 3990 }, { "epoch": 2.56, "learning_rate": 8.163507109004738e-08, "logits/generated": -1.5306708812713623, "logits/real": -1.6259679794311523, "logps/generated": -639.56298828125, "logps/real": -343.0978088378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.859777450561523, "rewards/margins": 29.583415985107422, "rewards/real": 1.7236419916152954, "step": 4000 }, { "epoch": 2.57, "learning_rate": 8.045023696682464e-08, "logits/generated": -1.4984711408615112, "logits/real": -1.6365811824798584, "logps/generated": -625.7791748046875, "logps/real": -301.8245544433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.093109130859375, "rewards/margins": 30.440902709960938, "rewards/real": 2.347792387008667, "step": 4010 }, { "epoch": 2.57, "learning_rate": 7.926540284360189e-08, "logits/generated": -1.5656770467758179, "logits/real": -1.7118957042694092, "logps/generated": -602.1696166992188, "logps/real": -290.41668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.142230987548828, "rewards/margins": 29.259593963623047, "rewards/real": 2.1173670291900635, "step": 4020 }, { "epoch": 2.58, "learning_rate": 7.808056872037915e-08, "logits/generated": -1.35243821144104, "logits/real": -1.5588561296463013, "logps/generated": -645.7506103515625, "logps/real": -301.26165771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.24430274963379, "rewards/margins": 30.519649505615234, "rewards/real": 1.2753461599349976, "step": 4030 }, { "epoch": 2.58, "learning_rate": 7.689573459715639e-08, "logits/generated": -1.5671889781951904, "logits/real": -1.6195242404937744, "logps/generated": -576.3297119140625, "logps/real": -281.00543212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.25125503540039, "rewards/margins": 28.661273956298828, "rewards/real": 2.4100213050842285, "step": 4040 }, { "epoch": 2.59, "learning_rate": 7.571090047393365e-08, "logits/generated": -1.384068250656128, "logits/real": -1.5691883563995361, "logps/generated": -633.6173706054688, "logps/real": -302.5428161621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.69685935974121, "rewards/margins": 31.717281341552734, "rewards/real": 3.020423412322998, "step": 4050 }, { "epoch": 2.6, "learning_rate": 7.45260663507109e-08, "logits/generated": -1.2888624668121338, "logits/real": -1.674544095993042, "logps/generated": -571.7451782226562, "logps/real": -274.7939147949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.30230712890625, "rewards/margins": 28.19662094116211, "rewards/real": 1.8943134546279907, "step": 4060 }, { "epoch": 2.6, "learning_rate": 7.334123222748814e-08, "logits/generated": -1.357360601425171, "logits/real": -1.6954199075698853, "logps/generated": -610.406494140625, "logps/real": -282.0722961425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.547073364257812, "rewards/margins": 30.05575180053711, "rewards/real": 1.5086801052093506, "step": 4070 }, { "epoch": 2.61, "learning_rate": 7.215639810426539e-08, "logits/generated": -1.3595961332321167, "logits/real": -1.5079976320266724, "logps/generated": -631.2000732421875, "logps/real": -248.12158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.73921775817871, "rewards/margins": 31.44719886779785, "rewards/real": 2.707982063293457, "step": 4080 }, { "epoch": 2.62, "learning_rate": 7.097156398104265e-08, "logits/generated": -1.2010899782180786, "logits/real": -1.526855230331421, "logps/generated": -676.9327392578125, "logps/real": -274.713623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.303884506225586, "rewards/margins": 33.11503219604492, "rewards/real": 1.8111457824707031, "step": 4090 }, { "epoch": 2.62, "learning_rate": 6.97867298578199e-08, "logits/generated": -1.382359504699707, "logits/real": -1.6755861043930054, "logps/generated": -634.60791015625, "logps/real": -349.86358642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.610187530517578, "rewards/margins": 30.647531509399414, "rewards/real": 2.0373458862304688, "step": 4100 }, { "epoch": 2.63, "learning_rate": 6.860189573459716e-08, "logits/generated": -1.2465829849243164, "logits/real": -1.4604103565216064, "logps/generated": -647.5206298828125, "logps/real": -309.0945129394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.66015625, "rewards/margins": 31.68243408203125, "rewards/real": 2.022277355194092, "step": 4110 }, { "epoch": 2.64, "learning_rate": 6.74170616113744e-08, "logits/generated": -1.3056268692016602, "logits/real": -1.4986435174942017, "logps/generated": -685.72509765625, "logps/real": -317.60296630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.90750503540039, "rewards/margins": 32.936302185058594, "rewards/real": 1.02879798412323, "step": 4120 }, { "epoch": 2.64, "learning_rate": 6.623222748815166e-08, "logits/generated": -1.4349414110183716, "logits/real": -1.3471622467041016, "logps/generated": -644.2572021484375, "logps/real": -292.59356689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.81112289428711, "rewards/margins": 31.459768295288086, "rewards/real": 1.6486486196517944, "step": 4130 }, { "epoch": 2.65, "learning_rate": 6.504739336492891e-08, "logits/generated": -1.3193347454071045, "logits/real": -1.4731448888778687, "logps/generated": -676.6284790039062, "logps/real": -278.0181579589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.47968292236328, "rewards/margins": 34.45676803588867, "rewards/real": 1.9770870208740234, "step": 4140 }, { "epoch": 2.66, "learning_rate": 6.386255924170615e-08, "logits/generated": -1.4920405149459839, "logits/real": -1.5615254640579224, "logps/generated": -666.9974365234375, "logps/real": -260.97607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.99616050720215, "rewards/margins": 34.06662368774414, "rewards/real": 2.0704617500305176, "step": 4150 }, { "epoch": 2.66, "learning_rate": 6.26777251184834e-08, "logits/generated": -1.3855369091033936, "logits/real": -1.456311821937561, "logps/generated": -605.8585205078125, "logps/real": -314.4039001464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.266433715820312, "rewards/margins": 30.011083602905273, "rewards/real": 1.74465012550354, "step": 4160 }, { "epoch": 2.67, "learning_rate": 6.149289099526066e-08, "logits/generated": -1.4731667041778564, "logits/real": -1.5671155452728271, "logps/generated": -602.9185791015625, "logps/real": -313.18890380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.158035278320312, "rewards/margins": 29.691242218017578, "rewards/real": 1.533206820487976, "step": 4170 }, { "epoch": 2.67, "learning_rate": 6.030805687203791e-08, "logits/generated": -1.4378567934036255, "logits/real": -1.5310169458389282, "logps/generated": -612.8416748046875, "logps/real": -331.9561767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.270498275756836, "rewards/margins": 29.416418075561523, "rewards/real": 1.145920991897583, "step": 4180 }, { "epoch": 2.68, "learning_rate": 5.912322274881516e-08, "logits/generated": -1.3654181957244873, "logits/real": -1.5570844411849976, "logps/generated": -638.9073486328125, "logps/real": -313.94573974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.935832977294922, "rewards/margins": 30.70108413696289, "rewards/real": 1.7652504444122314, "step": 4190 }, { "epoch": 2.69, "learning_rate": 5.793838862559241e-08, "logits/generated": -1.2255719900131226, "logits/real": -1.6582701206207275, "logps/generated": -676.3460083007812, "logps/real": -256.4339904785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.53457260131836, "rewards/margins": 33.86653518676758, "rewards/real": 2.331956624984741, "step": 4200 }, { "epoch": 2.69, "learning_rate": 5.6753554502369666e-08, "logits/generated": -1.321560025215149, "logits/real": -1.535123586654663, "logps/generated": -633.9786376953125, "logps/real": -316.16729736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.52561378479004, "rewards/margins": 31.2910099029541, "rewards/real": 1.765394926071167, "step": 4210 }, { "epoch": 2.7, "learning_rate": 5.556872037914691e-08, "logits/generated": -1.1623280048370361, "logits/real": -1.4255826473236084, "logps/generated": -636.1403198242188, "logps/real": -278.0563049316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.736618041992188, "rewards/margins": 31.96358871459961, "rewards/real": 1.2269766330718994, "step": 4220 }, { "epoch": 2.71, "learning_rate": 5.4383886255924165e-08, "logits/generated": -1.7334274053573608, "logits/real": -1.3684265613555908, "logps/generated": -691.8567504882812, "logps/real": -281.8096618652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.39247131347656, "rewards/margins": 35.019840240478516, "rewards/real": 1.6273695230484009, "step": 4230 }, { "epoch": 2.71, "learning_rate": 5.319905213270142e-08, "logits/generated": -1.4239901304244995, "logits/real": -1.427843451499939, "logps/generated": -697.7626953125, "logps/real": -342.26702880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.1848030090332, "rewards/margins": 33.265724182128906, "rewards/real": 0.08092144876718521, "step": 4240 }, { "epoch": 2.72, "learning_rate": 5.201421800947867e-08, "logits/generated": -1.3704322576522827, "logits/real": -1.4075593948364258, "logps/generated": -689.48876953125, "logps/real": -274.60333251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.80956268310547, "rewards/margins": 35.31214141845703, "rewards/real": 2.5025734901428223, "step": 4250 }, { "epoch": 2.73, "learning_rate": 5.082938388625592e-08, "logits/generated": -1.57331383228302, "logits/real": -1.4759578704833984, "logps/generated": -668.5193481445312, "logps/real": -323.9053955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.33473587036133, "rewards/margins": 33.67780303955078, "rewards/real": 1.3430638313293457, "step": 4260 }, { "epoch": 2.73, "learning_rate": 4.964454976303317e-08, "logits/generated": -1.6090238094329834, "logits/real": -1.5114948749542236, "logps/generated": -628.6146240234375, "logps/real": -331.726806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.931575775146484, "rewards/margins": 30.34956932067871, "rewards/real": 0.417994886636734, "step": 4270 }, { "epoch": 2.74, "learning_rate": 4.845971563981042e-08, "logits/generated": -1.3274040222167969, "logits/real": -1.4224097728729248, "logps/generated": -665.4379272460938, "logps/real": -342.1637878417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.560409545898438, "rewards/margins": 32.40532684326172, "rewards/real": 0.8449224233627319, "step": 4280 }, { "epoch": 2.74, "learning_rate": 4.7274881516587676e-08, "logits/generated": -1.2782102823257446, "logits/real": -1.4752721786499023, "logps/generated": -665.552978515625, "logps/real": -311.4815368652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.476404190063477, "rewards/margins": 32.4752311706543, "rewards/real": 0.9988266229629517, "step": 4290 }, { "epoch": 2.75, "learning_rate": 4.609004739336492e-08, "logits/generated": -1.5071719884872437, "logits/real": -1.4472278356552124, "logps/generated": -654.7091064453125, "logps/real": -304.1969299316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.26534080505371, "rewards/margins": 32.228668212890625, "rewards/real": 0.9633296132087708, "step": 4300 }, { "epoch": 2.76, "learning_rate": 4.4905213270142176e-08, "logits/generated": -1.4495041370391846, "logits/real": -1.3939712047576904, "logps/generated": -639.5325317382812, "logps/real": -304.6444091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.296783447265625, "rewards/margins": 31.05965805053711, "rewards/real": 0.7628771662712097, "step": 4310 }, { "epoch": 2.76, "learning_rate": 4.372037914691943e-08, "logits/generated": -1.6121094226837158, "logits/real": -1.4426803588867188, "logps/generated": -646.7779541015625, "logps/real": -295.48101806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.99783706665039, "rewards/margins": 32.061744689941406, "rewards/real": 1.0639095306396484, "step": 4320 }, { "epoch": 2.77, "learning_rate": 4.253554502369668e-08, "logits/generated": -1.4167978763580322, "logits/real": -1.4320820569992065, "logps/generated": -634.4563598632812, "logps/real": -302.04510498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.510427474975586, "rewards/margins": 30.8123722076416, "rewards/real": 0.3019457757472992, "step": 4330 }, { "epoch": 2.78, "learning_rate": 4.135071090047393e-08, "logits/generated": -1.3798516988754272, "logits/real": -1.5143741369247437, "logps/generated": -614.4584350585938, "logps/real": -318.1557922363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.524084091186523, "rewards/margins": 30.829952239990234, "rewards/real": 1.3058717250823975, "step": 4340 }, { "epoch": 2.78, "learning_rate": 4.016587677725118e-08, "logits/generated": -1.2446117401123047, "logits/real": -1.295252799987793, "logps/generated": -679.209716796875, "logps/real": -255.12344360351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.41288375854492, "rewards/margins": 35.52049255371094, "rewards/real": 2.107609987258911, "step": 4350 }, { "epoch": 2.79, "learning_rate": 3.8981042654028434e-08, "logits/generated": -1.244602084159851, "logits/real": -1.5427472591400146, "logps/generated": -659.0145874023438, "logps/real": -333.52227783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.68132972717285, "rewards/margins": 32.54865646362305, "rewards/real": 0.8673309087753296, "step": 4360 }, { "epoch": 2.8, "learning_rate": 3.779620853080569e-08, "logits/generated": -1.563761591911316, "logits/real": -1.4602763652801514, "logps/generated": -659.84814453125, "logps/real": -325.8492736816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.91091537475586, "rewards/margins": 32.29651641845703, "rewards/real": 0.38559943437576294, "step": 4370 }, { "epoch": 2.8, "learning_rate": 3.661137440758294e-08, "logits/generated": -1.562272548675537, "logits/real": -1.2975503206253052, "logps/generated": -676.2703857421875, "logps/real": -274.28497314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.26447296142578, "rewards/margins": 33.826171875, "rewards/real": 0.5616960525512695, "step": 4380 }, { "epoch": 2.81, "learning_rate": 3.5426540284360186e-08, "logits/generated": -1.2435789108276367, "logits/real": -1.368475079536438, "logps/generated": -702.6434326171875, "logps/real": -307.38519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.470088958740234, "rewards/margins": 35.56832504272461, "rewards/real": 1.0982415676116943, "step": 4390 }, { "epoch": 2.82, "learning_rate": 3.424170616113744e-08, "logits/generated": -1.341025948524475, "logits/real": -1.500510573387146, "logps/generated": -663.18115234375, "logps/real": -308.29083251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.19363021850586, "rewards/margins": 32.23539352416992, "rewards/real": 1.0417646169662476, "step": 4400 }, { "epoch": 2.82, "learning_rate": 3.305687203791469e-08, "logits/generated": -1.3434271812438965, "logits/real": -1.5266337394714355, "logps/generated": -688.38525390625, "logps/real": -302.8508605957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.6732177734375, "rewards/margins": 34.55495834350586, "rewards/real": 0.8817389607429504, "step": 4410 }, { "epoch": 2.83, "learning_rate": 3.1872037914691945e-08, "logits/generated": -1.299809455871582, "logits/real": -1.5283958911895752, "logps/generated": -656.4718627929688, "logps/real": -302.70330810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.77448081970215, "rewards/margins": 32.886390686035156, "rewards/real": 1.1119110584259033, "step": 4420 }, { "epoch": 2.83, "learning_rate": 3.068720379146919e-08, "logits/generated": -1.2048507928848267, "logits/real": -1.536926507949829, "logps/generated": -664.2643432617188, "logps/real": -283.11236572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.95454788208008, "rewards/margins": 34.54044723510742, "rewards/real": 1.585902452468872, "step": 4430 }, { "epoch": 2.84, "learning_rate": 2.9502369668246444e-08, "logits/generated": -1.3344858884811401, "logits/real": -1.537445306777954, "logps/generated": -638.8182983398438, "logps/real": -298.3970031738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.375442504882812, "rewards/margins": 32.65177917480469, "rewards/real": 1.2763371467590332, "step": 4440 }, { "epoch": 2.85, "learning_rate": 2.8317535545023697e-08, "logits/generated": -1.3904526233673096, "logits/real": -1.5245314836502075, "logps/generated": -652.20361328125, "logps/real": -306.90478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.721105575561523, "rewards/margins": 33.16910171508789, "rewards/real": 1.4479949474334717, "step": 4450 }, { "epoch": 2.85, "learning_rate": 2.7132701421800947e-08, "logits/generated": -1.5399653911590576, "logits/real": -1.4249584674835205, "logps/generated": -672.1215209960938, "logps/real": -309.1695861816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.100067138671875, "rewards/margins": 33.0582389831543, "rewards/real": 0.9581753015518188, "step": 4460 }, { "epoch": 2.86, "learning_rate": 2.59478672985782e-08, "logits/generated": -1.1655782461166382, "logits/real": -1.5251566171646118, "logps/generated": -640.125, "logps/real": -310.0162658691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.95326805114746, "rewards/margins": 31.368066787719727, "rewards/real": 1.4147999286651611, "step": 4470 }, { "epoch": 2.87, "learning_rate": 2.476303317535545e-08, "logits/generated": -1.172010064125061, "logits/real": -1.4067021608352661, "logps/generated": -666.5604858398438, "logps/real": -299.98040771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.463706970214844, "rewards/margins": 34.147499084472656, "rewards/real": 1.6837971210479736, "step": 4480 }, { "epoch": 2.87, "learning_rate": 2.3578199052132702e-08, "logits/generated": -1.190029263496399, "logits/real": -1.6297121047973633, "logps/generated": -612.0564575195312, "logps/real": -320.60369873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.345834732055664, "rewards/margins": 30.781299591064453, "rewards/real": 1.4354665279388428, "step": 4490 }, { "epoch": 2.88, "learning_rate": 2.239336492890995e-08, "logits/generated": -1.1696540117263794, "logits/real": -1.6007992029190063, "logps/generated": -653.1607666015625, "logps/real": -303.2743225097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.995059967041016, "rewards/margins": 33.59009552001953, "rewards/real": 1.5950433015823364, "step": 4500 }, { "epoch": 2.89, "learning_rate": 2.1208530805687202e-08, "logits/generated": -1.485703706741333, "logits/real": -1.4554283618927002, "logps/generated": -724.2576293945312, "logps/real": -299.9760437011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.655555725097656, "rewards/margins": 35.341522216796875, "rewards/real": 0.6859633326530457, "step": 4510 }, { "epoch": 2.89, "learning_rate": 2.002369668246445e-08, "logits/generated": -1.2735097408294678, "logits/real": -1.5833942890167236, "logps/generated": -677.175048828125, "logps/real": -311.3417053222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.748931884765625, "rewards/margins": 33.196983337402344, "rewards/real": 0.44805091619491577, "step": 4520 }, { "epoch": 2.9, "learning_rate": 1.8838862559241704e-08, "logits/generated": -1.3113911151885986, "logits/real": -1.5203197002410889, "logps/generated": -657.1876220703125, "logps/real": -291.06573486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.46697998046875, "rewards/margins": 32.98638153076172, "rewards/real": 1.5193980932235718, "step": 4530 }, { "epoch": 2.9, "learning_rate": 1.7654028436018954e-08, "logits/generated": -1.3836846351623535, "logits/real": -1.481992483139038, "logps/generated": -686.9658203125, "logps/real": -320.5834655761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.389808654785156, "rewards/margins": 34.02761459350586, "rewards/real": 0.6378093957901001, "step": 4540 }, { "epoch": 2.91, "learning_rate": 1.6469194312796207e-08, "logits/generated": -1.3578197956085205, "logits/real": -1.570059061050415, "logps/generated": -617.54541015625, "logps/real": -316.9815368652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.717233657836914, "rewards/margins": 30.001474380493164, "rewards/real": 0.2842446267604828, "step": 4550 }, { "epoch": 2.92, "learning_rate": 1.528436018957346e-08, "logits/generated": -1.0007681846618652, "logits/real": -1.5491087436676025, "logps/generated": -626.058349609375, "logps/real": -322.63885498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.955204010009766, "rewards/margins": 30.372121810913086, "rewards/real": 0.4169168472290039, "step": 4560 }, { "epoch": 2.92, "learning_rate": 1.409952606635071e-08, "logits/generated": -1.311229944229126, "logits/real": -1.5780706405639648, "logps/generated": -663.3609619140625, "logps/real": -298.3038024902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.45374298095703, "rewards/margins": 32.698936462402344, "rewards/real": 0.24519185721874237, "step": 4570 }, { "epoch": 2.93, "learning_rate": 1.2914691943127961e-08, "logits/generated": -1.044814109802246, "logits/real": -1.6311661005020142, "logps/generated": -641.4202880859375, "logps/real": -279.78692626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.022247314453125, "rewards/margins": 34.16607666015625, "rewards/real": 2.1438281536102295, "step": 4580 }, { "epoch": 2.94, "learning_rate": 1.1729857819905212e-08, "logits/generated": -1.4656040668487549, "logits/real": -1.5026826858520508, "logps/generated": -721.88818359375, "logps/real": -334.41729736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.15550231933594, "rewards/margins": 35.81855010986328, "rewards/real": 0.6630504727363586, "step": 4590 }, { "epoch": 2.94, "learning_rate": 1.0545023696682464e-08, "logits/generated": -1.3504948616027832, "logits/real": -1.4049073457717896, "logps/generated": -686.4686889648438, "logps/real": -305.3426208496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.25273513793945, "rewards/margins": 34.83153533935547, "rewards/real": 0.57881098985672, "step": 4600 }, { "epoch": 2.95, "learning_rate": 9.360189573459715e-09, "logits/generated": -1.3581933975219727, "logits/real": -1.4892122745513916, "logps/generated": -695.0593872070312, "logps/real": -302.53668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.506256103515625, "rewards/margins": 35.15620040893555, "rewards/real": 1.6499462127685547, "step": 4610 }, { "epoch": 2.96, "learning_rate": 8.175355450236966e-09, "logits/generated": -1.326404333114624, "logits/real": -1.554738163948059, "logps/generated": -623.974609375, "logps/real": -320.1224365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.265674591064453, "rewards/margins": 30.284282684326172, "rewards/real": 1.018613576889038, "step": 4620 }, { "epoch": 2.96, "learning_rate": 6.990521327014218e-09, "logits/generated": -1.3114399909973145, "logits/real": -1.6282100677490234, "logps/generated": -608.7630004882812, "logps/real": -289.3063659667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.360219955444336, "rewards/margins": 31.15984535217285, "rewards/real": 0.7996245622634888, "step": 4630 }, { "epoch": 2.97, "learning_rate": 5.805687203791469e-09, "logits/generated": -1.5137460231781006, "logits/real": -1.4329473972320557, "logps/generated": -700.6799926757812, "logps/real": -319.1806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.5382080078125, "rewards/margins": 34.899410247802734, "rewards/real": 0.361206591129303, "step": 4640 }, { "epoch": 2.98, "learning_rate": 4.62085308056872e-09, "logits/generated": -1.3561103343963623, "logits/real": -1.4579746723175049, "logps/generated": -704.6149291992188, "logps/real": -319.0995788574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.43476867675781, "rewards/margins": 34.943641662597656, "rewards/real": 0.5088704824447632, "step": 4650 }, { "epoch": 2.98, "learning_rate": 3.4360189573459714e-09, "logits/generated": -1.2713743448257446, "logits/real": -1.6401087045669556, "logps/generated": -697.460205078125, "logps/real": -367.7886962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.62900924682617, "rewards/margins": 33.49079895019531, "rewards/real": -0.13821372389793396, "step": 4660 }, { "epoch": 2.99, "learning_rate": 2.2511848341232227e-09, "logits/generated": -1.000103235244751, "logits/real": -1.5592067241668701, "logps/generated": -652.8013916015625, "logps/real": -282.12872314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.3394889831543, "rewards/margins": 33.116920471191406, "rewards/real": 0.7774264216423035, "step": 4670 }, { "epoch": 2.99, "learning_rate": 1.0663507109004738e-09, "logits/generated": -1.109363079071045, "logits/real": -1.4930717945098877, "logps/generated": -675.4696044921875, "logps/real": -347.3270263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.0277099609375, "rewards/margins": 33.342735290527344, "rewards/real": 0.3150270879268646, "step": 4680 }, { "epoch": 3.0, "step": 4689, "total_flos": 0.0, "train_loss": 0.008253485002314443, "train_runtime": 37056.5761, "train_samples_per_second": 4.048, "train_steps_per_second": 0.127 } ], "logging_steps": 10, "max_steps": 4689, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }