diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.0, + "epoch": 2.0, "eval_steps": 500, - "global_step": 4039, + "global_step": 8078, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -44436,6 +44436,44435 @@ "loss": 0.278, "rejected_geometric_mean": -3.0530829429626465, "step": 4039 + }, + { + "chosen_geometric_mean": -1.032576084136963, + "epoch": 1.0, + "grad_norm": 12.5, + "learning_rate": 2.5038938911543724e-06, + "log_odds": 6.236103057861328, + "log_odds_ratio": -0.1057060956954956, + "loss": 0.2669, + "rejected_geometric_mean": -6.887216567993164, + "step": 4040 + }, + { + "chosen_geometric_mean": -0.9852403998374939, + "epoch": 1.0, + "grad_norm": 3.15625, + "learning_rate": 2.5029204188823864e-06, + "log_odds": 3.9882471561431885, + "log_odds_ratio": -0.12486662715673447, + "loss": 0.3069, + "rejected_geometric_mean": -4.5984721183776855, + "step": 4041 + }, + { + "chosen_geometric_mean": -1.0373573303222656, + "epoch": 1.0, + "grad_norm": 5.0, + "learning_rate": 2.501946946167594e-06, + "log_odds": 3.44156813621521, + "log_odds_ratio": -0.2094205617904663, + "loss": 0.2342, + "rejected_geometric_mean": -4.124581336975098, + "step": 4042 + }, + { + "chosen_geometric_mean": -0.9225136041641235, + "epoch": 1.0, + "grad_norm": 27.0, + "learning_rate": 2.5009734731575978e-06, + "log_odds": 2.7462143898010254, + "log_odds_ratio": -0.1938311606645584, + "loss": 0.2642, + "rejected_geometric_mean": -3.3063180446624756, + "step": 4043 + }, + { + "chosen_geometric_mean": -1.0822197198867798, + "epoch": 1.0, + "grad_norm": 2.25, + "learning_rate": 2.5e-06, + "log_odds": 7.494528770446777, + "log_odds_ratio": -0.002591225318610668, + "loss": 0.2736, + "rejected_geometric_mean": -8.126688957214355, + "step": 4044 + }, + { + "chosen_geometric_mean": -1.04124915599823, + "epoch": 1.0, + "grad_norm": 74.5, + "learning_rate": 2.4990265268424026e-06, + "log_odds": 13.407389640808105, + "log_odds_ratio": -0.11087391525506973, + "loss": 0.2705, + "rejected_geometric_mean": -14.077622413635254, + "step": 4045 + }, + { + "chosen_geometric_mean": -0.8627548217773438, + "epoch": 1.0, + "grad_norm": 3.78125, + "learning_rate": 2.4980530538324065e-06, + "log_odds": 1.622637391090393, + "log_odds_ratio": -0.41980278491973877, + "loss": 0.2919, + "rejected_geometric_mean": -2.290034770965576, + "step": 4046 + }, + { + "chosen_geometric_mean": -0.8965572118759155, + "epoch": 1.0, + "grad_norm": 11.875, + "learning_rate": 2.497079581117614e-06, + "log_odds": 3.6886377334594727, + "log_odds_ratio": -0.1456298828125, + "loss": 0.2622, + "rejected_geometric_mean": -4.15746545791626, + "step": 4047 + }, + { + "chosen_geometric_mean": -0.8712268471717834, + "epoch": 1.0, + "grad_norm": 5.875, + "learning_rate": 2.4961061088456276e-06, + "log_odds": 4.1989030838012695, + "log_odds_ratio": -0.3798523545265198, + "loss": 0.2496, + "rejected_geometric_mean": -4.751762390136719, + "step": 4048 + }, + { + "chosen_geometric_mean": -0.9312019348144531, + "epoch": 1.0, + "grad_norm": 3.4375, + "learning_rate": 2.495132637164049e-06, + "log_odds": 4.923520088195801, + "log_odds_ratio": -0.02399747632443905, + "loss": 0.2677, + "rejected_geometric_mean": -5.3385009765625, + "step": 4049 + }, + { + "chosen_geometric_mean": -0.8416034579277039, + "epoch": 1.0, + "grad_norm": 29.125, + "learning_rate": 2.494159166220479e-06, + "log_odds": 2.943739414215088, + "log_odds_ratio": -0.1809787005186081, + "loss": 0.306, + "rejected_geometric_mean": -3.3637051582336426, + "step": 4050 + }, + { + "chosen_geometric_mean": -0.9368268251419067, + "epoch": 1.0, + "grad_norm": 43.25, + "learning_rate": 2.4931856961625197e-06, + "log_odds": 4.274911880493164, + "log_odds_ratio": -0.08413659036159515, + "loss": 0.3191, + "rejected_geometric_mean": -4.739860534667969, + "step": 4051 + }, + { + "chosen_geometric_mean": -0.7819088101387024, + "epoch": 1.0, + "grad_norm": 6.6875, + "learning_rate": 2.492212227137772e-06, + "log_odds": 9.876632690429688, + "log_odds_ratio": -0.005625968798995018, + "loss": 0.2541, + "rejected_geometric_mean": -10.016488075256348, + "step": 4052 + }, + { + "chosen_geometric_mean": -0.9730819463729858, + "epoch": 1.0, + "grad_norm": 14.5, + "learning_rate": 2.491238759293837e-06, + "log_odds": 7.1616363525390625, + "log_odds_ratio": -0.010909154079854488, + "loss": 0.3177, + "rejected_geometric_mean": -7.654173851013184, + "step": 4053 + }, + { + "chosen_geometric_mean": -1.085881233215332, + "epoch": 1.0, + "grad_norm": 5.5625, + "learning_rate": 2.4902652927783174e-06, + "log_odds": 3.805521249771118, + "log_odds_ratio": -0.15023577213287354, + "loss": 0.2686, + "rejected_geometric_mean": -4.564700126647949, + "step": 4054 + }, + { + "chosen_geometric_mean": -1.0073119401931763, + "epoch": 1.0, + "grad_norm": 7.6875, + "learning_rate": 2.489291827738813e-06, + "log_odds": 5.495388507843018, + "log_odds_ratio": -0.289874792098999, + "loss": 0.2783, + "rejected_geometric_mean": -6.249464988708496, + "step": 4055 + }, + { + "chosen_geometric_mean": -0.8572990894317627, + "epoch": 1.0, + "grad_norm": 20.625, + "learning_rate": 2.488318364322925e-06, + "log_odds": 2.0767345428466797, + "log_odds_ratio": -0.26790928840637207, + "loss": 0.2869, + "rejected_geometric_mean": -2.5841054916381836, + "step": 4056 + }, + { + "chosen_geometric_mean": -0.9774379730224609, + "epoch": 1.0, + "grad_norm": 23.875, + "learning_rate": 2.4873449026782527e-06, + "log_odds": 4.20993709564209, + "log_odds_ratio": -0.29258567094802856, + "loss": 0.2686, + "rejected_geometric_mean": -4.8788957595825195, + "step": 4057 + }, + { + "chosen_geometric_mean": -0.824551522731781, + "epoch": 1.0, + "grad_norm": 3.53125, + "learning_rate": 2.486371442952396e-06, + "log_odds": 3.6478209495544434, + "log_odds_ratio": -0.12657296657562256, + "loss": 0.2712, + "rejected_geometric_mean": -3.9622936248779297, + "step": 4058 + }, + { + "chosen_geometric_mean": -1.0603785514831543, + "epoch": 1.0, + "grad_norm": 27.375, + "learning_rate": 2.4853979852929575e-06, + "log_odds": 5.4022417068481445, + "log_odds_ratio": -0.1579105108976364, + "loss": 0.2815, + "rejected_geometric_mean": -6.100971698760986, + "step": 4059 + }, + { + "chosen_geometric_mean": -1.060446858406067, + "epoch": 1.01, + "grad_norm": 15.375, + "learning_rate": 2.484424529847534e-06, + "log_odds": 6.8320088386535645, + "log_odds_ratio": -0.20699436962604523, + "loss": 0.3067, + "rejected_geometric_mean": -7.567814350128174, + "step": 4060 + }, + { + "chosen_geometric_mean": -0.9825919270515442, + "epoch": 1.01, + "grad_norm": 1.9140625, + "learning_rate": 2.483451076763726e-06, + "log_odds": 7.3580708503723145, + "log_odds_ratio": -0.12933394312858582, + "loss": 0.2509, + "rejected_geometric_mean": -7.873027801513672, + "step": 4061 + }, + { + "chosen_geometric_mean": -0.6917474865913391, + "epoch": 1.01, + "grad_norm": 16.25, + "learning_rate": 2.482477626189132e-06, + "log_odds": 3.1554012298583984, + "log_odds_ratio": -0.04355010762810707, + "loss": 0.2869, + "rejected_geometric_mean": -3.178518295288086, + "step": 4062 + }, + { + "chosen_geometric_mean": -0.9068239331245422, + "epoch": 1.01, + "grad_norm": 10.25, + "learning_rate": 2.481504178271351e-06, + "log_odds": 1.9460999965667725, + "log_odds_ratio": -0.26015594601631165, + "loss": 0.2663, + "rejected_geometric_mean": -2.51008677482605, + "step": 4063 + }, + { + "chosen_geometric_mean": -1.0707310438156128, + "epoch": 1.01, + "grad_norm": 17.75, + "learning_rate": 2.4805307331579786e-06, + "log_odds": 4.616857051849365, + "log_odds_ratio": -0.10755431652069092, + "loss": 0.2645, + "rejected_geometric_mean": -5.306272506713867, + "step": 4064 + }, + { + "chosen_geometric_mean": -1.0274155139923096, + "epoch": 1.01, + "grad_norm": 5.4375, + "learning_rate": 2.479557290996616e-06, + "log_odds": 7.461104393005371, + "log_odds_ratio": -0.0011419845977798104, + "loss": 0.234, + "rejected_geometric_mean": -8.032247543334961, + "step": 4065 + }, + { + "chosen_geometric_mean": -0.9821656346321106, + "epoch": 1.01, + "grad_norm": 2.21875, + "learning_rate": 2.4785838519348587e-06, + "log_odds": 6.086422920227051, + "log_odds_ratio": -0.09640153497457504, + "loss": 0.2676, + "rejected_geometric_mean": -6.6447062492370605, + "step": 4066 + }, + { + "chosen_geometric_mean": -0.8807372450828552, + "epoch": 1.01, + "grad_norm": 38.75, + "learning_rate": 2.477610416120303e-06, + "log_odds": 6.714106559753418, + "log_odds_ratio": -0.030937986448407173, + "loss": 0.2521, + "rejected_geometric_mean": -7.0430402755737305, + "step": 4067 + }, + { + "chosen_geometric_mean": -1.1970384120941162, + "epoch": 1.01, + "grad_norm": 29.25, + "learning_rate": 2.476636983700546e-06, + "log_odds": 6.198063373565674, + "log_odds_ratio": -0.10806506872177124, + "loss": 0.3217, + "rejected_geometric_mean": -7.054513931274414, + "step": 4068 + }, + { + "chosen_geometric_mean": -0.9723073244094849, + "epoch": 1.01, + "grad_norm": 1.9609375, + "learning_rate": 2.4756635548231826e-06, + "log_odds": 2.2322769165039062, + "log_odds_ratio": -0.2651270031929016, + "loss": 0.2488, + "rejected_geometric_mean": -2.906937599182129, + "step": 4069 + }, + { + "chosen_geometric_mean": -1.0041974782943726, + "epoch": 1.01, + "grad_norm": 6.28125, + "learning_rate": 2.474690129635809e-06, + "log_odds": 3.8182168006896973, + "log_odds_ratio": -0.17720046639442444, + "loss": 0.3151, + "rejected_geometric_mean": -4.454788684844971, + "step": 4070 + }, + { + "chosen_geometric_mean": -0.9324648380279541, + "epoch": 1.01, + "grad_norm": 7.78125, + "learning_rate": 2.473716708286019e-06, + "log_odds": 7.80345344543457, + "log_odds_ratio": -0.006270714104175568, + "loss": 0.2268, + "rejected_geometric_mean": -8.222935676574707, + "step": 4071 + }, + { + "chosen_geometric_mean": -1.0329657793045044, + "epoch": 1.01, + "grad_norm": 2.0625, + "learning_rate": 2.472743290921407e-06, + "log_odds": 2.152337074279785, + "log_odds_ratio": -0.37860485911369324, + "loss": 0.2865, + "rejected_geometric_mean": -2.9976229667663574, + "step": 4072 + }, + { + "chosen_geometric_mean": -1.0327513217926025, + "epoch": 1.01, + "grad_norm": 29.0, + "learning_rate": 2.471769877689567e-06, + "log_odds": 3.5900702476501465, + "log_odds_ratio": -0.12252770364284515, + "loss": 0.2733, + "rejected_geometric_mean": -4.231071949005127, + "step": 4073 + }, + { + "chosen_geometric_mean": -1.2585783004760742, + "epoch": 1.01, + "grad_norm": 2.78125, + "learning_rate": 2.4707964687380906e-06, + "log_odds": 3.2610490322113037, + "log_odds_ratio": -0.12780827283859253, + "loss": 0.3015, + "rejected_geometric_mean": -4.2333221435546875, + "step": 4074 + }, + { + "chosen_geometric_mean": -0.8251069784164429, + "epoch": 1.01, + "grad_norm": 2.046875, + "learning_rate": 2.4698230642145702e-06, + "log_odds": 4.877774715423584, + "log_odds_ratio": -0.24186469614505768, + "loss": 0.3034, + "rejected_geometric_mean": -5.33580207824707, + "step": 4075 + }, + { + "chosen_geometric_mean": -0.941246509552002, + "epoch": 1.01, + "grad_norm": 8.9375, + "learning_rate": 2.468849664266598e-06, + "log_odds": 2.655892848968506, + "log_odds_ratio": -0.18407569825649261, + "loss": 0.2188, + "rejected_geometric_mean": -3.227970600128174, + "step": 4076 + }, + { + "chosen_geometric_mean": -0.7609311938285828, + "epoch": 1.01, + "grad_norm": 15.0, + "learning_rate": 2.467876269041764e-06, + "log_odds": 4.656299114227295, + "log_odds_ratio": -0.2954305410385132, + "loss": 0.2742, + "rejected_geometric_mean": -5.045105934143066, + "step": 4077 + }, + { + "chosen_geometric_mean": -1.1358892917633057, + "epoch": 1.01, + "grad_norm": 8.25, + "learning_rate": 2.466902878687659e-06, + "log_odds": 1.9095286130905151, + "log_odds_ratio": -0.2471218705177307, + "loss": 0.2998, + "rejected_geometric_mean": -2.7480435371398926, + "step": 4078 + }, + { + "chosen_geometric_mean": -1.0318106412887573, + "epoch": 1.01, + "grad_norm": 2.9375, + "learning_rate": 2.4659294933518724e-06, + "log_odds": 2.3482701778411865, + "log_odds_ratio": -0.2323789745569229, + "loss": 0.2531, + "rejected_geometric_mean": -3.0405123233795166, + "step": 4079 + }, + { + "chosen_geometric_mean": -1.0364632606506348, + "epoch": 1.01, + "grad_norm": 21.125, + "learning_rate": 2.464956113181992e-06, + "log_odds": 1.9300315380096436, + "log_odds_ratio": -0.33392706513404846, + "loss": 0.2968, + "rejected_geometric_mean": -2.7153687477111816, + "step": 4080 + }, + { + "chosen_geometric_mean": -1.1072956323623657, + "epoch": 1.01, + "grad_norm": 2.5, + "learning_rate": 2.463982738325606e-06, + "log_odds": 7.174313545227051, + "log_odds_ratio": -0.09602756053209305, + "loss": 0.2529, + "rejected_geometric_mean": -7.862322807312012, + "step": 4081 + }, + { + "chosen_geometric_mean": -1.085841178894043, + "epoch": 1.01, + "grad_norm": 2.125, + "learning_rate": 2.4630093689303023e-06, + "log_odds": 2.211392879486084, + "log_odds_ratio": -0.19952666759490967, + "loss": 0.226, + "rejected_geometric_mean": -2.968748092651367, + "step": 4082 + }, + { + "chosen_geometric_mean": -0.8175736665725708, + "epoch": 1.01, + "grad_norm": 2.296875, + "learning_rate": 2.462036005143665e-06, + "log_odds": 3.8668253421783447, + "log_odds_ratio": -0.22376111149787903, + "loss": 0.2246, + "rejected_geometric_mean": -4.283326625823975, + "step": 4083 + }, + { + "chosen_geometric_mean": -1.0108113288879395, + "epoch": 1.01, + "grad_norm": 1.953125, + "learning_rate": 2.4610626471132815e-06, + "log_odds": 2.657975673675537, + "log_odds_ratio": -0.2166605293750763, + "loss": 0.2354, + "rejected_geometric_mean": -3.352141857147217, + "step": 4084 + }, + { + "chosen_geometric_mean": -1.1058309078216553, + "epoch": 1.01, + "grad_norm": 2.15625, + "learning_rate": 2.4600892949867344e-06, + "log_odds": 5.041900634765625, + "log_odds_ratio": -0.14813198149204254, + "loss": 0.2656, + "rejected_geometric_mean": -5.791479110717773, + "step": 4085 + }, + { + "chosen_geometric_mean": -0.956976056098938, + "epoch": 1.01, + "grad_norm": 18.25, + "learning_rate": 2.459115948911609e-06, + "log_odds": 10.435335159301758, + "log_odds_ratio": -0.0018386133015155792, + "loss": 0.2608, + "rejected_geometric_mean": -10.887320518493652, + "step": 4086 + }, + { + "chosen_geometric_mean": -1.050078272819519, + "epoch": 1.01, + "grad_norm": 3.171875, + "learning_rate": 2.458142609035487e-06, + "log_odds": 10.808710098266602, + "log_odds_ratio": -0.09652449190616608, + "loss": 0.2955, + "rejected_geometric_mean": -11.483241081237793, + "step": 4087 + }, + { + "chosen_geometric_mean": -0.9106979370117188, + "epoch": 1.01, + "grad_norm": 5.21875, + "learning_rate": 2.45716927550595e-06, + "log_odds": 7.111710071563721, + "log_odds_ratio": -0.017651213333010674, + "loss": 0.2801, + "rejected_geometric_mean": -7.504660129547119, + "step": 4088 + }, + { + "chosen_geometric_mean": -1.0509618520736694, + "epoch": 1.01, + "grad_norm": 33.0, + "learning_rate": 2.4561959484705797e-06, + "log_odds": 6.426477909088135, + "log_odds_ratio": -0.019853778183460236, + "loss": 0.3315, + "rejected_geometric_mean": -7.052201271057129, + "step": 4089 + }, + { + "chosen_geometric_mean": -1.0532153844833374, + "epoch": 1.01, + "grad_norm": 8.75, + "learning_rate": 2.4552226280769544e-06, + "log_odds": 11.435300827026367, + "log_odds_ratio": -0.16227206587791443, + "loss": 0.2888, + "rejected_geometric_mean": -12.076589584350586, + "step": 4090 + }, + { + "chosen_geometric_mean": -0.9860643148422241, + "epoch": 1.01, + "grad_norm": 34.75, + "learning_rate": 2.454249314472653e-06, + "log_odds": 3.666450023651123, + "log_odds_ratio": -0.3506086468696594, + "loss": 0.3464, + "rejected_geometric_mean": -4.390811920166016, + "step": 4091 + }, + { + "chosen_geometric_mean": -0.7714091539382935, + "epoch": 1.01, + "grad_norm": 3.890625, + "learning_rate": 2.453276007805254e-06, + "log_odds": 4.98291540145874, + "log_odds_ratio": -0.08420901000499725, + "loss": 0.2474, + "rejected_geometric_mean": -5.200064182281494, + "step": 4092 + }, + { + "chosen_geometric_mean": -1.2957735061645508, + "epoch": 1.01, + "grad_norm": 32.0, + "learning_rate": 2.4523027082223346e-06, + "log_odds": 3.1112449169158936, + "log_odds_ratio": -0.27142658829689026, + "loss": 0.3118, + "rejected_geometric_mean": -4.100571155548096, + "step": 4093 + }, + { + "chosen_geometric_mean": -1.0401854515075684, + "epoch": 1.01, + "grad_norm": 5.4375, + "learning_rate": 2.451329415871469e-06, + "log_odds": 5.8935065269470215, + "log_odds_ratio": -0.19632871448993683, + "loss": 0.2575, + "rejected_geometric_mean": -6.615365028381348, + "step": 4094 + }, + { + "chosen_geometric_mean": -1.6536407470703125, + "epoch": 1.01, + "grad_norm": 32.25, + "learning_rate": 2.450356130900232e-06, + "log_odds": 11.12424087524414, + "log_odds_ratio": -0.01989477314054966, + "loss": 0.3207, + "rejected_geometric_mean": -12.503125190734863, + "step": 4095 + }, + { + "chosen_geometric_mean": -1.3880709409713745, + "epoch": 1.01, + "grad_norm": 15.375, + "learning_rate": 2.449382853456197e-06, + "log_odds": 3.714289665222168, + "log_odds_ratio": -0.27403610944747925, + "loss": 0.3418, + "rejected_geometric_mean": -4.919970512390137, + "step": 4096 + }, + { + "chosen_geometric_mean": -1.093536615371704, + "epoch": 1.01, + "grad_norm": 36.0, + "learning_rate": 2.448409583686937e-06, + "log_odds": 2.2799606323242188, + "log_odds_ratio": -0.5480943918228149, + "loss": 0.2883, + "rejected_geometric_mean": -3.1143863201141357, + "step": 4097 + }, + { + "chosen_geometric_mean": -0.8813204169273376, + "epoch": 1.01, + "grad_norm": 3.515625, + "learning_rate": 2.4474363217400226e-06, + "log_odds": 3.4850668907165527, + "log_odds_ratio": -0.12472474575042725, + "loss": 0.2597, + "rejected_geometric_mean": -3.9166903495788574, + "step": 4098 + }, + { + "chosen_geometric_mean": -1.1435580253601074, + "epoch": 1.01, + "grad_norm": 3.46875, + "learning_rate": 2.446463067763023e-06, + "log_odds": 6.814829349517822, + "log_odds_ratio": -0.057106148451566696, + "loss": 0.2837, + "rejected_geometric_mean": -7.583918571472168, + "step": 4099 + }, + { + "chosen_geometric_mean": -0.8927726745605469, + "epoch": 1.02, + "grad_norm": 1.953125, + "learning_rate": 2.445489821903508e-06, + "log_odds": 10.779716491699219, + "log_odds_ratio": -0.0037550190463662148, + "loss": 0.262, + "rejected_geometric_mean": -11.122394561767578, + "step": 4100 + }, + { + "chosen_geometric_mean": -2.265416145324707, + "epoch": 1.02, + "grad_norm": 41.0, + "learning_rate": 2.4445165843090443e-06, + "log_odds": 6.295352935791016, + "log_odds_ratio": -0.05396007001399994, + "loss": 0.3046, + "rejected_geometric_mean": -8.222820281982422, + "step": 4101 + }, + { + "chosen_geometric_mean": -0.8614225387573242, + "epoch": 1.02, + "grad_norm": 7.625, + "learning_rate": 2.4435433551271975e-06, + "log_odds": 5.164654731750488, + "log_odds_ratio": -0.061557646840810776, + "loss": 0.256, + "rejected_geometric_mean": -5.513949871063232, + "step": 4102 + }, + { + "chosen_geometric_mean": -1.087679147720337, + "epoch": 1.02, + "grad_norm": 3.21875, + "learning_rate": 2.4425701345055342e-06, + "log_odds": 2.6666886806488037, + "log_odds_ratio": -0.2804887890815735, + "loss": 0.2532, + "rejected_geometric_mean": -3.528554677963257, + "step": 4103 + }, + { + "chosen_geometric_mean": -1.0399304628372192, + "epoch": 1.02, + "grad_norm": 28.875, + "learning_rate": 2.4415969225916166e-06, + "log_odds": 2.1095402240753174, + "log_odds_ratio": -0.369994193315506, + "loss": 0.3329, + "rejected_geometric_mean": -2.9286017417907715, + "step": 4104 + }, + { + "chosen_geometric_mean": -0.9595988392829895, + "epoch": 1.02, + "grad_norm": 3.203125, + "learning_rate": 2.4406237195330083e-06, + "log_odds": 9.28968620300293, + "log_odds_ratio": -0.10687068104743958, + "loss": 0.2952, + "rejected_geometric_mean": -9.78636646270752, + "step": 4105 + }, + { + "chosen_geometric_mean": -0.7723495364189148, + "epoch": 1.02, + "grad_norm": 4.96875, + "learning_rate": 2.4396505254772685e-06, + "log_odds": 10.186222076416016, + "log_odds_ratio": -0.002864719135686755, + "loss": 0.2252, + "rejected_geometric_mean": -10.340181350708008, + "step": 4106 + }, + { + "chosen_geometric_mean": -1.1294916868209839, + "epoch": 1.02, + "grad_norm": 4.0625, + "learning_rate": 2.4386773405719565e-06, + "log_odds": 5.5004472732543945, + "log_odds_ratio": -0.013295117765665054, + "loss": 0.2325, + "rejected_geometric_mean": -6.210745334625244, + "step": 4107 + }, + { + "chosen_geometric_mean": -1.0239901542663574, + "epoch": 1.02, + "grad_norm": 4.21875, + "learning_rate": 2.437704164964634e-06, + "log_odds": 9.79537582397461, + "log_odds_ratio": -0.06321559101343155, + "loss": 0.2819, + "rejected_geometric_mean": -10.397411346435547, + "step": 4108 + }, + { + "chosen_geometric_mean": -1.084986925125122, + "epoch": 1.02, + "grad_norm": 5.65625, + "learning_rate": 2.436730998802854e-06, + "log_odds": 14.664180755615234, + "log_odds_ratio": -0.0001438246836187318, + "loss": 0.2338, + "rejected_geometric_mean": -15.325910568237305, + "step": 4109 + }, + { + "chosen_geometric_mean": -0.8500440716743469, + "epoch": 1.02, + "grad_norm": 4.21875, + "learning_rate": 2.435757842234174e-06, + "log_odds": 3.46797776222229, + "log_odds_ratio": -0.2850722670555115, + "loss": 0.2508, + "rejected_geometric_mean": -3.9919095039367676, + "step": 4110 + }, + { + "chosen_geometric_mean": -0.9574741125106812, + "epoch": 1.02, + "grad_norm": 31.875, + "learning_rate": 2.4347846954061464e-06, + "log_odds": 5.443019390106201, + "log_odds_ratio": -0.16401995718479156, + "loss": 0.3442, + "rejected_geometric_mean": -6.03977632522583, + "step": 4111 + }, + { + "chosen_geometric_mean": -1.0424867868423462, + "epoch": 1.02, + "grad_norm": 6.4375, + "learning_rate": 2.4338115584663256e-06, + "log_odds": 5.526619911193848, + "log_odds_ratio": -0.0497957244515419, + "loss": 0.2424, + "rejected_geometric_mean": -6.141599655151367, + "step": 4112 + }, + { + "chosen_geometric_mean": -1.1665300130844116, + "epoch": 1.02, + "grad_norm": 12.1875, + "learning_rate": 2.4328384315622596e-06, + "log_odds": 5.950192451477051, + "log_odds_ratio": -0.11281286180019379, + "loss": 0.2613, + "rejected_geometric_mean": -6.781169891357422, + "step": 4113 + }, + { + "chosen_geometric_mean": -0.8295322060585022, + "epoch": 1.02, + "grad_norm": 59.0, + "learning_rate": 2.4318653148415007e-06, + "log_odds": 4.864276885986328, + "log_odds_ratio": -0.1593114137649536, + "loss": 0.287, + "rejected_geometric_mean": -5.243992805480957, + "step": 4114 + }, + { + "chosen_geometric_mean": -1.0106637477874756, + "epoch": 1.02, + "grad_norm": 9.375, + "learning_rate": 2.4308922084515956e-06, + "log_odds": 8.399429321289062, + "log_odds_ratio": -0.011132875457406044, + "loss": 0.2274, + "rejected_geometric_mean": -8.953076362609863, + "step": 4115 + }, + { + "chosen_geometric_mean": -1.2244508266448975, + "epoch": 1.02, + "grad_norm": 12.375, + "learning_rate": 2.4299191125400903e-06, + "log_odds": 5.933377265930176, + "log_odds_ratio": -0.03811435401439667, + "loss": 0.276, + "rejected_geometric_mean": -6.763876914978027, + "step": 4116 + }, + { + "chosen_geometric_mean": -1.069981575012207, + "epoch": 1.02, + "grad_norm": 48.25, + "learning_rate": 2.42894602725453e-06, + "log_odds": 3.7425220012664795, + "log_odds_ratio": -0.33164897561073303, + "loss": 0.277, + "rejected_geometric_mean": -4.516233444213867, + "step": 4117 + }, + { + "chosen_geometric_mean": -0.9321835041046143, + "epoch": 1.02, + "grad_norm": 2.40625, + "learning_rate": 2.4279729527424566e-06, + "log_odds": 2.632903814315796, + "log_odds_ratio": -0.19399704039096832, + "loss": 0.2552, + "rejected_geometric_mean": -3.1994144916534424, + "step": 4118 + }, + { + "chosen_geometric_mean": -1.0967462062835693, + "epoch": 1.02, + "grad_norm": 7.90625, + "learning_rate": 2.426999889151413e-06, + "log_odds": 3.6905386447906494, + "log_odds_ratio": -0.22188234329223633, + "loss": 0.3219, + "rejected_geometric_mean": -4.504354000091553, + "step": 4119 + }, + { + "chosen_geometric_mean": -1.0046802759170532, + "epoch": 1.02, + "grad_norm": 2.640625, + "learning_rate": 2.426026836628939e-06, + "log_odds": 1.359316110610962, + "log_odds_ratio": -0.4249616861343384, + "loss": 0.2861, + "rejected_geometric_mean": -2.1686525344848633, + "step": 4120 + }, + { + "chosen_geometric_mean": -0.930744469165802, + "epoch": 1.02, + "grad_norm": 9.0625, + "learning_rate": 2.425053795322572e-06, + "log_odds": 4.456335067749023, + "log_odds_ratio": -0.15904635190963745, + "loss": 0.2898, + "rejected_geometric_mean": -4.933312892913818, + "step": 4121 + }, + { + "chosen_geometric_mean": -1.0715141296386719, + "epoch": 1.02, + "grad_norm": 3.0, + "learning_rate": 2.424080765379849e-06, + "log_odds": 1.8790379762649536, + "log_odds_ratio": -0.31351611018180847, + "loss": 0.2301, + "rejected_geometric_mean": -2.7218661308288574, + "step": 4122 + }, + { + "chosen_geometric_mean": -1.1361119747161865, + "epoch": 1.02, + "grad_norm": 7.5, + "learning_rate": 2.423107746948304e-06, + "log_odds": 2.0303478240966797, + "log_odds_ratio": -0.382843017578125, + "loss": 0.2666, + "rejected_geometric_mean": -2.989816904067993, + "step": 4123 + }, + { + "chosen_geometric_mean": -0.9746403694152832, + "epoch": 1.02, + "grad_norm": 29.0, + "learning_rate": 2.4221347401754695e-06, + "log_odds": 4.667475700378418, + "log_odds_ratio": -0.019156593829393387, + "loss": 0.2861, + "rejected_geometric_mean": -5.168614864349365, + "step": 4124 + }, + { + "chosen_geometric_mean": -1.0672602653503418, + "epoch": 1.02, + "grad_norm": 20.75, + "learning_rate": 2.421161745208878e-06, + "log_odds": 5.2143049240112305, + "log_odds_ratio": -0.25055521726608276, + "loss": 0.262, + "rejected_geometric_mean": -6.003803730010986, + "step": 4125 + }, + { + "chosen_geometric_mean": -0.8986666798591614, + "epoch": 1.02, + "grad_norm": 2.109375, + "learning_rate": 2.4201887621960587e-06, + "log_odds": 5.720343589782715, + "log_odds_ratio": -0.25363805890083313, + "loss": 0.2641, + "rejected_geometric_mean": -6.241567134857178, + "step": 4126 + }, + { + "chosen_geometric_mean": -1.0772888660430908, + "epoch": 1.02, + "grad_norm": 2.28125, + "learning_rate": 2.419215791284539e-06, + "log_odds": 4.494674205780029, + "log_odds_ratio": -0.06367422640323639, + "loss": 0.2179, + "rejected_geometric_mean": -5.1824212074279785, + "step": 4127 + }, + { + "chosen_geometric_mean": -1.1569385528564453, + "epoch": 1.02, + "grad_norm": 8.75, + "learning_rate": 2.4182428326218437e-06, + "log_odds": 7.142346382141113, + "log_odds_ratio": -0.1698089987039566, + "loss": 0.2598, + "rejected_geometric_mean": -7.946963310241699, + "step": 4128 + }, + { + "chosen_geometric_mean": -1.149315595626831, + "epoch": 1.02, + "grad_norm": 28.75, + "learning_rate": 2.417269886355497e-06, + "log_odds": 3.2588050365448, + "log_odds_ratio": -0.3449159860610962, + "loss": 0.2819, + "rejected_geometric_mean": -4.202543258666992, + "step": 4129 + }, + { + "chosen_geometric_mean": -0.9193463921546936, + "epoch": 1.02, + "grad_norm": 1.9296875, + "learning_rate": 2.4162969526330225e-06, + "log_odds": 9.091978073120117, + "log_odds_ratio": -0.011035620234906673, + "loss": 0.2495, + "rejected_geometric_mean": -9.48751449584961, + "step": 4130 + }, + { + "chosen_geometric_mean": -0.8762140870094299, + "epoch": 1.02, + "grad_norm": 1.9609375, + "learning_rate": 2.4153240316019393e-06, + "log_odds": 3.4824023246765137, + "log_odds_ratio": -0.14690814912319183, + "loss": 0.2208, + "rejected_geometric_mean": -3.8942742347717285, + "step": 4131 + }, + { + "chosen_geometric_mean": -0.9606154561042786, + "epoch": 1.02, + "grad_norm": 2.046875, + "learning_rate": 2.4143511234097653e-06, + "log_odds": 3.8898258209228516, + "log_odds_ratio": -0.149440735578537, + "loss": 0.2751, + "rejected_geometric_mean": -4.4618916511535645, + "step": 4132 + }, + { + "chosen_geometric_mean": -1.2356294393539429, + "epoch": 1.02, + "grad_norm": 4.53125, + "learning_rate": 2.4133782282040166e-06, + "log_odds": 5.827523708343506, + "log_odds_ratio": -0.08492179960012436, + "loss": 0.2654, + "rejected_geometric_mean": -6.738334655761719, + "step": 4133 + }, + { + "chosen_geometric_mean": -0.9784294366836548, + "epoch": 1.02, + "grad_norm": 2.328125, + "learning_rate": 2.412405346132208e-06, + "log_odds": 6.238846302032471, + "log_odds_ratio": -0.1052030548453331, + "loss": 0.2678, + "rejected_geometric_mean": -6.8260698318481445, + "step": 4134 + }, + { + "chosen_geometric_mean": -0.963284969329834, + "epoch": 1.02, + "grad_norm": 12.375, + "learning_rate": 2.4114324773418505e-06, + "log_odds": 3.7131359577178955, + "log_odds_ratio": -0.22962434589862823, + "loss": 0.3118, + "rejected_geometric_mean": -4.320764064788818, + "step": 4135 + }, + { + "chosen_geometric_mean": -1.0083884000778198, + "epoch": 1.02, + "grad_norm": 31.25, + "learning_rate": 2.4104596219804567e-06, + "log_odds": 3.707455635070801, + "log_odds_ratio": -0.14567682147026062, + "loss": 0.2666, + "rejected_geometric_mean": -4.315249443054199, + "step": 4136 + }, + { + "chosen_geometric_mean": -1.0399432182312012, + "epoch": 1.02, + "grad_norm": 3.515625, + "learning_rate": 2.4094867801955328e-06, + "log_odds": 5.088918685913086, + "log_odds_ratio": -0.008471949957311153, + "loss": 0.2126, + "rejected_geometric_mean": -5.692198753356934, + "step": 4137 + }, + { + "chosen_geometric_mean": -0.959205150604248, + "epoch": 1.02, + "grad_norm": 12.1875, + "learning_rate": 2.408513952134587e-06, + "log_odds": 3.0061535835266113, + "log_odds_ratio": -0.23922790586948395, + "loss": 0.2697, + "rejected_geometric_mean": -3.5782511234283447, + "step": 4138 + }, + { + "chosen_geometric_mean": -1.10860013961792, + "epoch": 1.02, + "grad_norm": 3.640625, + "learning_rate": 2.407541137945121e-06, + "log_odds": 4.445407390594482, + "log_odds_ratio": -0.1182405948638916, + "loss": 0.2787, + "rejected_geometric_mean": -5.208262920379639, + "step": 4139 + }, + { + "chosen_geometric_mean": -0.9645581245422363, + "epoch": 1.03, + "grad_norm": 13.125, + "learning_rate": 2.406568337774637e-06, + "log_odds": 6.925860404968262, + "log_odds_ratio": -0.11206578463315964, + "loss": 0.2822, + "rejected_geometric_mean": -7.417013645172119, + "step": 4140 + }, + { + "chosen_geometric_mean": -0.821159839630127, + "epoch": 1.03, + "grad_norm": 4.4375, + "learning_rate": 2.405595551770637e-06, + "log_odds": 2.840385675430298, + "log_odds_ratio": -0.08912665396928787, + "loss": 0.2336, + "rejected_geometric_mean": -3.1439785957336426, + "step": 4141 + }, + { + "chosen_geometric_mean": -0.9358105659484863, + "epoch": 1.03, + "grad_norm": 17.25, + "learning_rate": 2.404622780080617e-06, + "log_odds": 5.995820999145508, + "log_odds_ratio": -0.00459929509088397, + "loss": 0.2278, + "rejected_geometric_mean": -6.412734031677246, + "step": 4142 + }, + { + "chosen_geometric_mean": -0.9317271113395691, + "epoch": 1.03, + "grad_norm": 2.734375, + "learning_rate": 2.4036500228520734e-06, + "log_odds": 5.297292709350586, + "log_odds_ratio": -0.1443096101284027, + "loss": 0.2371, + "rejected_geometric_mean": -5.808916091918945, + "step": 4143 + }, + { + "chosen_geometric_mean": -1.093625783920288, + "epoch": 1.03, + "grad_norm": 2.765625, + "learning_rate": 2.4026772802324983e-06, + "log_odds": 3.2014734745025635, + "log_odds_ratio": -0.09525994956493378, + "loss": 0.2693, + "rejected_geometric_mean": -3.9191696643829346, + "step": 4144 + }, + { + "chosen_geometric_mean": -0.977484941482544, + "epoch": 1.03, + "grad_norm": 2.15625, + "learning_rate": 2.401704552369385e-06, + "log_odds": 3.399280548095703, + "log_odds_ratio": -0.1687585711479187, + "loss": 0.2582, + "rejected_geometric_mean": -3.9733123779296875, + "step": 4145 + }, + { + "chosen_geometric_mean": -1.0812798738479614, + "epoch": 1.03, + "grad_norm": 4.0, + "learning_rate": 2.400731839410219e-06, + "log_odds": 1.444443941116333, + "log_odds_ratio": -0.34272417426109314, + "loss": 0.2873, + "rejected_geometric_mean": -2.3001201152801514, + "step": 4146 + }, + { + "chosen_geometric_mean": -0.7616859674453735, + "epoch": 1.03, + "grad_norm": 17.25, + "learning_rate": 2.3997591415024902e-06, + "log_odds": 11.37112808227539, + "log_odds_ratio": -0.15882179141044617, + "loss": 0.2693, + "rejected_geometric_mean": -11.674161911010742, + "step": 4147 + }, + { + "chosen_geometric_mean": -1.0971076488494873, + "epoch": 1.03, + "grad_norm": 7.4375, + "learning_rate": 2.398786458793682e-06, + "log_odds": 3.594637155532837, + "log_odds_ratio": -0.20859578251838684, + "loss": 0.2559, + "rejected_geometric_mean": -4.383378028869629, + "step": 4148 + }, + { + "chosen_geometric_mean": -1.035150170326233, + "epoch": 1.03, + "grad_norm": 3.625, + "learning_rate": 2.397813791431276e-06, + "log_odds": 4.795741081237793, + "log_odds_ratio": -0.09224223345518112, + "loss": 0.2241, + "rejected_geometric_mean": -5.397989273071289, + "step": 4149 + }, + { + "chosen_geometric_mean": -1.0757982730865479, + "epoch": 1.03, + "grad_norm": 19.5, + "learning_rate": 2.396841139562752e-06, + "log_odds": 3.31885027885437, + "log_odds_ratio": -0.30230164527893066, + "loss": 0.3267, + "rejected_geometric_mean": -4.137886047363281, + "step": 4150 + }, + { + "chosen_geometric_mean": -1.0308666229248047, + "epoch": 1.03, + "grad_norm": 2.796875, + "learning_rate": 2.395868503335587e-06, + "log_odds": 5.0465192794799805, + "log_odds_ratio": -0.07005332410335541, + "loss": 0.2422, + "rejected_geometric_mean": -5.649436950683594, + "step": 4151 + }, + { + "chosen_geometric_mean": -1.5234181880950928, + "epoch": 1.03, + "grad_norm": 21.625, + "learning_rate": 2.3948958828972567e-06, + "log_odds": 6.747418403625488, + "log_odds_ratio": -0.29361292719841003, + "loss": 0.2767, + "rejected_geometric_mean": -8.09635066986084, + "step": 4152 + }, + { + "chosen_geometric_mean": -0.890708327293396, + "epoch": 1.03, + "grad_norm": 2.390625, + "learning_rate": 2.3939232783952336e-06, + "log_odds": 3.9449524879455566, + "log_odds_ratio": -0.14245566725730896, + "loss": 0.2261, + "rejected_geometric_mean": -4.394620895385742, + "step": 4153 + }, + { + "chosen_geometric_mean": -1.170823097229004, + "epoch": 1.03, + "grad_norm": 29.75, + "learning_rate": 2.3929506899769877e-06, + "log_odds": 3.8251540660858154, + "log_odds_ratio": -0.2583025395870209, + "loss": 0.2924, + "rejected_geometric_mean": -4.676087379455566, + "step": 4154 + }, + { + "chosen_geometric_mean": -1.3261257410049438, + "epoch": 1.03, + "grad_norm": 44.25, + "learning_rate": 2.3919781177899872e-06, + "log_odds": 6.954912185668945, + "log_odds_ratio": -0.009611543267965317, + "loss": 0.3105, + "rejected_geometric_mean": -7.901501178741455, + "step": 4155 + }, + { + "chosen_geometric_mean": -0.9590448141098022, + "epoch": 1.03, + "grad_norm": 3.71875, + "learning_rate": 2.3910055619816966e-06, + "log_odds": 5.673028945922852, + "log_odds_ratio": -0.06064898520708084, + "loss": 0.2323, + "rejected_geometric_mean": -6.1771039962768555, + "step": 4156 + }, + { + "chosen_geometric_mean": -0.9624530076980591, + "epoch": 1.03, + "grad_norm": 3.421875, + "learning_rate": 2.3900330226995786e-06, + "log_odds": 1.9359309673309326, + "log_odds_ratio": -0.18559205532073975, + "loss": 0.2384, + "rejected_geometric_mean": -2.516561269760132, + "step": 4157 + }, + { + "chosen_geometric_mean": -0.9201478362083435, + "epoch": 1.03, + "grad_norm": 4.90625, + "learning_rate": 2.3890605000910946e-06, + "log_odds": 10.215784072875977, + "log_odds_ratio": -0.0007389324018731713, + "loss": 0.2888, + "rejected_geometric_mean": -10.612753868103027, + "step": 4158 + }, + { + "chosen_geometric_mean": -0.9363408088684082, + "epoch": 1.03, + "grad_norm": 19.625, + "learning_rate": 2.3880879943037015e-06, + "log_odds": 5.625199317932129, + "log_odds_ratio": -0.2399873435497284, + "loss": 0.296, + "rejected_geometric_mean": -6.236469268798828, + "step": 4159 + }, + { + "chosen_geometric_mean": -1.004982590675354, + "epoch": 1.03, + "grad_norm": 2.21875, + "learning_rate": 2.387115505484855e-06, + "log_odds": 7.08013391494751, + "log_odds_ratio": -0.05731431394815445, + "loss": 0.2713, + "rejected_geometric_mean": -7.594874382019043, + "step": 4160 + }, + { + "chosen_geometric_mean": -0.9418231248855591, + "epoch": 1.03, + "grad_norm": 3.140625, + "learning_rate": 2.3861430337820084e-06, + "log_odds": 4.585811614990234, + "log_odds_ratio": -0.30091261863708496, + "loss": 0.2844, + "rejected_geometric_mean": -5.248771667480469, + "step": 4161 + }, + { + "chosen_geometric_mean": -0.903039813041687, + "epoch": 1.03, + "grad_norm": 9.875, + "learning_rate": 2.385170579342609e-06, + "log_odds": 4.806095123291016, + "log_odds_ratio": -0.26429882645606995, + "loss": 0.2794, + "rejected_geometric_mean": -5.390941143035889, + "step": 4162 + }, + { + "chosen_geometric_mean": -0.9060953259468079, + "epoch": 1.03, + "grad_norm": 2.296875, + "learning_rate": 2.384198142314107e-06, + "log_odds": 9.4428071975708, + "log_odds_ratio": -0.054190900176763535, + "loss": 0.2506, + "rejected_geometric_mean": -9.87306022644043, + "step": 4163 + }, + { + "chosen_geometric_mean": -0.9499926567077637, + "epoch": 1.03, + "grad_norm": 5.75, + "learning_rate": 2.383225722843947e-06, + "log_odds": 4.6347246170043945, + "log_odds_ratio": -0.1512947529554367, + "loss": 0.2761, + "rejected_geometric_mean": -5.210719585418701, + "step": 4164 + }, + { + "chosen_geometric_mean": -1.0925688743591309, + "epoch": 1.03, + "grad_norm": 4.71875, + "learning_rate": 2.38225332107957e-06, + "log_odds": 2.1476964950561523, + "log_odds_ratio": -0.3466789722442627, + "loss": 0.254, + "rejected_geometric_mean": -2.9641222953796387, + "step": 4165 + }, + { + "chosen_geometric_mean": -1.0310583114624023, + "epoch": 1.03, + "grad_norm": 25.125, + "learning_rate": 2.3812809371684157e-06, + "log_odds": 11.794047355651855, + "log_odds_ratio": -0.004238212015479803, + "loss": 0.3044, + "rejected_geometric_mean": -12.349753379821777, + "step": 4166 + }, + { + "chosen_geometric_mean": -0.894486665725708, + "epoch": 1.03, + "grad_norm": 4.40625, + "learning_rate": 2.3803085712579204e-06, + "log_odds": 6.531561851501465, + "log_odds_ratio": -0.24111902713775635, + "loss": 0.2453, + "rejected_geometric_mean": -7.070481300354004, + "step": 4167 + }, + { + "chosen_geometric_mean": -0.8800004720687866, + "epoch": 1.03, + "grad_norm": 3.5, + "learning_rate": 2.37933622349552e-06, + "log_odds": 3.644700050354004, + "log_odds_ratio": -0.3444140553474426, + "loss": 0.2566, + "rejected_geometric_mean": -4.236724853515625, + "step": 4168 + }, + { + "chosen_geometric_mean": -1.11698579788208, + "epoch": 1.03, + "grad_norm": 47.0, + "learning_rate": 2.3783638940286438e-06, + "log_odds": 5.287091255187988, + "log_odds_ratio": -0.25337764620780945, + "loss": 0.2372, + "rejected_geometric_mean": -6.061650276184082, + "step": 4169 + }, + { + "chosen_geometric_mean": -1.013028621673584, + "epoch": 1.03, + "grad_norm": 2.5625, + "learning_rate": 2.3773915830047213e-06, + "log_odds": 7.5916924476623535, + "log_odds_ratio": -0.1690911054611206, + "loss": 0.2518, + "rejected_geometric_mean": -8.243692398071289, + "step": 4170 + }, + { + "chosen_geometric_mean": -1.0567543506622314, + "epoch": 1.03, + "grad_norm": 7.84375, + "learning_rate": 2.376419290571178e-06, + "log_odds": 6.360910415649414, + "log_odds_ratio": -0.3285728991031647, + "loss": 0.2791, + "rejected_geometric_mean": -7.190565586090088, + "step": 4171 + }, + { + "chosen_geometric_mean": -0.8058913350105286, + "epoch": 1.03, + "grad_norm": 6.6875, + "learning_rate": 2.375447016875437e-06, + "log_odds": 4.535360813140869, + "log_odds_ratio": -0.07407382875680923, + "loss": 0.2402, + "rejected_geometric_mean": -4.788016319274902, + "step": 4172 + }, + { + "chosen_geometric_mean": -0.8576314449310303, + "epoch": 1.03, + "grad_norm": 2.34375, + "learning_rate": 2.374474762064917e-06, + "log_odds": 4.567502975463867, + "log_odds_ratio": -0.2044646441936493, + "loss": 0.255, + "rejected_geometric_mean": -5.028105735778809, + "step": 4173 + }, + { + "chosen_geometric_mean": -1.2853314876556396, + "epoch": 1.03, + "grad_norm": 4.25, + "learning_rate": 2.373502526287037e-06, + "log_odds": 6.563918113708496, + "log_odds_ratio": -0.3564279079437256, + "loss": 0.2407, + "rejected_geometric_mean": -7.617671966552734, + "step": 4174 + }, + { + "chosen_geometric_mean": -1.2174193859100342, + "epoch": 1.03, + "grad_norm": 50.5, + "learning_rate": 2.37253030968921e-06, + "log_odds": 1.098503589630127, + "log_odds_ratio": -0.3355062007904053, + "loss": 0.3085, + "rejected_geometric_mean": -2.123430013656616, + "step": 4175 + }, + { + "chosen_geometric_mean": -0.8776988983154297, + "epoch": 1.03, + "grad_norm": 12.625, + "learning_rate": 2.3715581124188476e-06, + "log_odds": 10.261385917663574, + "log_odds_ratio": -0.12170626223087311, + "loss": 0.2112, + "rejected_geometric_mean": -10.629107475280762, + "step": 4176 + }, + { + "chosen_geometric_mean": -1.103510856628418, + "epoch": 1.03, + "grad_norm": 18.0, + "learning_rate": 2.370585934623359e-06, + "log_odds": 6.9234819412231445, + "log_odds_ratio": -0.24358610808849335, + "loss": 0.2997, + "rejected_geometric_mean": -7.680202484130859, + "step": 4177 + }, + { + "chosen_geometric_mean": -0.8628763556480408, + "epoch": 1.03, + "grad_norm": 19.0, + "learning_rate": 2.3696137764501486e-06, + "log_odds": 3.5951361656188965, + "log_odds_ratio": -0.16465474665164948, + "loss": 0.3245, + "rejected_geometric_mean": -4.031758785247803, + "step": 4178 + }, + { + "chosen_geometric_mean": -1.0669450759887695, + "epoch": 1.03, + "grad_norm": 2.9375, + "learning_rate": 2.36864163804662e-06, + "log_odds": 3.492516279220581, + "log_odds_ratio": -0.1799163818359375, + "loss": 0.2692, + "rejected_geometric_mean": -4.241564750671387, + "step": 4179 + }, + { + "chosen_geometric_mean": -0.9523168802261353, + "epoch": 1.03, + "grad_norm": 3.796875, + "learning_rate": 2.3676695195601724e-06, + "log_odds": 2.6355183124542236, + "log_odds_ratio": -0.28161096572875977, + "loss": 0.2793, + "rejected_geometric_mean": -3.2575693130493164, + "step": 4180 + }, + { + "chosen_geometric_mean": -0.892032265663147, + "epoch": 1.04, + "grad_norm": 4.6875, + "learning_rate": 2.366697421138202e-06, + "log_odds": 10.789326667785645, + "log_odds_ratio": -0.007780277170240879, + "loss": 0.2552, + "rejected_geometric_mean": -11.128862380981445, + "step": 4181 + }, + { + "chosen_geometric_mean": -1.032715082168579, + "epoch": 1.04, + "grad_norm": 33.5, + "learning_rate": 2.365725342928102e-06, + "log_odds": 1.6111345291137695, + "log_odds_ratio": -0.323518842458725, + "loss": 0.2952, + "rejected_geometric_mean": -2.4351906776428223, + "step": 4182 + }, + { + "chosen_geometric_mean": -1.2540006637573242, + "epoch": 1.04, + "grad_norm": 7.59375, + "learning_rate": 2.364753285077263e-06, + "log_odds": 0.6659332513809204, + "log_odds_ratio": -0.4226588010787964, + "loss": 0.2549, + "rejected_geometric_mean": -1.764399528503418, + "step": 4183 + }, + { + "chosen_geometric_mean": -0.9594517946243286, + "epoch": 1.04, + "grad_norm": 2.078125, + "learning_rate": 2.3637812477330723e-06, + "log_odds": 7.0393195152282715, + "log_odds_ratio": -0.12989689409732819, + "loss": 0.2724, + "rejected_geometric_mean": -7.5949506759643555, + "step": 4184 + }, + { + "chosen_geometric_mean": -0.9896678924560547, + "epoch": 1.04, + "grad_norm": 2.71875, + "learning_rate": 2.3628092310429143e-06, + "log_odds": 9.280929565429688, + "log_odds_ratio": -0.1555788218975067, + "loss": 0.3054, + "rejected_geometric_mean": -9.888837814331055, + "step": 4185 + }, + { + "chosen_geometric_mean": -1.182334542274475, + "epoch": 1.04, + "grad_norm": 7.59375, + "learning_rate": 2.3618372351541706e-06, + "log_odds": 2.3659825325012207, + "log_odds_ratio": -0.334617018699646, + "loss": 0.3174, + "rejected_geometric_mean": -3.3263041973114014, + "step": 4186 + }, + { + "chosen_geometric_mean": -0.8813014626502991, + "epoch": 1.04, + "grad_norm": 3.84375, + "learning_rate": 2.3608652602142183e-06, + "log_odds": 0.746884286403656, + "log_odds_ratio": -0.4563756585121155, + "loss": 0.2619, + "rejected_geometric_mean": -1.4710626602172852, + "step": 4187 + }, + { + "chosen_geometric_mean": -0.8504233360290527, + "epoch": 1.04, + "grad_norm": 12.3125, + "learning_rate": 2.359893306370432e-06, + "log_odds": 8.823047637939453, + "log_odds_ratio": -0.1730334609746933, + "loss": 0.2881, + "rejected_geometric_mean": -9.274492263793945, + "step": 4188 + }, + { + "chosen_geometric_mean": -0.9063699841499329, + "epoch": 1.04, + "grad_norm": 6.8125, + "learning_rate": 2.358921373770183e-06, + "log_odds": 3.2259535789489746, + "log_odds_ratio": -0.11614502966403961, + "loss": 0.2749, + "rejected_geometric_mean": -3.6807994842529297, + "step": 4189 + }, + { + "chosen_geometric_mean": -1.2654036283493042, + "epoch": 1.04, + "grad_norm": 16.875, + "learning_rate": 2.3579494625608417e-06, + "log_odds": 10.23824405670166, + "log_odds_ratio": -0.04117932170629501, + "loss": 0.3249, + "rejected_geometric_mean": -11.129667282104492, + "step": 4190 + }, + { + "chosen_geometric_mean": -1.095844030380249, + "epoch": 1.04, + "grad_norm": 31.5, + "learning_rate": 2.356977572889771e-06, + "log_odds": 3.7006914615631104, + "log_odds_ratio": -0.19351230561733246, + "loss": 0.3133, + "rejected_geometric_mean": -4.430495738983154, + "step": 4191 + }, + { + "chosen_geometric_mean": -1.2299948930740356, + "epoch": 1.04, + "grad_norm": 10.1875, + "learning_rate": 2.356005704904333e-06, + "log_odds": 2.870445728302002, + "log_odds_ratio": -0.3545786440372467, + "loss": 0.2895, + "rejected_geometric_mean": -3.8382363319396973, + "step": 4192 + }, + { + "chosen_geometric_mean": -0.9663941860198975, + "epoch": 1.04, + "grad_norm": 1.9453125, + "learning_rate": 2.3550338587518867e-06, + "log_odds": 8.352949142456055, + "log_odds_ratio": -0.003337586298584938, + "loss": 0.2732, + "rejected_geometric_mean": -8.83566665649414, + "step": 4193 + }, + { + "chosen_geometric_mean": -1.0049614906311035, + "epoch": 1.04, + "grad_norm": 11.4375, + "learning_rate": 2.354062034579788e-06, + "log_odds": 6.319828987121582, + "log_odds_ratio": -0.1735733151435852, + "loss": 0.252, + "rejected_geometric_mean": -7.005871772766113, + "step": 4194 + }, + { + "chosen_geometric_mean": -1.0870845317840576, + "epoch": 1.04, + "grad_norm": 11.25, + "learning_rate": 2.353090232535386e-06, + "log_odds": 6.460178375244141, + "log_odds_ratio": -0.05593719333410263, + "loss": 0.2454, + "rejected_geometric_mean": -7.132511138916016, + "step": 4195 + }, + { + "chosen_geometric_mean": -1.2037111520767212, + "epoch": 1.04, + "grad_norm": 12.375, + "learning_rate": 2.3521184527660324e-06, + "log_odds": 2.1235783100128174, + "log_odds_ratio": -0.5668442845344543, + "loss": 0.2924, + "rejected_geometric_mean": -3.1543073654174805, + "step": 4196 + }, + { + "chosen_geometric_mean": -0.9477239847183228, + "epoch": 1.04, + "grad_norm": 73.5, + "learning_rate": 2.3511466954190716e-06, + "log_odds": 3.2514073848724365, + "log_odds_ratio": -0.2503562867641449, + "loss": 0.2554, + "rejected_geometric_mean": -3.8728811740875244, + "step": 4197 + }, + { + "chosen_geometric_mean": -0.706287145614624, + "epoch": 1.04, + "grad_norm": 7.1875, + "learning_rate": 2.350174960641844e-06, + "log_odds": 5.4919843673706055, + "log_odds_ratio": -0.28084737062454224, + "loss": 0.2301, + "rejected_geometric_mean": -5.793200492858887, + "step": 4198 + }, + { + "chosen_geometric_mean": -1.0797882080078125, + "epoch": 1.04, + "grad_norm": 9.0, + "learning_rate": 2.349203248581689e-06, + "log_odds": 8.86776065826416, + "log_odds_ratio": -0.06360470503568649, + "loss": 0.2891, + "rejected_geometric_mean": -9.561490058898926, + "step": 4199 + }, + { + "chosen_geometric_mean": -0.9906471371650696, + "epoch": 1.04, + "grad_norm": 3.203125, + "learning_rate": 2.3482315593859413e-06, + "log_odds": 1.2385756969451904, + "log_odds_ratio": -0.3113327622413635, + "loss": 0.275, + "rejected_geometric_mean": -1.9597564935684204, + "step": 4200 + }, + { + "chosen_geometric_mean": -0.9918308258056641, + "epoch": 1.04, + "grad_norm": 3.046875, + "learning_rate": 2.347259893201932e-06, + "log_odds": 2.283344268798828, + "log_odds_ratio": -0.2598229646682739, + "loss": 0.2885, + "rejected_geometric_mean": -2.969217538833618, + "step": 4201 + }, + { + "chosen_geometric_mean": -0.9769467115402222, + "epoch": 1.04, + "grad_norm": 20.125, + "learning_rate": 2.3462882501769895e-06, + "log_odds": 6.543452262878418, + "log_odds_ratio": -0.24102826416492462, + "loss": 0.2734, + "rejected_geometric_mean": -7.178926467895508, + "step": 4202 + }, + { + "chosen_geometric_mean": -0.9170401096343994, + "epoch": 1.04, + "grad_norm": 2.4375, + "learning_rate": 2.3453166304584377e-06, + "log_odds": 6.342313289642334, + "log_odds_ratio": -0.1211387887597084, + "loss": 0.2787, + "rejected_geometric_mean": -6.843654632568359, + "step": 4203 + }, + { + "chosen_geometric_mean": -1.1123082637786865, + "epoch": 1.04, + "grad_norm": 2.0, + "learning_rate": 2.3443450341935985e-06, + "log_odds": 1.6477751731872559, + "log_odds_ratio": -0.2422773540019989, + "loss": 0.248, + "rejected_geometric_mean": -2.4900708198547363, + "step": 4204 + }, + { + "chosen_geometric_mean": -1.033480167388916, + "epoch": 1.04, + "grad_norm": 3.375, + "learning_rate": 2.3433734615297883e-06, + "log_odds": 4.598457336425781, + "log_odds_ratio": -0.02987787127494812, + "loss": 0.2863, + "rejected_geometric_mean": -5.18329381942749, + "step": 4205 + }, + { + "chosen_geometric_mean": -0.8597010374069214, + "epoch": 1.04, + "grad_norm": 4.09375, + "learning_rate": 2.34240191261432e-06, + "log_odds": 2.9231371879577637, + "log_odds_ratio": -0.13093478977680206, + "loss": 0.2565, + "rejected_geometric_mean": -3.2983124256134033, + "step": 4206 + }, + { + "chosen_geometric_mean": -0.9272626638412476, + "epoch": 1.04, + "grad_norm": 2.3125, + "learning_rate": 2.3414303875945053e-06, + "log_odds": 1.9934673309326172, + "log_odds_ratio": -0.33790266513824463, + "loss": 0.2626, + "rejected_geometric_mean": -2.691276788711548, + "step": 4207 + }, + { + "chosen_geometric_mean": -1.0013964176177979, + "epoch": 1.04, + "grad_norm": 2.625, + "learning_rate": 2.34045888661765e-06, + "log_odds": 0.5985135436058044, + "log_odds_ratio": -0.4780910611152649, + "loss": 0.2773, + "rejected_geometric_mean": -1.451125979423523, + "step": 4208 + }, + { + "chosen_geometric_mean": -0.7235850095748901, + "epoch": 1.04, + "grad_norm": 2.203125, + "learning_rate": 2.3394874098310572e-06, + "log_odds": 2.8960235118865967, + "log_odds_ratio": -0.3114050030708313, + "loss": 0.23, + "rejected_geometric_mean": -3.251518964767456, + "step": 4209 + }, + { + "chosen_geometric_mean": -1.1370043754577637, + "epoch": 1.04, + "grad_norm": 5.4375, + "learning_rate": 2.338515957382026e-06, + "log_odds": 1.1687417030334473, + "log_odds_ratio": -0.3159506618976593, + "loss": 0.2343, + "rejected_geometric_mean": -2.0588364601135254, + "step": 4210 + }, + { + "chosen_geometric_mean": -1.3315743207931519, + "epoch": 1.04, + "grad_norm": 8.625, + "learning_rate": 2.3375445294178512e-06, + "log_odds": 6.618674278259277, + "log_odds_ratio": -0.018364455550909042, + "loss": 0.2523, + "rejected_geometric_mean": -7.590151309967041, + "step": 4211 + }, + { + "chosen_geometric_mean": -0.9875956773757935, + "epoch": 1.04, + "grad_norm": 3.59375, + "learning_rate": 2.3365731260858264e-06, + "log_odds": 1.2201988697052002, + "log_odds_ratio": -0.36842161417007446, + "loss": 0.3041, + "rejected_geometric_mean": -1.9331939220428467, + "step": 4212 + }, + { + "chosen_geometric_mean": -0.8300297260284424, + "epoch": 1.04, + "grad_norm": 3.671875, + "learning_rate": 2.3356017475332384e-06, + "log_odds": 2.8924574851989746, + "log_odds_ratio": -0.34430059790611267, + "loss": 0.2299, + "rejected_geometric_mean": -3.3811357021331787, + "step": 4213 + }, + { + "chosen_geometric_mean": -1.2071017026901245, + "epoch": 1.04, + "grad_norm": 2.3125, + "learning_rate": 2.334630393907372e-06, + "log_odds": 3.9194226264953613, + "log_odds_ratio": -0.1603650450706482, + "loss": 0.2691, + "rejected_geometric_mean": -4.819204807281494, + "step": 4214 + }, + { + "chosen_geometric_mean": -1.0227487087249756, + "epoch": 1.04, + "grad_norm": 15.875, + "learning_rate": 2.3336590653555068e-06, + "log_odds": 5.586034774780273, + "log_odds_ratio": -0.07432036846876144, + "loss": 0.2432, + "rejected_geometric_mean": -6.186919212341309, + "step": 4215 + }, + { + "chosen_geometric_mean": -1.0768883228302002, + "epoch": 1.04, + "grad_norm": 10.5, + "learning_rate": 2.33268776202492e-06, + "log_odds": 6.182198524475098, + "log_odds_ratio": -0.25439125299453735, + "loss": 0.2382, + "rejected_geometric_mean": -6.963968276977539, + "step": 4216 + }, + { + "chosen_geometric_mean": -0.9819960594177246, + "epoch": 1.04, + "grad_norm": 4.34375, + "learning_rate": 2.3317164840628843e-06, + "log_odds": 5.5441765785217285, + "log_odds_ratio": -0.09598830342292786, + "loss": 0.2783, + "rejected_geometric_mean": -6.113998889923096, + "step": 4217 + }, + { + "chosen_geometric_mean": -1.0395557880401611, + "epoch": 1.04, + "grad_norm": 7.84375, + "learning_rate": 2.3307452316166695e-06, + "log_odds": 5.525967121124268, + "log_odds_ratio": -0.19476400315761566, + "loss": 0.2546, + "rejected_geometric_mean": -6.178213119506836, + "step": 4218 + }, + { + "chosen_geometric_mean": -1.000434160232544, + "epoch": 1.04, + "grad_norm": 36.0, + "learning_rate": 2.329774004833541e-06, + "log_odds": 7.270537853240967, + "log_odds_ratio": -0.02352980338037014, + "loss": 0.2592, + "rejected_geometric_mean": -7.821619033813477, + "step": 4219 + }, + { + "chosen_geometric_mean": -1.0256659984588623, + "epoch": 1.04, + "grad_norm": 2.140625, + "learning_rate": 2.3288028038607593e-06, + "log_odds": 5.394190311431885, + "log_odds_ratio": -0.17351877689361572, + "loss": 0.3068, + "rejected_geometric_mean": -6.0758957862854, + "step": 4220 + }, + { + "chosen_geometric_mean": -0.8896760940551758, + "epoch": 1.05, + "grad_norm": 6.0, + "learning_rate": 2.3278316288455815e-06, + "log_odds": 3.1195919513702393, + "log_odds_ratio": -0.34800100326538086, + "loss": 0.298, + "rejected_geometric_mean": -3.6762094497680664, + "step": 4221 + }, + { + "chosen_geometric_mean": -1.4286134243011475, + "epoch": 1.05, + "grad_norm": 18.375, + "learning_rate": 2.3268604799352603e-06, + "log_odds": 7.306347846984863, + "log_odds_ratio": -0.21637505292892456, + "loss": 0.2714, + "rejected_geometric_mean": -8.524384498596191, + "step": 4222 + }, + { + "chosen_geometric_mean": -0.9942537546157837, + "epoch": 1.05, + "grad_norm": 6.5625, + "learning_rate": 2.3258893572770483e-06, + "log_odds": 4.052276611328125, + "log_odds_ratio": -0.24675783514976501, + "loss": 0.2433, + "rejected_geometric_mean": -4.6708455085754395, + "step": 4223 + }, + { + "chosen_geometric_mean": -1.1354131698608398, + "epoch": 1.05, + "grad_norm": 23.125, + "learning_rate": 2.3249182610181884e-06, + "log_odds": 2.3917620182037354, + "log_odds_ratio": -0.22091138362884521, + "loss": 0.2872, + "rejected_geometric_mean": -3.255617380142212, + "step": 4224 + }, + { + "chosen_geometric_mean": -0.9089659452438354, + "epoch": 1.05, + "grad_norm": 3.328125, + "learning_rate": 2.323947191305923e-06, + "log_odds": 3.859480381011963, + "log_odds_ratio": -0.07426053285598755, + "loss": 0.2721, + "rejected_geometric_mean": -4.258335590362549, + "step": 4225 + }, + { + "chosen_geometric_mean": -1.104048490524292, + "epoch": 1.05, + "grad_norm": 20.25, + "learning_rate": 2.3229761482874892e-06, + "log_odds": 1.608622670173645, + "log_odds_ratio": -0.3985742926597595, + "loss": 0.2937, + "rejected_geometric_mean": -2.524172306060791, + "step": 4226 + }, + { + "chosen_geometric_mean": -1.5533639192581177, + "epoch": 1.05, + "grad_norm": 22.75, + "learning_rate": 2.322005132110121e-06, + "log_odds": 7.759736061096191, + "log_odds_ratio": -0.23222702741622925, + "loss": 0.2991, + "rejected_geometric_mean": -9.135679244995117, + "step": 4227 + }, + { + "chosen_geometric_mean": -1.467218279838562, + "epoch": 1.05, + "grad_norm": 14.625, + "learning_rate": 2.3210341429210457e-06, + "log_odds": 6.593285083770752, + "log_odds_ratio": -0.23488639295101166, + "loss": 0.2694, + "rejected_geometric_mean": -7.776627063751221, + "step": 4228 + }, + { + "chosen_geometric_mean": -1.048844337463379, + "epoch": 1.05, + "grad_norm": 11.6875, + "learning_rate": 2.320063180867491e-06, + "log_odds": 6.208270072937012, + "log_odds_ratio": -0.02109573408961296, + "loss": 0.2539, + "rejected_geometric_mean": -6.836411952972412, + "step": 4229 + }, + { + "chosen_geometric_mean": -0.8075402975082397, + "epoch": 1.05, + "grad_norm": 3.53125, + "learning_rate": 2.3190922460966786e-06, + "log_odds": 10.854391098022461, + "log_odds_ratio": -0.1463061273097992, + "loss": 0.2586, + "rejected_geometric_mean": -11.134522438049316, + "step": 4230 + }, + { + "chosen_geometric_mean": -1.1230506896972656, + "epoch": 1.05, + "grad_norm": 3.890625, + "learning_rate": 2.318121338755823e-06, + "log_odds": 5.785499572753906, + "log_odds_ratio": -0.22448503971099854, + "loss": 0.258, + "rejected_geometric_mean": -6.66154670715332, + "step": 4231 + }, + { + "chosen_geometric_mean": -1.052651286125183, + "epoch": 1.05, + "grad_norm": 7.46875, + "learning_rate": 2.3171504589921387e-06, + "log_odds": 7.1151509284973145, + "log_odds_ratio": -0.1540350466966629, + "loss": 0.2888, + "rejected_geometric_mean": -7.823963165283203, + "step": 4232 + }, + { + "chosen_geometric_mean": -0.834723949432373, + "epoch": 1.05, + "grad_norm": 9.375, + "learning_rate": 2.316179606952833e-06, + "log_odds": 4.578155040740967, + "log_odds_ratio": -0.117684006690979, + "loss": 0.3051, + "rejected_geometric_mean": -4.894704818725586, + "step": 4233 + }, + { + "chosen_geometric_mean": -0.9954173564910889, + "epoch": 1.05, + "grad_norm": 4.28125, + "learning_rate": 2.315208782785112e-06, + "log_odds": 4.174385070800781, + "log_odds_ratio": -0.03202594816684723, + "loss": 0.2335, + "rejected_geometric_mean": -4.706693649291992, + "step": 4234 + }, + { + "chosen_geometric_mean": -1.0251432657241821, + "epoch": 1.05, + "grad_norm": 2.890625, + "learning_rate": 2.3142379866361753e-06, + "log_odds": 8.880661964416504, + "log_odds_ratio": -0.018013229593634605, + "loss": 0.2795, + "rejected_geometric_mean": -9.45272445678711, + "step": 4235 + }, + { + "chosen_geometric_mean": -0.8871970176696777, + "epoch": 1.05, + "grad_norm": 4.5625, + "learning_rate": 2.313267218653219e-06, + "log_odds": 11.154302597045898, + "log_odds_ratio": -0.031302716583013535, + "loss": 0.242, + "rejected_geometric_mean": -11.53024959564209, + "step": 4236 + }, + { + "chosen_geometric_mean": -1.175649881362915, + "epoch": 1.05, + "grad_norm": 6.3125, + "learning_rate": 2.312296478983435e-06, + "log_odds": 6.973315715789795, + "log_odds_ratio": -0.1918468177318573, + "loss": 0.262, + "rejected_geometric_mean": -7.879688262939453, + "step": 4237 + }, + { + "chosen_geometric_mean": -0.8759075403213501, + "epoch": 1.05, + "grad_norm": 4.34375, + "learning_rate": 2.3113257677740096e-06, + "log_odds": 3.643836498260498, + "log_odds_ratio": -0.11935579031705856, + "loss": 0.2468, + "rejected_geometric_mean": -4.057977676391602, + "step": 4238 + }, + { + "chosen_geometric_mean": -0.983523964881897, + "epoch": 1.05, + "grad_norm": 2.578125, + "learning_rate": 2.310355085172127e-06, + "log_odds": 2.4656991958618164, + "log_odds_ratio": -0.18794478476047516, + "loss": 0.2736, + "rejected_geometric_mean": -3.0874035358428955, + "step": 4239 + }, + { + "chosen_geometric_mean": -1.003584861755371, + "epoch": 1.05, + "grad_norm": 3.109375, + "learning_rate": 2.3093844313249653e-06, + "log_odds": 3.0864334106445312, + "log_odds_ratio": -0.28398430347442627, + "loss": 0.2745, + "rejected_geometric_mean": -3.7755918502807617, + "step": 4240 + }, + { + "chosen_geometric_mean": -1.036428689956665, + "epoch": 1.05, + "grad_norm": 4.59375, + "learning_rate": 2.3084138063797e-06, + "log_odds": 7.9373459815979, + "log_odds_ratio": -0.07440471649169922, + "loss": 0.2768, + "rejected_geometric_mean": -8.573721885681152, + "step": 4241 + }, + { + "chosen_geometric_mean": -0.9395809173583984, + "epoch": 1.05, + "grad_norm": 6.125, + "learning_rate": 2.3074432104835e-06, + "log_odds": 5.656293869018555, + "log_odds_ratio": -0.0696721151471138, + "loss": 0.2158, + "rejected_geometric_mean": -6.1359357833862305, + "step": 4242 + }, + { + "chosen_geometric_mean": -1.193474531173706, + "epoch": 1.05, + "grad_norm": 1.9375, + "learning_rate": 2.306472643783532e-06, + "log_odds": 4.345091819763184, + "log_odds_ratio": -0.10390613973140717, + "loss": 0.236, + "rejected_geometric_mean": -5.2241058349609375, + "step": 4243 + }, + { + "chosen_geometric_mean": -0.9917660355567932, + "epoch": 1.05, + "grad_norm": 34.25, + "learning_rate": 2.3055021064269554e-06, + "log_odds": 6.61509370803833, + "log_odds_ratio": -0.015548331663012505, + "loss": 0.2841, + "rejected_geometric_mean": -7.114955902099609, + "step": 4244 + }, + { + "chosen_geometric_mean": -0.9391850233078003, + "epoch": 1.05, + "grad_norm": 2.03125, + "learning_rate": 2.304531598560929e-06, + "log_odds": 5.6167755126953125, + "log_odds_ratio": -0.2161710411310196, + "loss": 0.2437, + "rejected_geometric_mean": -6.121232986450195, + "step": 4245 + }, + { + "chosen_geometric_mean": -1.2664527893066406, + "epoch": 1.05, + "grad_norm": 16.25, + "learning_rate": 2.3035611203326047e-06, + "log_odds": 6.889745712280273, + "log_odds_ratio": -0.12000231444835663, + "loss": 0.2642, + "rejected_geometric_mean": -7.83488130569458, + "step": 4246 + }, + { + "chosen_geometric_mean": -1.216726303100586, + "epoch": 1.05, + "grad_norm": 12.5625, + "learning_rate": 2.3025906718891296e-06, + "log_odds": 13.854501724243164, + "log_odds_ratio": -0.0008484335849061608, + "loss": 0.2615, + "rejected_geometric_mean": -14.663949012756348, + "step": 4247 + }, + { + "chosen_geometric_mean": -1.0606577396392822, + "epoch": 1.05, + "grad_norm": 3.953125, + "learning_rate": 2.3016202533776473e-06, + "log_odds": 7.828944206237793, + "log_odds_ratio": -0.17592176795005798, + "loss": 0.2626, + "rejected_geometric_mean": -8.52018928527832, + "step": 4248 + }, + { + "chosen_geometric_mean": -1.1423643827438354, + "epoch": 1.05, + "grad_norm": 28.75, + "learning_rate": 2.3006498649452956e-06, + "log_odds": 4.890952110290527, + "log_odds_ratio": -0.13317479193210602, + "loss": 0.2802, + "rejected_geometric_mean": -5.620099067687988, + "step": 4249 + }, + { + "chosen_geometric_mean": -1.0358291864395142, + "epoch": 1.05, + "grad_norm": 3.40625, + "learning_rate": 2.29967950673921e-06, + "log_odds": 8.904603004455566, + "log_odds_ratio": -0.17040082812309265, + "loss": 0.2947, + "rejected_geometric_mean": -9.486597061157227, + "step": 4250 + }, + { + "chosen_geometric_mean": -1.1141765117645264, + "epoch": 1.05, + "grad_norm": 44.5, + "learning_rate": 2.29870917890652e-06, + "log_odds": 1.655869483947754, + "log_odds_ratio": -0.25706571340560913, + "loss": 0.2943, + "rejected_geometric_mean": -2.47456955909729, + "step": 4251 + }, + { + "chosen_geometric_mean": -0.9398065209388733, + "epoch": 1.05, + "grad_norm": 30.0, + "learning_rate": 2.29773888159435e-06, + "log_odds": 6.087693691253662, + "log_odds_ratio": -0.17524267733097076, + "loss": 0.2645, + "rejected_geometric_mean": -6.649273872375488, + "step": 4252 + }, + { + "chosen_geometric_mean": -0.9117569327354431, + "epoch": 1.05, + "grad_norm": 3.265625, + "learning_rate": 2.2967686149498213e-06, + "log_odds": 3.7317492961883545, + "log_odds_ratio": -0.290735125541687, + "loss": 0.2207, + "rejected_geometric_mean": -4.378667831420898, + "step": 4253 + }, + { + "chosen_geometric_mean": -1.073034405708313, + "epoch": 1.05, + "grad_norm": 29.375, + "learning_rate": 2.295798379120048e-06, + "log_odds": 6.707568168640137, + "log_odds_ratio": -0.17958995699882507, + "loss": 0.276, + "rejected_geometric_mean": -7.4532856941223145, + "step": 4254 + }, + { + "chosen_geometric_mean": -0.7913874387741089, + "epoch": 1.05, + "grad_norm": 17.25, + "learning_rate": 2.294828174252142e-06, + "log_odds": 2.4952785968780518, + "log_odds_ratio": -0.3778810501098633, + "loss": 0.2426, + "rejected_geometric_mean": -3.0127646923065186, + "step": 4255 + }, + { + "chosen_geometric_mean": -0.9135435819625854, + "epoch": 1.05, + "grad_norm": 53.25, + "learning_rate": 2.29385800049321e-06, + "log_odds": 4.387606620788574, + "log_odds_ratio": -0.19544190168380737, + "loss": 0.2988, + "rejected_geometric_mean": -4.957859992980957, + "step": 4256 + }, + { + "chosen_geometric_mean": -0.8769859075546265, + "epoch": 1.05, + "grad_norm": 21.75, + "learning_rate": 2.292887857990354e-06, + "log_odds": 5.313839912414551, + "log_odds_ratio": -0.08946552127599716, + "loss": 0.3062, + "rejected_geometric_mean": -5.637350082397461, + "step": 4257 + }, + { + "chosen_geometric_mean": -1.6346889734268188, + "epoch": 1.05, + "grad_norm": 15.25, + "learning_rate": 2.2919177468906694e-06, + "log_odds": 3.3249692916870117, + "log_odds_ratio": -0.08801768720149994, + "loss": 0.2697, + "rejected_geometric_mean": -4.636495590209961, + "step": 4258 + }, + { + "chosen_geometric_mean": -1.02848482131958, + "epoch": 1.05, + "grad_norm": 2.390625, + "learning_rate": 2.2909476673412494e-06, + "log_odds": 7.203619956970215, + "log_odds_ratio": -0.1419641524553299, + "loss": 0.3194, + "rejected_geometric_mean": -7.842708587646484, + "step": 4259 + }, + { + "chosen_geometric_mean": -0.9185779690742493, + "epoch": 1.05, + "grad_norm": 10.5, + "learning_rate": 2.289977619489181e-06, + "log_odds": 4.241687774658203, + "log_odds_ratio": -0.25009405612945557, + "loss": 0.2528, + "rejected_geometric_mean": -4.8020219802856445, + "step": 4260 + }, + { + "chosen_geometric_mean": -1.0147731304168701, + "epoch": 1.05, + "grad_norm": 29.375, + "learning_rate": 2.289007603481547e-06, + "log_odds": 12.58687686920166, + "log_odds_ratio": -0.17655529081821442, + "loss": 0.3011, + "rejected_geometric_mean": -13.244955062866211, + "step": 4261 + }, + { + "chosen_geometric_mean": -1.131103754043579, + "epoch": 1.06, + "grad_norm": 2.78125, + "learning_rate": 2.2880376194654253e-06, + "log_odds": 5.2626848220825195, + "log_odds_ratio": -0.03452257439494133, + "loss": 0.2545, + "rejected_geometric_mean": -6.015377521514893, + "step": 4262 + }, + { + "chosen_geometric_mean": -1.0542116165161133, + "epoch": 1.06, + "grad_norm": 3.484375, + "learning_rate": 2.287067667587888e-06, + "log_odds": 5.413730144500732, + "log_odds_ratio": -0.14736592769622803, + "loss": 0.2596, + "rejected_geometric_mean": -6.093740940093994, + "step": 4263 + }, + { + "chosen_geometric_mean": -0.8930776119232178, + "epoch": 1.06, + "grad_norm": 5.25, + "learning_rate": 2.2860977479960036e-06, + "log_odds": 9.456341743469238, + "log_odds_ratio": -0.006706062704324722, + "loss": 0.1982, + "rejected_geometric_mean": -9.825626373291016, + "step": 4264 + }, + { + "chosen_geometric_mean": -1.0182645320892334, + "epoch": 1.06, + "grad_norm": 6.53125, + "learning_rate": 2.2851278608368353e-06, + "log_odds": 9.87111759185791, + "log_odds_ratio": -0.12136591970920563, + "loss": 0.3143, + "rejected_geometric_mean": -10.522122383117676, + "step": 4265 + }, + { + "chosen_geometric_mean": -1.452085256576538, + "epoch": 1.06, + "grad_norm": 31.25, + "learning_rate": 2.2841580062574404e-06, + "log_odds": 2.028547763824463, + "log_odds_ratio": -0.12890635430812836, + "loss": 0.2861, + "rejected_geometric_mean": -3.214449644088745, + "step": 4266 + }, + { + "chosen_geometric_mean": -1.0064826011657715, + "epoch": 1.06, + "grad_norm": 2.296875, + "learning_rate": 2.283188184404873e-06, + "log_odds": 6.615546226501465, + "log_odds_ratio": -0.13894005119800568, + "loss": 0.3146, + "rejected_geometric_mean": -7.294861793518066, + "step": 4267 + }, + { + "chosen_geometric_mean": -0.784382700920105, + "epoch": 1.06, + "grad_norm": 2.03125, + "learning_rate": 2.2822183954261815e-06, + "log_odds": 9.682110786437988, + "log_odds_ratio": -0.028600318357348442, + "loss": 0.2412, + "rejected_geometric_mean": -9.85293960571289, + "step": 4268 + }, + { + "chosen_geometric_mean": -0.747053861618042, + "epoch": 1.06, + "grad_norm": 23.375, + "learning_rate": 2.2812486394684094e-06, + "log_odds": 4.800622940063477, + "log_odds_ratio": -0.11256958544254303, + "loss": 0.2683, + "rejected_geometric_mean": -4.988664627075195, + "step": 4269 + }, + { + "chosen_geometric_mean": -0.8967317938804626, + "epoch": 1.06, + "grad_norm": 13.5625, + "learning_rate": 2.280278916678594e-06, + "log_odds": 5.740938663482666, + "log_odds_ratio": -0.1309950351715088, + "loss": 0.2834, + "rejected_geometric_mean": -6.188875198364258, + "step": 4270 + }, + { + "chosen_geometric_mean": -0.9970337152481079, + "epoch": 1.06, + "grad_norm": 20.0, + "learning_rate": 2.2793092272037674e-06, + "log_odds": 1.8281505107879639, + "log_odds_ratio": -0.36978593468666077, + "loss": 0.2607, + "rejected_geometric_mean": -2.6231117248535156, + "step": 4271 + }, + { + "chosen_geometric_mean": -1.0869877338409424, + "epoch": 1.06, + "grad_norm": 4.34375, + "learning_rate": 2.2783395711909614e-06, + "log_odds": 3.597461700439453, + "log_odds_ratio": -0.22173134982585907, + "loss": 0.2596, + "rejected_geometric_mean": -4.390629291534424, + "step": 4272 + }, + { + "chosen_geometric_mean": -0.7117723226547241, + "epoch": 1.06, + "grad_norm": 2.546875, + "learning_rate": 2.2773699487871967e-06, + "log_odds": 8.007476806640625, + "log_odds_ratio": -0.0039753089658916, + "loss": 0.2592, + "rejected_geometric_mean": -8.031888961791992, + "step": 4273 + }, + { + "chosen_geometric_mean": -0.9991330504417419, + "epoch": 1.06, + "grad_norm": 2.859375, + "learning_rate": 2.2764003601394915e-06, + "log_odds": 2.8819899559020996, + "log_odds_ratio": -0.2892853021621704, + "loss": 0.2582, + "rejected_geometric_mean": -3.6084280014038086, + "step": 4274 + }, + { + "chosen_geometric_mean": -1.0495225191116333, + "epoch": 1.06, + "grad_norm": 1.875, + "learning_rate": 2.2754308053948593e-06, + "log_odds": 4.1643147468566895, + "log_odds_ratio": -0.12336641550064087, + "loss": 0.2357, + "rejected_geometric_mean": -4.853590965270996, + "step": 4275 + }, + { + "chosen_geometric_mean": -1.1271724700927734, + "epoch": 1.06, + "grad_norm": 3.1875, + "learning_rate": 2.274461284700308e-06, + "log_odds": 9.885412216186523, + "log_odds_ratio": -0.15638765692710876, + "loss": 0.2642, + "rejected_geometric_mean": -10.727391242980957, + "step": 4276 + }, + { + "chosen_geometric_mean": -0.9206528067588806, + "epoch": 1.06, + "grad_norm": 2.328125, + "learning_rate": 2.2734917982028377e-06, + "log_odds": 10.681836128234863, + "log_odds_ratio": -0.2065843939781189, + "loss": 0.2682, + "rejected_geometric_mean": -11.255108833312988, + "step": 4277 + }, + { + "chosen_geometric_mean": -1.2643535137176514, + "epoch": 1.06, + "grad_norm": 17.375, + "learning_rate": 2.2725223460494496e-06, + "log_odds": 8.703058242797852, + "log_odds_ratio": -0.1866074502468109, + "loss": 0.2826, + "rejected_geometric_mean": -9.676934242248535, + "step": 4278 + }, + { + "chosen_geometric_mean": -1.1614104509353638, + "epoch": 1.06, + "grad_norm": 3.03125, + "learning_rate": 2.271552928387134e-06, + "log_odds": 9.090503692626953, + "log_odds_ratio": -0.1341625601053238, + "loss": 0.2648, + "rejected_geometric_mean": -9.939123153686523, + "step": 4279 + }, + { + "chosen_geometric_mean": -1.2537081241607666, + "epoch": 1.06, + "grad_norm": 10.9375, + "learning_rate": 2.2705835453628785e-06, + "log_odds": 3.592245578765869, + "log_odds_ratio": -0.17178350687026978, + "loss": 0.2111, + "rejected_geometric_mean": -4.550485134124756, + "step": 4280 + }, + { + "chosen_geometric_mean": -0.934499979019165, + "epoch": 1.06, + "grad_norm": 7.0, + "learning_rate": 2.2696141971236634e-06, + "log_odds": 6.799452781677246, + "log_odds_ratio": -0.11558695137500763, + "loss": 0.264, + "rejected_geometric_mean": -7.2517194747924805, + "step": 4281 + }, + { + "chosen_geometric_mean": -1.0130292177200317, + "epoch": 1.06, + "grad_norm": 3.484375, + "learning_rate": 2.268644883816466e-06, + "log_odds": 10.609516143798828, + "log_odds_ratio": -0.0642690360546112, + "loss": 0.2576, + "rejected_geometric_mean": -11.20553207397461, + "step": 4282 + }, + { + "chosen_geometric_mean": -0.8437203764915466, + "epoch": 1.06, + "grad_norm": 4.125, + "learning_rate": 2.2676756055882583e-06, + "log_odds": 9.91994571685791, + "log_odds_ratio": -0.011121556162834167, + "loss": 0.2325, + "rejected_geometric_mean": -10.200143814086914, + "step": 4283 + }, + { + "chosen_geometric_mean": -1.0599169731140137, + "epoch": 1.06, + "grad_norm": 10.75, + "learning_rate": 2.2667063625860056e-06, + "log_odds": 7.58042573928833, + "log_odds_ratio": -0.11515883356332779, + "loss": 0.2496, + "rejected_geometric_mean": -8.271615028381348, + "step": 4284 + }, + { + "chosen_geometric_mean": -0.9370942711830139, + "epoch": 1.06, + "grad_norm": 3.703125, + "learning_rate": 2.2657371549566683e-06, + "log_odds": 5.988990306854248, + "log_odds_ratio": -0.029581289738416672, + "loss": 0.2746, + "rejected_geometric_mean": -6.41915225982666, + "step": 4285 + }, + { + "chosen_geometric_mean": -1.1142239570617676, + "epoch": 1.06, + "grad_norm": 2.25, + "learning_rate": 2.264767982847202e-06, + "log_odds": 1.5664944648742676, + "log_odds_ratio": -0.3758246898651123, + "loss": 0.295, + "rejected_geometric_mean": -2.456068515777588, + "step": 4286 + }, + { + "chosen_geometric_mean": -1.1519626379013062, + "epoch": 1.06, + "grad_norm": 3.140625, + "learning_rate": 2.2637988464045553e-06, + "log_odds": 5.2533979415893555, + "log_odds_ratio": -0.22638127207756042, + "loss": 0.3057, + "rejected_geometric_mean": -6.138075828552246, + "step": 4287 + }, + { + "chosen_geometric_mean": -1.0871001482009888, + "epoch": 1.06, + "grad_norm": 18.625, + "learning_rate": 2.2628297457756725e-06, + "log_odds": 10.500262260437012, + "log_odds_ratio": -0.04587197303771973, + "loss": 0.2721, + "rejected_geometric_mean": -11.179905891418457, + "step": 4288 + }, + { + "chosen_geometric_mean": -1.183004379272461, + "epoch": 1.06, + "grad_norm": 21.375, + "learning_rate": 2.2618606811074943e-06, + "log_odds": 9.712821006774902, + "log_odds_ratio": -0.0025441008619964123, + "loss": 0.2421, + "rejected_geometric_mean": -10.526042938232422, + "step": 4289 + }, + { + "chosen_geometric_mean": -0.9440648555755615, + "epoch": 1.06, + "grad_norm": 2.40625, + "learning_rate": 2.260891652546953e-06, + "log_odds": 1.8175122737884521, + "log_odds_ratio": -0.3348690867424011, + "loss": 0.2367, + "rejected_geometric_mean": -2.5026285648345947, + "step": 4290 + }, + { + "chosen_geometric_mean": -1.135657548904419, + "epoch": 1.06, + "grad_norm": 10.5, + "learning_rate": 2.2599226602409764e-06, + "log_odds": 4.315862655639648, + "log_odds_ratio": -0.2664485573768616, + "loss": 0.2947, + "rejected_geometric_mean": -5.193108081817627, + "step": 4291 + }, + { + "chosen_geometric_mean": -0.8896905779838562, + "epoch": 1.06, + "grad_norm": 7.21875, + "learning_rate": 2.2589537043364877e-06, + "log_odds": 6.896529674530029, + "log_odds_ratio": -0.144358828663826, + "loss": 0.2634, + "rejected_geometric_mean": -7.317521095275879, + "step": 4292 + }, + { + "chosen_geometric_mean": -1.2133042812347412, + "epoch": 1.06, + "grad_norm": 9.9375, + "learning_rate": 2.2579847849804026e-06, + "log_odds": 8.001713752746582, + "log_odds_ratio": -0.07697658985853195, + "loss": 0.2988, + "rejected_geometric_mean": -8.883512496948242, + "step": 4293 + }, + { + "chosen_geometric_mean": -1.034184455871582, + "epoch": 1.06, + "grad_norm": 14.375, + "learning_rate": 2.2570159023196343e-06, + "log_odds": 10.343096733093262, + "log_odds_ratio": -0.1647869050502777, + "loss": 0.2647, + "rejected_geometric_mean": -10.980401992797852, + "step": 4294 + }, + { + "chosen_geometric_mean": -1.3175311088562012, + "epoch": 1.06, + "grad_norm": 14.125, + "learning_rate": 2.2560470565010883e-06, + "log_odds": 5.853330612182617, + "log_odds_ratio": -0.1696183681488037, + "loss": 0.2965, + "rejected_geometric_mean": -6.866710186004639, + "step": 4295 + }, + { + "chosen_geometric_mean": -1.1285451650619507, + "epoch": 1.06, + "grad_norm": 10.9375, + "learning_rate": 2.2550782476716634e-06, + "log_odds": 7.614518165588379, + "log_odds_ratio": -0.1651260256767273, + "loss": 0.2368, + "rejected_geometric_mean": -8.43781852722168, + "step": 4296 + }, + { + "chosen_geometric_mean": -0.7489595413208008, + "epoch": 1.06, + "grad_norm": 4.40625, + "learning_rate": 2.2541094759782556e-06, + "log_odds": 8.348313331604004, + "log_odds_ratio": -0.09538151323795319, + "loss": 0.2692, + "rejected_geometric_mean": -8.502180099487305, + "step": 4297 + }, + { + "chosen_geometric_mean": -1.035733699798584, + "epoch": 1.06, + "grad_norm": 21.375, + "learning_rate": 2.2531407415677534e-06, + "log_odds": 8.875182151794434, + "log_odds_ratio": -0.10726375132799149, + "loss": 0.2891, + "rejected_geometric_mean": -9.444181442260742, + "step": 4298 + }, + { + "chosen_geometric_mean": -0.9773391485214233, + "epoch": 1.06, + "grad_norm": 7.625, + "learning_rate": 2.25217204458704e-06, + "log_odds": 15.148445129394531, + "log_odds_ratio": -0.03851417079567909, + "loss": 0.2447, + "rejected_geometric_mean": -15.67593002319336, + "step": 4299 + }, + { + "chosen_geometric_mean": -0.7855061292648315, + "epoch": 1.06, + "grad_norm": 9.3125, + "learning_rate": 2.251203385182994e-06, + "log_odds": 1.8806754350662231, + "log_odds_ratio": -0.29888230562210083, + "loss": 0.2888, + "rejected_geometric_mean": -2.307650327682495, + "step": 4300 + }, + { + "chosen_geometric_mean": -0.969443678855896, + "epoch": 1.06, + "grad_norm": 7.34375, + "learning_rate": 2.2502347635024867e-06, + "log_odds": 11.458434104919434, + "log_odds_ratio": -0.08594975620508194, + "loss": 0.2459, + "rejected_geometric_mean": -11.991698265075684, + "step": 4301 + }, + { + "chosen_geometric_mean": -1.0268833637237549, + "epoch": 1.07, + "grad_norm": 4.21875, + "learning_rate": 2.249266179692385e-06, + "log_odds": 5.798920631408691, + "log_odds_ratio": -0.014830345287919044, + "loss": 0.2754, + "rejected_geometric_mean": -6.377278804779053, + "step": 4302 + }, + { + "chosen_geometric_mean": -0.8516654372215271, + "epoch": 1.07, + "grad_norm": 3.3125, + "learning_rate": 2.248297633899549e-06, + "log_odds": 4.886160373687744, + "log_odds_ratio": -0.1987384408712387, + "loss": 0.31, + "rejected_geometric_mean": -5.313684463500977, + "step": 4303 + }, + { + "chosen_geometric_mean": -1.0949198007583618, + "epoch": 1.07, + "grad_norm": 26.25, + "learning_rate": 2.247329126270832e-06, + "log_odds": 5.012423515319824, + "log_odds_ratio": -0.052550457417964935, + "loss": 0.2687, + "rejected_geometric_mean": -5.718541622161865, + "step": 4304 + }, + { + "chosen_geometric_mean": -1.1160826683044434, + "epoch": 1.07, + "grad_norm": 14.25, + "learning_rate": 2.2463606569530863e-06, + "log_odds": 4.074758529663086, + "log_odds_ratio": -0.3136308491230011, + "loss": 0.3582, + "rejected_geometric_mean": -4.920581340789795, + "step": 4305 + }, + { + "chosen_geometric_mean": -1.4304392337799072, + "epoch": 1.07, + "grad_norm": 9.375, + "learning_rate": 2.245392226093153e-06, + "log_odds": 8.444194793701172, + "log_odds_ratio": -0.033442422747612, + "loss": 0.2722, + "rejected_geometric_mean": -9.568687438964844, + "step": 4306 + }, + { + "chosen_geometric_mean": -1.0217353105545044, + "epoch": 1.07, + "grad_norm": 29.625, + "learning_rate": 2.2444238338378698e-06, + "log_odds": 5.661379814147949, + "log_odds_ratio": -0.27131906151771545, + "loss": 0.2115, + "rejected_geometric_mean": -6.393226623535156, + "step": 4307 + }, + { + "chosen_geometric_mean": -0.8365740180015564, + "epoch": 1.07, + "grad_norm": 2.78125, + "learning_rate": 2.243455480334068e-06, + "log_odds": 4.523228168487549, + "log_odds_ratio": -0.16988840699195862, + "loss": 0.2646, + "rejected_geometric_mean": -4.921003341674805, + "step": 4308 + }, + { + "chosen_geometric_mean": -0.7130446434020996, + "epoch": 1.07, + "grad_norm": 17.25, + "learning_rate": 2.242487165728574e-06, + "log_odds": 5.121499538421631, + "log_odds_ratio": -0.09443742036819458, + "loss": 0.2483, + "rejected_geometric_mean": -5.226149559020996, + "step": 4309 + }, + { + "chosen_geometric_mean": -0.9028325080871582, + "epoch": 1.07, + "grad_norm": 1.90625, + "learning_rate": 2.2415188901682057e-06, + "log_odds": 7.147252559661865, + "log_odds_ratio": -0.1502632200717926, + "loss": 0.2119, + "rejected_geometric_mean": -7.606598377227783, + "step": 4310 + }, + { + "chosen_geometric_mean": -0.9189599752426147, + "epoch": 1.07, + "grad_norm": 13.0, + "learning_rate": 2.240550653799779e-06, + "log_odds": 6.324605941772461, + "log_odds_ratio": -0.20405247807502747, + "loss": 0.2754, + "rejected_geometric_mean": -6.874966144561768, + "step": 4311 + }, + { + "chosen_geometric_mean": -0.9334778785705566, + "epoch": 1.07, + "grad_norm": 2.265625, + "learning_rate": 2.2395824567701016e-06, + "log_odds": 2.475672960281372, + "log_odds_ratio": -0.34206530451774597, + "loss": 0.2588, + "rejected_geometric_mean": -3.1630332469940186, + "step": 4312 + }, + { + "chosen_geometric_mean": -0.9837983846664429, + "epoch": 1.07, + "grad_norm": 3.53125, + "learning_rate": 2.2386142992259743e-06, + "log_odds": 6.598973751068115, + "log_odds_ratio": -0.13982073962688446, + "loss": 0.2669, + "rejected_geometric_mean": -7.1642537117004395, + "step": 4313 + }, + { + "chosen_geometric_mean": -1.0357154607772827, + "epoch": 1.07, + "grad_norm": 2.265625, + "learning_rate": 2.2376461813141934e-06, + "log_odds": 11.839537620544434, + "log_odds_ratio": -0.020876044407486916, + "loss": 0.2444, + "rejected_geometric_mean": -12.388871192932129, + "step": 4314 + }, + { + "chosen_geometric_mean": -0.9593029022216797, + "epoch": 1.07, + "grad_norm": 5.0625, + "learning_rate": 2.2366781031815484e-06, + "log_odds": 2.8147759437561035, + "log_odds_ratio": -0.38325291872024536, + "loss": 0.2469, + "rejected_geometric_mean": -3.5678818225860596, + "step": 4315 + }, + { + "chosen_geometric_mean": -0.9114375114440918, + "epoch": 1.07, + "grad_norm": 35.5, + "learning_rate": 2.2357100649748245e-06, + "log_odds": 4.144908428192139, + "log_odds_ratio": -0.11007081717252731, + "loss": 0.2926, + "rejected_geometric_mean": -4.611717224121094, + "step": 4316 + }, + { + "chosen_geometric_mean": -0.9442387223243713, + "epoch": 1.07, + "grad_norm": 17.5, + "learning_rate": 2.2347420668407987e-06, + "log_odds": 3.7588918209075928, + "log_odds_ratio": -0.08366156369447708, + "loss": 0.2433, + "rejected_geometric_mean": -4.2392683029174805, + "step": 4317 + }, + { + "chosen_geometric_mean": -0.9259575009346008, + "epoch": 1.07, + "grad_norm": 2.125, + "learning_rate": 2.2337741089262426e-06, + "log_odds": 4.678023815155029, + "log_odds_ratio": -0.3001847267150879, + "loss": 0.2534, + "rejected_geometric_mean": -5.322814464569092, + "step": 4318 + }, + { + "chosen_geometric_mean": -1.053490400314331, + "epoch": 1.07, + "grad_norm": 19.625, + "learning_rate": 2.2328061913779225e-06, + "log_odds": 13.517892837524414, + "log_odds_ratio": -0.1432911455631256, + "loss": 0.3003, + "rejected_geometric_mean": -14.212926864624023, + "step": 4319 + }, + { + "chosen_geometric_mean": -0.9317715167999268, + "epoch": 1.07, + "grad_norm": 11.375, + "learning_rate": 2.231838314342597e-06, + "log_odds": 9.563389778137207, + "log_odds_ratio": -0.10960038006305695, + "loss": 0.2714, + "rejected_geometric_mean": -10.027023315429688, + "step": 4320 + }, + { + "chosen_geometric_mean": -0.8680472373962402, + "epoch": 1.07, + "grad_norm": 2.0625, + "learning_rate": 2.2308704779670195e-06, + "log_odds": 1.2485648393630981, + "log_odds_ratio": -0.4458470344543457, + "loss": 0.2709, + "rejected_geometric_mean": -1.9297384023666382, + "step": 4321 + }, + { + "chosen_geometric_mean": -1.0909523963928223, + "epoch": 1.07, + "grad_norm": 2.59375, + "learning_rate": 2.2299026823979384e-06, + "log_odds": 8.341279029846191, + "log_odds_ratio": -0.052183255553245544, + "loss": 0.2353, + "rejected_geometric_mean": -9.052460670471191, + "step": 4322 + }, + { + "chosen_geometric_mean": -1.0324978828430176, + "epoch": 1.07, + "grad_norm": 14.375, + "learning_rate": 2.228934927782094e-06, + "log_odds": 1.090012788772583, + "log_odds_ratio": -0.29628250002861023, + "loss": 0.2761, + "rejected_geometric_mean": -1.8601926565170288, + "step": 4323 + }, + { + "chosen_geometric_mean": -0.8323901891708374, + "epoch": 1.07, + "grad_norm": 17.625, + "learning_rate": 2.227967214266221e-06, + "log_odds": 9.47590446472168, + "log_odds_ratio": -0.061265863478183746, + "loss": 0.288, + "rejected_geometric_mean": -9.767377853393555, + "step": 4324 + }, + { + "chosen_geometric_mean": -1.192151427268982, + "epoch": 1.07, + "grad_norm": 4.78125, + "learning_rate": 2.2269995419970488e-06, + "log_odds": 10.662006378173828, + "log_odds_ratio": -0.1580115407705307, + "loss": 0.2644, + "rejected_geometric_mean": -11.532683372497559, + "step": 4325 + }, + { + "chosen_geometric_mean": -1.1603258848190308, + "epoch": 1.07, + "grad_norm": 28.75, + "learning_rate": 2.2260319111212974e-06, + "log_odds": 7.855038642883301, + "log_odds_ratio": -0.1167798638343811, + "loss": 0.2782, + "rejected_geometric_mean": -8.671881675720215, + "step": 4326 + }, + { + "chosen_geometric_mean": -1.4750936031341553, + "epoch": 1.07, + "grad_norm": 34.0, + "learning_rate": 2.225064321785686e-06, + "log_odds": 9.129176139831543, + "log_odds_ratio": -0.45736461877822876, + "loss": 0.3954, + "rejected_geometric_mean": -10.602585792541504, + "step": 4327 + }, + { + "chosen_geometric_mean": -0.9597817659378052, + "epoch": 1.07, + "grad_norm": 26.25, + "learning_rate": 2.2240967741369233e-06, + "log_odds": 3.428861618041992, + "log_odds_ratio": -0.14281964302062988, + "loss": 0.2696, + "rejected_geometric_mean": -3.945094108581543, + "step": 4328 + }, + { + "chosen_geometric_mean": -1.05489182472229, + "epoch": 1.07, + "grad_norm": 9.4375, + "learning_rate": 2.223129268321712e-06, + "log_odds": 7.851469039916992, + "log_odds_ratio": -0.03328718617558479, + "loss": 0.2897, + "rejected_geometric_mean": -8.488365173339844, + "step": 4329 + }, + { + "chosen_geometric_mean": -0.8748541474342346, + "epoch": 1.07, + "grad_norm": 13.6875, + "learning_rate": 2.222161804486749e-06, + "log_odds": 4.2629194259643555, + "log_odds_ratio": -0.11292549222707748, + "loss": 0.2504, + "rejected_geometric_mean": -4.643446445465088, + "step": 4330 + }, + { + "chosen_geometric_mean": -0.8787368535995483, + "epoch": 1.07, + "grad_norm": 4.375, + "learning_rate": 2.2211943827787254e-06, + "log_odds": 4.344097137451172, + "log_odds_ratio": -0.06654128432273865, + "loss": 0.2468, + "rejected_geometric_mean": -4.693821907043457, + "step": 4331 + }, + { + "chosen_geometric_mean": -1.0520623922348022, + "epoch": 1.07, + "grad_norm": 48.0, + "learning_rate": 2.2202270033443267e-06, + "log_odds": 3.8072268962860107, + "log_odds_ratio": -0.281701922416687, + "loss": 0.2574, + "rejected_geometric_mean": -4.602923393249512, + "step": 4332 + }, + { + "chosen_geometric_mean": -0.8717262744903564, + "epoch": 1.07, + "grad_norm": 6.1875, + "learning_rate": 2.2192596663302294e-06, + "log_odds": 10.682565689086914, + "log_odds_ratio": -0.19658571481704712, + "loss": 0.2581, + "rejected_geometric_mean": -11.151923179626465, + "step": 4333 + }, + { + "chosen_geometric_mean": -1.295117974281311, + "epoch": 1.07, + "grad_norm": 3.9375, + "learning_rate": 2.2182923718831064e-06, + "log_odds": 6.042016983032227, + "log_odds_ratio": -0.26708918809890747, + "loss": 0.2943, + "rejected_geometric_mean": -7.119984149932861, + "step": 4334 + }, + { + "chosen_geometric_mean": -0.9375753402709961, + "epoch": 1.07, + "grad_norm": 2.046875, + "learning_rate": 2.2173251201496216e-06, + "log_odds": 3.361914873123169, + "log_odds_ratio": -0.2650792598724365, + "loss": 0.2345, + "rejected_geometric_mean": -3.9590680599212646, + "step": 4335 + }, + { + "chosen_geometric_mean": -1.1151707172393799, + "epoch": 1.07, + "grad_norm": 5.0625, + "learning_rate": 2.2163579112764334e-06, + "log_odds": 6.477543354034424, + "log_odds_ratio": -0.14581772685050964, + "loss": 0.2408, + "rejected_geometric_mean": -7.2648844718933105, + "step": 4336 + }, + { + "chosen_geometric_mean": -0.918267011642456, + "epoch": 1.07, + "grad_norm": 2.0625, + "learning_rate": 2.215390745410194e-06, + "log_odds": 4.15162467956543, + "log_odds_ratio": -0.3171809911727905, + "loss": 0.2932, + "rejected_geometric_mean": -4.804804801940918, + "step": 4337 + }, + { + "chosen_geometric_mean": -0.7696547508239746, + "epoch": 1.07, + "grad_norm": 4.59375, + "learning_rate": 2.21442362269755e-06, + "log_odds": 3.958111524581909, + "log_odds_ratio": -0.19700020551681519, + "loss": 0.2783, + "rejected_geometric_mean": -4.268960952758789, + "step": 4338 + }, + { + "chosen_geometric_mean": -1.230104923248291, + "epoch": 1.07, + "grad_norm": 7.96875, + "learning_rate": 2.2134565432851397e-06, + "log_odds": 2.9797773361206055, + "log_odds_ratio": -0.1833629608154297, + "loss": 0.2424, + "rejected_geometric_mean": -3.91921329498291, + "step": 4339 + }, + { + "chosen_geometric_mean": -1.2416625022888184, + "epoch": 1.07, + "grad_norm": 5.375, + "learning_rate": 2.212489507319596e-06, + "log_odds": 9.353989601135254, + "log_odds_ratio": -0.010941133834421635, + "loss": 0.3147, + "rejected_geometric_mean": -10.255596160888672, + "step": 4340 + }, + { + "chosen_geometric_mean": -0.9394949674606323, + "epoch": 1.07, + "grad_norm": 3.734375, + "learning_rate": 2.211522514947544e-06, + "log_odds": 4.489791393280029, + "log_odds_ratio": -0.1503717452287674, + "loss": 0.2949, + "rejected_geometric_mean": -4.979059219360352, + "step": 4341 + }, + { + "chosen_geometric_mean": -1.1503190994262695, + "epoch": 1.08, + "grad_norm": 13.6875, + "learning_rate": 2.2105555663156024e-06, + "log_odds": 8.314273834228516, + "log_odds_ratio": -0.0690770223736763, + "loss": 0.2151, + "rejected_geometric_mean": -9.123469352722168, + "step": 4342 + }, + { + "chosen_geometric_mean": -1.0878028869628906, + "epoch": 1.08, + "grad_norm": 3.28125, + "learning_rate": 2.209588661570386e-06, + "log_odds": 6.47135591506958, + "log_odds_ratio": -0.09219525754451752, + "loss": 0.2505, + "rejected_geometric_mean": -7.2004313468933105, + "step": 4343 + }, + { + "chosen_geometric_mean": -1.0349022150039673, + "epoch": 1.08, + "grad_norm": 4.75, + "learning_rate": 2.2086218008584994e-06, + "log_odds": 5.076650142669678, + "log_odds_ratio": -0.20891301333904266, + "loss": 0.3556, + "rejected_geometric_mean": -5.723118782043457, + "step": 4344 + }, + { + "chosen_geometric_mean": -0.9433372020721436, + "epoch": 1.08, + "grad_norm": 70.5, + "learning_rate": 2.207654984326542e-06, + "log_odds": 13.061488151550293, + "log_odds_ratio": -0.055995404720306396, + "loss": 0.2397, + "rejected_geometric_mean": -13.50387191772461, + "step": 4345 + }, + { + "chosen_geometric_mean": -0.9133092761039734, + "epoch": 1.08, + "grad_norm": 35.5, + "learning_rate": 2.2066882121211062e-06, + "log_odds": 11.180685043334961, + "log_odds_ratio": -0.07835902273654938, + "loss": 0.2632, + "rejected_geometric_mean": -11.623043060302734, + "step": 4346 + }, + { + "chosen_geometric_mean": -1.448167324066162, + "epoch": 1.08, + "grad_norm": 27.0, + "learning_rate": 2.2057214843887783e-06, + "log_odds": 8.886054039001465, + "log_odds_ratio": -0.19394686818122864, + "loss": 0.301, + "rejected_geometric_mean": -10.087906837463379, + "step": 4347 + }, + { + "chosen_geometric_mean": -0.974823534488678, + "epoch": 1.08, + "grad_norm": 23.0, + "learning_rate": 2.204754801276136e-06, + "log_odds": 12.49048137664795, + "log_odds_ratio": -0.046568721532821655, + "loss": 0.2922, + "rejected_geometric_mean": -13.009949684143066, + "step": 4348 + }, + { + "chosen_geometric_mean": -1.0498400926589966, + "epoch": 1.08, + "grad_norm": 3.328125, + "learning_rate": 2.2037881629297538e-06, + "log_odds": 1.6411980390548706, + "log_odds_ratio": -0.24835461378097534, + "loss": 0.2867, + "rejected_geometric_mean": -2.369204044342041, + "step": 4349 + }, + { + "chosen_geometric_mean": -1.157806158065796, + "epoch": 1.08, + "grad_norm": 59.0, + "learning_rate": 2.2028215694961962e-06, + "log_odds": 7.277351379394531, + "log_odds_ratio": -0.3156551122665405, + "loss": 0.2704, + "rejected_geometric_mean": -8.202985763549805, + "step": 4350 + }, + { + "chosen_geometric_mean": -1.1127872467041016, + "epoch": 1.08, + "grad_norm": 9.0, + "learning_rate": 2.201855021122023e-06, + "log_odds": 11.009631156921387, + "log_odds_ratio": -0.003060669871047139, + "loss": 0.2425, + "rejected_geometric_mean": -11.69494342803955, + "step": 4351 + }, + { + "chosen_geometric_mean": -0.964489221572876, + "epoch": 1.08, + "grad_norm": 9.0, + "learning_rate": 2.2008885179537846e-06, + "log_odds": 6.665999412536621, + "log_odds_ratio": -0.19497984647750854, + "loss": 0.2428, + "rejected_geometric_mean": -7.276634693145752, + "step": 4352 + }, + { + "chosen_geometric_mean": -1.025580883026123, + "epoch": 1.08, + "grad_norm": 6.90625, + "learning_rate": 2.1999220601380256e-06, + "log_odds": 3.5151054859161377, + "log_odds_ratio": -0.3444264531135559, + "loss": 0.2951, + "rejected_geometric_mean": -4.282384872436523, + "step": 4353 + }, + { + "chosen_geometric_mean": -0.9521561861038208, + "epoch": 1.08, + "grad_norm": 4.90625, + "learning_rate": 2.1989556478212867e-06, + "log_odds": 0.9419647455215454, + "log_odds_ratio": -0.4093751907348633, + "loss": 0.2688, + "rejected_geometric_mean": -1.6865770816802979, + "step": 4354 + }, + { + "chosen_geometric_mean": -0.8165477514266968, + "epoch": 1.08, + "grad_norm": 4.90625, + "learning_rate": 2.1979892811500976e-06, + "log_odds": 7.67488956451416, + "log_odds_ratio": -0.24842935800552368, + "loss": 0.2561, + "rejected_geometric_mean": -8.151854515075684, + "step": 4355 + }, + { + "chosen_geometric_mean": -0.7287441492080688, + "epoch": 1.08, + "grad_norm": 17.0, + "learning_rate": 2.197022960270983e-06, + "log_odds": 8.39431095123291, + "log_odds_ratio": -0.30560892820358276, + "loss": 0.2522, + "rejected_geometric_mean": -8.71373176574707, + "step": 4356 + }, + { + "chosen_geometric_mean": -1.0202637910842896, + "epoch": 1.08, + "grad_norm": 6.40625, + "learning_rate": 2.19605668533046e-06, + "log_odds": 5.362788677215576, + "log_odds_ratio": -0.16367366909980774, + "loss": 0.3244, + "rejected_geometric_mean": -6.005012512207031, + "step": 4357 + }, + { + "chosen_geometric_mean": -1.1318144798278809, + "epoch": 1.08, + "grad_norm": 6.28125, + "learning_rate": 2.19509045647504e-06, + "log_odds": 2.695338249206543, + "log_odds_ratio": -0.30458784103393555, + "loss": 0.274, + "rejected_geometric_mean": -3.613013744354248, + "step": 4358 + }, + { + "chosen_geometric_mean": -0.9355777502059937, + "epoch": 1.08, + "grad_norm": 2.078125, + "learning_rate": 2.1941242738512242e-06, + "log_odds": 3.26137113571167, + "log_odds_ratio": -0.14981739223003387, + "loss": 0.259, + "rejected_geometric_mean": -3.794755458831787, + "step": 4359 + }, + { + "chosen_geometric_mean": -1.0692760944366455, + "epoch": 1.08, + "grad_norm": 1.6953125, + "learning_rate": 2.1931581376055118e-06, + "log_odds": 4.291703701019287, + "log_odds_ratio": -0.2973763644695282, + "loss": 0.2187, + "rejected_geometric_mean": -5.060616493225098, + "step": 4360 + }, + { + "chosen_geometric_mean": -1.094616174697876, + "epoch": 1.08, + "grad_norm": 3.296875, + "learning_rate": 2.1921920478843913e-06, + "log_odds": 13.543407440185547, + "log_odds_ratio": -0.041850678622722626, + "loss": 0.2923, + "rejected_geometric_mean": -14.236656188964844, + "step": 4361 + }, + { + "chosen_geometric_mean": -1.0726990699768066, + "epoch": 1.08, + "grad_norm": 1.9296875, + "learning_rate": 2.1912260048343446e-06, + "log_odds": 7.141548156738281, + "log_odds_ratio": -0.1294267773628235, + "loss": 0.2152, + "rejected_geometric_mean": -7.867470741271973, + "step": 4362 + }, + { + "chosen_geometric_mean": -0.808674156665802, + "epoch": 1.08, + "grad_norm": 1.9453125, + "learning_rate": 2.1902600086018476e-06, + "log_odds": 6.156194686889648, + "log_odds_ratio": -0.2244056910276413, + "loss": 0.2519, + "rejected_geometric_mean": -6.524427890777588, + "step": 4363 + }, + { + "chosen_geometric_mean": -0.9009338021278381, + "epoch": 1.08, + "grad_norm": 2.609375, + "learning_rate": 2.1892940593333677e-06, + "log_odds": 1.4224780797958374, + "log_odds_ratio": -0.3039197623729706, + "loss": 0.2689, + "rejected_geometric_mean": -2.004556894302368, + "step": 4364 + }, + { + "chosen_geometric_mean": -0.9410046935081482, + "epoch": 1.08, + "grad_norm": 32.25, + "learning_rate": 2.188328157175367e-06, + "log_odds": 2.417055130004883, + "log_odds_ratio": -0.2500542402267456, + "loss": 0.3257, + "rejected_geometric_mean": -3.067950487136841, + "step": 4365 + }, + { + "chosen_geometric_mean": -0.7568567991256714, + "epoch": 1.08, + "grad_norm": 8.5, + "learning_rate": 2.187362302274299e-06, + "log_odds": 8.492032051086426, + "log_odds_ratio": -0.2133273333311081, + "loss": 0.2314, + "rejected_geometric_mean": -8.768580436706543, + "step": 4366 + }, + { + "chosen_geometric_mean": -1.0871127843856812, + "epoch": 1.08, + "grad_norm": 3.234375, + "learning_rate": 2.1863964947766108e-06, + "log_odds": 12.055549621582031, + "log_odds_ratio": -0.05903603509068489, + "loss": 0.2351, + "rejected_geometric_mean": -12.728668212890625, + "step": 4367 + }, + { + "chosen_geometric_mean": -1.0190836191177368, + "epoch": 1.08, + "grad_norm": 2.296875, + "learning_rate": 2.185430734828742e-06, + "log_odds": 6.378055095672607, + "log_odds_ratio": -0.020698128268122673, + "loss": 0.2339, + "rejected_geometric_mean": -6.93797492980957, + "step": 4368 + }, + { + "chosen_geometric_mean": -1.1331303119659424, + "epoch": 1.08, + "grad_norm": 3.8125, + "learning_rate": 2.1844650225771237e-06, + "log_odds": 2.692737579345703, + "log_odds_ratio": -0.4000051021575928, + "loss": 0.2539, + "rejected_geometric_mean": -3.6820077896118164, + "step": 4369 + }, + { + "chosen_geometric_mean": -0.847488284111023, + "epoch": 1.08, + "grad_norm": 1.8359375, + "learning_rate": 2.183499358168182e-06, + "log_odds": 12.640522003173828, + "log_odds_ratio": -0.034076374024152756, + "loss": 0.2431, + "rejected_geometric_mean": -12.915303230285645, + "step": 4370 + }, + { + "chosen_geometric_mean": -0.9534295797348022, + "epoch": 1.08, + "grad_norm": 6.1875, + "learning_rate": 2.1825337417483356e-06, + "log_odds": 7.07035493850708, + "log_odds_ratio": -0.23287822306156158, + "loss": 0.2595, + "rejected_geometric_mean": -7.6281585693359375, + "step": 4371 + }, + { + "chosen_geometric_mean": -0.8829809427261353, + "epoch": 1.08, + "grad_norm": 25.875, + "learning_rate": 2.181568173463994e-06, + "log_odds": 13.82707691192627, + "log_odds_ratio": -0.16382957994937897, + "loss": 0.2654, + "rejected_geometric_mean": -14.289349555969238, + "step": 4372 + }, + { + "chosen_geometric_mean": -0.8728548288345337, + "epoch": 1.08, + "grad_norm": 26.25, + "learning_rate": 2.180602653461561e-06, + "log_odds": 4.948078155517578, + "log_odds_ratio": -0.2054106891155243, + "loss": 0.2809, + "rejected_geometric_mean": -5.390206336975098, + "step": 4373 + }, + { + "chosen_geometric_mean": -1.1414098739624023, + "epoch": 1.08, + "grad_norm": 8.3125, + "learning_rate": 2.1796371818874325e-06, + "log_odds": 10.225198745727539, + "log_odds_ratio": -0.007792860269546509, + "loss": 0.2754, + "rejected_geometric_mean": -10.917993545532227, + "step": 4374 + }, + { + "chosen_geometric_mean": -0.804347574710846, + "epoch": 1.08, + "grad_norm": 6.75, + "learning_rate": 2.178671758887997e-06, + "log_odds": 4.371761798858643, + "log_odds_ratio": -0.2822961211204529, + "loss": 0.2284, + "rejected_geometric_mean": -4.745703220367432, + "step": 4375 + }, + { + "chosen_geometric_mean": -1.4148449897766113, + "epoch": 1.08, + "grad_norm": 40.75, + "learning_rate": 2.177706384609636e-06, + "log_odds": 10.770858764648438, + "log_odds_ratio": -0.13991782069206238, + "loss": 0.2679, + "rejected_geometric_mean": -11.932838439941406, + "step": 4376 + }, + { + "chosen_geometric_mean": -0.9272980690002441, + "epoch": 1.08, + "grad_norm": 29.5, + "learning_rate": 2.1767410591987243e-06, + "log_odds": 12.63885498046875, + "log_odds_ratio": -0.13248084485530853, + "loss": 0.392, + "rejected_geometric_mean": -13.11561393737793, + "step": 4377 + }, + { + "chosen_geometric_mean": -0.8164241909980774, + "epoch": 1.08, + "grad_norm": 4.21875, + "learning_rate": 2.175775782801627e-06, + "log_odds": 7.348127365112305, + "log_odds_ratio": -0.12765736877918243, + "loss": 0.2337, + "rejected_geometric_mean": -7.691952705383301, + "step": 4378 + }, + { + "chosen_geometric_mean": -0.8510432243347168, + "epoch": 1.08, + "grad_norm": 3.3125, + "learning_rate": 2.1748105555647035e-06, + "log_odds": 7.274925231933594, + "log_odds_ratio": -0.0014582063304260373, + "loss": 0.2645, + "rejected_geometric_mean": -7.556856632232666, + "step": 4379 + }, + { + "chosen_geometric_mean": -1.1378697156906128, + "epoch": 1.08, + "grad_norm": 9.375, + "learning_rate": 2.1738453776343053e-06, + "log_odds": 11.913854598999023, + "log_odds_ratio": -0.0003172180731780827, + "loss": 0.273, + "rejected_geometric_mean": -12.647439956665039, + "step": 4380 + }, + { + "chosen_geometric_mean": -0.946987509727478, + "epoch": 1.08, + "grad_norm": 1.7578125, + "learning_rate": 2.1728802491567767e-06, + "log_odds": 14.514144897460938, + "log_odds_ratio": -0.0001740166189847514, + "loss": 0.2276, + "rejected_geometric_mean": -14.964924812316895, + "step": 4381 + }, + { + "chosen_geometric_mean": -0.8891327381134033, + "epoch": 1.08, + "grad_norm": 38.0, + "learning_rate": 2.171915170278455e-06, + "log_odds": 12.117569923400879, + "log_odds_ratio": -0.16493533551692963, + "loss": 0.2693, + "rejected_geometric_mean": -12.619086265563965, + "step": 4382 + }, + { + "chosen_geometric_mean": -0.9308913350105286, + "epoch": 1.09, + "grad_norm": 52.75, + "learning_rate": 2.170950141145669e-06, + "log_odds": 3.23360013961792, + "log_odds_ratio": -0.23691880702972412, + "loss": 0.3257, + "rejected_geometric_mean": -3.8093314170837402, + "step": 4383 + }, + { + "chosen_geometric_mean": -1.449303150177002, + "epoch": 1.09, + "grad_norm": 14.25, + "learning_rate": 2.1699851619047403e-06, + "log_odds": 15.093286514282227, + "log_odds_ratio": -0.08675305545330048, + "loss": 0.2926, + "rejected_geometric_mean": -16.267873764038086, + "step": 4384 + }, + { + "chosen_geometric_mean": -0.8696516752243042, + "epoch": 1.09, + "grad_norm": 42.0, + "learning_rate": 2.169020232701982e-06, + "log_odds": 7.733453750610352, + "log_odds_ratio": -0.1410478949546814, + "loss": 0.2431, + "rejected_geometric_mean": -8.170705795288086, + "step": 4385 + }, + { + "chosen_geometric_mean": -0.9690297245979309, + "epoch": 1.09, + "grad_norm": 8.0625, + "learning_rate": 2.1680553536837006e-06, + "log_odds": 6.568597793579102, + "log_odds_ratio": -0.03176551312208176, + "loss": 0.2814, + "rejected_geometric_mean": -7.039105415344238, + "step": 4386 + }, + { + "chosen_geometric_mean": -1.204627275466919, + "epoch": 1.09, + "grad_norm": 16.375, + "learning_rate": 2.1670905249961966e-06, + "log_odds": 1.7899569272994995, + "log_odds_ratio": -0.17845194041728973, + "loss": 0.2707, + "rejected_geometric_mean": -2.707958698272705, + "step": 4387 + }, + { + "chosen_geometric_mean": -0.9420483112335205, + "epoch": 1.09, + "grad_norm": 2.078125, + "learning_rate": 2.16612574678576e-06, + "log_odds": 7.497082710266113, + "log_odds_ratio": -0.28518402576446533, + "loss": 0.2357, + "rejected_geometric_mean": -8.190698623657227, + "step": 4388 + }, + { + "chosen_geometric_mean": -1.1438992023468018, + "epoch": 1.09, + "grad_norm": 31.5, + "learning_rate": 2.1651610191986747e-06, + "log_odds": 8.615616798400879, + "log_odds_ratio": -0.21637795865535736, + "loss": 0.2872, + "rejected_geometric_mean": -9.486865997314453, + "step": 4389 + }, + { + "chosen_geometric_mean": -1.0898914337158203, + "epoch": 1.09, + "grad_norm": 6.1875, + "learning_rate": 2.164196342381216e-06, + "log_odds": 3.803926467895508, + "log_odds_ratio": -0.32543861865997314, + "loss": 0.2747, + "rejected_geometric_mean": -4.6646599769592285, + "step": 4390 + }, + { + "chosen_geometric_mean": -1.0187939405441284, + "epoch": 1.09, + "grad_norm": 11.8125, + "learning_rate": 2.1632317164796533e-06, + "log_odds": 5.628644943237305, + "log_odds_ratio": -0.012076282873749733, + "loss": 0.2866, + "rejected_geometric_mean": -6.178013801574707, + "step": 4391 + }, + { + "chosen_geometric_mean": -0.926052987575531, + "epoch": 1.09, + "grad_norm": 2.4375, + "learning_rate": 2.162267141640244e-06, + "log_odds": 4.477517604827881, + "log_odds_ratio": -0.15455308556556702, + "loss": 0.2457, + "rejected_geometric_mean": -4.988646984100342, + "step": 4392 + }, + { + "chosen_geometric_mean": -0.8714168667793274, + "epoch": 1.09, + "grad_norm": 4.03125, + "learning_rate": 2.1613026180092444e-06, + "log_odds": 5.216503143310547, + "log_odds_ratio": -0.09478700160980225, + "loss": 0.2282, + "rejected_geometric_mean": -5.580948829650879, + "step": 4393 + }, + { + "chosen_geometric_mean": -1.0001217126846313, + "epoch": 1.09, + "grad_norm": 20.0, + "learning_rate": 2.1603381457328983e-06, + "log_odds": 12.036036491394043, + "log_odds_ratio": -0.001115111168473959, + "loss": 0.2551, + "rejected_geometric_mean": -12.511555671691895, + "step": 4394 + }, + { + "chosen_geometric_mean": -0.9109258651733398, + "epoch": 1.09, + "grad_norm": 27.5, + "learning_rate": 2.159373724957442e-06, + "log_odds": 9.570517539978027, + "log_odds_ratio": -0.10672750324010849, + "loss": 0.2654, + "rejected_geometric_mean": -10.021472930908203, + "step": 4395 + }, + { + "chosen_geometric_mean": -0.9450293779373169, + "epoch": 1.09, + "grad_norm": 4.34375, + "learning_rate": 2.1584093558291056e-06, + "log_odds": 11.46656608581543, + "log_odds_ratio": -0.1236383393406868, + "loss": 0.2823, + "rejected_geometric_mean": -12.013717651367188, + "step": 4396 + }, + { + "chosen_geometric_mean": -1.129127860069275, + "epoch": 1.09, + "grad_norm": 20.0, + "learning_rate": 2.1574450384941095e-06, + "log_odds": 5.77900505065918, + "log_odds_ratio": -0.33676791191101074, + "loss": 0.2737, + "rejected_geometric_mean": -6.715679168701172, + "step": 4397 + }, + { + "chosen_geometric_mean": -0.9248152375221252, + "epoch": 1.09, + "grad_norm": 10.25, + "learning_rate": 2.156480773098669e-06, + "log_odds": 7.98102331161499, + "log_odds_ratio": -0.09022507816553116, + "loss": 0.2803, + "rejected_geometric_mean": -8.471375465393066, + "step": 4398 + }, + { + "chosen_geometric_mean": -0.9436107873916626, + "epoch": 1.09, + "grad_norm": 2.484375, + "learning_rate": 2.155516559788989e-06, + "log_odds": 5.2729034423828125, + "log_odds_ratio": -0.07316995412111282, + "loss": 0.2457, + "rejected_geometric_mean": -5.7737627029418945, + "step": 4399 + }, + { + "chosen_geometric_mean": -1.03666353225708, + "epoch": 1.09, + "grad_norm": 2.65625, + "learning_rate": 2.1545523987112675e-06, + "log_odds": 9.139348983764648, + "log_odds_ratio": -0.12675093114376068, + "loss": 0.2907, + "rejected_geometric_mean": -9.770049095153809, + "step": 4400 + }, + { + "chosen_geometric_mean": -0.8626947402954102, + "epoch": 1.09, + "grad_norm": 31.0, + "learning_rate": 2.153588290011695e-06, + "log_odds": 3.0537633895874023, + "log_odds_ratio": -0.13829168677330017, + "loss": 0.2599, + "rejected_geometric_mean": -3.45162296295166, + "step": 4401 + }, + { + "chosen_geometric_mean": -1.1331604719161987, + "epoch": 1.09, + "grad_norm": 8.25, + "learning_rate": 2.1526242338364527e-06, + "log_odds": 5.2602152824401855, + "log_odds_ratio": -0.18544241786003113, + "loss": 0.3091, + "rejected_geometric_mean": -6.080112934112549, + "step": 4402 + }, + { + "chosen_geometric_mean": -0.9788390398025513, + "epoch": 1.09, + "grad_norm": 10.1875, + "learning_rate": 2.151660230331714e-06, + "log_odds": 8.85052490234375, + "log_odds_ratio": -0.01528700441122055, + "loss": 0.2634, + "rejected_geometric_mean": -9.366235733032227, + "step": 4403 + }, + { + "chosen_geometric_mean": -0.8985164761543274, + "epoch": 1.09, + "grad_norm": 4.5625, + "learning_rate": 2.1506962796436465e-06, + "log_odds": 4.340761184692383, + "log_odds_ratio": -0.019431762397289276, + "loss": 0.2396, + "rejected_geometric_mean": -4.708183765411377, + "step": 4404 + }, + { + "chosen_geometric_mean": -1.0469669103622437, + "epoch": 1.09, + "grad_norm": 3.03125, + "learning_rate": 2.1497323819184077e-06, + "log_odds": 8.000176429748535, + "log_odds_ratio": -0.01926909200847149, + "loss": 0.2123, + "rejected_geometric_mean": -8.600689888000488, + "step": 4405 + }, + { + "chosen_geometric_mean": -1.1429624557495117, + "epoch": 1.09, + "grad_norm": 1.6796875, + "learning_rate": 2.148768537302148e-06, + "log_odds": 5.582614898681641, + "log_odds_ratio": -0.04248245805501938, + "loss": 0.2053, + "rejected_geometric_mean": -6.348174095153809, + "step": 4406 + }, + { + "chosen_geometric_mean": -0.9133132100105286, + "epoch": 1.09, + "grad_norm": 3.34375, + "learning_rate": 2.147804745941009e-06, + "log_odds": 6.374265193939209, + "log_odds_ratio": -0.18635646998882294, + "loss": 0.2615, + "rejected_geometric_mean": -6.867817401885986, + "step": 4407 + }, + { + "chosen_geometric_mean": -1.142745852470398, + "epoch": 1.09, + "grad_norm": 31.875, + "learning_rate": 2.146841007981123e-06, + "log_odds": 6.28156852722168, + "log_odds_ratio": -0.46469834446907043, + "loss": 0.3239, + "rejected_geometric_mean": -7.2173614501953125, + "step": 4408 + }, + { + "chosen_geometric_mean": -0.7874458432197571, + "epoch": 1.09, + "grad_norm": 21.25, + "learning_rate": 2.1458773235686184e-06, + "log_odds": 10.467510223388672, + "log_odds_ratio": -0.08138174563646317, + "loss": 0.2678, + "rejected_geometric_mean": -10.729032516479492, + "step": 4409 + }, + { + "chosen_geometric_mean": -1.0847358703613281, + "epoch": 1.09, + "grad_norm": 2.046875, + "learning_rate": 2.1449136928496127e-06, + "log_odds": 3.1096134185791016, + "log_odds_ratio": -0.0512261763215065, + "loss": 0.2133, + "rejected_geometric_mean": -3.8043737411499023, + "step": 4410 + }, + { + "chosen_geometric_mean": -0.990892767906189, + "epoch": 1.09, + "grad_norm": 2.21875, + "learning_rate": 2.143950115970214e-06, + "log_odds": 16.31690216064453, + "log_odds_ratio": -7.31899417587556e-05, + "loss": 0.3157, + "rejected_geometric_mean": -16.824533462524414, + "step": 4411 + }, + { + "chosen_geometric_mean": -2.0064830780029297, + "epoch": 1.09, + "grad_norm": 65.5, + "learning_rate": 2.1429865930765244e-06, + "log_odds": 4.9633073806762695, + "log_odds_ratio": -0.24013406038284302, + "loss": 0.3572, + "rejected_geometric_mean": -6.766315937042236, + "step": 4412 + }, + { + "chosen_geometric_mean": -0.9536842107772827, + "epoch": 1.09, + "grad_norm": 7.4375, + "learning_rate": 2.142023124314636e-06, + "log_odds": 16.628759384155273, + "log_odds_ratio": -1.8120388631359674e-05, + "loss": 0.2523, + "rejected_geometric_mean": -17.057973861694336, + "step": 4413 + }, + { + "chosen_geometric_mean": -0.9595434069633484, + "epoch": 1.09, + "grad_norm": 30.0, + "learning_rate": 2.141059709830636e-06, + "log_odds": 9.034778594970703, + "log_odds_ratio": -0.06432854384183884, + "loss": 0.3309, + "rejected_geometric_mean": -9.530739784240723, + "step": 4414 + }, + { + "chosen_geometric_mean": -1.0462907552719116, + "epoch": 1.09, + "grad_norm": 3.671875, + "learning_rate": 2.1400963497705995e-06, + "log_odds": 6.844318389892578, + "log_odds_ratio": -0.27438074350357056, + "loss": 0.2917, + "rejected_geometric_mean": -7.5563273429870605, + "step": 4415 + }, + { + "chosen_geometric_mean": -1.0938210487365723, + "epoch": 1.09, + "grad_norm": 4.03125, + "learning_rate": 2.1391330442805958e-06, + "log_odds": 6.041777610778809, + "log_odds_ratio": -0.1753193438053131, + "loss": 0.2795, + "rejected_geometric_mean": -6.827050685882568, + "step": 4416 + }, + { + "chosen_geometric_mean": -0.9840219020843506, + "epoch": 1.09, + "grad_norm": 4.0, + "learning_rate": 2.138169793506685e-06, + "log_odds": 8.831363677978516, + "log_odds_ratio": -0.1256682574748993, + "loss": 0.2415, + "rejected_geometric_mean": -9.368942260742188, + "step": 4417 + }, + { + "chosen_geometric_mean": -0.77483069896698, + "epoch": 1.09, + "grad_norm": 3.625, + "learning_rate": 2.1372065975949184e-06, + "log_odds": 9.463335990905762, + "log_odds_ratio": -0.01511404849588871, + "loss": 0.24, + "rejected_geometric_mean": -9.628188133239746, + "step": 4418 + }, + { + "chosen_geometric_mean": -0.9289878010749817, + "epoch": 1.09, + "grad_norm": 3.40625, + "learning_rate": 2.13624345669134e-06, + "log_odds": 15.58751106262207, + "log_odds_ratio": -0.03645964711904526, + "loss": 0.2793, + "rejected_geometric_mean": -16.025588989257812, + "step": 4419 + }, + { + "chosen_geometric_mean": -1.0506300926208496, + "epoch": 1.09, + "grad_norm": 16.875, + "learning_rate": 2.135280370941985e-06, + "log_odds": 3.7957839965820312, + "log_odds_ratio": -0.034987062215805054, + "loss": 0.2672, + "rejected_geometric_mean": -4.424725532531738, + "step": 4420 + }, + { + "chosen_geometric_mean": -1.062518835067749, + "epoch": 1.09, + "grad_norm": 1.9609375, + "learning_rate": 2.1343173404928807e-06, + "log_odds": 6.869589328765869, + "log_odds_ratio": -0.05722496286034584, + "loss": 0.2406, + "rejected_geometric_mean": -7.513031959533691, + "step": 4421 + }, + { + "chosen_geometric_mean": -0.8826966881752014, + "epoch": 1.09, + "grad_norm": 11.8125, + "learning_rate": 2.1333543654900455e-06, + "log_odds": 1.2374801635742188, + "log_odds_ratio": -0.3430074155330658, + "loss": 0.2269, + "rejected_geometric_mean": -1.8417754173278809, + "step": 4422 + }, + { + "chosen_geometric_mean": -0.8960306644439697, + "epoch": 1.1, + "grad_norm": 1.859375, + "learning_rate": 2.1323914460794896e-06, + "log_odds": 9.4619722366333, + "log_odds_ratio": -0.12449260801076889, + "loss": 0.2247, + "rejected_geometric_mean": -9.831706047058105, + "step": 4423 + }, + { + "chosen_geometric_mean": -1.026357889175415, + "epoch": 1.1, + "grad_norm": 1.859375, + "learning_rate": 2.131428582407214e-06, + "log_odds": 11.47795295715332, + "log_odds_ratio": -0.11204016953706741, + "loss": 0.2494, + "rejected_geometric_mean": -12.109600067138672, + "step": 4424 + }, + { + "chosen_geometric_mean": -1.2544336318969727, + "epoch": 1.1, + "grad_norm": 42.0, + "learning_rate": 2.1304657746192128e-06, + "log_odds": 6.165933609008789, + "log_odds_ratio": -0.11881844699382782, + "loss": 0.2839, + "rejected_geometric_mean": -7.109254837036133, + "step": 4425 + }, + { + "chosen_geometric_mean": -1.1074142456054688, + "epoch": 1.1, + "grad_norm": 2.90625, + "learning_rate": 2.129503022861471e-06, + "log_odds": 3.4928338527679443, + "log_odds_ratio": -0.05629449337720871, + "loss": 0.2307, + "rejected_geometric_mean": -4.217962741851807, + "step": 4426 + }, + { + "chosen_geometric_mean": -0.98953777551651, + "epoch": 1.1, + "grad_norm": 4.53125, + "learning_rate": 2.1285403272799633e-06, + "log_odds": 5.186025142669678, + "log_odds_ratio": -0.1740904152393341, + "loss": 0.2595, + "rejected_geometric_mean": -5.771279335021973, + "step": 4427 + }, + { + "chosen_geometric_mean": -1.058221459388733, + "epoch": 1.1, + "grad_norm": 7.03125, + "learning_rate": 2.127577688020659e-06, + "log_odds": 4.055091857910156, + "log_odds_ratio": -0.10929176211357117, + "loss": 0.2466, + "rejected_geometric_mean": -4.736754417419434, + "step": 4428 + }, + { + "chosen_geometric_mean": -0.9615159034729004, + "epoch": 1.1, + "grad_norm": 8.375, + "learning_rate": 2.126615105229517e-06, + "log_odds": 4.378780364990234, + "log_odds_ratio": -0.11040760576725006, + "loss": 0.2165, + "rejected_geometric_mean": -4.8770246505737305, + "step": 4429 + }, + { + "chosen_geometric_mean": -0.9706945419311523, + "epoch": 1.1, + "grad_norm": 29.875, + "learning_rate": 2.1256525790524867e-06, + "log_odds": 10.084297180175781, + "log_odds_ratio": -0.18507643043994904, + "loss": 0.2473, + "rejected_geometric_mean": -10.684675216674805, + "step": 4430 + }, + { + "chosen_geometric_mean": -1.1120622158050537, + "epoch": 1.1, + "grad_norm": 79.0, + "learning_rate": 2.124690109635512e-06, + "log_odds": 2.856158971786499, + "log_odds_ratio": -0.5013444423675537, + "loss": 0.4487, + "rejected_geometric_mean": -3.8170430660247803, + "step": 4431 + }, + { + "chosen_geometric_mean": -0.9818233251571655, + "epoch": 1.1, + "grad_norm": 3.5625, + "learning_rate": 2.1237276971245254e-06, + "log_odds": 3.025709629058838, + "log_odds_ratio": -0.3145681321620941, + "loss": 0.2591, + "rejected_geometric_mean": -3.717423439025879, + "step": 4432 + }, + { + "chosen_geometric_mean": -1.1922965049743652, + "epoch": 1.1, + "grad_norm": 2.265625, + "learning_rate": 2.1227653416654526e-06, + "log_odds": 8.047163009643555, + "log_odds_ratio": -0.0013698451220989227, + "loss": 0.3114, + "rejected_geometric_mean": -8.837608337402344, + "step": 4433 + }, + { + "chosen_geometric_mean": -0.7831994891166687, + "epoch": 1.1, + "grad_norm": 2.4375, + "learning_rate": 2.1218030434042087e-06, + "log_odds": 0.9686481952667236, + "log_odds_ratio": -0.3786832094192505, + "loss": 0.2347, + "rejected_geometric_mean": -1.418799638748169, + "step": 4434 + }, + { + "chosen_geometric_mean": -0.8101124167442322, + "epoch": 1.1, + "grad_norm": 18.75, + "learning_rate": 2.1208408024867e-06, + "log_odds": 11.684892654418945, + "log_odds_ratio": -0.1738462597131729, + "loss": 0.3053, + "rejected_geometric_mean": -12.043947219848633, + "step": 4435 + }, + { + "chosen_geometric_mean": -0.8325150609016418, + "epoch": 1.1, + "grad_norm": 2.109375, + "learning_rate": 2.1198786190588293e-06, + "log_odds": 3.2979164123535156, + "log_odds_ratio": -0.15941528975963593, + "loss": 0.2126, + "rejected_geometric_mean": -3.659613847732544, + "step": 4436 + }, + { + "chosen_geometric_mean": -1.0822516679763794, + "epoch": 1.1, + "grad_norm": 59.75, + "learning_rate": 2.1189164932664835e-06, + "log_odds": 0.21828362345695496, + "log_odds_ratio": -0.7093689441680908, + "loss": 0.2747, + "rejected_geometric_mean": -1.3265291452407837, + "step": 4437 + }, + { + "chosen_geometric_mean": -0.8317996859550476, + "epoch": 1.1, + "grad_norm": 3.765625, + "learning_rate": 2.117954425255545e-06, + "log_odds": 6.105576515197754, + "log_odds_ratio": -0.04004083573818207, + "loss": 0.2246, + "rejected_geometric_mean": -6.3982319831848145, + "step": 4438 + }, + { + "chosen_geometric_mean": -0.8571861386299133, + "epoch": 1.1, + "grad_norm": 12.875, + "learning_rate": 2.1169924151718858e-06, + "log_odds": 9.822142601013184, + "log_odds_ratio": -0.0431947335600853, + "loss": 0.2339, + "rejected_geometric_mean": -10.113393783569336, + "step": 4439 + }, + { + "chosen_geometric_mean": -0.9510639309883118, + "epoch": 1.1, + "grad_norm": 13.375, + "learning_rate": 2.116030463161371e-06, + "log_odds": 3.53998064994812, + "log_odds_ratio": -0.3278418779373169, + "loss": 0.2647, + "rejected_geometric_mean": -4.267336845397949, + "step": 4440 + }, + { + "chosen_geometric_mean": -1.0071613788604736, + "epoch": 1.1, + "grad_norm": 5.15625, + "learning_rate": 2.1150685693698532e-06, + "log_odds": 3.4795351028442383, + "log_odds_ratio": -0.34757792949676514, + "loss": 0.258, + "rejected_geometric_mean": -4.274966239929199, + "step": 4441 + }, + { + "chosen_geometric_mean": -0.8295732736587524, + "epoch": 1.1, + "grad_norm": 2.078125, + "learning_rate": 2.114106733943181e-06, + "log_odds": 9.951563835144043, + "log_odds_ratio": -8.749124390305951e-05, + "loss": 0.2175, + "rejected_geometric_mean": -10.201638221740723, + "step": 4442 + }, + { + "chosen_geometric_mean": -1.116520643234253, + "epoch": 1.1, + "grad_norm": 52.5, + "learning_rate": 2.1131449570271915e-06, + "log_odds": 2.649221420288086, + "log_odds_ratio": -0.4013674259185791, + "loss": 0.241, + "rejected_geometric_mean": -3.5761024951934814, + "step": 4443 + }, + { + "chosen_geometric_mean": -0.8663051128387451, + "epoch": 1.1, + "grad_norm": 2.0, + "learning_rate": 2.112183238767712e-06, + "log_odds": 10.301027297973633, + "log_odds_ratio": -0.01174178533256054, + "loss": 0.2698, + "rejected_geometric_mean": -10.613227844238281, + "step": 4444 + }, + { + "chosen_geometric_mean": -1.1290507316589355, + "epoch": 1.1, + "grad_norm": 7.9375, + "learning_rate": 2.1112215793105628e-06, + "log_odds": 3.241332530975342, + "log_odds_ratio": -0.08446653932332993, + "loss": 0.2663, + "rejected_geometric_mean": -4.006877422332764, + "step": 4445 + }, + { + "chosen_geometric_mean": -1.0565550327301025, + "epoch": 1.1, + "grad_norm": 30.0, + "learning_rate": 2.110259978801554e-06, + "log_odds": 12.371418952941895, + "log_odds_ratio": -0.008831452578306198, + "loss": 0.2687, + "rejected_geometric_mean": -12.999443054199219, + "step": 4446 + }, + { + "chosen_geometric_mean": -1.1685246229171753, + "epoch": 1.1, + "grad_norm": 12.5625, + "learning_rate": 2.1092984373864876e-06, + "log_odds": 11.938997268676758, + "log_odds_ratio": -0.045756276696920395, + "loss": 0.2356, + "rejected_geometric_mean": -12.739400863647461, + "step": 4447 + }, + { + "chosen_geometric_mean": -0.9170413613319397, + "epoch": 1.1, + "grad_norm": 8.875, + "learning_rate": 2.108336955211157e-06, + "log_odds": 8.566816329956055, + "log_odds_ratio": -0.14542560279369354, + "loss": 0.2965, + "rejected_geometric_mean": -8.9949951171875, + "step": 4448 + }, + { + "chosen_geometric_mean": -1.2029627561569214, + "epoch": 1.1, + "grad_norm": 48.75, + "learning_rate": 2.1073755324213452e-06, + "log_odds": 8.529762268066406, + "log_odds_ratio": -0.10859094560146332, + "loss": 0.2795, + "rejected_geometric_mean": -9.412662506103516, + "step": 4449 + }, + { + "chosen_geometric_mean": -0.9895482063293457, + "epoch": 1.1, + "grad_norm": 2.84375, + "learning_rate": 2.1064141691628276e-06, + "log_odds": 2.735060214996338, + "log_odds_ratio": -0.33733493089675903, + "loss": 0.2646, + "rejected_geometric_mean": -3.473097085952759, + "step": 4450 + }, + { + "chosen_geometric_mean": -1.047919511795044, + "epoch": 1.1, + "grad_norm": 14.4375, + "learning_rate": 2.1054528655813695e-06, + "log_odds": 5.627335548400879, + "log_odds_ratio": -0.12581989169120789, + "loss": 0.2749, + "rejected_geometric_mean": -6.269984245300293, + "step": 4451 + }, + { + "chosen_geometric_mean": -1.0904490947723389, + "epoch": 1.1, + "grad_norm": 8.1875, + "learning_rate": 2.1044916218227264e-06, + "log_odds": 4.271294593811035, + "log_odds_ratio": -0.2483154833316803, + "loss": 0.2894, + "rejected_geometric_mean": -5.057955741882324, + "step": 4452 + }, + { + "chosen_geometric_mean": -0.9727611541748047, + "epoch": 1.1, + "grad_norm": 2.109375, + "learning_rate": 2.1035304380326484e-06, + "log_odds": 11.802096366882324, + "log_odds_ratio": -0.00818453636020422, + "loss": 0.2659, + "rejected_geometric_mean": -12.29545783996582, + "step": 4453 + }, + { + "chosen_geometric_mean": -0.9793297052383423, + "epoch": 1.1, + "grad_norm": 3.109375, + "learning_rate": 2.1025693143568726e-06, + "log_odds": 7.708816051483154, + "log_odds_ratio": -0.11668892204761505, + "loss": 0.255, + "rejected_geometric_mean": -8.293865203857422, + "step": 4454 + }, + { + "chosen_geometric_mean": -0.8978814482688904, + "epoch": 1.1, + "grad_norm": 71.0, + "learning_rate": 2.1016082509411286e-06, + "log_odds": 7.404898643493652, + "log_odds_ratio": -0.12034621834754944, + "loss": 0.2642, + "rejected_geometric_mean": -7.833907127380371, + "step": 4455 + }, + { + "chosen_geometric_mean": -1.0887997150421143, + "epoch": 1.1, + "grad_norm": 2.15625, + "learning_rate": 2.1006472479311364e-06, + "log_odds": 16.85439682006836, + "log_odds_ratio": -0.12517490983009338, + "loss": 0.2822, + "rejected_geometric_mean": -17.551725387573242, + "step": 4456 + }, + { + "chosen_geometric_mean": -0.9264764189720154, + "epoch": 1.1, + "grad_norm": 44.25, + "learning_rate": 2.0996863054726073e-06, + "log_odds": 8.853015899658203, + "log_odds_ratio": -0.007700522895902395, + "loss": 0.2751, + "rejected_geometric_mean": -9.246513366699219, + "step": 4457 + }, + { + "chosen_geometric_mean": -1.1201012134552002, + "epoch": 1.1, + "grad_norm": 7.75, + "learning_rate": 2.0987254237112437e-06, + "log_odds": 10.45307731628418, + "log_odds_ratio": -0.0017049310263246298, + "loss": 0.3021, + "rejected_geometric_mean": -11.176450729370117, + "step": 4458 + }, + { + "chosen_geometric_mean": -1.0055687427520752, + "epoch": 1.1, + "grad_norm": 13.1875, + "learning_rate": 2.097764602792739e-06, + "log_odds": 7.510682106018066, + "log_odds_ratio": -0.013079529628157616, + "loss": 0.2919, + "rejected_geometric_mean": -8.060807228088379, + "step": 4459 + }, + { + "chosen_geometric_mean": -1.3968641757965088, + "epoch": 1.1, + "grad_norm": 41.25, + "learning_rate": 2.0968038428627748e-06, + "log_odds": 10.66628360748291, + "log_odds_ratio": -0.0006372276693582535, + "loss": 0.293, + "rejected_geometric_mean": -11.756454467773438, + "step": 4460 + }, + { + "chosen_geometric_mean": -0.740493655204773, + "epoch": 1.1, + "grad_norm": 7.03125, + "learning_rate": 2.0958431440670267e-06, + "log_odds": 16.585363388061523, + "log_odds_ratio": -4.231964794598753e-06, + "loss": 0.2853, + "rejected_geometric_mean": -16.673065185546875, + "step": 4461 + }, + { + "chosen_geometric_mean": -2.1829376220703125, + "epoch": 1.1, + "grad_norm": 43.0, + "learning_rate": 2.0948825065511587e-06, + "log_odds": 9.14406967163086, + "log_odds_ratio": -0.7083792686462402, + "loss": 0.3516, + "rejected_geometric_mean": -11.080641746520996, + "step": 4462 + }, + { + "chosen_geometric_mean": -1.1867220401763916, + "epoch": 1.1, + "grad_norm": 5.59375, + "learning_rate": 2.093921930460827e-06, + "log_odds": 8.659111022949219, + "log_odds_ratio": -0.04505123943090439, + "loss": 0.2625, + "rejected_geometric_mean": -9.470949172973633, + "step": 4463 + }, + { + "chosen_geometric_mean": -1.016696572303772, + "epoch": 1.11, + "grad_norm": 37.5, + "learning_rate": 2.0929614159416787e-06, + "log_odds": 9.34324836730957, + "log_odds_ratio": -0.12239327281713486, + "loss": 0.285, + "rejected_geometric_mean": -9.98737907409668, + "step": 4464 + }, + { + "chosen_geometric_mean": -0.9174308180809021, + "epoch": 1.11, + "grad_norm": 4.625, + "learning_rate": 2.09200096313935e-06, + "log_odds": 3.3037068843841553, + "log_odds_ratio": -0.24621672928333282, + "loss": 0.2512, + "rejected_geometric_mean": -3.8999533653259277, + "step": 4465 + }, + { + "chosen_geometric_mean": -0.8131937384605408, + "epoch": 1.11, + "grad_norm": 28.875, + "learning_rate": 2.0910405721994697e-06, + "log_odds": 0.8902406096458435, + "log_odds_ratio": -0.3838801383972168, + "loss": 0.2676, + "rejected_geometric_mean": -1.4439897537231445, + "step": 4466 + }, + { + "chosen_geometric_mean": -0.9336116313934326, + "epoch": 1.11, + "grad_norm": 3.3125, + "learning_rate": 2.0900802432676545e-06, + "log_odds": 2.2384631633758545, + "log_odds_ratio": -0.31175026297569275, + "loss": 0.258, + "rejected_geometric_mean": -2.8798441886901855, + "step": 4467 + }, + { + "chosen_geometric_mean": -1.0973057746887207, + "epoch": 1.11, + "grad_norm": 9.625, + "learning_rate": 2.089119976489513e-06, + "log_odds": 13.784669876098633, + "log_odds_ratio": -0.137897789478302, + "loss": 0.2187, + "rejected_geometric_mean": -14.511911392211914, + "step": 4468 + }, + { + "chosen_geometric_mean": -1.0485272407531738, + "epoch": 1.11, + "grad_norm": 15.4375, + "learning_rate": 2.0881597720106476e-06, + "log_odds": 9.103687286376953, + "log_odds_ratio": -0.1117183044552803, + "loss": 0.2822, + "rejected_geometric_mean": -9.738844871520996, + "step": 4469 + }, + { + "chosen_geometric_mean": -0.9908483028411865, + "epoch": 1.11, + "grad_norm": 1.875, + "learning_rate": 2.087199629976646e-06, + "log_odds": 11.88992977142334, + "log_odds_ratio": -0.02631387673318386, + "loss": 0.2398, + "rejected_geometric_mean": -12.412650108337402, + "step": 4470 + }, + { + "chosen_geometric_mean": -0.8654086589813232, + "epoch": 1.11, + "grad_norm": 3.703125, + "learning_rate": 2.0862395505330892e-06, + "log_odds": 14.372367858886719, + "log_odds_ratio": -0.1472013294696808, + "loss": 0.235, + "rejected_geometric_mean": -14.73954963684082, + "step": 4471 + }, + { + "chosen_geometric_mean": -0.9466062188148499, + "epoch": 1.11, + "grad_norm": 12.6875, + "learning_rate": 2.0852795338255483e-06, + "log_odds": 11.37495231628418, + "log_odds_ratio": -0.11129841953516006, + "loss": 0.2778, + "rejected_geometric_mean": -11.873575210571289, + "step": 4472 + }, + { + "chosen_geometric_mean": -0.8375048637390137, + "epoch": 1.11, + "grad_norm": 1.9609375, + "learning_rate": 2.0843195799995858e-06, + "log_odds": 8.519408226013184, + "log_odds_ratio": -0.14767156541347504, + "loss": 0.2253, + "rejected_geometric_mean": -8.897833824157715, + "step": 4473 + }, + { + "chosen_geometric_mean": -1.0011329650878906, + "epoch": 1.11, + "grad_norm": 4.625, + "learning_rate": 2.083359689200751e-06, + "log_odds": 7.977635860443115, + "log_odds_ratio": -0.15719828009605408, + "loss": 0.2843, + "rejected_geometric_mean": -8.554975509643555, + "step": 4474 + }, + { + "chosen_geometric_mean": -1.1332013607025146, + "epoch": 1.11, + "grad_norm": 51.75, + "learning_rate": 2.08239986157459e-06, + "log_odds": 11.332062721252441, + "log_odds_ratio": -0.09548202157020569, + "loss": 0.3211, + "rejected_geometric_mean": -12.115265846252441, + "step": 4475 + }, + { + "chosen_geometric_mean": -0.9422371983528137, + "epoch": 1.11, + "grad_norm": 8.8125, + "learning_rate": 2.081440097266634e-06, + "log_odds": 2.942272424697876, + "log_odds_ratio": -0.14929334819316864, + "loss": 0.2756, + "rejected_geometric_mean": -3.439211368560791, + "step": 4476 + }, + { + "chosen_geometric_mean": -0.88011234998703, + "epoch": 1.11, + "grad_norm": 18.875, + "learning_rate": 2.080480396422406e-06, + "log_odds": 3.2986464500427246, + "log_odds_ratio": -0.26528164744377136, + "loss": 0.2818, + "rejected_geometric_mean": -3.8711252212524414, + "step": 4477 + }, + { + "chosen_geometric_mean": -0.9976445436477661, + "epoch": 1.11, + "grad_norm": 2.390625, + "learning_rate": 2.0795207591874194e-06, + "log_odds": 7.226309299468994, + "log_odds_ratio": -0.09837168455123901, + "loss": 0.2361, + "rejected_geometric_mean": -7.830859184265137, + "step": 4478 + }, + { + "chosen_geometric_mean": -0.8172246813774109, + "epoch": 1.11, + "grad_norm": 2.59375, + "learning_rate": 2.078561185707179e-06, + "log_odds": 7.8946123123168945, + "log_odds_ratio": -0.2719435393810272, + "loss": 0.2237, + "rejected_geometric_mean": -8.395633697509766, + "step": 4479 + }, + { + "chosen_geometric_mean": -0.9423688650131226, + "epoch": 1.11, + "grad_norm": 35.25, + "learning_rate": 2.0776016761271794e-06, + "log_odds": 7.840863227844238, + "log_odds_ratio": -0.19385047256946564, + "loss": 0.2461, + "rejected_geometric_mean": -8.459310531616211, + "step": 4480 + }, + { + "chosen_geometric_mean": -1.311610460281372, + "epoch": 1.11, + "grad_norm": 3.546875, + "learning_rate": 2.076642230592905e-06, + "log_odds": 8.875332832336426, + "log_odds_ratio": -0.05127406865358353, + "loss": 0.261, + "rejected_geometric_mean": -9.889613151550293, + "step": 4481 + }, + { + "chosen_geometric_mean": -1.1987042427062988, + "epoch": 1.11, + "grad_norm": 16.25, + "learning_rate": 2.075682849249831e-06, + "log_odds": 11.663034439086914, + "log_odds_ratio": -0.13134104013442993, + "loss": 0.3069, + "rejected_geometric_mean": -12.555668830871582, + "step": 4482 + }, + { + "chosen_geometric_mean": -1.268331527709961, + "epoch": 1.11, + "grad_norm": 97.5, + "learning_rate": 2.074723532243422e-06, + "log_odds": 7.516840934753418, + "log_odds_ratio": -0.14671792089939117, + "loss": 0.4585, + "rejected_geometric_mean": -8.497917175292969, + "step": 4483 + }, + { + "chosen_geometric_mean": -0.8152508735656738, + "epoch": 1.11, + "grad_norm": 2.03125, + "learning_rate": 2.073764279719134e-06, + "log_odds": 9.61251449584961, + "log_odds_ratio": -0.008569360710680485, + "loss": 0.2573, + "rejected_geometric_mean": -9.83141040802002, + "step": 4484 + }, + { + "chosen_geometric_mean": -1.225135326385498, + "epoch": 1.11, + "grad_norm": 1.9140625, + "learning_rate": 2.072805091822412e-06, + "log_odds": 4.7562947273254395, + "log_odds_ratio": -0.1405513733625412, + "loss": 0.2516, + "rejected_geometric_mean": -5.672812461853027, + "step": 4485 + }, + { + "chosen_geometric_mean": -0.8730379343032837, + "epoch": 1.11, + "grad_norm": 4.75, + "learning_rate": 2.0718459686986926e-06, + "log_odds": 6.3206610679626465, + "log_odds_ratio": -0.005241251550614834, + "loss": 0.2106, + "rejected_geometric_mean": -6.653479099273682, + "step": 4486 + }, + { + "chosen_geometric_mean": -0.866865336894989, + "epoch": 1.11, + "grad_norm": 4.09375, + "learning_rate": 2.0708869104934015e-06, + "log_odds": 4.13627815246582, + "log_odds_ratio": -0.142228364944458, + "loss": 0.2248, + "rejected_geometric_mean": -4.535191535949707, + "step": 4487 + }, + { + "chosen_geometric_mean": -0.9318501353263855, + "epoch": 1.11, + "grad_norm": 2.0, + "learning_rate": 2.0699279173519556e-06, + "log_odds": 7.385385513305664, + "log_odds_ratio": -0.15668287873268127, + "loss": 0.2426, + "rejected_geometric_mean": -7.904320240020752, + "step": 4488 + }, + { + "chosen_geometric_mean": -1.2426570653915405, + "epoch": 1.11, + "grad_norm": 2.234375, + "learning_rate": 2.0689689894197612e-06, + "log_odds": 5.181885242462158, + "log_odds_ratio": -0.04542261362075806, + "loss": 0.2628, + "rejected_geometric_mean": -6.098874568939209, + "step": 4489 + }, + { + "chosen_geometric_mean": -1.0510722398757935, + "epoch": 1.11, + "grad_norm": 2.890625, + "learning_rate": 2.068010126842213e-06, + "log_odds": 9.11292552947998, + "log_odds_ratio": -0.1496734619140625, + "loss": 0.2685, + "rejected_geometric_mean": -9.821080207824707, + "step": 4490 + }, + { + "chosen_geometric_mean": -0.9052814245223999, + "epoch": 1.11, + "grad_norm": 16.0, + "learning_rate": 2.0670513297647e-06, + "log_odds": 9.550347328186035, + "log_odds_ratio": -0.12201151251792908, + "loss": 0.2386, + "rejected_geometric_mean": -10.003865242004395, + "step": 4491 + }, + { + "chosen_geometric_mean": -1.04216468334198, + "epoch": 1.11, + "grad_norm": 9.5, + "learning_rate": 2.0660925983325985e-06, + "log_odds": 8.856911659240723, + "log_odds_ratio": -0.09618622809648514, + "loss": 0.2839, + "rejected_geometric_mean": -9.498109817504883, + "step": 4492 + }, + { + "chosen_geometric_mean": -0.9005300402641296, + "epoch": 1.11, + "grad_norm": 2.0625, + "learning_rate": 2.065133932691274e-06, + "log_odds": 8.937864303588867, + "log_odds_ratio": -0.028697947040200233, + "loss": 0.2408, + "rejected_geometric_mean": -9.28116226196289, + "step": 4493 + }, + { + "chosen_geometric_mean": -1.1591535806655884, + "epoch": 1.11, + "grad_norm": 3.609375, + "learning_rate": 2.0641753329860835e-06, + "log_odds": 5.73925256729126, + "log_odds_ratio": -0.3042345643043518, + "loss": 0.2682, + "rejected_geometric_mean": -6.547179698944092, + "step": 4494 + }, + { + "chosen_geometric_mean": -0.8450084924697876, + "epoch": 1.11, + "grad_norm": 2.390625, + "learning_rate": 2.063216799362374e-06, + "log_odds": 6.670694351196289, + "log_odds_ratio": -0.05029488354921341, + "loss": 0.2614, + "rejected_geometric_mean": -6.982499122619629, + "step": 4495 + }, + { + "chosen_geometric_mean": -0.8578908443450928, + "epoch": 1.11, + "grad_norm": 39.75, + "learning_rate": 2.0622583319654823e-06, + "log_odds": 7.521265029907227, + "log_odds_ratio": -0.1573794186115265, + "loss": 0.2822, + "rejected_geometric_mean": -7.95504093170166, + "step": 4496 + }, + { + "chosen_geometric_mean": -1.07218337059021, + "epoch": 1.11, + "grad_norm": 3.140625, + "learning_rate": 2.061299930940735e-06, + "log_odds": 5.553206920623779, + "log_odds_ratio": -0.151422381401062, + "loss": 0.2447, + "rejected_geometric_mean": -6.297782897949219, + "step": 4497 + }, + { + "chosen_geometric_mean": -1.9567692279815674, + "epoch": 1.11, + "grad_norm": 19.375, + "learning_rate": 2.0603415964334493e-06, + "log_odds": 6.026653289794922, + "log_odds_ratio": -0.2470148503780365, + "loss": 0.2957, + "rejected_geometric_mean": -7.80893611907959, + "step": 4498 + }, + { + "chosen_geometric_mean": -0.9486677050590515, + "epoch": 1.11, + "grad_norm": 17.375, + "learning_rate": 2.0593833285889312e-06, + "log_odds": 9.518808364868164, + "log_odds_ratio": -0.2647828757762909, + "loss": 0.2261, + "rejected_geometric_mean": -10.083895683288574, + "step": 4499 + }, + { + "chosen_geometric_mean": -1.0634839534759521, + "epoch": 1.11, + "grad_norm": 3.125, + "learning_rate": 2.0584251275524762e-06, + "log_odds": 14.370160102844238, + "log_odds_ratio": -0.005792740732431412, + "loss": 0.2869, + "rejected_geometric_mean": -14.96671199798584, + "step": 4500 + }, + { + "chosen_geometric_mean": -1.029164433479309, + "epoch": 1.11, + "grad_norm": 22.875, + "learning_rate": 2.0574669934693712e-06, + "log_odds": 6.179844379425049, + "log_odds_ratio": -0.14304763078689575, + "loss": 0.254, + "rejected_geometric_mean": -6.862586498260498, + "step": 4501 + }, + { + "chosen_geometric_mean": -0.9404078722000122, + "epoch": 1.11, + "grad_norm": 3.171875, + "learning_rate": 2.056508926484893e-06, + "log_odds": 8.649483680725098, + "log_odds_ratio": -0.09649031609296799, + "loss": 0.2415, + "rejected_geometric_mean": -9.111807823181152, + "step": 4502 + }, + { + "chosen_geometric_mean": -0.9228083491325378, + "epoch": 1.11, + "grad_norm": 4.40625, + "learning_rate": 2.0555509267443073e-06, + "log_odds": 10.55364990234375, + "log_odds_ratio": -0.0486963652074337, + "loss": 0.2124, + "rejected_geometric_mean": -10.97071647644043, + "step": 4503 + }, + { + "chosen_geometric_mean": -0.824753999710083, + "epoch": 1.12, + "grad_norm": 2.1875, + "learning_rate": 2.054592994392869e-06, + "log_odds": 10.99392318725586, + "log_odds_ratio": -0.14251813292503357, + "loss": 0.2232, + "rejected_geometric_mean": -11.306836128234863, + "step": 4504 + }, + { + "chosen_geometric_mean": -1.1020334959030151, + "epoch": 1.12, + "grad_norm": 16.625, + "learning_rate": 2.053635129575825e-06, + "log_odds": 0.678176760673523, + "log_odds_ratio": -0.46206212043762207, + "loss": 0.2736, + "rejected_geometric_mean": -1.616320252418518, + "step": 4505 + }, + { + "chosen_geometric_mean": -0.8961195349693298, + "epoch": 1.12, + "grad_norm": 12.5625, + "learning_rate": 2.0526773324384088e-06, + "log_odds": 13.046751022338867, + "log_odds_ratio": -0.07404960691928864, + "loss": 0.3107, + "rejected_geometric_mean": -13.473073959350586, + "step": 4506 + }, + { + "chosen_geometric_mean": -1.0036810636520386, + "epoch": 1.12, + "grad_norm": 34.0, + "learning_rate": 2.0517196031258473e-06, + "log_odds": 11.049755096435547, + "log_odds_ratio": -0.0013619223609566689, + "loss": 0.3038, + "rejected_geometric_mean": -11.5791015625, + "step": 4507 + }, + { + "chosen_geometric_mean": -1.1439143419265747, + "epoch": 1.12, + "grad_norm": 9.8125, + "learning_rate": 2.050761941783355e-06, + "log_odds": 12.71983528137207, + "log_odds_ratio": -0.06521144509315491, + "loss": 0.2571, + "rejected_geometric_mean": -13.502799034118652, + "step": 4508 + }, + { + "chosen_geometric_mean": -0.892022967338562, + "epoch": 1.12, + "grad_norm": 23.875, + "learning_rate": 2.049804348556135e-06, + "log_odds": 4.926011085510254, + "log_odds_ratio": -0.14300844073295593, + "loss": 0.2802, + "rejected_geometric_mean": -5.376184940338135, + "step": 4509 + }, + { + "chosen_geometric_mean": -1.0127148628234863, + "epoch": 1.12, + "grad_norm": 11.875, + "learning_rate": 2.0488468235893824e-06, + "log_odds": 4.86537504196167, + "log_odds_ratio": -0.2578679919242859, + "loss": 0.2572, + "rejected_geometric_mean": -5.610185623168945, + "step": 4510 + }, + { + "chosen_geometric_mean": -1.0291658639907837, + "epoch": 1.12, + "grad_norm": 51.75, + "learning_rate": 2.0478893670282807e-06, + "log_odds": 7.482293128967285, + "log_odds_ratio": -0.10893122851848602, + "loss": 0.2972, + "rejected_geometric_mean": -8.071603775024414, + "step": 4511 + }, + { + "chosen_geometric_mean": -0.8261563181877136, + "epoch": 1.12, + "grad_norm": 45.25, + "learning_rate": 2.046931979018003e-06, + "log_odds": 17.122207641601562, + "log_odds_ratio": -0.15302841365337372, + "loss": 0.3928, + "rejected_geometric_mean": -17.4620418548584, + "step": 4512 + }, + { + "chosen_geometric_mean": -0.8942914605140686, + "epoch": 1.12, + "grad_norm": 5.21875, + "learning_rate": 2.045974659703713e-06, + "log_odds": 7.510199546813965, + "log_odds_ratio": -0.024449067190289497, + "loss": 0.2226, + "rejected_geometric_mean": -7.874918460845947, + "step": 4513 + }, + { + "chosen_geometric_mean": -1.103379726409912, + "epoch": 1.12, + "grad_norm": 8.5, + "learning_rate": 2.045017409230563e-06, + "log_odds": 8.591329574584961, + "log_odds_ratio": -0.2768474817276001, + "loss": 0.2457, + "rejected_geometric_mean": -9.451869010925293, + "step": 4514 + }, + { + "chosen_geometric_mean": -0.9977121949195862, + "epoch": 1.12, + "grad_norm": 2.59375, + "learning_rate": 2.044060227743696e-06, + "log_odds": 7.535180568695068, + "log_odds_ratio": -0.04026556760072708, + "loss": 0.2154, + "rejected_geometric_mean": -8.068201065063477, + "step": 4515 + }, + { + "chosen_geometric_mean": -1.1102761030197144, + "epoch": 1.12, + "grad_norm": 2.265625, + "learning_rate": 2.0431031153882417e-06, + "log_odds": 11.604434967041016, + "log_odds_ratio": -0.0013255830854177475, + "loss": 0.2401, + "rejected_geometric_mean": -12.309839248657227, + "step": 4516 + }, + { + "chosen_geometric_mean": -1.0710119009017944, + "epoch": 1.12, + "grad_norm": 2.71875, + "learning_rate": 2.042146072309322e-06, + "log_odds": 5.647542476654053, + "log_odds_ratio": -0.06737592816352844, + "loss": 0.2062, + "rejected_geometric_mean": -6.321995258331299, + "step": 4517 + }, + { + "chosen_geometric_mean": -1.0333995819091797, + "epoch": 1.12, + "grad_norm": 15.5, + "learning_rate": 2.0411890986520494e-06, + "log_odds": 7.848886489868164, + "log_odds_ratio": -0.21610018610954285, + "loss": 0.2884, + "rejected_geometric_mean": -8.544014930725098, + "step": 4518 + }, + { + "chosen_geometric_mean": -1.0600254535675049, + "epoch": 1.12, + "grad_norm": 3.53125, + "learning_rate": 2.0402321945615224e-06, + "log_odds": 6.533224105834961, + "log_odds_ratio": -0.1316457837820053, + "loss": 0.2718, + "rejected_geometric_mean": -7.220851898193359, + "step": 4519 + }, + { + "chosen_geometric_mean": -0.8360685110092163, + "epoch": 1.12, + "grad_norm": 3.125, + "learning_rate": 2.03927536018283e-06, + "log_odds": 3.8597211837768555, + "log_odds_ratio": -0.2584972083568573, + "loss": 0.2785, + "rejected_geometric_mean": -4.331516265869141, + "step": 4520 + }, + { + "chosen_geometric_mean": -0.9267705678939819, + "epoch": 1.12, + "grad_norm": 8.5, + "learning_rate": 2.038318595661053e-06, + "log_odds": 5.914738655090332, + "log_odds_ratio": -0.20104634761810303, + "loss": 0.2935, + "rejected_geometric_mean": -6.4679436683654785, + "step": 4521 + }, + { + "chosen_geometric_mean": -0.9205235242843628, + "epoch": 1.12, + "grad_norm": 29.75, + "learning_rate": 2.0373619011412596e-06, + "log_odds": 15.194014549255371, + "log_odds_ratio": -0.12913841009140015, + "loss": 0.2897, + "rejected_geometric_mean": -15.716642379760742, + "step": 4522 + }, + { + "chosen_geometric_mean": -0.8615904450416565, + "epoch": 1.12, + "grad_norm": 24.875, + "learning_rate": 2.036405276768505e-06, + "log_odds": 7.844045639038086, + "log_odds_ratio": -0.0038899374194443226, + "loss": 0.3532, + "rejected_geometric_mean": -8.15089225769043, + "step": 4523 + }, + { + "chosen_geometric_mean": -0.8394693732261658, + "epoch": 1.12, + "grad_norm": 2.0, + "learning_rate": 2.0354487226878403e-06, + "log_odds": 4.8095197677612305, + "log_odds_ratio": -0.08878488093614578, + "loss": 0.2214, + "rejected_geometric_mean": -5.09997034072876, + "step": 4524 + }, + { + "chosen_geometric_mean": -0.9941273927688599, + "epoch": 1.12, + "grad_norm": 3.171875, + "learning_rate": 2.0344922390443003e-06, + "log_odds": 5.344901084899902, + "log_odds_ratio": -0.04307008907198906, + "loss": 0.3452, + "rejected_geometric_mean": -5.897594451904297, + "step": 4525 + }, + { + "chosen_geometric_mean": -2.0333478450775146, + "epoch": 1.12, + "grad_norm": 26.625, + "learning_rate": 2.0335358259829105e-06, + "log_odds": 16.0339298248291, + "log_odds_ratio": -0.0001507951965322718, + "loss": 0.3039, + "rejected_geometric_mean": -17.633420944213867, + "step": 4526 + }, + { + "chosen_geometric_mean": -1.0111587047576904, + "epoch": 1.12, + "grad_norm": 4.6875, + "learning_rate": 2.0325794836486867e-06, + "log_odds": 8.558831214904785, + "log_odds_ratio": -0.005627547390758991, + "loss": 0.2087, + "rejected_geometric_mean": -9.094512939453125, + "step": 4527 + }, + { + "chosen_geometric_mean": -0.9012432098388672, + "epoch": 1.12, + "grad_norm": 6.03125, + "learning_rate": 2.0316232121866324e-06, + "log_odds": 17.412580490112305, + "log_odds_ratio": -3.576280960260192e-07, + "loss": 0.2099, + "rejected_geometric_mean": -17.76531219482422, + "step": 4528 + }, + { + "chosen_geometric_mean": -1.0186867713928223, + "epoch": 1.12, + "grad_norm": 20.75, + "learning_rate": 2.0306670117417423e-06, + "log_odds": 9.66424560546875, + "log_odds_ratio": -0.07907994836568832, + "loss": 0.2011, + "rejected_geometric_mean": -10.277080535888672, + "step": 4529 + }, + { + "chosen_geometric_mean": -1.0106723308563232, + "epoch": 1.12, + "grad_norm": 16.25, + "learning_rate": 2.029710882458999e-06, + "log_odds": 7.91774320602417, + "log_odds_ratio": -0.040737248957157135, + "loss": 0.2657, + "rejected_geometric_mean": -8.488204002380371, + "step": 4530 + }, + { + "chosen_geometric_mean": -1.0366032123565674, + "epoch": 1.12, + "grad_norm": 53.5, + "learning_rate": 2.028754824483375e-06, + "log_odds": 17.528484344482422, + "log_odds_ratio": -6.854543812551128e-07, + "loss": 0.315, + "rejected_geometric_mean": -18.116750717163086, + "step": 4531 + }, + { + "chosen_geometric_mean": -1.0039161443710327, + "epoch": 1.12, + "grad_norm": 2.515625, + "learning_rate": 2.0277988379598317e-06, + "log_odds": 5.664618492126465, + "log_odds_ratio": -0.20822986960411072, + "loss": 0.2596, + "rejected_geometric_mean": -6.320024013519287, + "step": 4532 + }, + { + "chosen_geometric_mean": -0.9425929188728333, + "epoch": 1.12, + "grad_norm": 6.4375, + "learning_rate": 2.026842923033319e-06, + "log_odds": 2.4827799797058105, + "log_odds_ratio": -0.14321357011795044, + "loss": 0.2113, + "rejected_geometric_mean": -2.943567991256714, + "step": 4533 + }, + { + "chosen_geometric_mean": -1.0796875953674316, + "epoch": 1.12, + "grad_norm": 4.75, + "learning_rate": 2.025887079848776e-06, + "log_odds": 5.60495662689209, + "log_odds_ratio": -0.21670517325401306, + "loss": 0.2505, + "rejected_geometric_mean": -6.3319597244262695, + "step": 4534 + }, + { + "chosen_geometric_mean": -1.7483799457550049, + "epoch": 1.12, + "grad_norm": 24.25, + "learning_rate": 2.0249313085511325e-06, + "log_odds": 16.23052215576172, + "log_odds_ratio": -4.887602699454874e-06, + "loss": 0.2658, + "rejected_geometric_mean": -17.661556243896484, + "step": 4535 + }, + { + "chosen_geometric_mean": -1.6946805715560913, + "epoch": 1.12, + "grad_norm": 61.0, + "learning_rate": 2.023975609285307e-06, + "log_odds": 8.614298820495605, + "log_odds_ratio": -0.004203808028250933, + "loss": 0.3811, + "rejected_geometric_mean": -9.934661865234375, + "step": 4536 + }, + { + "chosen_geometric_mean": -1.1671407222747803, + "epoch": 1.12, + "grad_norm": 18.125, + "learning_rate": 2.023019982196205e-06, + "log_odds": 4.814352035522461, + "log_odds_ratio": -0.1383237987756729, + "loss": 0.2776, + "rejected_geometric_mean": -5.645949840545654, + "step": 4537 + }, + { + "chosen_geometric_mean": -1.0132761001586914, + "epoch": 1.12, + "grad_norm": 61.0, + "learning_rate": 2.022064427428723e-06, + "log_odds": 9.938695907592773, + "log_odds_ratio": -0.024418730288743973, + "loss": 0.2926, + "rejected_geometric_mean": -10.466541290283203, + "step": 4538 + }, + { + "chosen_geometric_mean": -0.9680794477462769, + "epoch": 1.12, + "grad_norm": 51.0, + "learning_rate": 2.021108945127746e-06, + "log_odds": 6.806591033935547, + "log_odds_ratio": -0.267329603433609, + "loss": 0.275, + "rejected_geometric_mean": -7.425177097320557, + "step": 4539 + }, + { + "chosen_geometric_mean": -1.1728146076202393, + "epoch": 1.12, + "grad_norm": 2.15625, + "learning_rate": 2.0201535354381484e-06, + "log_odds": 4.428624153137207, + "log_odds_ratio": -0.1830177754163742, + "loss": 0.2698, + "rejected_geometric_mean": -5.338339328765869, + "step": 4540 + }, + { + "chosen_geometric_mean": -0.7202922701835632, + "epoch": 1.12, + "grad_norm": 12.9375, + "learning_rate": 2.0191981985047936e-06, + "log_odds": 3.3279314041137695, + "log_odds_ratio": -0.2088032364845276, + "loss": 0.2353, + "rejected_geometric_mean": -3.4598259925842285, + "step": 4541 + }, + { + "chosen_geometric_mean": -0.971032977104187, + "epoch": 1.12, + "grad_norm": 1.9921875, + "learning_rate": 2.0182429344725323e-06, + "log_odds": 6.785313129425049, + "log_odds_ratio": -0.15302260220050812, + "loss": 0.2358, + "rejected_geometric_mean": -7.368856906890869, + "step": 4542 + }, + { + "chosen_geometric_mean": -1.1670504808425903, + "epoch": 1.12, + "grad_norm": 16.75, + "learning_rate": 2.0172877434862065e-06, + "log_odds": 8.329181671142578, + "log_odds_ratio": -0.004636372905224562, + "loss": 0.2812, + "rejected_geometric_mean": -9.124107360839844, + "step": 4543 + }, + { + "chosen_geometric_mean": -0.8239352107048035, + "epoch": 1.13, + "grad_norm": 1.9375, + "learning_rate": 2.016332625690646e-06, + "log_odds": 3.1549134254455566, + "log_odds_ratio": -0.12111368030309677, + "loss": 0.219, + "rejected_geometric_mean": -3.4454190731048584, + "step": 4544 + }, + { + "chosen_geometric_mean": -1.025600790977478, + "epoch": 1.13, + "grad_norm": 16.875, + "learning_rate": 2.015377581230668e-06, + "log_odds": 1.9168767929077148, + "log_odds_ratio": -0.30947911739349365, + "loss": 0.2271, + "rejected_geometric_mean": -2.727827548980713, + "step": 4545 + }, + { + "chosen_geometric_mean": -0.8230164647102356, + "epoch": 1.13, + "grad_norm": 2.125, + "learning_rate": 2.014422610251083e-06, + "log_odds": 7.061590671539307, + "log_odds_ratio": -0.10477589815855026, + "loss": 0.2943, + "rejected_geometric_mean": -7.365789413452148, + "step": 4546 + }, + { + "chosen_geometric_mean": -1.1503890752792358, + "epoch": 1.13, + "grad_norm": 21.75, + "learning_rate": 2.0134677128966852e-06, + "log_odds": 8.767822265625, + "log_odds_ratio": -0.03077029436826706, + "loss": 0.2279, + "rejected_geometric_mean": -9.502742767333984, + "step": 4547 + }, + { + "chosen_geometric_mean": -1.076114535331726, + "epoch": 1.13, + "grad_norm": 3.40625, + "learning_rate": 2.0125128893122615e-06, + "log_odds": 8.955260276794434, + "log_odds_ratio": -0.011544456705451012, + "loss": 0.2846, + "rejected_geometric_mean": -9.578144073486328, + "step": 4548 + }, + { + "chosen_geometric_mean": -1.0237839221954346, + "epoch": 1.13, + "grad_norm": 1.84375, + "learning_rate": 2.0115581396425853e-06, + "log_odds": 11.79283618927002, + "log_odds_ratio": -0.007245390210300684, + "loss": 0.2415, + "rejected_geometric_mean": -12.353042602539062, + "step": 4549 + }, + { + "chosen_geometric_mean": -1.0635638236999512, + "epoch": 1.13, + "grad_norm": 3.953125, + "learning_rate": 2.0106034640324186e-06, + "log_odds": 5.64652681350708, + "log_odds_ratio": -0.10361846536397934, + "loss": 0.2477, + "rejected_geometric_mean": -6.3300371170043945, + "step": 4550 + }, + { + "chosen_geometric_mean": -0.9472208023071289, + "epoch": 1.13, + "grad_norm": 2.21875, + "learning_rate": 2.0096488626265153e-06, + "log_odds": 1.861382246017456, + "log_odds_ratio": -0.24700786173343658, + "loss": 0.2404, + "rejected_geometric_mean": -2.481532573699951, + "step": 4551 + }, + { + "chosen_geometric_mean": -1.0071065425872803, + "epoch": 1.13, + "grad_norm": 2.3125, + "learning_rate": 2.0086943355696146e-06, + "log_odds": 8.737164497375488, + "log_odds_ratio": -0.04258683696389198, + "loss": 0.2994, + "rejected_geometric_mean": -9.299161911010742, + "step": 4552 + }, + { + "chosen_geometric_mean": -1.1942267417907715, + "epoch": 1.13, + "grad_norm": 2.703125, + "learning_rate": 2.0077398830064457e-06, + "log_odds": 6.012451648712158, + "log_odds_ratio": -0.007857087068259716, + "loss": 0.2805, + "rejected_geometric_mean": -6.817226886749268, + "step": 4553 + }, + { + "chosen_geometric_mean": -0.7852922081947327, + "epoch": 1.13, + "grad_norm": 2.6875, + "learning_rate": 2.006785505081727e-06, + "log_odds": 6.874538898468018, + "log_odds_ratio": -0.17187076807022095, + "loss": 0.2925, + "rejected_geometric_mean": -7.199921607971191, + "step": 4554 + }, + { + "chosen_geometric_mean": -0.8466321229934692, + "epoch": 1.13, + "grad_norm": 8.875, + "learning_rate": 2.005831201940165e-06, + "log_odds": 0.5944759249687195, + "log_odds_ratio": -0.5311971306800842, + "loss": 0.3165, + "rejected_geometric_mean": -1.3311378955841064, + "step": 4555 + }, + { + "chosen_geometric_mean": -0.9408952593803406, + "epoch": 1.13, + "grad_norm": 29.625, + "learning_rate": 2.0048769737264534e-06, + "log_odds": 7.8655686378479, + "log_odds_ratio": -0.10792452841997147, + "loss": 0.3669, + "rejected_geometric_mean": -8.391999244689941, + "step": 4556 + }, + { + "chosen_geometric_mean": -1.0971884727478027, + "epoch": 1.13, + "grad_norm": 4.75, + "learning_rate": 2.0039228205852784e-06, + "log_odds": 2.43563175201416, + "log_odds_ratio": -0.307427316904068, + "loss": 0.2542, + "rejected_geometric_mean": -3.289738655090332, + "step": 4557 + }, + { + "chosen_geometric_mean": -1.089791178703308, + "epoch": 1.13, + "grad_norm": 3.296875, + "learning_rate": 2.002968742661312e-06, + "log_odds": 3.0895495414733887, + "log_odds_ratio": -0.1521228700876236, + "loss": 0.2604, + "rejected_geometric_mean": -3.864020347595215, + "step": 4558 + }, + { + "chosen_geometric_mean": -1.195732831954956, + "epoch": 1.13, + "grad_norm": 12.1875, + "learning_rate": 2.0020147400992144e-06, + "log_odds": 5.00614070892334, + "log_odds_ratio": -0.11346765607595444, + "loss": 0.2376, + "rejected_geometric_mean": -5.898896217346191, + "step": 4559 + }, + { + "chosen_geometric_mean": -0.9257256984710693, + "epoch": 1.13, + "grad_norm": 1.8359375, + "learning_rate": 2.0010608130436364e-06, + "log_odds": 8.444694519042969, + "log_odds_ratio": -0.013337665237486362, + "loss": 0.2629, + "rejected_geometric_mean": -8.796158790588379, + "step": 4560 + }, + { + "chosen_geometric_mean": -1.0804353952407837, + "epoch": 1.13, + "grad_norm": 9.0, + "learning_rate": 2.0001069616392145e-06, + "log_odds": 8.063701629638672, + "log_odds_ratio": -0.20664110779762268, + "loss": 0.2196, + "rejected_geometric_mean": -8.786211013793945, + "step": 4561 + }, + { + "chosen_geometric_mean": -0.8353282809257507, + "epoch": 1.13, + "grad_norm": 5.6875, + "learning_rate": 1.9991531860305775e-06, + "log_odds": 4.670568943023682, + "log_odds_ratio": -0.11478780955076218, + "loss": 0.2653, + "rejected_geometric_mean": -5.0096435546875, + "step": 4562 + }, + { + "chosen_geometric_mean": -1.0950571298599243, + "epoch": 1.13, + "grad_norm": 2.671875, + "learning_rate": 1.9981994863623393e-06, + "log_odds": 5.233711242675781, + "log_odds_ratio": -0.33221086859703064, + "loss": 0.2247, + "rejected_geometric_mean": -6.09094762802124, + "step": 4563 + }, + { + "chosen_geometric_mean": -1.03555428981781, + "epoch": 1.13, + "grad_norm": 8.875, + "learning_rate": 1.9972458627791045e-06, + "log_odds": 3.21879243850708, + "log_odds_ratio": -0.21060901880264282, + "loss": 0.2764, + "rejected_geometric_mean": -3.9231865406036377, + "step": 4564 + }, + { + "chosen_geometric_mean": -1.0483896732330322, + "epoch": 1.13, + "grad_norm": 10.375, + "learning_rate": 1.9962923154254655e-06, + "log_odds": 3.329960584640503, + "log_odds_ratio": -0.2374396175146103, + "loss": 0.2298, + "rejected_geometric_mean": -4.085674285888672, + "step": 4565 + }, + { + "chosen_geometric_mean": -0.8922159671783447, + "epoch": 1.13, + "grad_norm": 5.8125, + "learning_rate": 1.995338844446002e-06, + "log_odds": 2.8737289905548096, + "log_odds_ratio": -0.4317995011806488, + "loss": 0.3019, + "rejected_geometric_mean": -3.5787363052368164, + "step": 4566 + }, + { + "chosen_geometric_mean": -0.9097409844398499, + "epoch": 1.13, + "grad_norm": 7.9375, + "learning_rate": 1.9943854499852827e-06, + "log_odds": 13.122686386108398, + "log_odds_ratio": -0.011301811784505844, + "loss": 0.2784, + "rejected_geometric_mean": -13.493305206298828, + "step": 4567 + }, + { + "chosen_geometric_mean": -1.0614031553268433, + "epoch": 1.13, + "grad_norm": 26.625, + "learning_rate": 1.9934321321878663e-06, + "log_odds": 3.9774439334869385, + "log_odds_ratio": -0.1164335235953331, + "loss": 0.2763, + "rejected_geometric_mean": -4.610559940338135, + "step": 4568 + }, + { + "chosen_geometric_mean": -1.075921654701233, + "epoch": 1.13, + "grad_norm": 5.3125, + "learning_rate": 1.9924788911982986e-06, + "log_odds": 9.353776931762695, + "log_odds_ratio": -0.029945354908704758, + "loss": 0.2521, + "rejected_geometric_mean": -10.006172180175781, + "step": 4569 + }, + { + "chosen_geometric_mean": -0.9605057835578918, + "epoch": 1.13, + "grad_norm": 4.3125, + "learning_rate": 1.9915257271611135e-06, + "log_odds": 5.790668964385986, + "log_odds_ratio": -0.09251510351896286, + "loss": 0.2672, + "rejected_geometric_mean": -6.31085205078125, + "step": 4570 + }, + { + "chosen_geometric_mean": -0.8807046413421631, + "epoch": 1.13, + "grad_norm": 10.0, + "learning_rate": 1.990572640220834e-06, + "log_odds": 9.48234748840332, + "log_odds_ratio": -0.0015153756830841303, + "loss": 0.2291, + "rejected_geometric_mean": -9.778629302978516, + "step": 4571 + }, + { + "chosen_geometric_mean": -0.9269506335258484, + "epoch": 1.13, + "grad_norm": 2.078125, + "learning_rate": 1.989619630521969e-06, + "log_odds": 12.481656074523926, + "log_odds_ratio": -0.11682555824518204, + "loss": 0.2695, + "rejected_geometric_mean": -12.985702514648438, + "step": 4572 + }, + { + "chosen_geometric_mean": -1.1450010538101196, + "epoch": 1.13, + "grad_norm": 31.125, + "learning_rate": 1.98866669820902e-06, + "log_odds": 8.68121337890625, + "log_odds_ratio": -0.020363088697195053, + "loss": 0.2841, + "rejected_geometric_mean": -9.447027206420898, + "step": 4573 + }, + { + "chosen_geometric_mean": -1.0110204219818115, + "epoch": 1.13, + "grad_norm": 14.75, + "learning_rate": 1.9877138434264743e-06, + "log_odds": 6.275272369384766, + "log_odds_ratio": -0.23829656839370728, + "loss": 0.232, + "rejected_geometric_mean": -6.986464977264404, + "step": 4574 + }, + { + "chosen_geometric_mean": -1.4892537593841553, + "epoch": 1.13, + "grad_norm": 22.375, + "learning_rate": 1.986761066318806e-06, + "log_odds": 6.320344924926758, + "log_odds_ratio": -0.13486361503601074, + "loss": 0.3502, + "rejected_geometric_mean": -7.607112884521484, + "step": 4575 + }, + { + "chosen_geometric_mean": -0.9030366539955139, + "epoch": 1.13, + "grad_norm": 5.4375, + "learning_rate": 1.98580836703048e-06, + "log_odds": 4.157419681549072, + "log_odds_ratio": -0.10596869885921478, + "loss": 0.2401, + "rejected_geometric_mean": -4.609534740447998, + "step": 4576 + }, + { + "chosen_geometric_mean": -0.8742228746414185, + "epoch": 1.13, + "grad_norm": 1.984375, + "learning_rate": 1.984855745705948e-06, + "log_odds": 10.686235427856445, + "log_odds_ratio": -0.010188516229391098, + "loss": 0.2479, + "rejected_geometric_mean": -11.016006469726562, + "step": 4577 + }, + { + "chosen_geometric_mean": -0.975657045841217, + "epoch": 1.13, + "grad_norm": 6.375, + "learning_rate": 1.9839032024896504e-06, + "log_odds": 6.220972061157227, + "log_odds_ratio": -0.005121773574501276, + "loss": 0.2639, + "rejected_geometric_mean": -6.713597774505615, + "step": 4578 + }, + { + "chosen_geometric_mean": -1.1105412244796753, + "epoch": 1.13, + "grad_norm": 8.4375, + "learning_rate": 1.9829507375260158e-06, + "log_odds": 5.455793380737305, + "log_odds_ratio": -0.1418020874261856, + "loss": 0.2715, + "rejected_geometric_mean": -6.2215986251831055, + "step": 4579 + }, + { + "chosen_geometric_mean": -1.031001091003418, + "epoch": 1.13, + "grad_norm": 29.125, + "learning_rate": 1.9819983509594605e-06, + "log_odds": 3.757075548171997, + "log_odds_ratio": -0.2924076020717621, + "loss": 0.2968, + "rejected_geometric_mean": -4.525932312011719, + "step": 4580 + }, + { + "chosen_geometric_mean": -1.23013174533844, + "epoch": 1.13, + "grad_norm": 14.4375, + "learning_rate": 1.98104604293439e-06, + "log_odds": 10.629119873046875, + "log_odds_ratio": -0.018430037423968315, + "loss": 0.3928, + "rejected_geometric_mean": -11.456293106079102, + "step": 4581 + }, + { + "chosen_geometric_mean": -1.040736198425293, + "epoch": 1.13, + "grad_norm": 11.9375, + "learning_rate": 1.980093813595196e-06, + "log_odds": 11.432854652404785, + "log_odds_ratio": -0.0813048928976059, + "loss": 0.2567, + "rejected_geometric_mean": -12.080432891845703, + "step": 4582 + }, + { + "chosen_geometric_mean": -1.1263225078582764, + "epoch": 1.13, + "grad_norm": 37.0, + "learning_rate": 1.9791416630862583e-06, + "log_odds": 4.466831207275391, + "log_odds_ratio": -0.12166834622621536, + "loss": 0.3409, + "rejected_geometric_mean": -5.274621963500977, + "step": 4583 + }, + { + "chosen_geometric_mean": -1.4673889875411987, + "epoch": 1.13, + "grad_norm": 1.9921875, + "learning_rate": 1.9781895915519483e-06, + "log_odds": 3.688925266265869, + "log_odds_ratio": -0.2170167714357376, + "loss": 0.2504, + "rejected_geometric_mean": -4.945113182067871, + "step": 4584 + }, + { + "chosen_geometric_mean": -0.9308462738990784, + "epoch": 1.14, + "grad_norm": 20.5, + "learning_rate": 1.9772375991366218e-06, + "log_odds": 8.191108703613281, + "log_odds_ratio": -0.18136397004127502, + "loss": 0.2454, + "rejected_geometric_mean": -8.727066040039062, + "step": 4585 + }, + { + "chosen_geometric_mean": -0.9809530973434448, + "epoch": 1.14, + "grad_norm": 2.5625, + "learning_rate": 1.9762856859846234e-06, + "log_odds": 7.634472846984863, + "log_odds_ratio": -0.07159017771482468, + "loss": 0.2428, + "rejected_geometric_mean": -8.11390209197998, + "step": 4586 + }, + { + "chosen_geometric_mean": -1.0468591451644897, + "epoch": 1.14, + "grad_norm": 3.21875, + "learning_rate": 1.975333852240286e-06, + "log_odds": 8.129509925842285, + "log_odds_ratio": -0.13221777975559235, + "loss": 0.2989, + "rejected_geometric_mean": -8.783970832824707, + "step": 4587 + }, + { + "chosen_geometric_mean": -1.0324057340621948, + "epoch": 1.14, + "grad_norm": 3.90625, + "learning_rate": 1.9743820980479304e-06, + "log_odds": 6.662601947784424, + "log_odds_ratio": -0.2344132363796234, + "loss": 0.2559, + "rejected_geometric_mean": -7.411464214324951, + "step": 4588 + }, + { + "chosen_geometric_mean": -1.086771011352539, + "epoch": 1.14, + "grad_norm": 5.25, + "learning_rate": 1.973430423551866e-06, + "log_odds": 8.252543449401855, + "log_odds_ratio": -0.166229709982872, + "loss": 0.2823, + "rejected_geometric_mean": -9.020164489746094, + "step": 4589 + }, + { + "chosen_geometric_mean": -0.9129269123077393, + "epoch": 1.14, + "grad_norm": 2.46875, + "learning_rate": 1.9724788288963896e-06, + "log_odds": 4.284481048583984, + "log_odds_ratio": -0.28660064935684204, + "loss": 0.2566, + "rejected_geometric_mean": -4.828866958618164, + "step": 4590 + }, + { + "chosen_geometric_mean": -0.9361859560012817, + "epoch": 1.14, + "grad_norm": 8.9375, + "learning_rate": 1.971527314225785e-06, + "log_odds": 4.112067222595215, + "log_odds_ratio": -0.10433255136013031, + "loss": 0.2821, + "rejected_geometric_mean": -4.637382507324219, + "step": 4591 + }, + { + "chosen_geometric_mean": -0.9909355640411377, + "epoch": 1.14, + "grad_norm": 3.78125, + "learning_rate": 1.9705758796843248e-06, + "log_odds": 5.569862365722656, + "log_odds_ratio": -0.16201309859752655, + "loss": 0.2536, + "rejected_geometric_mean": -6.205539226531982, + "step": 4592 + }, + { + "chosen_geometric_mean": -1.192451000213623, + "epoch": 1.14, + "grad_norm": 38.25, + "learning_rate": 1.9696245254162698e-06, + "log_odds": 13.633077621459961, + "log_odds_ratio": -0.1029672920703888, + "loss": 0.2913, + "rejected_geometric_mean": -14.491394996643066, + "step": 4593 + }, + { + "chosen_geometric_mean": -1.1280460357666016, + "epoch": 1.14, + "grad_norm": 2.3125, + "learning_rate": 1.9686732515658667e-06, + "log_odds": 10.298277854919434, + "log_odds_ratio": -0.029774511232972145, + "loss": 0.247, + "rejected_geometric_mean": -11.042489051818848, + "step": 4594 + }, + { + "chosen_geometric_mean": -0.8991637229919434, + "epoch": 1.14, + "grad_norm": 3.96875, + "learning_rate": 1.9677220582773537e-06, + "log_odds": 13.469720840454102, + "log_odds_ratio": -5.777840124210343e-05, + "loss": 0.2425, + "rejected_geometric_mean": -13.816158294677734, + "step": 4595 + }, + { + "chosen_geometric_mean": -2.330599784851074, + "epoch": 1.14, + "grad_norm": 42.0, + "learning_rate": 1.966770945694953e-06, + "log_odds": -0.02298014611005783, + "log_odds_ratio": -1.436582088470459, + "loss": 0.4023, + "rejected_geometric_mean": -2.3318021297454834, + "step": 4596 + }, + { + "chosen_geometric_mean": -0.9071975350379944, + "epoch": 1.14, + "grad_norm": 64.5, + "learning_rate": 1.9658199139628774e-06, + "log_odds": 3.8723602294921875, + "log_odds_ratio": -0.16643516719341278, + "loss": 0.2757, + "rejected_geometric_mean": -4.310057640075684, + "step": 4597 + }, + { + "chosen_geometric_mean": -0.9882286190986633, + "epoch": 1.14, + "grad_norm": 2.625, + "learning_rate": 1.9648689632253245e-06, + "log_odds": 6.474168300628662, + "log_odds_ratio": -0.003133901162073016, + "loss": 0.2754, + "rejected_geometric_mean": -6.970939636230469, + "step": 4598 + }, + { + "chosen_geometric_mean": -1.0533188581466675, + "epoch": 1.14, + "grad_norm": 2.296875, + "learning_rate": 1.963918093626481e-06, + "log_odds": 10.82888412475586, + "log_odds_ratio": -0.007267934735864401, + "loss": 0.234, + "rejected_geometric_mean": -11.454877853393555, + "step": 4599 + }, + { + "chosen_geometric_mean": -0.804944634437561, + "epoch": 1.14, + "grad_norm": 63.0, + "learning_rate": 1.9629673053105244e-06, + "log_odds": 8.70538330078125, + "log_odds_ratio": -0.0011315852170810103, + "loss": 0.26, + "rejected_geometric_mean": -8.912076950073242, + "step": 4600 + }, + { + "chosen_geometric_mean": -1.0402438640594482, + "epoch": 1.14, + "grad_norm": 5.90625, + "learning_rate": 1.9620165984216143e-06, + "log_odds": 3.7628583908081055, + "log_odds_ratio": -0.3675375282764435, + "loss": 0.2415, + "rejected_geometric_mean": -4.535094261169434, + "step": 4601 + }, + { + "chosen_geometric_mean": -1.0270991325378418, + "epoch": 1.14, + "grad_norm": 8.6875, + "learning_rate": 1.961065973103902e-06, + "log_odds": 10.594888687133789, + "log_odds_ratio": -0.0004246331809554249, + "loss": 0.2309, + "rejected_geometric_mean": -11.165428161621094, + "step": 4602 + }, + { + "chosen_geometric_mean": -1.1218807697296143, + "epoch": 1.14, + "grad_norm": 6.8125, + "learning_rate": 1.9601154295015244e-06, + "log_odds": 4.236952304840088, + "log_odds_ratio": -0.1519821733236313, + "loss": 0.2418, + "rejected_geometric_mean": -5.033726692199707, + "step": 4603 + }, + { + "chosen_geometric_mean": -1.206933617591858, + "epoch": 1.14, + "grad_norm": 32.0, + "learning_rate": 1.9591649677586077e-06, + "log_odds": 7.197777271270752, + "log_odds_ratio": -0.2269567847251892, + "loss": 0.3028, + "rejected_geometric_mean": -8.144362449645996, + "step": 4604 + }, + { + "chosen_geometric_mean": -1.0388225317001343, + "epoch": 1.14, + "grad_norm": 10.625, + "learning_rate": 1.9582145880192625e-06, + "log_odds": 2.2139549255371094, + "log_odds_ratio": -0.23748759925365448, + "loss": 0.3158, + "rejected_geometric_mean": -2.9600396156311035, + "step": 4605 + }, + { + "chosen_geometric_mean": -1.0324819087982178, + "epoch": 1.14, + "grad_norm": 3.046875, + "learning_rate": 1.9572642904275917e-06, + "log_odds": 4.269315719604492, + "log_odds_ratio": -0.1649242788553238, + "loss": 0.255, + "rejected_geometric_mean": -4.955108642578125, + "step": 4606 + }, + { + "chosen_geometric_mean": -1.2282509803771973, + "epoch": 1.14, + "grad_norm": 2.25, + "learning_rate": 1.9563140751276827e-06, + "log_odds": 3.8854689598083496, + "log_odds_ratio": -0.2581445276737213, + "loss": 0.2635, + "rejected_geometric_mean": -4.890209197998047, + "step": 4607 + }, + { + "chosen_geometric_mean": -0.8890729546546936, + "epoch": 1.14, + "grad_norm": 5.46875, + "learning_rate": 1.95536394226361e-06, + "log_odds": 5.935370922088623, + "log_odds_ratio": -0.05807347223162651, + "loss": 0.2199, + "rejected_geometric_mean": -6.329584121704102, + "step": 4608 + }, + { + "chosen_geometric_mean": -1.1432034969329834, + "epoch": 1.14, + "grad_norm": 6.59375, + "learning_rate": 1.954413891979437e-06, + "log_odds": 5.609759330749512, + "log_odds_ratio": -0.008737153373658657, + "loss": 0.2501, + "rejected_geometric_mean": -6.36000394821167, + "step": 4609 + }, + { + "chosen_geometric_mean": -1.1056441068649292, + "epoch": 1.14, + "grad_norm": 24.125, + "learning_rate": 1.953463924419214e-06, + "log_odds": 4.105680465698242, + "log_odds_ratio": -0.019007382914423943, + "loss": 0.2177, + "rejected_geometric_mean": -4.817812442779541, + "step": 4610 + }, + { + "chosen_geometric_mean": -0.9382043480873108, + "epoch": 1.14, + "grad_norm": 5.71875, + "learning_rate": 1.952514039726979e-06, + "log_odds": 6.9642839431762695, + "log_odds_ratio": -0.08233582973480225, + "loss": 0.2418, + "rejected_geometric_mean": -7.436203956604004, + "step": 4611 + }, + { + "chosen_geometric_mean": -0.8827773332595825, + "epoch": 1.14, + "grad_norm": 8.4375, + "learning_rate": 1.9515642380467577e-06, + "log_odds": 7.9561991691589355, + "log_odds_ratio": -0.05601746216416359, + "loss": 0.2814, + "rejected_geometric_mean": -8.338493347167969, + "step": 4612 + }, + { + "chosen_geometric_mean": -1.1720502376556396, + "epoch": 1.14, + "grad_norm": 18.25, + "learning_rate": 1.9506145195225624e-06, + "log_odds": 4.447371006011963, + "log_odds_ratio": -0.29427698254585266, + "loss": 0.3244, + "rejected_geometric_mean": -5.399265289306641, + "step": 4613 + }, + { + "chosen_geometric_mean": -0.943988025188446, + "epoch": 1.14, + "grad_norm": 29.625, + "learning_rate": 1.9496648842983943e-06, + "log_odds": 11.003403663635254, + "log_odds_ratio": -0.0009390197228640318, + "loss": 0.2408, + "rejected_geometric_mean": -11.450729370117188, + "step": 4614 + }, + { + "chosen_geometric_mean": -1.1705255508422852, + "epoch": 1.14, + "grad_norm": 4.59375, + "learning_rate": 1.948715332518239e-06, + "log_odds": 7.108109951019287, + "log_odds_ratio": -0.05610251799225807, + "loss": 0.2918, + "rejected_geometric_mean": -7.91641902923584, + "step": 4615 + }, + { + "chosen_geometric_mean": -1.2352960109710693, + "epoch": 1.14, + "grad_norm": 34.5, + "learning_rate": 1.9477658643260717e-06, + "log_odds": 13.769962310791016, + "log_odds_ratio": -0.009621950797736645, + "loss": 0.309, + "rejected_geometric_mean": -14.536375999450684, + "step": 4616 + }, + { + "chosen_geometric_mean": -0.7609441876411438, + "epoch": 1.14, + "grad_norm": 2.90625, + "learning_rate": 1.946816479865856e-06, + "log_odds": 8.543188095092773, + "log_odds_ratio": -0.16599400341510773, + "loss": 0.245, + "rejected_geometric_mean": -8.858421325683594, + "step": 4617 + }, + { + "chosen_geometric_mean": -0.9599930644035339, + "epoch": 1.14, + "grad_norm": 8.125, + "learning_rate": 1.9458671792815405e-06, + "log_odds": 1.9578111171722412, + "log_odds_ratio": -0.17714276909828186, + "loss": 0.2543, + "rejected_geometric_mean": -2.5053629875183105, + "step": 4618 + }, + { + "chosen_geometric_mean": -0.7454536557197571, + "epoch": 1.14, + "grad_norm": 3.0, + "learning_rate": 1.944917962717062e-06, + "log_odds": 8.367260932922363, + "log_odds_ratio": -0.046652652323246, + "loss": 0.2436, + "rejected_geometric_mean": -8.495781898498535, + "step": 4619 + }, + { + "chosen_geometric_mean": -0.9376166462898254, + "epoch": 1.14, + "grad_norm": 2.46875, + "learning_rate": 1.943968830316344e-06, + "log_odds": 6.235607147216797, + "log_odds_ratio": -0.23173315823078156, + "loss": 0.2406, + "rejected_geometric_mean": -6.81691312789917, + "step": 4620 + }, + { + "chosen_geometric_mean": -0.8577861785888672, + "epoch": 1.14, + "grad_norm": 9.0625, + "learning_rate": 1.943019782223298e-06, + "log_odds": 12.01209831237793, + "log_odds_ratio": -0.06288515031337738, + "loss": 0.283, + "rejected_geometric_mean": -12.368630409240723, + "step": 4621 + }, + { + "chosen_geometric_mean": -0.9650214910507202, + "epoch": 1.14, + "grad_norm": 30.5, + "learning_rate": 1.9420708185818238e-06, + "log_odds": 0.9668537974357605, + "log_odds_ratio": -0.353049099445343, + "loss": 0.2253, + "rejected_geometric_mean": -1.6881362199783325, + "step": 4622 + }, + { + "chosen_geometric_mean": -0.9207000732421875, + "epoch": 1.14, + "grad_norm": 1.8828125, + "learning_rate": 1.941121939535806e-06, + "log_odds": 9.997724533081055, + "log_odds_ratio": -0.0017671759705990553, + "loss": 0.2402, + "rejected_geometric_mean": -10.39903736114502, + "step": 4623 + }, + { + "chosen_geometric_mean": -0.9870723485946655, + "epoch": 1.14, + "grad_norm": 32.25, + "learning_rate": 1.940173145229117e-06, + "log_odds": 7.257089614868164, + "log_odds_ratio": -0.25011780858039856, + "loss": 0.3119, + "rejected_geometric_mean": -7.935075283050537, + "step": 4624 + }, + { + "chosen_geometric_mean": -0.8465064764022827, + "epoch": 1.15, + "grad_norm": 17.375, + "learning_rate": 1.939224435805617e-06, + "log_odds": 8.541462898254395, + "log_odds_ratio": -0.04202284663915634, + "loss": 0.2431, + "rejected_geometric_mean": -8.815168380737305, + "step": 4625 + }, + { + "chosen_geometric_mean": -1.2374999523162842, + "epoch": 1.15, + "grad_norm": 2.609375, + "learning_rate": 1.9382758114091535e-06, + "log_odds": 2.2344777584075928, + "log_odds_ratio": -0.20224478840827942, + "loss": 0.2674, + "rejected_geometric_mean": -3.2282779216766357, + "step": 4626 + }, + { + "chosen_geometric_mean": -1.3005542755126953, + "epoch": 1.15, + "grad_norm": 86.0, + "learning_rate": 1.93732727218356e-06, + "log_odds": 10.421646118164062, + "log_odds_ratio": -0.20230312645435333, + "loss": 0.3304, + "rejected_geometric_mean": -11.495662689208984, + "step": 4627 + }, + { + "chosen_geometric_mean": -1.0418579578399658, + "epoch": 1.15, + "grad_norm": 31.25, + "learning_rate": 1.936378818272659e-06, + "log_odds": 5.353089332580566, + "log_odds_ratio": -0.13977797329425812, + "loss": 0.2866, + "rejected_geometric_mean": -6.027299880981445, + "step": 4628 + }, + { + "chosen_geometric_mean": -0.9185062646865845, + "epoch": 1.15, + "grad_norm": 5.6875, + "learning_rate": 1.935430449820258e-06, + "log_odds": 5.122557640075684, + "log_odds_ratio": -0.15655934810638428, + "loss": 0.2528, + "rejected_geometric_mean": -5.623050689697266, + "step": 4629 + }, + { + "chosen_geometric_mean": -0.9858012199401855, + "epoch": 1.15, + "grad_norm": 3.890625, + "learning_rate": 1.934482166970154e-06, + "log_odds": 8.432453155517578, + "log_odds_ratio": -0.0009747951989993453, + "loss": 0.2489, + "rejected_geometric_mean": -8.914264678955078, + "step": 4630 + }, + { + "chosen_geometric_mean": -0.8117741942405701, + "epoch": 1.15, + "grad_norm": 2.890625, + "learning_rate": 1.933533969866127e-06, + "log_odds": 12.812616348266602, + "log_odds_ratio": -0.13148154318332672, + "loss": 0.2579, + "rejected_geometric_mean": -13.152898788452148, + "step": 4631 + }, + { + "chosen_geometric_mean": -0.932466983795166, + "epoch": 1.15, + "grad_norm": 2.265625, + "learning_rate": 1.9325858586519465e-06, + "log_odds": 11.96074104309082, + "log_odds_ratio": -0.0001019906485453248, + "loss": 0.2153, + "rejected_geometric_mean": -12.367379188537598, + "step": 4632 + }, + { + "chosen_geometric_mean": -0.8972036242485046, + "epoch": 1.15, + "grad_norm": 7.78125, + "learning_rate": 1.9316378334713715e-06, + "log_odds": 5.937428951263428, + "log_odds_ratio": -0.005556827876716852, + "loss": 0.2263, + "rejected_geometric_mean": -6.308155536651611, + "step": 4633 + }, + { + "chosen_geometric_mean": -0.8975173830986023, + "epoch": 1.15, + "grad_norm": 65.0, + "learning_rate": 1.9306898944681436e-06, + "log_odds": 5.7300920486450195, + "log_odds_ratio": -0.049652207642793655, + "loss": 0.3187, + "rejected_geometric_mean": -6.1224822998046875, + "step": 4634 + }, + { + "chosen_geometric_mean": -1.0395466089248657, + "epoch": 1.15, + "grad_norm": 3.21875, + "learning_rate": 1.929742041785993e-06, + "log_odds": 9.488569259643555, + "log_odds_ratio": -0.0010840786853805184, + "loss": 0.238, + "rejected_geometric_mean": -10.083662986755371, + "step": 4635 + }, + { + "chosen_geometric_mean": -1.093425989151001, + "epoch": 1.15, + "grad_norm": 48.25, + "learning_rate": 1.9287942755686375e-06, + "log_odds": 12.964211463928223, + "log_odds_ratio": -0.1960800737142563, + "loss": 0.3053, + "rejected_geometric_mean": -13.75280475616455, + "step": 4636 + }, + { + "chosen_geometric_mean": -0.8284684419631958, + "epoch": 1.15, + "grad_norm": 6.6875, + "learning_rate": 1.9278465959597816e-06, + "log_odds": 15.976715087890625, + "log_odds_ratio": -0.0012835660018026829, + "loss": 0.2281, + "rejected_geometric_mean": -16.22332191467285, + "step": 4637 + }, + { + "chosen_geometric_mean": -1.0809773206710815, + "epoch": 1.15, + "grad_norm": 6.59375, + "learning_rate": 1.9268990031031144e-06, + "log_odds": 5.5008087158203125, + "log_odds_ratio": -0.06387226283550262, + "loss": 0.273, + "rejected_geometric_mean": -6.175198078155518, + "step": 4638 + }, + { + "chosen_geometric_mean": -1.044414758682251, + "epoch": 1.15, + "grad_norm": 56.5, + "learning_rate": 1.9259514971423155e-06, + "log_odds": 10.479663848876953, + "log_odds_ratio": -0.0070391204208135605, + "loss": 0.22, + "rejected_geometric_mean": -11.091949462890625, + "step": 4639 + }, + { + "chosen_geometric_mean": -0.8764310479164124, + "epoch": 1.15, + "grad_norm": 2.015625, + "learning_rate": 1.92500407822105e-06, + "log_odds": 8.40492057800293, + "log_odds_ratio": -0.012125914916396141, + "loss": 0.229, + "rejected_geometric_mean": -8.74561595916748, + "step": 4640 + }, + { + "chosen_geometric_mean": -1.0046038627624512, + "epoch": 1.15, + "grad_norm": 2.671875, + "learning_rate": 1.9240567464829675e-06, + "log_odds": 5.6008806228637695, + "log_odds_ratio": -0.02729744277894497, + "loss": 0.212, + "rejected_geometric_mean": -6.139110088348389, + "step": 4641 + }, + { + "chosen_geometric_mean": -0.9791046977043152, + "epoch": 1.15, + "grad_norm": 62.5, + "learning_rate": 1.923109502071707e-06, + "log_odds": 3.574070930480957, + "log_odds_ratio": -0.22285959124565125, + "loss": 0.2502, + "rejected_geometric_mean": -4.272391319274902, + "step": 4642 + }, + { + "chosen_geometric_mean": -1.1549346446990967, + "epoch": 1.15, + "grad_norm": 6.96875, + "learning_rate": 1.9221623451308934e-06, + "log_odds": 2.34204363822937, + "log_odds_ratio": -0.2707681655883789, + "loss": 0.2495, + "rejected_geometric_mean": -3.2282700538635254, + "step": 4643 + }, + { + "chosen_geometric_mean": -0.960397481918335, + "epoch": 1.15, + "grad_norm": 7.4375, + "learning_rate": 1.9212152758041396e-06, + "log_odds": 4.2566094398498535, + "log_odds_ratio": -0.16885462403297424, + "loss": 0.236, + "rejected_geometric_mean": -4.781383991241455, + "step": 4644 + }, + { + "chosen_geometric_mean": -0.84401935338974, + "epoch": 1.15, + "grad_norm": 4.34375, + "learning_rate": 1.9202682942350428e-06, + "log_odds": 7.611573219299316, + "log_odds_ratio": -0.2572849988937378, + "loss": 0.2255, + "rejected_geometric_mean": -8.074762344360352, + "step": 4645 + }, + { + "chosen_geometric_mean": -1.1930561065673828, + "epoch": 1.15, + "grad_norm": 43.0, + "learning_rate": 1.9193214005671878e-06, + "log_odds": 9.541906356811523, + "log_odds_ratio": -0.16921308636665344, + "loss": 0.2779, + "rejected_geometric_mean": -10.4541015625, + "step": 4646 + }, + { + "chosen_geometric_mean": -0.8644882440567017, + "epoch": 1.15, + "grad_norm": 3.90625, + "learning_rate": 1.9183745949441483e-06, + "log_odds": 9.60651969909668, + "log_odds_ratio": -0.20201334357261658, + "loss": 0.226, + "rejected_geometric_mean": -10.072240829467773, + "step": 4647 + }, + { + "chosen_geometric_mean": -1.004990816116333, + "epoch": 1.15, + "grad_norm": 21.25, + "learning_rate": 1.917427877509481e-06, + "log_odds": 9.006237030029297, + "log_odds_ratio": -0.09952905774116516, + "loss": 0.2875, + "rejected_geometric_mean": -9.600918769836426, + "step": 4648 + }, + { + "chosen_geometric_mean": -0.8658626675605774, + "epoch": 1.15, + "grad_norm": 6.3125, + "learning_rate": 1.9164812484067303e-06, + "log_odds": 8.600159645080566, + "log_odds_ratio": -0.02344522252678871, + "loss": 0.2369, + "rejected_geometric_mean": -8.905543327331543, + "step": 4649 + }, + { + "chosen_geometric_mean": -1.0190621614456177, + "epoch": 1.15, + "grad_norm": 6.65625, + "learning_rate": 1.91553470777943e-06, + "log_odds": 3.651270866394043, + "log_odds_ratio": -0.4186049699783325, + "loss": 0.2562, + "rejected_geometric_mean": -4.452094554901123, + "step": 4650 + }, + { + "chosen_geometric_mean": -1.1856977939605713, + "epoch": 1.15, + "grad_norm": 5.0625, + "learning_rate": 1.914588255771097e-06, + "log_odds": 5.375668525695801, + "log_odds_ratio": -0.13543939590454102, + "loss": 0.2589, + "rejected_geometric_mean": -6.210714340209961, + "step": 4651 + }, + { + "chosen_geometric_mean": -0.7941388487815857, + "epoch": 1.15, + "grad_norm": 32.75, + "learning_rate": 1.913641892525237e-06, + "log_odds": 6.592303276062012, + "log_odds_ratio": -0.1541612595319748, + "loss": 0.2637, + "rejected_geometric_mean": -6.912599563598633, + "step": 4652 + }, + { + "chosen_geometric_mean": -0.8680015802383423, + "epoch": 1.15, + "grad_norm": 15.1875, + "learning_rate": 1.9126956181853417e-06, + "log_odds": 5.7256927490234375, + "log_odds_ratio": -0.050895240157842636, + "loss": 0.2676, + "rejected_geometric_mean": -6.068724155426025, + "step": 4653 + }, + { + "chosen_geometric_mean": -0.9444606304168701, + "epoch": 1.15, + "grad_norm": 14.625, + "learning_rate": 1.9117494328948865e-06, + "log_odds": 4.948508262634277, + "log_odds_ratio": -0.14493361115455627, + "loss": 0.2809, + "rejected_geometric_mean": -5.505288600921631, + "step": 4654 + }, + { + "chosen_geometric_mean": -0.8371545672416687, + "epoch": 1.15, + "grad_norm": 17.25, + "learning_rate": 1.910803336797339e-06, + "log_odds": 7.480942249298096, + "log_odds_ratio": -0.21379715204238892, + "loss": 0.3066, + "rejected_geometric_mean": -7.91518497467041, + "step": 4655 + }, + { + "chosen_geometric_mean": -1.0480611324310303, + "epoch": 1.15, + "grad_norm": 2.1875, + "learning_rate": 1.9098573300361485e-06, + "log_odds": 5.011662483215332, + "log_odds_ratio": -0.21461434662342072, + "loss": 0.2464, + "rejected_geometric_mean": -5.733015060424805, + "step": 4656 + }, + { + "chosen_geometric_mean": -0.9720233082771301, + "epoch": 1.15, + "grad_norm": 5.4375, + "learning_rate": 1.9089114127547525e-06, + "log_odds": 2.2352547645568848, + "log_odds_ratio": -0.18958374857902527, + "loss": 0.2346, + "rejected_geometric_mean": -2.8642423152923584, + "step": 4657 + }, + { + "chosen_geometric_mean": -1.0076122283935547, + "epoch": 1.15, + "grad_norm": 24.75, + "learning_rate": 1.9079655850965747e-06, + "log_odds": 10.296638488769531, + "log_odds_ratio": -0.0008382200030609965, + "loss": 0.3116, + "rejected_geometric_mean": -10.832411766052246, + "step": 4658 + }, + { + "chosen_geometric_mean": -0.8313882350921631, + "epoch": 1.15, + "grad_norm": 2.0, + "learning_rate": 1.9070198472050248e-06, + "log_odds": 8.54212760925293, + "log_odds_ratio": -0.005553915165364742, + "loss": 0.24, + "rejected_geometric_mean": -8.79419231414795, + "step": 4659 + }, + { + "chosen_geometric_mean": -0.8864680528640747, + "epoch": 1.15, + "grad_norm": 33.5, + "learning_rate": 1.9060741992235002e-06, + "log_odds": 2.794847249984741, + "log_odds_ratio": -0.2620448172092438, + "loss": 0.2574, + "rejected_geometric_mean": -3.359127998352051, + "step": 4660 + }, + { + "chosen_geometric_mean": -1.038459062576294, + "epoch": 1.15, + "grad_norm": 12.3125, + "learning_rate": 1.9051286412953837e-06, + "log_odds": 7.889688491821289, + "log_odds_ratio": -0.026132876053452492, + "loss": 0.313, + "rejected_geometric_mean": -8.478494644165039, + "step": 4661 + }, + { + "chosen_geometric_mean": -1.0820660591125488, + "epoch": 1.15, + "grad_norm": 55.5, + "learning_rate": 1.9041831735640445e-06, + "log_odds": 5.645076751708984, + "log_odds_ratio": -0.023060131818056107, + "loss": 0.2405, + "rejected_geometric_mean": -6.312010765075684, + "step": 4662 + }, + { + "chosen_geometric_mean": -0.8156903982162476, + "epoch": 1.15, + "grad_norm": 2.390625, + "learning_rate": 1.9032377961728387e-06, + "log_odds": 5.324483871459961, + "log_odds_ratio": -0.3571329414844513, + "loss": 0.2562, + "rejected_geometric_mean": -5.811802864074707, + "step": 4663 + }, + { + "chosen_geometric_mean": -1.0761363506317139, + "epoch": 1.15, + "grad_norm": 6.8125, + "learning_rate": 1.9022925092651067e-06, + "log_odds": 17.201488494873047, + "log_odds_ratio": -0.0015799450920894742, + "loss": 0.2685, + "rejected_geometric_mean": -17.85188102722168, + "step": 4664 + }, + { + "chosen_geometric_mean": -0.9807437062263489, + "epoch": 1.15, + "grad_norm": 2.734375, + "learning_rate": 1.9013473129841775e-06, + "log_odds": 8.659228324890137, + "log_odds_ratio": -0.0034361323341727257, + "loss": 0.2355, + "rejected_geometric_mean": -9.169658660888672, + "step": 4665 + }, + { + "chosen_geometric_mean": -1.0447771549224854, + "epoch": 1.16, + "grad_norm": 4.71875, + "learning_rate": 1.9004022074733662e-06, + "log_odds": 5.858959674835205, + "log_odds_ratio": -0.23384778201580048, + "loss": 0.279, + "rejected_geometric_mean": -6.569822788238525, + "step": 4666 + }, + { + "chosen_geometric_mean": -0.769492506980896, + "epoch": 1.16, + "grad_norm": 3.734375, + "learning_rate": 1.8994571928759728e-06, + "log_odds": 10.655263900756836, + "log_odds_ratio": -0.12058867514133453, + "loss": 0.246, + "rejected_geometric_mean": -10.860756874084473, + "step": 4667 + }, + { + "chosen_geometric_mean": -0.9840315580368042, + "epoch": 1.16, + "grad_norm": 3.265625, + "learning_rate": 1.8985122693352843e-06, + "log_odds": 1.3132433891296387, + "log_odds_ratio": -0.395668089389801, + "loss": 0.2463, + "rejected_geometric_mean": -2.100841999053955, + "step": 4668 + }, + { + "chosen_geometric_mean": -0.8862342834472656, + "epoch": 1.16, + "grad_norm": 13.1875, + "learning_rate": 1.897567436994574e-06, + "log_odds": 9.944321632385254, + "log_odds_ratio": -0.0008060744730755687, + "loss": 0.3001, + "rejected_geometric_mean": -10.286105155944824, + "step": 4669 + }, + { + "chosen_geometric_mean": -0.7778680324554443, + "epoch": 1.16, + "grad_norm": 5.0, + "learning_rate": 1.8966226959971001e-06, + "log_odds": 8.938739776611328, + "log_odds_ratio": -0.13431064784526825, + "loss": 0.2469, + "rejected_geometric_mean": -9.147364616394043, + "step": 4670 + }, + { + "chosen_geometric_mean": -1.0230215787887573, + "epoch": 1.16, + "grad_norm": 4.0625, + "learning_rate": 1.89567804648611e-06, + "log_odds": 11.090442657470703, + "log_odds_ratio": -0.0018017899710685015, + "loss": 0.2723, + "rejected_geometric_mean": -11.630640983581543, + "step": 4671 + }, + { + "chosen_geometric_mean": -1.0813121795654297, + "epoch": 1.16, + "grad_norm": 30.25, + "learning_rate": 1.894733488604834e-06, + "log_odds": 14.43241024017334, + "log_odds_ratio": -0.0011061126133427024, + "loss": 0.2777, + "rejected_geometric_mean": -15.090436935424805, + "step": 4672 + }, + { + "chosen_geometric_mean": -1.017158031463623, + "epoch": 1.16, + "grad_norm": 10.75, + "learning_rate": 1.89378902249649e-06, + "log_odds": 5.194549560546875, + "log_odds_ratio": -0.03663874790072441, + "loss": 0.2851, + "rejected_geometric_mean": -5.7636847496032715, + "step": 4673 + }, + { + "chosen_geometric_mean": -0.8464904427528381, + "epoch": 1.16, + "grad_norm": 2.15625, + "learning_rate": 1.8928446483042812e-06, + "log_odds": 8.060371398925781, + "log_odds_ratio": -0.26878511905670166, + "loss": 0.2582, + "rejected_geometric_mean": -8.522148132324219, + "step": 4674 + }, + { + "chosen_geometric_mean": -1.0290114879608154, + "epoch": 1.16, + "grad_norm": 2.15625, + "learning_rate": 1.891900366171398e-06, + "log_odds": 4.6467976570129395, + "log_odds_ratio": -0.1361088752746582, + "loss": 0.2673, + "rejected_geometric_mean": -5.2910356521606445, + "step": 4675 + }, + { + "chosen_geometric_mean": -1.135472059249878, + "epoch": 1.16, + "grad_norm": 4.25, + "learning_rate": 1.8909561762410156e-06, + "log_odds": 3.3685479164123535, + "log_odds_ratio": -0.2374962419271469, + "loss": 0.2412, + "rejected_geometric_mean": -4.253151893615723, + "step": 4676 + }, + { + "chosen_geometric_mean": -1.0408430099487305, + "epoch": 1.16, + "grad_norm": 18.75, + "learning_rate": 1.8900120786562967e-06, + "log_odds": 2.2022206783294678, + "log_odds_ratio": -0.2651994228363037, + "loss": 0.2542, + "rejected_geometric_mean": -2.991788387298584, + "step": 4677 + }, + { + "chosen_geometric_mean": -0.9060373902320862, + "epoch": 1.16, + "grad_norm": 33.75, + "learning_rate": 1.889068073560389e-06, + "log_odds": 6.5744781494140625, + "log_odds_ratio": -0.08504243195056915, + "loss": 0.2547, + "rejected_geometric_mean": -7.033600330352783, + "step": 4678 + }, + { + "chosen_geometric_mean": -0.9781537055969238, + "epoch": 1.16, + "grad_norm": 13.0625, + "learning_rate": 1.8881241610964263e-06, + "log_odds": 2.2273054122924805, + "log_odds_ratio": -0.33304890990257263, + "loss": 0.2614, + "rejected_geometric_mean": -2.910173177719116, + "step": 4679 + }, + { + "chosen_geometric_mean": -0.794525146484375, + "epoch": 1.16, + "grad_norm": 2.90625, + "learning_rate": 1.887180341407528e-06, + "log_odds": 1.8357312679290771, + "log_odds_ratio": -0.22733166813850403, + "loss": 0.2391, + "rejected_geometric_mean": -2.218517780303955, + "step": 4680 + }, + { + "chosen_geometric_mean": -1.1630985736846924, + "epoch": 1.16, + "grad_norm": 14.625, + "learning_rate": 1.8862366146367988e-06, + "log_odds": 10.543137550354004, + "log_odds_ratio": -0.13164065778255463, + "loss": 0.2733, + "rejected_geometric_mean": -11.368253707885742, + "step": 4681 + }, + { + "chosen_geometric_mean": -0.9886850118637085, + "epoch": 1.16, + "grad_norm": 15.9375, + "learning_rate": 1.8852929809273329e-06, + "log_odds": 13.697978973388672, + "log_odds_ratio": -0.12297243624925613, + "loss": 0.2624, + "rejected_geometric_mean": -14.220331192016602, + "step": 4682 + }, + { + "chosen_geometric_mean": -1.161937952041626, + "epoch": 1.16, + "grad_norm": 20.25, + "learning_rate": 1.8843494404222062e-06, + "log_odds": 4.500054359436035, + "log_odds_ratio": -0.23730897903442383, + "loss": 0.253, + "rejected_geometric_mean": -5.345623970031738, + "step": 4683 + }, + { + "chosen_geometric_mean": -1.0488369464874268, + "epoch": 1.16, + "grad_norm": 8.5, + "learning_rate": 1.8834059932644825e-06, + "log_odds": 8.84547233581543, + "log_odds_ratio": -0.13736090064048767, + "loss": 0.2312, + "rejected_geometric_mean": -9.546422958374023, + "step": 4684 + }, + { + "chosen_geometric_mean": -1.0246760845184326, + "epoch": 1.16, + "grad_norm": 11.125, + "learning_rate": 1.8824626395972106e-06, + "log_odds": 7.848893165588379, + "log_odds_ratio": -0.16222575306892395, + "loss": 0.2444, + "rejected_geometric_mean": -8.521247863769531, + "step": 4685 + }, + { + "chosen_geometric_mean": -0.8846985697746277, + "epoch": 1.16, + "grad_norm": 2.671875, + "learning_rate": 1.8815193795634264e-06, + "log_odds": 5.787990093231201, + "log_odds_ratio": -0.0248826052993536, + "loss": 0.2753, + "rejected_geometric_mean": -6.133511543273926, + "step": 4686 + }, + { + "chosen_geometric_mean": -1.214721441268921, + "epoch": 1.16, + "grad_norm": 3.25, + "learning_rate": 1.8805762133061489e-06, + "log_odds": 0.42637941241264343, + "log_odds_ratio": -0.5353365540504456, + "loss": 0.2513, + "rejected_geometric_mean": -1.5567798614501953, + "step": 4687 + }, + { + "chosen_geometric_mean": -1.0661201477050781, + "epoch": 1.16, + "grad_norm": 2.265625, + "learning_rate": 1.879633140968387e-06, + "log_odds": 6.871594429016113, + "log_odds_ratio": -0.09825753420591354, + "loss": 0.252, + "rejected_geometric_mean": -7.548397064208984, + "step": 4688 + }, + { + "chosen_geometric_mean": -0.9236910343170166, + "epoch": 1.16, + "grad_norm": 2.234375, + "learning_rate": 1.8786901626931326e-06, + "log_odds": 5.022852897644043, + "log_odds_ratio": -0.24200847744941711, + "loss": 0.2533, + "rejected_geometric_mean": -5.6162495613098145, + "step": 4689 + }, + { + "chosen_geometric_mean": -1.1175227165222168, + "epoch": 1.16, + "grad_norm": 6.28125, + "learning_rate": 1.8777472786233631e-06, + "log_odds": 7.7239885330200195, + "log_odds_ratio": -0.09323986619710922, + "loss": 0.2362, + "rejected_geometric_mean": -8.450172424316406, + "step": 4690 + }, + { + "chosen_geometric_mean": -0.8334226608276367, + "epoch": 1.16, + "grad_norm": 1.84375, + "learning_rate": 1.8768044889020424e-06, + "log_odds": 11.486300468444824, + "log_odds_ratio": -0.057132840156555176, + "loss": 0.237, + "rejected_geometric_mean": -11.734867095947266, + "step": 4691 + }, + { + "chosen_geometric_mean": -0.8967626094818115, + "epoch": 1.16, + "grad_norm": 31.375, + "learning_rate": 1.8758617936721197e-06, + "log_odds": 6.769385814666748, + "log_odds_ratio": -0.16278967261314392, + "loss": 0.2324, + "rejected_geometric_mean": -7.265271186828613, + "step": 4692 + }, + { + "chosen_geometric_mean": -0.9541453123092651, + "epoch": 1.16, + "grad_norm": 11.5625, + "learning_rate": 1.8749191930765314e-06, + "log_odds": 12.49151611328125, + "log_odds_ratio": -0.0006465213373303413, + "loss": 0.2749, + "rejected_geometric_mean": -12.908634185791016, + "step": 4693 + }, + { + "chosen_geometric_mean": -1.0158828496932983, + "epoch": 1.16, + "grad_norm": 3.59375, + "learning_rate": 1.8739766872581983e-06, + "log_odds": 7.049120903015137, + "log_odds_ratio": -0.11785035580396652, + "loss": 0.2341, + "rejected_geometric_mean": -7.6976518630981445, + "step": 4694 + }, + { + "chosen_geometric_mean": -0.8664221167564392, + "epoch": 1.16, + "grad_norm": 15.25, + "learning_rate": 1.873034276360026e-06, + "log_odds": 5.651269435882568, + "log_odds_ratio": -0.26361989974975586, + "loss": 0.2731, + "rejected_geometric_mean": -6.138221740722656, + "step": 4695 + }, + { + "chosen_geometric_mean": -1.1004586219787598, + "epoch": 1.16, + "grad_norm": 3.296875, + "learning_rate": 1.8720919605249077e-06, + "log_odds": 9.771916389465332, + "log_odds_ratio": -0.0037894942797720432, + "loss": 0.2675, + "rejected_geometric_mean": -10.462745666503906, + "step": 4696 + }, + { + "chosen_geometric_mean": -0.8754583597183228, + "epoch": 1.16, + "grad_norm": 55.0, + "learning_rate": 1.8711497398957198e-06, + "log_odds": 3.504085063934326, + "log_odds_ratio": -0.1018693819642067, + "loss": 0.2816, + "rejected_geometric_mean": -3.9091720581054688, + "step": 4697 + }, + { + "chosen_geometric_mean": -1.0392353534698486, + "epoch": 1.16, + "grad_norm": 4.625, + "learning_rate": 1.8702076146153258e-06, + "log_odds": 7.605713844299316, + "log_odds_ratio": -0.10257188975811005, + "loss": 0.2541, + "rejected_geometric_mean": -8.281330108642578, + "step": 4698 + }, + { + "chosen_geometric_mean": -1.0304245948791504, + "epoch": 1.16, + "grad_norm": 2.765625, + "learning_rate": 1.8692655848265753e-06, + "log_odds": 7.981122016906738, + "log_odds_ratio": -0.1877165138721466, + "loss": 0.2678, + "rejected_geometric_mean": -8.67757797241211, + "step": 4699 + }, + { + "chosen_geometric_mean": -0.9538074731826782, + "epoch": 1.16, + "grad_norm": 18.25, + "learning_rate": 1.8683236506723023e-06, + "log_odds": 9.485512733459473, + "log_odds_ratio": -0.1048358604311943, + "loss": 0.2744, + "rejected_geometric_mean": -10.029645919799805, + "step": 4700 + }, + { + "chosen_geometric_mean": -1.0742628574371338, + "epoch": 1.16, + "grad_norm": 14.0, + "learning_rate": 1.8673818122953267e-06, + "log_odds": 9.919256210327148, + "log_odds_ratio": -0.030956603586673737, + "loss": 0.2993, + "rejected_geometric_mean": -10.559480667114258, + "step": 4701 + }, + { + "chosen_geometric_mean": -0.9000558257102966, + "epoch": 1.16, + "grad_norm": 3.984375, + "learning_rate": 1.8664400698384534e-06, + "log_odds": 5.788891315460205, + "log_odds_ratio": -0.14467523992061615, + "loss": 0.2308, + "rejected_geometric_mean": -6.232012748718262, + "step": 4702 + }, + { + "chosen_geometric_mean": -0.8174519538879395, + "epoch": 1.16, + "grad_norm": 16.0, + "learning_rate": 1.8654984234444732e-06, + "log_odds": 6.904130935668945, + "log_odds_ratio": -0.1946949064731598, + "loss": 0.2559, + "rejected_geometric_mean": -7.299467086791992, + "step": 4703 + }, + { + "chosen_geometric_mean": -1.0911312103271484, + "epoch": 1.16, + "grad_norm": 3.53125, + "learning_rate": 1.8645568732561627e-06, + "log_odds": 4.472657680511475, + "log_odds_ratio": -0.050054680556058884, + "loss": 0.2592, + "rejected_geometric_mean": -5.166525840759277, + "step": 4704 + }, + { + "chosen_geometric_mean": -1.0300215482711792, + "epoch": 1.16, + "grad_norm": 2.0, + "learning_rate": 1.8636154194162843e-06, + "log_odds": 2.288299083709717, + "log_odds_ratio": -0.3833073377609253, + "loss": 0.2483, + "rejected_geometric_mean": -3.102205514907837, + "step": 4705 + }, + { + "chosen_geometric_mean": -0.8342941999435425, + "epoch": 1.17, + "grad_norm": 3.296875, + "learning_rate": 1.8626740620675832e-06, + "log_odds": 6.028098106384277, + "log_odds_ratio": -0.03157380595803261, + "loss": 0.2339, + "rejected_geometric_mean": -6.29268741607666, + "step": 4706 + }, + { + "chosen_geometric_mean": -1.2324814796447754, + "epoch": 1.17, + "grad_norm": 33.5, + "learning_rate": 1.8617328013527924e-06, + "log_odds": 1.9273126125335693, + "log_odds_ratio": -0.2868954539299011, + "loss": 0.3053, + "rejected_geometric_mean": -2.9874603748321533, + "step": 4707 + }, + { + "chosen_geometric_mean": -1.0840160846710205, + "epoch": 1.17, + "grad_norm": 3.828125, + "learning_rate": 1.8607916374146303e-06, + "log_odds": 1.3399147987365723, + "log_odds_ratio": -0.27429676055908203, + "loss": 0.2346, + "rejected_geometric_mean": -2.1356306076049805, + "step": 4708 + }, + { + "chosen_geometric_mean": -0.9513411521911621, + "epoch": 1.17, + "grad_norm": 3.203125, + "learning_rate": 1.8598505703957986e-06, + "log_odds": 9.413671493530273, + "log_odds_ratio": -0.3248673975467682, + "loss": 0.2839, + "rejected_geometric_mean": -10.111859321594238, + "step": 4709 + }, + { + "chosen_geometric_mean": -1.2803726196289062, + "epoch": 1.17, + "grad_norm": 29.0, + "learning_rate": 1.8589096004389873e-06, + "log_odds": 10.334024429321289, + "log_odds_ratio": -0.0010774864349514246, + "loss": 0.295, + "rejected_geometric_mean": -11.195064544677734, + "step": 4710 + }, + { + "chosen_geometric_mean": -1.068915843963623, + "epoch": 1.17, + "grad_norm": 2.203125, + "learning_rate": 1.8579687276868694e-06, + "log_odds": 8.602158546447754, + "log_odds_ratio": -0.09018899500370026, + "loss": 0.2543, + "rejected_geometric_mean": -9.28632926940918, + "step": 4711 + }, + { + "chosen_geometric_mean": -0.9884447455406189, + "epoch": 1.17, + "grad_norm": 6.875, + "learning_rate": 1.8570279522821038e-06, + "log_odds": 5.677864074707031, + "log_odds_ratio": -0.18735966086387634, + "loss": 0.2566, + "rejected_geometric_mean": -6.3112592697143555, + "step": 4712 + }, + { + "chosen_geometric_mean": -0.8335347175598145, + "epoch": 1.17, + "grad_norm": 5.9375, + "learning_rate": 1.8560872743673342e-06, + "log_odds": 6.138456344604492, + "log_odds_ratio": -0.14287428557872772, + "loss": 0.2621, + "rejected_geometric_mean": -6.487484931945801, + "step": 4713 + }, + { + "chosen_geometric_mean": -1.268264889717102, + "epoch": 1.17, + "grad_norm": 69.0, + "learning_rate": 1.8551466940851893e-06, + "log_odds": 5.6939167976379395, + "log_odds_ratio": -0.16904135048389435, + "loss": 0.2943, + "rejected_geometric_mean": -6.660149097442627, + "step": 4714 + }, + { + "chosen_geometric_mean": -0.9781579971313477, + "epoch": 1.17, + "grad_norm": 7.0625, + "learning_rate": 1.8542062115782854e-06, + "log_odds": 12.2904634475708, + "log_odds_ratio": -0.03106716461479664, + "loss": 0.2365, + "rejected_geometric_mean": -12.810002326965332, + "step": 4715 + }, + { + "chosen_geometric_mean": -1.8080025911331177, + "epoch": 1.17, + "grad_norm": 116.5, + "learning_rate": 1.8532658269892219e-06, + "log_odds": 8.860219955444336, + "log_odds_ratio": -1.0612083673477173, + "loss": 0.4594, + "rejected_geometric_mean": -10.222878456115723, + "step": 4716 + }, + { + "chosen_geometric_mean": -1.0745877027511597, + "epoch": 1.17, + "grad_norm": 41.75, + "learning_rate": 1.8523255404605826e-06, + "log_odds": 7.150790214538574, + "log_odds_ratio": -0.04693441838026047, + "loss": 0.2985, + "rejected_geometric_mean": -7.832614421844482, + "step": 4717 + }, + { + "chosen_geometric_mean": -0.9474501609802246, + "epoch": 1.17, + "grad_norm": 2.75, + "learning_rate": 1.851385352134938e-06, + "log_odds": 8.533271789550781, + "log_odds_ratio": -0.1400948017835617, + "loss": 0.2383, + "rejected_geometric_mean": -9.094090461730957, + "step": 4718 + }, + { + "chosen_geometric_mean": -1.0004676580429077, + "epoch": 1.17, + "grad_norm": 2.296875, + "learning_rate": 1.8504452621548442e-06, + "log_odds": 8.69791316986084, + "log_odds_ratio": -0.17511306703090668, + "loss": 0.2484, + "rejected_geometric_mean": -9.277955055236816, + "step": 4719 + }, + { + "chosen_geometric_mean": -0.9482371211051941, + "epoch": 1.17, + "grad_norm": 16.75, + "learning_rate": 1.8495052706628385e-06, + "log_odds": 4.359989166259766, + "log_odds_ratio": -0.23646369576454163, + "loss": 0.2738, + "rejected_geometric_mean": -5.007887840270996, + "step": 4720 + }, + { + "chosen_geometric_mean": -1.0275452136993408, + "epoch": 1.17, + "grad_norm": 2.265625, + "learning_rate": 1.8485653778014495e-06, + "log_odds": 5.536027908325195, + "log_odds_ratio": -0.16326715052127838, + "loss": 0.2531, + "rejected_geometric_mean": -6.219970703125, + "step": 4721 + }, + { + "chosen_geometric_mean": -1.0558743476867676, + "epoch": 1.17, + "grad_norm": 14.1875, + "learning_rate": 1.8476255837131868e-06, + "log_odds": 3.9283447265625, + "log_odds_ratio": -0.44582995772361755, + "loss": 0.2552, + "rejected_geometric_mean": -4.817173957824707, + "step": 4722 + }, + { + "chosen_geometric_mean": -0.9292396306991577, + "epoch": 1.17, + "grad_norm": 26.375, + "learning_rate": 1.8466858885405448e-06, + "log_odds": 11.104702949523926, + "log_odds_ratio": -0.14036130905151367, + "loss": 0.2739, + "rejected_geometric_mean": -11.64613151550293, + "step": 4723 + }, + { + "chosen_geometric_mean": -1.2227051258087158, + "epoch": 1.17, + "grad_norm": 3.109375, + "learning_rate": 1.845746292426004e-06, + "log_odds": 4.723053932189941, + "log_odds_ratio": -0.033613141626119614, + "loss": 0.2808, + "rejected_geometric_mean": -5.599421501159668, + "step": 4724 + }, + { + "chosen_geometric_mean": -0.9890627861022949, + "epoch": 1.17, + "grad_norm": 6.625, + "learning_rate": 1.8448067955120292e-06, + "log_odds": 9.59408187866211, + "log_odds_ratio": -0.06780149042606354, + "loss": 0.2556, + "rejected_geometric_mean": -10.134529113769531, + "step": 4725 + }, + { + "chosen_geometric_mean": -1.2065982818603516, + "epoch": 1.17, + "grad_norm": 2.9375, + "learning_rate": 1.843867397941072e-06, + "log_odds": 3.2830090522766113, + "log_odds_ratio": -0.20688097178936005, + "loss": 0.2852, + "rejected_geometric_mean": -4.210300445556641, + "step": 4726 + }, + { + "chosen_geometric_mean": -1.071531057357788, + "epoch": 1.17, + "grad_norm": 2.25, + "learning_rate": 1.8429280998555672e-06, + "log_odds": 13.584874153137207, + "log_odds_ratio": -0.010866447351872921, + "loss": 0.2154, + "rejected_geometric_mean": -14.206067085266113, + "step": 4727 + }, + { + "chosen_geometric_mean": -1.093682050704956, + "epoch": 1.17, + "grad_norm": 11.0625, + "learning_rate": 1.8419889013979348e-06, + "log_odds": 5.712518215179443, + "log_odds_ratio": -0.18491441011428833, + "loss": 0.2484, + "rejected_geometric_mean": -6.5091552734375, + "step": 4728 + }, + { + "chosen_geometric_mean": -0.7596551775932312, + "epoch": 1.17, + "grad_norm": 7.53125, + "learning_rate": 1.84104980271058e-06, + "log_odds": 5.046341896057129, + "log_odds_ratio": -0.17997826635837555, + "loss": 0.2545, + "rejected_geometric_mean": -5.302937984466553, + "step": 4729 + }, + { + "chosen_geometric_mean": -1.084916591644287, + "epoch": 1.17, + "grad_norm": 8.375, + "learning_rate": 1.8401108039358912e-06, + "log_odds": 6.911381721496582, + "log_odds_ratio": -0.2528025507926941, + "loss": 0.2639, + "rejected_geometric_mean": -7.667909145355225, + "step": 4730 + }, + { + "chosen_geometric_mean": -1.1764070987701416, + "epoch": 1.17, + "grad_norm": 15.1875, + "learning_rate": 1.839171905216246e-06, + "log_odds": 6.870141506195068, + "log_odds_ratio": -0.023068977519869804, + "loss": 0.2343, + "rejected_geometric_mean": -7.670045852661133, + "step": 4731 + }, + { + "chosen_geometric_mean": -1.1815625429153442, + "epoch": 1.17, + "grad_norm": 31.625, + "learning_rate": 1.838233106694002e-06, + "log_odds": 7.06659460067749, + "log_odds_ratio": -0.10214892029762268, + "loss": 0.2727, + "rejected_geometric_mean": -7.930900573730469, + "step": 4732 + }, + { + "chosen_geometric_mean": -1.15292489528656, + "epoch": 1.17, + "grad_norm": 1.9609375, + "learning_rate": 1.8372944085115035e-06, + "log_odds": 2.3307974338531494, + "log_odds_ratio": -0.28591084480285645, + "loss": 0.2546, + "rejected_geometric_mean": -3.2104651927948, + "step": 4733 + }, + { + "chosen_geometric_mean": -0.8285555839538574, + "epoch": 1.17, + "grad_norm": 20.5, + "learning_rate": 1.8363558108110807e-06, + "log_odds": 5.429527282714844, + "log_odds_ratio": -0.05647021904587746, + "loss": 0.218, + "rejected_geometric_mean": -5.723085880279541, + "step": 4734 + }, + { + "chosen_geometric_mean": -1.2065396308898926, + "epoch": 1.17, + "grad_norm": 3.421875, + "learning_rate": 1.8354173137350476e-06, + "log_odds": 4.802238464355469, + "log_odds_ratio": -0.22330714762210846, + "loss": 0.2646, + "rejected_geometric_mean": -5.739688873291016, + "step": 4735 + }, + { + "chosen_geometric_mean": -0.9675995707511902, + "epoch": 1.17, + "grad_norm": 31.5, + "learning_rate": 1.8344789174257006e-06, + "log_odds": 6.728741645812988, + "log_odds_ratio": -0.2738908529281616, + "loss": 0.2621, + "rejected_geometric_mean": -7.335876941680908, + "step": 4736 + }, + { + "chosen_geometric_mean": -1.0799226760864258, + "epoch": 1.17, + "grad_norm": 4.3125, + "learning_rate": 1.8335406220253257e-06, + "log_odds": 7.111065864562988, + "log_odds_ratio": -0.07767397165298462, + "loss": 0.1884, + "rejected_geometric_mean": -7.751511096954346, + "step": 4737 + }, + { + "chosen_geometric_mean": -0.8800069689750671, + "epoch": 1.17, + "grad_norm": 2.8125, + "learning_rate": 1.832602427676191e-06, + "log_odds": 1.7807656526565552, + "log_odds_ratio": -0.2681337296962738, + "loss": 0.2474, + "rejected_geometric_mean": -2.28444504737854, + "step": 4738 + }, + { + "chosen_geometric_mean": -0.9929946064949036, + "epoch": 1.17, + "grad_norm": 3.015625, + "learning_rate": 1.8316643345205477e-06, + "log_odds": 0.934350848197937, + "log_odds_ratio": -0.41362759470939636, + "loss": 0.2801, + "rejected_geometric_mean": -1.7720094919204712, + "step": 4739 + }, + { + "chosen_geometric_mean": -0.8941065073013306, + "epoch": 1.17, + "grad_norm": 2.8125, + "learning_rate": 1.8307263427006338e-06, + "log_odds": 6.020190238952637, + "log_odds_ratio": -0.030848676338791847, + "loss": 0.2545, + "rejected_geometric_mean": -6.38206148147583, + "step": 4740 + }, + { + "chosen_geometric_mean": -1.1532673835754395, + "epoch": 1.17, + "grad_norm": 7.71875, + "learning_rate": 1.829788452358671e-06, + "log_odds": 7.626576900482178, + "log_odds_ratio": -0.30249133706092834, + "loss": 0.2899, + "rejected_geometric_mean": -8.607282638549805, + "step": 4741 + }, + { + "chosen_geometric_mean": -1.0924732685089111, + "epoch": 1.17, + "grad_norm": 2.15625, + "learning_rate": 1.8288506636368674e-06, + "log_odds": 2.2437353134155273, + "log_odds_ratio": -0.24573597311973572, + "loss": 0.2437, + "rejected_geometric_mean": -3.0655813217163086, + "step": 4742 + }, + { + "chosen_geometric_mean": -0.5815112590789795, + "epoch": 1.17, + "grad_norm": 2.375, + "learning_rate": 1.8279129766774135e-06, + "log_odds": 7.615201473236084, + "log_odds_ratio": -0.37889084219932556, + "loss": 0.2521, + "rejected_geometric_mean": -7.876663684844971, + "step": 4743 + }, + { + "chosen_geometric_mean": -0.9637008905410767, + "epoch": 1.17, + "grad_norm": 36.75, + "learning_rate": 1.8269753916224848e-06, + "log_odds": 4.572346210479736, + "log_odds_ratio": -0.1438426524400711, + "loss": 0.2308, + "rejected_geometric_mean": -5.126352310180664, + "step": 4744 + }, + { + "chosen_geometric_mean": -0.8566316366195679, + "epoch": 1.17, + "grad_norm": 9.0625, + "learning_rate": 1.8260379086142425e-06, + "log_odds": 6.8149542808532715, + "log_odds_ratio": -0.021884631365537643, + "loss": 0.2874, + "rejected_geometric_mean": -7.0996479988098145, + "step": 4745 + }, + { + "chosen_geometric_mean": -0.9246861934661865, + "epoch": 1.18, + "grad_norm": 13.875, + "learning_rate": 1.8251005277948303e-06, + "log_odds": 6.665754795074463, + "log_odds_ratio": -0.2892882823944092, + "loss": 0.2576, + "rejected_geometric_mean": -7.229589939117432, + "step": 4746 + }, + { + "chosen_geometric_mean": -1.0050392150878906, + "epoch": 1.18, + "grad_norm": 4.78125, + "learning_rate": 1.824163249306378e-06, + "log_odds": 11.75085735321045, + "log_odds_ratio": -0.0005825548432767391, + "loss": 0.2333, + "rejected_geometric_mean": -12.284192085266113, + "step": 4747 + }, + { + "chosen_geometric_mean": -1.0263419151306152, + "epoch": 1.18, + "grad_norm": 15.25, + "learning_rate": 1.823226073291e-06, + "log_odds": 10.023420333862305, + "log_odds_ratio": -0.043483514338731766, + "loss": 0.2691, + "rejected_geometric_mean": -10.609129905700684, + "step": 4748 + }, + { + "chosen_geometric_mean": -1.1699962615966797, + "epoch": 1.18, + "grad_norm": 13.4375, + "learning_rate": 1.822288999890795e-06, + "log_odds": 8.467077255249023, + "log_odds_ratio": -0.12315794080495834, + "loss": 0.2306, + "rejected_geometric_mean": -9.346366882324219, + "step": 4749 + }, + { + "chosen_geometric_mean": -0.9499980211257935, + "epoch": 1.18, + "grad_norm": 4.125, + "learning_rate": 1.8213520292478454e-06, + "log_odds": 3.7081894874572754, + "log_odds_ratio": -0.2975969910621643, + "loss": 0.2592, + "rejected_geometric_mean": -4.335880279541016, + "step": 4750 + }, + { + "chosen_geometric_mean": -0.8984246253967285, + "epoch": 1.18, + "grad_norm": 1.7578125, + "learning_rate": 1.8204151615042182e-06, + "log_odds": 12.785463333129883, + "log_odds_ratio": -0.11568571627140045, + "loss": 0.193, + "rejected_geometric_mean": -13.224254608154297, + "step": 4751 + }, + { + "chosen_geometric_mean": -1.098501205444336, + "epoch": 1.18, + "grad_norm": 2.4375, + "learning_rate": 1.8194783968019643e-06, + "log_odds": 3.997821807861328, + "log_odds_ratio": -0.3668835759162903, + "loss": 0.2977, + "rejected_geometric_mean": -4.932938098907471, + "step": 4752 + }, + { + "chosen_geometric_mean": -1.139059066772461, + "epoch": 1.18, + "grad_norm": 41.25, + "learning_rate": 1.8185417352831213e-06, + "log_odds": 6.168135643005371, + "log_odds_ratio": -0.06021168828010559, + "loss": 0.286, + "rejected_geometric_mean": -6.874640464782715, + "step": 4753 + }, + { + "chosen_geometric_mean": -0.9449237585067749, + "epoch": 1.18, + "grad_norm": 19.25, + "learning_rate": 1.81760517708971e-06, + "log_odds": 10.023929595947266, + "log_odds_ratio": -0.011587874963879585, + "loss": 0.2471, + "rejected_geometric_mean": -10.462867736816406, + "step": 4754 + }, + { + "chosen_geometric_mean": -1.1726049184799194, + "epoch": 1.18, + "grad_norm": 5.5625, + "learning_rate": 1.816668722363733e-06, + "log_odds": 4.1342926025390625, + "log_odds_ratio": -0.15242472290992737, + "loss": 0.244, + "rejected_geometric_mean": -4.951374053955078, + "step": 4755 + }, + { + "chosen_geometric_mean": -1.144232988357544, + "epoch": 1.18, + "grad_norm": 5.28125, + "learning_rate": 1.8157323712471804e-06, + "log_odds": 6.7448577880859375, + "log_odds_ratio": -0.13281820714473724, + "loss": 0.257, + "rejected_geometric_mean": -7.556487560272217, + "step": 4756 + }, + { + "chosen_geometric_mean": -1.0545532703399658, + "epoch": 1.18, + "grad_norm": 7.5625, + "learning_rate": 1.8147961238820256e-06, + "log_odds": 6.987044334411621, + "log_odds_ratio": -0.12133748829364777, + "loss": 0.2335, + "rejected_geometric_mean": -7.673760414123535, + "step": 4757 + }, + { + "chosen_geometric_mean": -0.988283634185791, + "epoch": 1.18, + "grad_norm": 5.78125, + "learning_rate": 1.8138599804102253e-06, + "log_odds": 3.504185199737549, + "log_odds_ratio": -0.11858364939689636, + "loss": 0.2751, + "rejected_geometric_mean": -4.090982913970947, + "step": 4758 + }, + { + "chosen_geometric_mean": -1.047934651374817, + "epoch": 1.18, + "grad_norm": 2.15625, + "learning_rate": 1.8129239409737222e-06, + "log_odds": 5.0332465171813965, + "log_odds_ratio": -0.013185718096792698, + "loss": 0.2543, + "rejected_geometric_mean": -5.637151718139648, + "step": 4759 + }, + { + "chosen_geometric_mean": -1.0059049129486084, + "epoch": 1.18, + "grad_norm": 20.5, + "learning_rate": 1.8119880057144426e-06, + "log_odds": 14.849367141723633, + "log_odds_ratio": -0.00043328379979357123, + "loss": 0.2963, + "rejected_geometric_mean": -15.398662567138672, + "step": 4760 + }, + { + "chosen_geometric_mean": -1.305740475654602, + "epoch": 1.18, + "grad_norm": 10.1875, + "learning_rate": 1.8110521747742968e-06, + "log_odds": 2.2668087482452393, + "log_odds_ratio": -0.3676140308380127, + "loss": 0.281, + "rejected_geometric_mean": -3.4596798419952393, + "step": 4761 + }, + { + "chosen_geometric_mean": -1.024672031402588, + "epoch": 1.18, + "grad_norm": 3.890625, + "learning_rate": 1.8101164482951783e-06, + "log_odds": 5.618467330932617, + "log_odds_ratio": -0.2853916883468628, + "loss": 0.2617, + "rejected_geometric_mean": -6.321542263031006, + "step": 4762 + }, + { + "chosen_geometric_mean": -1.131298303604126, + "epoch": 1.18, + "grad_norm": 6.3125, + "learning_rate": 1.8091808264189648e-06, + "log_odds": 3.471404790878296, + "log_odds_ratio": -0.11438708007335663, + "loss": 0.3242, + "rejected_geometric_mean": -4.261948585510254, + "step": 4763 + }, + { + "chosen_geometric_mean": -0.9220937490463257, + "epoch": 1.18, + "grad_norm": 2.859375, + "learning_rate": 1.8082453092875222e-06, + "log_odds": 5.531073570251465, + "log_odds_ratio": -0.3309253752231598, + "loss": 0.2565, + "rejected_geometric_mean": -6.20673942565918, + "step": 4764 + }, + { + "chosen_geometric_mean": -0.883080244064331, + "epoch": 1.18, + "grad_norm": 3.921875, + "learning_rate": 1.807309897042695e-06, + "log_odds": 6.02579927444458, + "log_odds_ratio": -0.17853403091430664, + "loss": 0.2598, + "rejected_geometric_mean": -6.478357791900635, + "step": 4765 + }, + { + "chosen_geometric_mean": -1.0653241872787476, + "epoch": 1.18, + "grad_norm": 4.75, + "learning_rate": 1.8063745898263143e-06, + "log_odds": 5.284533977508545, + "log_odds_ratio": -0.1651116907596588, + "loss": 0.2367, + "rejected_geometric_mean": -6.005153179168701, + "step": 4766 + }, + { + "chosen_geometric_mean": -0.9474917650222778, + "epoch": 1.18, + "grad_norm": 3.90625, + "learning_rate": 1.8054393877801959e-06, + "log_odds": 12.941964149475098, + "log_odds_ratio": -4.0743012505117804e-05, + "loss": 0.2484, + "rejected_geometric_mean": -13.372539520263672, + "step": 4767 + }, + { + "chosen_geometric_mean": -1.2349071502685547, + "epoch": 1.18, + "grad_norm": 22.0, + "learning_rate": 1.8045042910461392e-06, + "log_odds": 9.887755393981934, + "log_odds_ratio": -0.14040793478488922, + "loss": 0.2589, + "rejected_geometric_mean": -10.800271987915039, + "step": 4768 + }, + { + "chosen_geometric_mean": -1.051938772201538, + "epoch": 1.18, + "grad_norm": 3.1875, + "learning_rate": 1.8035692997659243e-06, + "log_odds": 2.1819119453430176, + "log_odds_ratio": -0.28264498710632324, + "loss": 0.2736, + "rejected_geometric_mean": -2.9813623428344727, + "step": 4769 + }, + { + "chosen_geometric_mean": -0.8995137214660645, + "epoch": 1.18, + "grad_norm": 5.25, + "learning_rate": 1.8026344140813224e-06, + "log_odds": 6.442892074584961, + "log_odds_ratio": -0.1651090383529663, + "loss": 0.2503, + "rejected_geometric_mean": -6.929258823394775, + "step": 4770 + }, + { + "chosen_geometric_mean": -1.1151888370513916, + "epoch": 1.18, + "grad_norm": 36.75, + "learning_rate": 1.8016996341340832e-06, + "log_odds": 6.494563102722168, + "log_odds_ratio": -0.02921740524470806, + "loss": 0.3208, + "rejected_geometric_mean": -7.186316967010498, + "step": 4771 + }, + { + "chosen_geometric_mean": -1.0827476978302002, + "epoch": 1.18, + "grad_norm": 5.4375, + "learning_rate": 1.8007649600659404e-06, + "log_odds": 10.80410385131836, + "log_odds_ratio": -0.08162830770015717, + "loss": 0.2354, + "rejected_geometric_mean": -11.508949279785156, + "step": 4772 + }, + { + "chosen_geometric_mean": -1.2913657426834106, + "epoch": 1.18, + "grad_norm": 12.125, + "learning_rate": 1.7998303920186144e-06, + "log_odds": 4.976480484008789, + "log_odds_ratio": -0.3783267140388489, + "loss": 0.251, + "rejected_geometric_mean": -5.9326171875, + "step": 4773 + }, + { + "chosen_geometric_mean": -1.1609033346176147, + "epoch": 1.18, + "grad_norm": 8.75, + "learning_rate": 1.7988959301338074e-06, + "log_odds": 5.164299964904785, + "log_odds_ratio": -0.3877599835395813, + "loss": 0.3075, + "rejected_geometric_mean": -6.100008964538574, + "step": 4774 + }, + { + "chosen_geometric_mean": -0.9175915718078613, + "epoch": 1.18, + "grad_norm": 16.0, + "learning_rate": 1.7979615745532069e-06, + "log_odds": 10.83476448059082, + "log_odds_ratio": -0.12479609251022339, + "loss": 0.2402, + "rejected_geometric_mean": -11.335517883300781, + "step": 4775 + }, + { + "chosen_geometric_mean": -0.9107174873352051, + "epoch": 1.18, + "grad_norm": 20.25, + "learning_rate": 1.7970273254184836e-06, + "log_odds": 8.165499687194824, + "log_odds_ratio": -0.0038307509385049343, + "loss": 0.2575, + "rejected_geometric_mean": -8.561933517456055, + "step": 4776 + }, + { + "chosen_geometric_mean": -1.0024234056472778, + "epoch": 1.18, + "grad_norm": 23.875, + "learning_rate": 1.7960931828712919e-06, + "log_odds": 3.2103629112243652, + "log_odds_ratio": -0.18077895045280457, + "loss": 0.2684, + "rejected_geometric_mean": -3.8319311141967773, + "step": 4777 + }, + { + "chosen_geometric_mean": -0.8894699811935425, + "epoch": 1.18, + "grad_norm": 6.875, + "learning_rate": 1.7951591470532707e-06, + "log_odds": 5.816893577575684, + "log_odds_ratio": -0.01214152853935957, + "loss": 0.2334, + "rejected_geometric_mean": -6.18100643157959, + "step": 4778 + }, + { + "chosen_geometric_mean": -1.0341503620147705, + "epoch": 1.18, + "grad_norm": 66.5, + "learning_rate": 1.794225218106041e-06, + "log_odds": 8.7671480178833, + "log_odds_ratio": -0.16383884847164154, + "loss": 0.2777, + "rejected_geometric_mean": -9.371628761291504, + "step": 4779 + }, + { + "chosen_geometric_mean": -1.2955204248428345, + "epoch": 1.18, + "grad_norm": 3.40625, + "learning_rate": 1.793291396171209e-06, + "log_odds": 5.094694137573242, + "log_odds_ratio": -0.30728861689567566, + "loss": 0.2672, + "rejected_geometric_mean": -6.095487594604492, + "step": 4780 + }, + { + "chosen_geometric_mean": -0.9179327487945557, + "epoch": 1.18, + "grad_norm": 3.65625, + "learning_rate": 1.7923576813903658e-06, + "log_odds": 4.828092575073242, + "log_odds_ratio": -0.1277804970741272, + "loss": 0.2546, + "rejected_geometric_mean": -5.278908729553223, + "step": 4781 + }, + { + "chosen_geometric_mean": -0.9769293069839478, + "epoch": 1.18, + "grad_norm": 2.375, + "learning_rate": 1.7914240739050844e-06, + "log_odds": 4.745545864105225, + "log_odds_ratio": -0.3241303861141205, + "loss": 0.2639, + "rejected_geometric_mean": -5.430512428283691, + "step": 4782 + }, + { + "chosen_geometric_mean": -0.7343469858169556, + "epoch": 1.18, + "grad_norm": 4.875, + "learning_rate": 1.790490573856922e-06, + "log_odds": 7.690652370452881, + "log_odds_ratio": -0.007417398039251566, + "loss": 0.2567, + "rejected_geometric_mean": -7.769637107849121, + "step": 4783 + }, + { + "chosen_geometric_mean": -0.9803941249847412, + "epoch": 1.18, + "grad_norm": 2.828125, + "learning_rate": 1.7895571813874197e-06, + "log_odds": 18.21384048461914, + "log_odds_ratio": -2.056368202829617e-06, + "loss": 0.2438, + "rejected_geometric_mean": -18.698766708374023, + "step": 4784 + }, + { + "chosen_geometric_mean": -0.9487555623054504, + "epoch": 1.18, + "grad_norm": 5.15625, + "learning_rate": 1.7886238966381012e-06, + "log_odds": 7.381293773651123, + "log_odds_ratio": -0.1980721652507782, + "loss": 0.2577, + "rejected_geometric_mean": -7.9556732177734375, + "step": 4785 + }, + { + "chosen_geometric_mean": -1.070223331451416, + "epoch": 1.18, + "grad_norm": 5.5625, + "learning_rate": 1.7876907197504764e-06, + "log_odds": 5.136604309082031, + "log_odds_ratio": -0.12524937093257904, + "loss": 0.2549, + "rejected_geometric_mean": -5.837182998657227, + "step": 4786 + }, + { + "chosen_geometric_mean": -1.1230533123016357, + "epoch": 1.19, + "grad_norm": 17.25, + "learning_rate": 1.7867576508660373e-06, + "log_odds": 6.250237464904785, + "log_odds_ratio": -0.23902598023414612, + "loss": 0.271, + "rejected_geometric_mean": -7.095586776733398, + "step": 4787 + }, + { + "chosen_geometric_mean": -0.8774983286857605, + "epoch": 1.19, + "grad_norm": 36.5, + "learning_rate": 1.7858246901262586e-06, + "log_odds": 7.7338714599609375, + "log_odds_ratio": -0.07307365536689758, + "loss": 0.2653, + "rejected_geometric_mean": -8.117449760437012, + "step": 4788 + }, + { + "chosen_geometric_mean": -1.0721335411071777, + "epoch": 1.19, + "grad_norm": 3.859375, + "learning_rate": 1.7848918376725996e-06, + "log_odds": 10.296525001525879, + "log_odds_ratio": -0.037640348076820374, + "loss": 0.2467, + "rejected_geometric_mean": -10.966428756713867, + "step": 4789 + }, + { + "chosen_geometric_mean": -0.9761890172958374, + "epoch": 1.19, + "grad_norm": 4.96875, + "learning_rate": 1.7839590936465033e-06, + "log_odds": 2.8264877796173096, + "log_odds_ratio": -0.22126874327659607, + "loss": 0.2945, + "rejected_geometric_mean": -3.453131675720215, + "step": 4790 + }, + { + "chosen_geometric_mean": -0.9453206658363342, + "epoch": 1.19, + "grad_norm": 12.75, + "learning_rate": 1.7830264581893963e-06, + "log_odds": 4.2191057205200195, + "log_odds_ratio": -0.18107280135154724, + "loss": 0.2776, + "rejected_geometric_mean": -4.7587361335754395, + "step": 4791 + }, + { + "chosen_geometric_mean": -1.1945492029190063, + "epoch": 1.19, + "grad_norm": 3.515625, + "learning_rate": 1.7820939314426882e-06, + "log_odds": 9.197378158569336, + "log_odds_ratio": -0.08695603907108307, + "loss": 0.2574, + "rejected_geometric_mean": -10.079763412475586, + "step": 4792 + }, + { + "chosen_geometric_mean": -0.8543986082077026, + "epoch": 1.19, + "grad_norm": 5.53125, + "learning_rate": 1.7811615135477733e-06, + "log_odds": 14.1998929977417, + "log_odds_ratio": -0.05977308750152588, + "loss": 0.2453, + "rejected_geometric_mean": -14.527249336242676, + "step": 4793 + }, + { + "chosen_geometric_mean": -0.8573566675186157, + "epoch": 1.19, + "grad_norm": 10.4375, + "learning_rate": 1.780229204646028e-06, + "log_odds": 5.879944324493408, + "log_odds_ratio": -0.3851052522659302, + "loss": 0.3128, + "rejected_geometric_mean": -6.478046417236328, + "step": 4794 + }, + { + "chosen_geometric_mean": -0.9292452335357666, + "epoch": 1.19, + "grad_norm": 9.6875, + "learning_rate": 1.7792970048788121e-06, + "log_odds": 5.166711330413818, + "log_odds_ratio": -0.027504870668053627, + "loss": 0.2611, + "rejected_geometric_mean": -5.589901924133301, + "step": 4795 + }, + { + "chosen_geometric_mean": -1.041348934173584, + "epoch": 1.19, + "grad_norm": 18.0, + "learning_rate": 1.778364914387469e-06, + "log_odds": 6.315704345703125, + "log_odds_ratio": -0.25736966729164124, + "loss": 0.2989, + "rejected_geometric_mean": -7.0803327560424805, + "step": 4796 + }, + { + "chosen_geometric_mean": -1.0541303157806396, + "epoch": 1.19, + "grad_norm": 3.828125, + "learning_rate": 1.7774329333133283e-06, + "log_odds": 14.479494094848633, + "log_odds_ratio": -0.04125401750206947, + "loss": 0.2528, + "rejected_geometric_mean": -15.120397567749023, + "step": 4797 + }, + { + "chosen_geometric_mean": -1.0127224922180176, + "epoch": 1.19, + "grad_norm": 2.25, + "learning_rate": 1.7765010617976992e-06, + "log_odds": 7.520962238311768, + "log_odds_ratio": -0.19895993173122406, + "loss": 0.286, + "rejected_geometric_mean": -8.113332748413086, + "step": 4798 + }, + { + "chosen_geometric_mean": -0.8102558255195618, + "epoch": 1.19, + "grad_norm": 17.0, + "learning_rate": 1.7755692999818758e-06, + "log_odds": 6.200488090515137, + "log_odds_ratio": -0.06908392161130905, + "loss": 0.2369, + "rejected_geometric_mean": -6.483200550079346, + "step": 4799 + }, + { + "chosen_geometric_mean": -1.0608155727386475, + "epoch": 1.19, + "grad_norm": 11.0625, + "learning_rate": 1.7746376480071355e-06, + "log_odds": 7.564365386962891, + "log_odds_ratio": -0.02319406345486641, + "loss": 0.2714, + "rejected_geometric_mean": -8.17375373840332, + "step": 4800 + }, + { + "chosen_geometric_mean": -0.899735689163208, + "epoch": 1.19, + "grad_norm": 18.625, + "learning_rate": 1.77370610601474e-06, + "log_odds": 6.889592170715332, + "log_odds_ratio": -0.15955092012882233, + "loss": 0.2827, + "rejected_geometric_mean": -7.380454063415527, + "step": 4801 + }, + { + "chosen_geometric_mean": -1.070309042930603, + "epoch": 1.19, + "grad_norm": 8.25, + "learning_rate": 1.772774674145931e-06, + "log_odds": 13.978278160095215, + "log_odds_ratio": -0.0006639169878326356, + "loss": 0.2542, + "rejected_geometric_mean": -14.596445083618164, + "step": 4802 + }, + { + "chosen_geometric_mean": -0.940315842628479, + "epoch": 1.19, + "grad_norm": 1.984375, + "learning_rate": 1.7718433525419394e-06, + "log_odds": 5.741487503051758, + "log_odds_ratio": -0.23912003636360168, + "loss": 0.2531, + "rejected_geometric_mean": -6.315248966217041, + "step": 4803 + }, + { + "chosen_geometric_mean": -1.7080378532409668, + "epoch": 1.19, + "grad_norm": 5.78125, + "learning_rate": 1.7709121413439739e-06, + "log_odds": 11.125649452209473, + "log_odds_ratio": -0.08535495400428772, + "loss": 0.3209, + "rejected_geometric_mean": -12.53567123413086, + "step": 4804 + }, + { + "chosen_geometric_mean": -0.9429973363876343, + "epoch": 1.19, + "grad_norm": 74.0, + "learning_rate": 1.7699810406932283e-06, + "log_odds": 7.690788745880127, + "log_odds_ratio": -0.009324337355792522, + "loss": 0.2957, + "rejected_geometric_mean": -8.137626647949219, + "step": 4805 + }, + { + "chosen_geometric_mean": -0.9323113560676575, + "epoch": 1.19, + "grad_norm": 17.5, + "learning_rate": 1.7690500507308806e-06, + "log_odds": 11.46704387664795, + "log_odds_ratio": -0.052742719650268555, + "loss": 0.2172, + "rejected_geometric_mean": -11.865017890930176, + "step": 4806 + }, + { + "chosen_geometric_mean": -0.973088264465332, + "epoch": 1.19, + "grad_norm": 3.609375, + "learning_rate": 1.76811917159809e-06, + "log_odds": 2.5722689628601074, + "log_odds_ratio": -0.16798187792301178, + "loss": 0.2685, + "rejected_geometric_mean": -3.1939144134521484, + "step": 4807 + }, + { + "chosen_geometric_mean": -0.8251848816871643, + "epoch": 1.19, + "grad_norm": 2.96875, + "learning_rate": 1.7671884034360015e-06, + "log_odds": 1.1320092678070068, + "log_odds_ratio": -0.3687117397785187, + "loss": 0.2428, + "rejected_geometric_mean": -1.6705411672592163, + "step": 4808 + }, + { + "chosen_geometric_mean": -1.1551305055618286, + "epoch": 1.19, + "grad_norm": 20.625, + "learning_rate": 1.7662577463857417e-06, + "log_odds": 4.75752067565918, + "log_odds_ratio": -0.1573365330696106, + "loss": 0.3321, + "rejected_geometric_mean": -5.591288089752197, + "step": 4809 + }, + { + "chosen_geometric_mean": -1.0375179052352905, + "epoch": 1.19, + "grad_norm": 13.0, + "learning_rate": 1.7653272005884207e-06, + "log_odds": 4.139054775238037, + "log_odds_ratio": -0.10144317150115967, + "loss": 0.2384, + "rejected_geometric_mean": -4.787705898284912, + "step": 4810 + }, + { + "chosen_geometric_mean": -1.028445839881897, + "epoch": 1.19, + "grad_norm": 15.125, + "learning_rate": 1.7643967661851302e-06, + "log_odds": 9.437789916992188, + "log_odds_ratio": -0.027466483414173126, + "loss": 0.2518, + "rejected_geometric_mean": -9.977343559265137, + "step": 4811 + }, + { + "chosen_geometric_mean": -0.7741138935089111, + "epoch": 1.19, + "grad_norm": 17.375, + "learning_rate": 1.7634664433169469e-06, + "log_odds": 14.8717041015625, + "log_odds_ratio": -0.14545251429080963, + "loss": 0.312, + "rejected_geometric_mean": -15.172341346740723, + "step": 4812 + }, + { + "chosen_geometric_mean": -1.0890522003173828, + "epoch": 1.19, + "grad_norm": 7.5625, + "learning_rate": 1.7625362321249318e-06, + "log_odds": 0.8806033134460449, + "log_odds_ratio": -0.600751519203186, + "loss": 0.288, + "rejected_geometric_mean": -1.8319909572601318, + "step": 4813 + }, + { + "chosen_geometric_mean": -0.9519809484481812, + "epoch": 1.19, + "grad_norm": 20.0, + "learning_rate": 1.761606132750125e-06, + "log_odds": 10.758018493652344, + "log_odds_ratio": -0.026852592825889587, + "loss": 0.3326, + "rejected_geometric_mean": -11.220274925231934, + "step": 4814 + }, + { + "chosen_geometric_mean": -0.8722642660140991, + "epoch": 1.19, + "grad_norm": 2.15625, + "learning_rate": 1.760676145333553e-06, + "log_odds": 7.540033340454102, + "log_odds_ratio": -0.004453589674085379, + "loss": 0.2282, + "rejected_geometric_mean": -7.860764980316162, + "step": 4815 + }, + { + "chosen_geometric_mean": -1.0205001831054688, + "epoch": 1.19, + "grad_norm": 18.0, + "learning_rate": 1.7597462700162243e-06, + "log_odds": 6.481976509094238, + "log_odds_ratio": -0.02865580841898918, + "loss": 0.2921, + "rejected_geometric_mean": -7.045166015625, + "step": 4816 + }, + { + "chosen_geometric_mean": -1.1360529661178589, + "epoch": 1.19, + "grad_norm": 8.75, + "learning_rate": 1.7588165069391306e-06, + "log_odds": 6.856751918792725, + "log_odds_ratio": -0.112571120262146, + "loss": 0.2726, + "rejected_geometric_mean": -7.6496429443359375, + "step": 4817 + }, + { + "chosen_geometric_mean": -0.987146258354187, + "epoch": 1.19, + "grad_norm": 41.25, + "learning_rate": 1.7578868562432438e-06, + "log_odds": 8.560209274291992, + "log_odds_ratio": -0.05576205626130104, + "loss": 0.2514, + "rejected_geometric_mean": -9.101362228393555, + "step": 4818 + }, + { + "chosen_geometric_mean": -0.814560055732727, + "epoch": 1.19, + "grad_norm": 2.15625, + "learning_rate": 1.756957318069525e-06, + "log_odds": 7.506913661956787, + "log_odds_ratio": -0.056748226284980774, + "loss": 0.2168, + "rejected_geometric_mean": -7.777732849121094, + "step": 4819 + }, + { + "chosen_geometric_mean": -1.0231045484542847, + "epoch": 1.19, + "grad_norm": 1.8203125, + "learning_rate": 1.7560278925589125e-06, + "log_odds": 8.289278030395508, + "log_odds_ratio": -0.1113855168223381, + "loss": 0.2301, + "rejected_geometric_mean": -8.914546966552734, + "step": 4820 + }, + { + "chosen_geometric_mean": -1.0963817834854126, + "epoch": 1.19, + "grad_norm": 17.0, + "learning_rate": 1.7550985798523296e-06, + "log_odds": 7.59368896484375, + "log_odds_ratio": -0.18961572647094727, + "loss": 0.3105, + "rejected_geometric_mean": -8.411865234375, + "step": 4821 + }, + { + "chosen_geometric_mean": -1.0088213682174683, + "epoch": 1.19, + "grad_norm": 2.390625, + "learning_rate": 1.7541693800906823e-06, + "log_odds": 6.498603820800781, + "log_odds_ratio": -0.23527979850769043, + "loss": 0.2665, + "rejected_geometric_mean": -7.181220054626465, + "step": 4822 + }, + { + "chosen_geometric_mean": -0.9122300148010254, + "epoch": 1.19, + "grad_norm": 2.0, + "learning_rate": 1.7532402934148593e-06, + "log_odds": 6.036581993103027, + "log_odds_ratio": -0.24390719830989838, + "loss": 0.2339, + "rejected_geometric_mean": -6.6060404777526855, + "step": 4823 + }, + { + "chosen_geometric_mean": -1.138535976409912, + "epoch": 1.19, + "grad_norm": 2.828125, + "learning_rate": 1.7523113199657334e-06, + "log_odds": 6.176999568939209, + "log_odds_ratio": -0.14171354472637177, + "loss": 0.2795, + "rejected_geometric_mean": -6.963088035583496, + "step": 4824 + }, + { + "chosen_geometric_mean": -0.8384897708892822, + "epoch": 1.19, + "grad_norm": 3.21875, + "learning_rate": 1.7513824598841591e-06, + "log_odds": 7.511293411254883, + "log_odds_ratio": -0.08808433264493942, + "loss": 0.2067, + "rejected_geometric_mean": -7.835043430328369, + "step": 4825 + }, + { + "chosen_geometric_mean": -0.9062527418136597, + "epoch": 1.19, + "grad_norm": 2.125, + "learning_rate": 1.7504537133109739e-06, + "log_odds": 9.349289894104004, + "log_odds_ratio": -0.10571601241827011, + "loss": 0.2691, + "rejected_geometric_mean": -9.825024604797363, + "step": 4826 + }, + { + "chosen_geometric_mean": -0.8459952473640442, + "epoch": 1.2, + "grad_norm": 2.109375, + "learning_rate": 1.7495250803869978e-06, + "log_odds": 5.51020622253418, + "log_odds_ratio": -0.13801109790802002, + "loss": 0.2965, + "rejected_geometric_mean": -5.863101005554199, + "step": 4827 + }, + { + "chosen_geometric_mean": -1.08747398853302, + "epoch": 1.2, + "grad_norm": 4.625, + "learning_rate": 1.7485965612530337e-06, + "log_odds": 5.3271331787109375, + "log_odds_ratio": -0.13004133105278015, + "loss": 0.2664, + "rejected_geometric_mean": -6.031652450561523, + "step": 4828 + }, + { + "chosen_geometric_mean": -0.9192436933517456, + "epoch": 1.2, + "grad_norm": 4.75, + "learning_rate": 1.7476681560498669e-06, + "log_odds": 5.247888565063477, + "log_odds_ratio": -0.04829048365354538, + "loss": 0.2849, + "rejected_geometric_mean": -5.660017490386963, + "step": 4829 + }, + { + "chosen_geometric_mean": -1.081396460533142, + "epoch": 1.2, + "grad_norm": 4.625, + "learning_rate": 1.746739864918267e-06, + "log_odds": 7.537354946136475, + "log_odds_ratio": -0.006902384106069803, + "loss": 0.2468, + "rejected_geometric_mean": -8.175626754760742, + "step": 4830 + }, + { + "chosen_geometric_mean": -1.0035384893417358, + "epoch": 1.2, + "grad_norm": 30.5, + "learning_rate": 1.7458116879989847e-06, + "log_odds": 10.795256614685059, + "log_odds_ratio": -0.0017823383677750826, + "loss": 0.2783, + "rejected_geometric_mean": -11.336650848388672, + "step": 4831 + }, + { + "chosen_geometric_mean": -0.8536098003387451, + "epoch": 1.2, + "grad_norm": 1.859375, + "learning_rate": 1.7448836254327542e-06, + "log_odds": 6.480113983154297, + "log_odds_ratio": -0.17403613030910492, + "loss": 0.2328, + "rejected_geometric_mean": -6.882503032684326, + "step": 4832 + }, + { + "chosen_geometric_mean": -0.8597709536552429, + "epoch": 1.2, + "grad_norm": 7.03125, + "learning_rate": 1.7439556773602916e-06, + "log_odds": 12.200638771057129, + "log_odds_ratio": -0.09424148499965668, + "loss": 0.2442, + "rejected_geometric_mean": -12.580178260803223, + "step": 4833 + }, + { + "chosen_geometric_mean": -0.7991254925727844, + "epoch": 1.2, + "grad_norm": 2.8125, + "learning_rate": 1.7430278439222958e-06, + "log_odds": 11.99269962310791, + "log_odds_ratio": -0.10836243629455566, + "loss": 0.2394, + "rejected_geometric_mean": -12.286659240722656, + "step": 4834 + }, + { + "chosen_geometric_mean": -1.3582749366760254, + "epoch": 1.2, + "grad_norm": 11.6875, + "learning_rate": 1.74210012525945e-06, + "log_odds": 10.762411117553711, + "log_odds_ratio": -0.05986495688557625, + "loss": 0.3047, + "rejected_geometric_mean": -11.795863151550293, + "step": 4835 + }, + { + "chosen_geometric_mean": -1.0397558212280273, + "epoch": 1.2, + "grad_norm": 14.8125, + "learning_rate": 1.7411725215124182e-06, + "log_odds": 8.125862121582031, + "log_odds_ratio": -0.00947203952819109, + "loss": 0.3047, + "rejected_geometric_mean": -8.712735176086426, + "step": 4836 + }, + { + "chosen_geometric_mean": -0.8492317199707031, + "epoch": 1.2, + "grad_norm": 18.875, + "learning_rate": 1.7402450328218462e-06, + "log_odds": 4.685007095336914, + "log_odds_ratio": -0.4076576232910156, + "loss": 0.3194, + "rejected_geometric_mean": -5.144428253173828, + "step": 4837 + }, + { + "chosen_geometric_mean": -0.8766701221466064, + "epoch": 1.2, + "grad_norm": 2.109375, + "learning_rate": 1.7393176593283644e-06, + "log_odds": 10.795269012451172, + "log_odds_ratio": -0.002935323864221573, + "loss": 0.2278, + "rejected_geometric_mean": -11.112447738647461, + "step": 4838 + }, + { + "chosen_geometric_mean": -0.9376243352890015, + "epoch": 1.2, + "grad_norm": 5.84375, + "learning_rate": 1.738390401172585e-06, + "log_odds": 12.033119201660156, + "log_odds_ratio": -0.00011906684085261077, + "loss": 0.2343, + "rejected_geometric_mean": -12.466854095458984, + "step": 4839 + }, + { + "chosen_geometric_mean": -0.972482442855835, + "epoch": 1.2, + "grad_norm": 2.703125, + "learning_rate": 1.7374632584951018e-06, + "log_odds": 3.7057228088378906, + "log_odds_ratio": -0.19182026386260986, + "loss": 0.2676, + "rejected_geometric_mean": -4.359749794006348, + "step": 4840 + }, + { + "chosen_geometric_mean": -1.5619460344314575, + "epoch": 1.2, + "grad_norm": 11.5625, + "learning_rate": 1.736536231436493e-06, + "log_odds": 8.006237030029297, + "log_odds_ratio": -0.019651401787996292, + "loss": 0.3304, + "rejected_geometric_mean": -9.262916564941406, + "step": 4841 + }, + { + "chosen_geometric_mean": -1.056434154510498, + "epoch": 1.2, + "grad_norm": 5.875, + "learning_rate": 1.7356093201373175e-06, + "log_odds": 4.999940395355225, + "log_odds_ratio": -0.09852365404367447, + "loss": 0.2358, + "rejected_geometric_mean": -5.584649085998535, + "step": 4842 + }, + { + "chosen_geometric_mean": -0.939308226108551, + "epoch": 1.2, + "grad_norm": 17.25, + "learning_rate": 1.7346825247381183e-06, + "log_odds": 0.7480348944664001, + "log_odds_ratio": -0.41359370946884155, + "loss": 0.2764, + "rejected_geometric_mean": -1.4796078205108643, + "step": 4843 + }, + { + "chosen_geometric_mean": -0.9292937517166138, + "epoch": 1.2, + "grad_norm": 27.875, + "learning_rate": 1.733755845379418e-06, + "log_odds": 5.688783168792725, + "log_odds_ratio": -0.027092397212982178, + "loss": 0.2625, + "rejected_geometric_mean": -6.075087070465088, + "step": 4844 + }, + { + "chosen_geometric_mean": -1.2208901643753052, + "epoch": 1.2, + "grad_norm": 2.28125, + "learning_rate": 1.7328292822017237e-06, + "log_odds": 6.752192497253418, + "log_odds_ratio": -0.1461292803287506, + "loss": 0.2972, + "rejected_geometric_mean": -7.700247764587402, + "step": 4845 + }, + { + "chosen_geometric_mean": -0.9775927066802979, + "epoch": 1.2, + "grad_norm": 5.59375, + "learning_rate": 1.7319028353455269e-06, + "log_odds": 5.701360702514648, + "log_odds_ratio": -0.2880445718765259, + "loss": 0.2609, + "rejected_geometric_mean": -6.395057201385498, + "step": 4846 + }, + { + "chosen_geometric_mean": -0.8306620717048645, + "epoch": 1.2, + "grad_norm": 3.25, + "learning_rate": 1.7309765049512967e-06, + "log_odds": 4.911169528961182, + "log_odds_ratio": -0.12644615769386292, + "loss": 0.2507, + "rejected_geometric_mean": -5.26682186126709, + "step": 4847 + }, + { + "chosen_geometric_mean": -1.19557785987854, + "epoch": 1.2, + "grad_norm": 18.125, + "learning_rate": 1.7300502911594882e-06, + "log_odds": 2.664273500442505, + "log_odds_ratio": -0.32945626974105835, + "loss": 0.268, + "rejected_geometric_mean": -3.691009044647217, + "step": 4848 + }, + { + "chosen_geometric_mean": -0.9033003449440002, + "epoch": 1.2, + "grad_norm": 1.9765625, + "learning_rate": 1.7291241941105374e-06, + "log_odds": 4.901383399963379, + "log_odds_ratio": -0.14016766846179962, + "loss": 0.2665, + "rejected_geometric_mean": -5.378293037414551, + "step": 4849 + }, + { + "chosen_geometric_mean": -1.0651395320892334, + "epoch": 1.2, + "grad_norm": 2.34375, + "learning_rate": 1.7281982139448634e-06, + "log_odds": 5.874663829803467, + "log_odds_ratio": -0.1464165300130844, + "loss": 0.252, + "rejected_geometric_mean": -6.5893120765686035, + "step": 4850 + }, + { + "chosen_geometric_mean": -1.0767736434936523, + "epoch": 1.2, + "grad_norm": 10.25, + "learning_rate": 1.7272723508028647e-06, + "log_odds": 8.618550300598145, + "log_odds_ratio": -0.10265710949897766, + "loss": 0.2597, + "rejected_geometric_mean": -9.332488059997559, + "step": 4851 + }, + { + "chosen_geometric_mean": -1.0360544919967651, + "epoch": 1.2, + "grad_norm": 4.25, + "learning_rate": 1.7263466048249273e-06, + "log_odds": 5.04262113571167, + "log_odds_ratio": -0.1957632303237915, + "loss": 0.2406, + "rejected_geometric_mean": -5.72689151763916, + "step": 4852 + }, + { + "chosen_geometric_mean": -0.780475914478302, + "epoch": 1.2, + "grad_norm": 4.15625, + "learning_rate": 1.7254209761514157e-06, + "log_odds": 6.517705917358398, + "log_odds_ratio": -0.11285021901130676, + "loss": 0.267, + "rejected_geometric_mean": -6.755652904510498, + "step": 4853 + }, + { + "chosen_geometric_mean": -1.1454212665557861, + "epoch": 1.2, + "grad_norm": 6.46875, + "learning_rate": 1.7244954649226764e-06, + "log_odds": 3.832818031311035, + "log_odds_ratio": -0.07251597195863724, + "loss": 0.2255, + "rejected_geometric_mean": -4.6119914054870605, + "step": 4854 + }, + { + "chosen_geometric_mean": -0.8748252391815186, + "epoch": 1.2, + "grad_norm": 25.0, + "learning_rate": 1.7235700712790398e-06, + "log_odds": 3.644839286804199, + "log_odds_ratio": -0.042698994278907776, + "loss": 0.3488, + "rejected_geometric_mean": -3.981564521789551, + "step": 4855 + }, + { + "chosen_geometric_mean": -0.9214130640029907, + "epoch": 1.2, + "grad_norm": 2.09375, + "learning_rate": 1.7226447953608174e-06, + "log_odds": 2.7953715324401855, + "log_odds_ratio": -0.24637874960899353, + "loss": 0.2804, + "rejected_geometric_mean": -3.3948452472686768, + "step": 4856 + }, + { + "chosen_geometric_mean": -1.0155974626541138, + "epoch": 1.2, + "grad_norm": 5.3125, + "learning_rate": 1.7217196373083038e-06, + "log_odds": 10.802401542663574, + "log_odds_ratio": -0.17463743686676025, + "loss": 0.2478, + "rejected_geometric_mean": -11.429509162902832, + "step": 4857 + }, + { + "chosen_geometric_mean": -0.8181401491165161, + "epoch": 1.2, + "grad_norm": 49.75, + "learning_rate": 1.7207945972617756e-06, + "log_odds": 2.0532121658325195, + "log_odds_ratio": -0.18757236003875732, + "loss": 0.2498, + "rejected_geometric_mean": -2.40952205657959, + "step": 4858 + }, + { + "chosen_geometric_mean": -0.9173948764801025, + "epoch": 1.2, + "grad_norm": 18.75, + "learning_rate": 1.7198696753614902e-06, + "log_odds": 10.099754333496094, + "log_odds_ratio": -0.17317768931388855, + "loss": 0.2841, + "rejected_geometric_mean": -10.612505912780762, + "step": 4859 + }, + { + "chosen_geometric_mean": -0.9921160936355591, + "epoch": 1.2, + "grad_norm": 9.125, + "learning_rate": 1.7189448717476885e-06, + "log_odds": 7.450741291046143, + "log_odds_ratio": -0.11141467094421387, + "loss": 0.2357, + "rejected_geometric_mean": -8.031113624572754, + "step": 4860 + }, + { + "chosen_geometric_mean": -0.7852880358695984, + "epoch": 1.2, + "grad_norm": 7.96875, + "learning_rate": 1.7180201865605923e-06, + "log_odds": 4.3976335525512695, + "log_odds_ratio": -0.19283461570739746, + "loss": 0.2214, + "rejected_geometric_mean": -4.759518623352051, + "step": 4861 + }, + { + "chosen_geometric_mean": -1.20718252658844, + "epoch": 1.2, + "grad_norm": 2.125, + "learning_rate": 1.7170956199404053e-06, + "log_odds": 7.702459812164307, + "log_odds_ratio": -0.028815746307373047, + "loss": 0.2383, + "rejected_geometric_mean": -8.523350715637207, + "step": 4862 + }, + { + "chosen_geometric_mean": -1.3118476867675781, + "epoch": 1.2, + "grad_norm": 35.0, + "learning_rate": 1.7161711720273164e-06, + "log_odds": 5.618616580963135, + "log_odds_ratio": -0.2491530478000641, + "loss": 0.3012, + "rejected_geometric_mean": -6.700255870819092, + "step": 4863 + }, + { + "chosen_geometric_mean": -0.938243567943573, + "epoch": 1.2, + "grad_norm": 3.1875, + "learning_rate": 1.7152468429614925e-06, + "log_odds": 8.831938743591309, + "log_odds_ratio": -0.029060831293463707, + "loss": 0.2302, + "rejected_geometric_mean": -9.261800765991211, + "step": 4864 + }, + { + "chosen_geometric_mean": -1.0369960069656372, + "epoch": 1.2, + "grad_norm": 3.53125, + "learning_rate": 1.7143226328830841e-06, + "log_odds": 7.59773063659668, + "log_odds_ratio": -0.07335200905799866, + "loss": 0.2315, + "rejected_geometric_mean": -8.232538223266602, + "step": 4865 + }, + { + "chosen_geometric_mean": -1.448355793952942, + "epoch": 1.2, + "grad_norm": 31.75, + "learning_rate": 1.713398541932224e-06, + "log_odds": 11.129467010498047, + "log_odds_ratio": -0.011702990159392357, + "loss": 0.2906, + "rejected_geometric_mean": -12.229314804077148, + "step": 4866 + }, + { + "chosen_geometric_mean": -0.846998929977417, + "epoch": 1.21, + "grad_norm": 3.53125, + "learning_rate": 1.7124745702490264e-06, + "log_odds": 10.526631355285645, + "log_odds_ratio": -0.08464701473712921, + "loss": 0.2753, + "rejected_geometric_mean": -10.860527038574219, + "step": 4867 + }, + { + "chosen_geometric_mean": -0.9694207906723022, + "epoch": 1.21, + "grad_norm": 2.4375, + "learning_rate": 1.7115507179735876e-06, + "log_odds": 12.602432250976562, + "log_odds_ratio": -0.06726928055286407, + "loss": 0.2329, + "rejected_geometric_mean": -13.128647804260254, + "step": 4868 + }, + { + "chosen_geometric_mean": -0.9467562437057495, + "epoch": 1.21, + "grad_norm": 3.234375, + "learning_rate": 1.7106269852459868e-06, + "log_odds": 10.182697296142578, + "log_odds_ratio": -0.01967841386795044, + "loss": 0.2956, + "rejected_geometric_mean": -10.646398544311523, + "step": 4869 + }, + { + "chosen_geometric_mean": -0.9278416633605957, + "epoch": 1.21, + "grad_norm": 4.53125, + "learning_rate": 1.7097033722062822e-06, + "log_odds": 11.185308456420898, + "log_odds_ratio": -0.0023919399827718735, + "loss": 0.2576, + "rejected_geometric_mean": -11.60940933227539, + "step": 4870 + }, + { + "chosen_geometric_mean": -1.142195701599121, + "epoch": 1.21, + "grad_norm": 5.6875, + "learning_rate": 1.7087798789945167e-06, + "log_odds": 3.632828712463379, + "log_odds_ratio": -0.19044587016105652, + "loss": 0.2804, + "rejected_geometric_mean": -4.497435092926025, + "step": 4871 + }, + { + "chosen_geometric_mean": -1.2232866287231445, + "epoch": 1.21, + "grad_norm": 2.109375, + "learning_rate": 1.707856505750714e-06, + "log_odds": 7.77278470993042, + "log_odds_ratio": -0.1726381480693817, + "loss": 0.2537, + "rejected_geometric_mean": -8.736224174499512, + "step": 4872 + }, + { + "chosen_geometric_mean": -1.0194768905639648, + "epoch": 1.21, + "grad_norm": 5.21875, + "learning_rate": 1.706933252614879e-06, + "log_odds": 6.113887786865234, + "log_odds_ratio": -0.15000644326210022, + "loss": 0.25, + "rejected_geometric_mean": -6.690817832946777, + "step": 4873 + }, + { + "chosen_geometric_mean": -1.1380308866500854, + "epoch": 1.21, + "grad_norm": 3.15625, + "learning_rate": 1.7060101197270001e-06, + "log_odds": 5.622871398925781, + "log_odds_ratio": -0.17435412108898163, + "loss": 0.252, + "rejected_geometric_mean": -6.446312427520752, + "step": 4874 + }, + { + "chosen_geometric_mean": -0.9577043056488037, + "epoch": 1.21, + "grad_norm": 1.984375, + "learning_rate": 1.7050871072270458e-06, + "log_odds": 16.13121795654297, + "log_odds_ratio": -0.0023123237770050764, + "loss": 0.2381, + "rejected_geometric_mean": -16.602447509765625, + "step": 4875 + }, + { + "chosen_geometric_mean": -0.9820342063903809, + "epoch": 1.21, + "grad_norm": 2.0625, + "learning_rate": 1.7041642152549677e-06, + "log_odds": 5.256155967712402, + "log_odds_ratio": -0.05442624166607857, + "loss": 0.2746, + "rejected_geometric_mean": -5.802247524261475, + "step": 4876 + }, + { + "chosen_geometric_mean": -0.96736741065979, + "epoch": 1.21, + "grad_norm": 2.171875, + "learning_rate": 1.7032414439506972e-06, + "log_odds": 5.051297187805176, + "log_odds_ratio": -0.1378098428249359, + "loss": 0.2407, + "rejected_geometric_mean": -5.630065441131592, + "step": 4877 + }, + { + "chosen_geometric_mean": -0.826941967010498, + "epoch": 1.21, + "grad_norm": 2.140625, + "learning_rate": 1.7023187934541484e-06, + "log_odds": 4.551927089691162, + "log_odds_ratio": -0.20612916350364685, + "loss": 0.2569, + "rejected_geometric_mean": -4.956351280212402, + "step": 4878 + }, + { + "chosen_geometric_mean": -1.1191869974136353, + "epoch": 1.21, + "grad_norm": 40.0, + "learning_rate": 1.7013962639052193e-06, + "log_odds": 5.809558868408203, + "log_odds_ratio": -0.21363067626953125, + "loss": 0.3092, + "rejected_geometric_mean": -6.6241254806518555, + "step": 4879 + }, + { + "chosen_geometric_mean": -1.147830843925476, + "epoch": 1.21, + "grad_norm": 4.21875, + "learning_rate": 1.7004738554437855e-06, + "log_odds": 6.839961528778076, + "log_odds_ratio": -0.00411957548931241, + "loss": 0.2316, + "rejected_geometric_mean": -7.606125831604004, + "step": 4880 + }, + { + "chosen_geometric_mean": -0.8359028100967407, + "epoch": 1.21, + "grad_norm": 5.625, + "learning_rate": 1.6995515682097074e-06, + "log_odds": 4.453822135925293, + "log_odds_ratio": -0.23032626509666443, + "loss": 0.2621, + "rejected_geometric_mean": -4.853404521942139, + "step": 4881 + }, + { + "chosen_geometric_mean": -0.8420387506484985, + "epoch": 1.21, + "grad_norm": 33.0, + "learning_rate": 1.6986294023428252e-06, + "log_odds": 8.77005386352539, + "log_odds_ratio": -0.23660731315612793, + "loss": 0.3477, + "rejected_geometric_mean": -9.236916542053223, + "step": 4882 + }, + { + "chosen_geometric_mean": -0.9239436984062195, + "epoch": 1.21, + "grad_norm": 3.953125, + "learning_rate": 1.697707357982962e-06, + "log_odds": 7.578174591064453, + "log_odds_ratio": -0.08825001865625381, + "loss": 0.2438, + "rejected_geometric_mean": -8.017960548400879, + "step": 4883 + }, + { + "chosen_geometric_mean": -0.9679553508758545, + "epoch": 1.21, + "grad_norm": 36.0, + "learning_rate": 1.69678543526992e-06, + "log_odds": 8.165478706359863, + "log_odds_ratio": -0.01658605970442295, + "loss": 0.287, + "rejected_geometric_mean": -8.648931503295898, + "step": 4884 + }, + { + "chosen_geometric_mean": -0.833858072757721, + "epoch": 1.21, + "grad_norm": 10.1875, + "learning_rate": 1.6958636343434875e-06, + "log_odds": 7.273467540740967, + "log_odds_ratio": -0.3078708052635193, + "loss": 0.2677, + "rejected_geometric_mean": -7.820941925048828, + "step": 4885 + }, + { + "chosen_geometric_mean": -0.7297037839889526, + "epoch": 1.21, + "grad_norm": 120.0, + "learning_rate": 1.6949419553434309e-06, + "log_odds": 9.346761703491211, + "log_odds_ratio": -0.04653134569525719, + "loss": 0.2602, + "rejected_geometric_mean": -9.417797088623047, + "step": 4886 + }, + { + "chosen_geometric_mean": -1.0433763265609741, + "epoch": 1.21, + "grad_norm": 40.0, + "learning_rate": 1.694020398409498e-06, + "log_odds": 4.422905445098877, + "log_odds_ratio": -0.2059502899646759, + "loss": 0.2909, + "rejected_geometric_mean": -5.152207374572754, + "step": 4887 + }, + { + "chosen_geometric_mean": -1.0718369483947754, + "epoch": 1.21, + "grad_norm": 29.0, + "learning_rate": 1.6930989636814192e-06, + "log_odds": 4.9307756423950195, + "log_odds_ratio": -0.29304251074790955, + "loss": 0.3189, + "rejected_geometric_mean": -5.74721622467041, + "step": 4888 + }, + { + "chosen_geometric_mean": -1.0156253576278687, + "epoch": 1.21, + "grad_norm": 38.0, + "learning_rate": 1.692177651298906e-06, + "log_odds": 10.248173713684082, + "log_odds_ratio": -0.11886939406394958, + "loss": 0.2767, + "rejected_geometric_mean": -10.853364944458008, + "step": 4889 + }, + { + "chosen_geometric_mean": -0.8526290655136108, + "epoch": 1.21, + "grad_norm": 2.0625, + "learning_rate": 1.6912564614016522e-06, + "log_odds": 14.502793312072754, + "log_odds_ratio": -0.003958769608289003, + "loss": 0.2394, + "rejected_geometric_mean": -14.78515911102295, + "step": 4890 + }, + { + "chosen_geometric_mean": -1.0823488235473633, + "epoch": 1.21, + "grad_norm": 2.203125, + "learning_rate": 1.6903353941293316e-06, + "log_odds": 3.554841995239258, + "log_odds_ratio": -0.2473980039358139, + "loss": 0.2781, + "rejected_geometric_mean": -4.373300552368164, + "step": 4891 + }, + { + "chosen_geometric_mean": -0.9316360354423523, + "epoch": 1.21, + "grad_norm": 8.4375, + "learning_rate": 1.6894144496216014e-06, + "log_odds": 2.2452921867370605, + "log_odds_ratio": -0.21866697072982788, + "loss": 0.2451, + "rejected_geometric_mean": -2.843364715576172, + "step": 4892 + }, + { + "chosen_geometric_mean": -0.9348472356796265, + "epoch": 1.21, + "grad_norm": 40.0, + "learning_rate": 1.6884936280180968e-06, + "log_odds": 10.017965316772461, + "log_odds_ratio": -0.003692604135721922, + "loss": 0.2367, + "rejected_geometric_mean": -10.442242622375488, + "step": 4893 + }, + { + "chosen_geometric_mean": -0.9797062873840332, + "epoch": 1.21, + "grad_norm": 26.375, + "learning_rate": 1.687572929458437e-06, + "log_odds": 6.589075088500977, + "log_odds_ratio": -0.14165301620960236, + "loss": 0.2512, + "rejected_geometric_mean": -7.185182094573975, + "step": 4894 + }, + { + "chosen_geometric_mean": -1.0675294399261475, + "epoch": 1.21, + "grad_norm": 2.875, + "learning_rate": 1.6866523540822239e-06, + "log_odds": 6.76493501663208, + "log_odds_ratio": -0.04280850291252136, + "loss": 0.2657, + "rejected_geometric_mean": -7.431266784667969, + "step": 4895 + }, + { + "chosen_geometric_mean": -0.939366340637207, + "epoch": 1.21, + "grad_norm": 8.75, + "learning_rate": 1.6857319020290366e-06, + "log_odds": 9.360024452209473, + "log_odds_ratio": -0.17062154412269592, + "loss": 0.2351, + "rejected_geometric_mean": -9.935914993286133, + "step": 4896 + }, + { + "chosen_geometric_mean": -1.071614384651184, + "epoch": 1.21, + "grad_norm": 2.390625, + "learning_rate": 1.684811573438439e-06, + "log_odds": 11.396238327026367, + "log_odds_ratio": -0.008487722836434841, + "loss": 0.2676, + "rejected_geometric_mean": -12.038753509521484, + "step": 4897 + }, + { + "chosen_geometric_mean": -1.0016793012619019, + "epoch": 1.21, + "grad_norm": 1.9296875, + "learning_rate": 1.683891368449974e-06, + "log_odds": 8.289042472839355, + "log_odds_ratio": -0.08517690002918243, + "loss": 0.2328, + "rejected_geometric_mean": -8.851028442382812, + "step": 4898 + }, + { + "chosen_geometric_mean": -1.0525375604629517, + "epoch": 1.21, + "grad_norm": 58.0, + "learning_rate": 1.6829712872031684e-06, + "log_odds": 10.980430603027344, + "log_odds_ratio": -0.1904461830854416, + "loss": 0.2452, + "rejected_geometric_mean": -11.713606834411621, + "step": 4899 + }, + { + "chosen_geometric_mean": -1.1210145950317383, + "epoch": 1.21, + "grad_norm": 6.65625, + "learning_rate": 1.682051329837526e-06, + "log_odds": 3.361443042755127, + "log_odds_ratio": -0.09161461889743805, + "loss": 0.246, + "rejected_geometric_mean": -4.130867958068848, + "step": 4900 + }, + { + "chosen_geometric_mean": -0.7766502499580383, + "epoch": 1.21, + "grad_norm": 2.109375, + "learning_rate": 1.6811314964925367e-06, + "log_odds": 3.837134838104248, + "log_odds_ratio": -0.12951809167861938, + "loss": 0.226, + "rejected_geometric_mean": -4.081467151641846, + "step": 4901 + }, + { + "chosen_geometric_mean": -0.8672838807106018, + "epoch": 1.21, + "grad_norm": 1.6796875, + "learning_rate": 1.6802117873076696e-06, + "log_odds": 12.252670288085938, + "log_odds_ratio": -0.020966101437807083, + "loss": 0.2106, + "rejected_geometric_mean": -12.57647705078125, + "step": 4902 + }, + { + "chosen_geometric_mean": -0.9858371615409851, + "epoch": 1.21, + "grad_norm": 2.109375, + "learning_rate": 1.6792922024223728e-06, + "log_odds": 4.5095977783203125, + "log_odds_ratio": -0.11659058183431625, + "loss": 0.2359, + "rejected_geometric_mean": -5.0700907707214355, + "step": 4903 + }, + { + "chosen_geometric_mean": -1.0269360542297363, + "epoch": 1.21, + "grad_norm": 3.34375, + "learning_rate": 1.6783727419760782e-06, + "log_odds": 7.285090923309326, + "log_odds_ratio": -0.16354212164878845, + "loss": 0.2579, + "rejected_geometric_mean": -7.923618316650391, + "step": 4904 + }, + { + "chosen_geometric_mean": -1.04683518409729, + "epoch": 1.21, + "grad_norm": 15.9375, + "learning_rate": 1.6774534061081979e-06, + "log_odds": 5.973781585693359, + "log_odds_ratio": -0.13230183720588684, + "loss": 0.2962, + "rejected_geometric_mean": -6.68638801574707, + "step": 4905 + }, + { + "chosen_geometric_mean": -0.8143897652626038, + "epoch": 1.21, + "grad_norm": 2.328125, + "learning_rate": 1.6765341949581259e-06, + "log_odds": 2.981741189956665, + "log_odds_ratio": -0.3130192160606384, + "loss": 0.2761, + "rejected_geometric_mean": -3.4430501461029053, + "step": 4906 + }, + { + "chosen_geometric_mean": -0.9178417921066284, + "epoch": 1.21, + "grad_norm": 8.125, + "learning_rate": 1.6756151086652367e-06, + "log_odds": 11.683197975158691, + "log_odds_ratio": -0.0991944894194603, + "loss": 0.2187, + "rejected_geometric_mean": -12.180424690246582, + "step": 4907 + }, + { + "chosen_geometric_mean": -1.1117851734161377, + "epoch": 1.22, + "grad_norm": 2.03125, + "learning_rate": 1.6746961473688849e-06, + "log_odds": 9.0809326171875, + "log_odds_ratio": -0.22094841301441193, + "loss": 0.2283, + "rejected_geometric_mean": -9.90988540649414, + "step": 4908 + }, + { + "chosen_geometric_mean": -1.1252976655960083, + "epoch": 1.22, + "grad_norm": 8.3125, + "learning_rate": 1.6737773112084088e-06, + "log_odds": 9.589757919311523, + "log_odds_ratio": -0.04038211330771446, + "loss": 0.3079, + "rejected_geometric_mean": -10.29440689086914, + "step": 4909 + }, + { + "chosen_geometric_mean": -0.910376787185669, + "epoch": 1.22, + "grad_norm": 6.25, + "learning_rate": 1.6728586003231243e-06, + "log_odds": 8.986577033996582, + "log_odds_ratio": -0.12137828022241592, + "loss": 0.2811, + "rejected_geometric_mean": -9.454140663146973, + "step": 4910 + }, + { + "chosen_geometric_mean": -0.9604846239089966, + "epoch": 1.22, + "grad_norm": 3.25, + "learning_rate": 1.6719400148523298e-06, + "log_odds": 6.692976474761963, + "log_odds_ratio": -0.0874033272266388, + "loss": 0.2167, + "rejected_geometric_mean": -7.2312188148498535, + "step": 4911 + }, + { + "chosen_geometric_mean": -1.0256297588348389, + "epoch": 1.22, + "grad_norm": 7.46875, + "learning_rate": 1.6710215549353065e-06, + "log_odds": 8.947671890258789, + "log_odds_ratio": -0.16129173338413239, + "loss": 0.306, + "rejected_geometric_mean": -9.56646728515625, + "step": 4912 + }, + { + "chosen_geometric_mean": -0.9955600500106812, + "epoch": 1.22, + "grad_norm": 16.875, + "learning_rate": 1.670103220711314e-06, + "log_odds": 5.700231075286865, + "log_odds_ratio": -0.45241647958755493, + "loss": 0.241, + "rejected_geometric_mean": -6.552872180938721, + "step": 4913 + }, + { + "chosen_geometric_mean": -1.1506658792495728, + "epoch": 1.22, + "grad_norm": 2.90625, + "learning_rate": 1.669185012319594e-06, + "log_odds": 5.975561141967773, + "log_odds_ratio": -0.20188026130199432, + "loss": 0.288, + "rejected_geometric_mean": -6.8365159034729, + "step": 4914 + }, + { + "chosen_geometric_mean": -0.868840217590332, + "epoch": 1.22, + "grad_norm": 3.84375, + "learning_rate": 1.6682669298993692e-06, + "log_odds": 14.218605041503906, + "log_odds_ratio": -0.014526136219501495, + "loss": 0.2719, + "rejected_geometric_mean": -14.547828674316406, + "step": 4915 + }, + { + "chosen_geometric_mean": -1.1424078941345215, + "epoch": 1.22, + "grad_norm": 5.375, + "learning_rate": 1.6673489735898422e-06, + "log_odds": 7.703179359436035, + "log_odds_ratio": -0.11910814791917801, + "loss": 0.2533, + "rejected_geometric_mean": -8.481644630432129, + "step": 4916 + }, + { + "chosen_geometric_mean": -0.9164249897003174, + "epoch": 1.22, + "grad_norm": 7.625, + "learning_rate": 1.6664311435301978e-06, + "log_odds": 7.025263786315918, + "log_odds_ratio": -0.20137768983840942, + "loss": 0.2661, + "rejected_geometric_mean": -7.564563751220703, + "step": 4917 + }, + { + "chosen_geometric_mean": -1.0225636959075928, + "epoch": 1.22, + "grad_norm": 2.90625, + "learning_rate": 1.6655134398596018e-06, + "log_odds": 7.823009490966797, + "log_odds_ratio": -0.033551428467035294, + "loss": 0.2927, + "rejected_geometric_mean": -8.418340682983398, + "step": 4918 + }, + { + "chosen_geometric_mean": -1.0525490045547485, + "epoch": 1.22, + "grad_norm": 31.875, + "learning_rate": 1.6645958627171988e-06, + "log_odds": 7.837765693664551, + "log_odds_ratio": -0.08150704205036163, + "loss": 0.3271, + "rejected_geometric_mean": -8.509626388549805, + "step": 4919 + }, + { + "chosen_geometric_mean": -0.8991258144378662, + "epoch": 1.22, + "grad_norm": 2.203125, + "learning_rate": 1.6636784122421158e-06, + "log_odds": 14.130524635314941, + "log_odds_ratio": -0.0005190852098166943, + "loss": 0.2597, + "rejected_geometric_mean": -14.495853424072266, + "step": 4920 + }, + { + "chosen_geometric_mean": -0.7978368997573853, + "epoch": 1.22, + "grad_norm": 4.34375, + "learning_rate": 1.6627610885734604e-06, + "log_odds": 8.588092803955078, + "log_odds_ratio": -0.15708068013191223, + "loss": 0.2554, + "rejected_geometric_mean": -8.934175491333008, + "step": 4921 + }, + { + "chosen_geometric_mean": -1.120434045791626, + "epoch": 1.22, + "grad_norm": 2.671875, + "learning_rate": 1.6618438918503207e-06, + "log_odds": 3.9770236015319824, + "log_odds_ratio": -0.20405763387680054, + "loss": 0.3023, + "rejected_geometric_mean": -4.802136421203613, + "step": 4922 + }, + { + "chosen_geometric_mean": -1.0986135005950928, + "epoch": 1.22, + "grad_norm": 2.484375, + "learning_rate": 1.6609268222117664e-06, + "log_odds": 1.6706268787384033, + "log_odds_ratio": -0.3487663269042969, + "loss": 0.2705, + "rejected_geometric_mean": -2.5626845359802246, + "step": 4923 + }, + { + "chosen_geometric_mean": -0.8435693383216858, + "epoch": 1.22, + "grad_norm": 2.375, + "learning_rate": 1.660009879796847e-06, + "log_odds": 4.9822893142700195, + "log_odds_ratio": -0.19504714012145996, + "loss": 0.2443, + "rejected_geometric_mean": -5.441632270812988, + "step": 4924 + }, + { + "chosen_geometric_mean": -1.1551854610443115, + "epoch": 1.22, + "grad_norm": 2.890625, + "learning_rate": 1.6590930647445936e-06, + "log_odds": 1.8609645366668701, + "log_odds_ratio": -0.36820483207702637, + "loss": 0.2776, + "rejected_geometric_mean": -2.835174560546875, + "step": 4925 + }, + { + "chosen_geometric_mean": -1.0393753051757812, + "epoch": 1.22, + "grad_norm": 30.75, + "learning_rate": 1.6581763771940157e-06, + "log_odds": 8.113279342651367, + "log_odds_ratio": -0.18366354703903198, + "loss": 0.3176, + "rejected_geometric_mean": -8.830708503723145, + "step": 4926 + }, + { + "chosen_geometric_mean": -0.9747377634048462, + "epoch": 1.22, + "grad_norm": 3.859375, + "learning_rate": 1.657259817284105e-06, + "log_odds": 1.9501341581344604, + "log_odds_ratio": -0.2791125774383545, + "loss": 0.244, + "rejected_geometric_mean": -2.61606502532959, + "step": 4927 + }, + { + "chosen_geometric_mean": -0.894892156124115, + "epoch": 1.22, + "grad_norm": 2.125, + "learning_rate": 1.656343385153837e-06, + "log_odds": 11.216817855834961, + "log_odds_ratio": -0.1294933706521988, + "loss": 0.2216, + "rejected_geometric_mean": -11.64510726928711, + "step": 4928 + }, + { + "chosen_geometric_mean": -0.9082317352294922, + "epoch": 1.22, + "grad_norm": 2.296875, + "learning_rate": 1.6554270809421622e-06, + "log_odds": 5.93410062789917, + "log_odds_ratio": -0.16052526235580444, + "loss": 0.2787, + "rejected_geometric_mean": -6.420084476470947, + "step": 4929 + }, + { + "chosen_geometric_mean": -0.9111176133155823, + "epoch": 1.22, + "grad_norm": 9.875, + "learning_rate": 1.6545109047880148e-06, + "log_odds": 5.5036234855651855, + "log_odds_ratio": -0.4713621735572815, + "loss": 0.2392, + "rejected_geometric_mean": -6.262677192687988, + "step": 4930 + }, + { + "chosen_geometric_mean": -1.0002028942108154, + "epoch": 1.22, + "grad_norm": 8.3125, + "learning_rate": 1.6535948568303095e-06, + "log_odds": 13.17253303527832, + "log_odds_ratio": -0.03635496646165848, + "loss": 0.2422, + "rejected_geometric_mean": -13.70750617980957, + "step": 4931 + }, + { + "chosen_geometric_mean": -0.9369968175888062, + "epoch": 1.22, + "grad_norm": 2.359375, + "learning_rate": 1.6526789372079415e-06, + "log_odds": 4.03108024597168, + "log_odds_ratio": -0.2337879091501236, + "loss": 0.2797, + "rejected_geometric_mean": -4.616919040679932, + "step": 4932 + }, + { + "chosen_geometric_mean": -0.9811027646064758, + "epoch": 1.22, + "grad_norm": 50.75, + "learning_rate": 1.6517631460597842e-06, + "log_odds": 5.307877540588379, + "log_odds_ratio": -0.08238333463668823, + "loss": 0.2328, + "rejected_geometric_mean": -5.8525590896606445, + "step": 4933 + }, + { + "chosen_geometric_mean": -0.967575192451477, + "epoch": 1.22, + "grad_norm": 5.0625, + "learning_rate": 1.6508474835246957e-06, + "log_odds": 8.929045677185059, + "log_odds_ratio": -0.10413690656423569, + "loss": 0.2539, + "rejected_geometric_mean": -9.47811508178711, + "step": 4934 + }, + { + "chosen_geometric_mean": -0.9597955346107483, + "epoch": 1.22, + "grad_norm": 1.9296875, + "learning_rate": 1.6499319497415124e-06, + "log_odds": 13.223278999328613, + "log_odds_ratio": -0.0025419997982680798, + "loss": 0.2206, + "rejected_geometric_mean": -13.692625045776367, + "step": 4935 + }, + { + "chosen_geometric_mean": -1.0075256824493408, + "epoch": 1.22, + "grad_norm": 53.0, + "learning_rate": 1.6490165448490496e-06, + "log_odds": 8.331506729125977, + "log_odds_ratio": -0.00329953758046031, + "loss": 0.2613, + "rejected_geometric_mean": -8.87120246887207, + "step": 4936 + }, + { + "chosen_geometric_mean": -1.0622388124465942, + "epoch": 1.22, + "grad_norm": 2.359375, + "learning_rate": 1.648101268986106e-06, + "log_odds": 17.816547393798828, + "log_odds_ratio": -0.04731099680066109, + "loss": 0.2414, + "rejected_geometric_mean": -18.473129272460938, + "step": 4937 + }, + { + "chosen_geometric_mean": -0.994566798210144, + "epoch": 1.22, + "grad_norm": 15.9375, + "learning_rate": 1.647186122291458e-06, + "log_odds": 2.943837881088257, + "log_odds_ratio": -0.2724604606628418, + "loss": 0.3028, + "rejected_geometric_mean": -3.6165733337402344, + "step": 4938 + }, + { + "chosen_geometric_mean": -0.7892910838127136, + "epoch": 1.22, + "grad_norm": 8.125, + "learning_rate": 1.6462711049038657e-06, + "log_odds": 9.50218677520752, + "log_odds_ratio": -0.19991454482078552, + "loss": 0.2197, + "rejected_geometric_mean": -9.86231803894043, + "step": 4939 + }, + { + "chosen_geometric_mean": -0.9861485958099365, + "epoch": 1.22, + "grad_norm": 2.328125, + "learning_rate": 1.6453562169620665e-06, + "log_odds": 13.229125022888184, + "log_odds_ratio": -0.0007036618771962821, + "loss": 0.2803, + "rejected_geometric_mean": -13.735485076904297, + "step": 4940 + }, + { + "chosen_geometric_mean": -0.8053466081619263, + "epoch": 1.22, + "grad_norm": 3.734375, + "learning_rate": 1.6444414586047797e-06, + "log_odds": 3.9233596324920654, + "log_odds_ratio": -0.2773653268814087, + "loss": 0.2484, + "rejected_geometric_mean": -4.360698223114014, + "step": 4941 + }, + { + "chosen_geometric_mean": -0.737091064453125, + "epoch": 1.22, + "grad_norm": 14.3125, + "learning_rate": 1.6435268299707049e-06, + "log_odds": 5.526251792907715, + "log_odds_ratio": -0.15042796730995178, + "loss": 0.2142, + "rejected_geometric_mean": -5.761319637298584, + "step": 4942 + }, + { + "chosen_geometric_mean": -0.9542184472084045, + "epoch": 1.22, + "grad_norm": 17.5, + "learning_rate": 1.642612331198521e-06, + "log_odds": 12.068188667297363, + "log_odds_ratio": -0.0341084748506546, + "loss": 0.2513, + "rejected_geometric_mean": -12.550251960754395, + "step": 4943 + }, + { + "chosen_geometric_mean": -1.109792709350586, + "epoch": 1.22, + "grad_norm": 23.0, + "learning_rate": 1.6416979624268878e-06, + "log_odds": 13.19733715057373, + "log_odds_ratio": -0.03962109610438347, + "loss": 0.2491, + "rejected_geometric_mean": -13.894125938415527, + "step": 4944 + }, + { + "chosen_geometric_mean": -1.0677692890167236, + "epoch": 1.22, + "grad_norm": 2.359375, + "learning_rate": 1.6407837237944466e-06, + "log_odds": 13.664758682250977, + "log_odds_ratio": -0.001726834336295724, + "loss": 0.2726, + "rejected_geometric_mean": -14.31071662902832, + "step": 4945 + }, + { + "chosen_geometric_mean": -0.8649994134902954, + "epoch": 1.22, + "grad_norm": 62.75, + "learning_rate": 1.6398696154398176e-06, + "log_odds": 8.906648635864258, + "log_odds_ratio": -0.13361980020999908, + "loss": 0.25, + "rejected_geometric_mean": -9.325420379638672, + "step": 4946 + }, + { + "chosen_geometric_mean": -0.9852421879768372, + "epoch": 1.22, + "grad_norm": 4.71875, + "learning_rate": 1.638955637501601e-06, + "log_odds": 8.523690223693848, + "log_odds_ratio": -0.023703893646597862, + "loss": 0.2392, + "rejected_geometric_mean": -9.043807983398438, + "step": 4947 + }, + { + "chosen_geometric_mean": -1.2177678346633911, + "epoch": 1.23, + "grad_norm": 29.875, + "learning_rate": 1.6380417901183788e-06, + "log_odds": 8.135478019714355, + "log_odds_ratio": -0.14846254885196686, + "loss": 0.2921, + "rejected_geometric_mean": -9.031877517700195, + "step": 4948 + }, + { + "chosen_geometric_mean": -0.7745729088783264, + "epoch": 1.23, + "grad_norm": 2.4375, + "learning_rate": 1.6371280734287103e-06, + "log_odds": 5.535408020019531, + "log_odds_ratio": -0.18472352623939514, + "loss": 0.2431, + "rejected_geometric_mean": -5.871823787689209, + "step": 4949 + }, + { + "chosen_geometric_mean": -0.983290433883667, + "epoch": 1.23, + "grad_norm": 5.0625, + "learning_rate": 1.6362144875711395e-06, + "log_odds": 3.5727105140686035, + "log_odds_ratio": -0.4501027464866638, + "loss": 0.2448, + "rejected_geometric_mean": -4.41102933883667, + "step": 4950 + }, + { + "chosen_geometric_mean": -0.92387855052948, + "epoch": 1.23, + "grad_norm": 2.125, + "learning_rate": 1.6353010326841864e-06, + "log_odds": 5.924341678619385, + "log_odds_ratio": -0.13720645010471344, + "loss": 0.2694, + "rejected_geometric_mean": -6.384294033050537, + "step": 4951 + }, + { + "chosen_geometric_mean": -0.9854780435562134, + "epoch": 1.23, + "grad_norm": 4.90625, + "learning_rate": 1.6343877089063528e-06, + "log_odds": 13.393482208251953, + "log_odds_ratio": -0.04739006981253624, + "loss": 0.2327, + "rejected_geometric_mean": -13.934576034545898, + "step": 4952 + }, + { + "chosen_geometric_mean": -1.1875940561294556, + "epoch": 1.23, + "grad_norm": 37.0, + "learning_rate": 1.6334745163761207e-06, + "log_odds": 5.377641677856445, + "log_odds_ratio": -0.2804868221282959, + "loss": 0.2408, + "rejected_geometric_mean": -6.20751953125, + "step": 4953 + }, + { + "chosen_geometric_mean": -1.3421835899353027, + "epoch": 1.23, + "grad_norm": 30.375, + "learning_rate": 1.6325614552319516e-06, + "log_odds": 1.5743311643600464, + "log_odds_ratio": -0.32292693853378296, + "loss": 0.2549, + "rejected_geometric_mean": -2.742544651031494, + "step": 4954 + }, + { + "chosen_geometric_mean": -0.9647108316421509, + "epoch": 1.23, + "grad_norm": 1.8984375, + "learning_rate": 1.6316485256122874e-06, + "log_odds": 6.996342658996582, + "log_odds_ratio": -0.13006602227687836, + "loss": 0.2259, + "rejected_geometric_mean": -7.550975322723389, + "step": 4955 + }, + { + "chosen_geometric_mean": -0.741207480430603, + "epoch": 1.23, + "grad_norm": 10.5625, + "learning_rate": 1.6307357276555514e-06, + "log_odds": 11.852639198303223, + "log_odds_ratio": -0.03849098086357117, + "loss": 0.2572, + "rejected_geometric_mean": -11.982824325561523, + "step": 4956 + }, + { + "chosen_geometric_mean": -1.089914321899414, + "epoch": 1.23, + "grad_norm": 10.0, + "learning_rate": 1.6298230615001443e-06, + "log_odds": 3.5999677181243896, + "log_odds_ratio": -0.18134336173534393, + "loss": 0.2549, + "rejected_geometric_mean": -4.355885982513428, + "step": 4957 + }, + { + "chosen_geometric_mean": -0.8827210664749146, + "epoch": 1.23, + "grad_norm": 30.75, + "learning_rate": 1.6289105272844497e-06, + "log_odds": 7.388944625854492, + "log_odds_ratio": -0.12443084269762039, + "loss": 0.2801, + "rejected_geometric_mean": -7.841888904571533, + "step": 4958 + }, + { + "chosen_geometric_mean": -1.182918667793274, + "epoch": 1.23, + "grad_norm": 7.1875, + "learning_rate": 1.627998125146828e-06, + "log_odds": 5.8665690422058105, + "log_odds_ratio": -0.33450812101364136, + "loss": 0.3299, + "rejected_geometric_mean": -6.855058193206787, + "step": 4959 + }, + { + "chosen_geometric_mean": -1.2565621137619019, + "epoch": 1.23, + "grad_norm": 3.765625, + "learning_rate": 1.6270858552256209e-06, + "log_odds": 3.9284720420837402, + "log_odds_ratio": -0.12163561582565308, + "loss": 0.2887, + "rejected_geometric_mean": -4.876530170440674, + "step": 4960 + }, + { + "chosen_geometric_mean": -0.9554165601730347, + "epoch": 1.23, + "grad_norm": 1.953125, + "learning_rate": 1.626173717659153e-06, + "log_odds": 11.851924896240234, + "log_odds_ratio": -0.06302203238010406, + "loss": 0.2596, + "rejected_geometric_mean": -12.36623764038086, + "step": 4961 + }, + { + "chosen_geometric_mean": -1.1904957294464111, + "epoch": 1.23, + "grad_norm": 3.859375, + "learning_rate": 1.6252617125857245e-06, + "log_odds": 15.364477157592773, + "log_odds_ratio": -0.08095162361860275, + "loss": 0.3086, + "rejected_geometric_mean": -16.20892906188965, + "step": 4962 + }, + { + "chosen_geometric_mean": -0.8805942535400391, + "epoch": 1.23, + "grad_norm": 3.5625, + "learning_rate": 1.6243498401436174e-06, + "log_odds": 8.993182182312012, + "log_odds_ratio": -0.12615284323692322, + "loss": 0.2834, + "rejected_geometric_mean": -9.394566535949707, + "step": 4963 + }, + { + "chosen_geometric_mean": -1.2011229991912842, + "epoch": 1.23, + "grad_norm": 10.5625, + "learning_rate": 1.6234381004710936e-06, + "log_odds": 5.013904571533203, + "log_odds_ratio": -0.22786584496498108, + "loss": 0.2414, + "rejected_geometric_mean": -5.9690680503845215, + "step": 4964 + }, + { + "chosen_geometric_mean": -1.1684153079986572, + "epoch": 1.23, + "grad_norm": 6.5625, + "learning_rate": 1.622526493706395e-06, + "log_odds": 10.466224670410156, + "log_odds_ratio": -0.0006895352271385491, + "loss": 0.267, + "rejected_geometric_mean": -11.241945266723633, + "step": 4965 + }, + { + "chosen_geometric_mean": -0.835465669631958, + "epoch": 1.23, + "grad_norm": 4.21875, + "learning_rate": 1.6216150199877414e-06, + "log_odds": 6.893723011016846, + "log_odds_ratio": -0.07321929931640625, + "loss": 0.2257, + "rejected_geometric_mean": -7.196258544921875, + "step": 4966 + }, + { + "chosen_geometric_mean": -1.0435872077941895, + "epoch": 1.23, + "grad_norm": 13.5, + "learning_rate": 1.6207036794533366e-06, + "log_odds": 7.511110782623291, + "log_odds_ratio": -0.07567458599805832, + "loss": 0.2493, + "rejected_geometric_mean": -8.123164176940918, + "step": 4967 + }, + { + "chosen_geometric_mean": -1.1735610961914062, + "epoch": 1.23, + "grad_norm": 19.375, + "learning_rate": 1.6197924722413611e-06, + "log_odds": 11.478497505187988, + "log_odds_ratio": -0.006463849451392889, + "loss": 0.3101, + "rejected_geometric_mean": -12.279778480529785, + "step": 4968 + }, + { + "chosen_geometric_mean": -0.8848201036453247, + "epoch": 1.23, + "grad_norm": 8.125, + "learning_rate": 1.6188813984899742e-06, + "log_odds": 9.251646041870117, + "log_odds_ratio": -0.01562965288758278, + "loss": 0.2449, + "rejected_geometric_mean": -9.592973709106445, + "step": 4969 + }, + { + "chosen_geometric_mean": -1.1393405199050903, + "epoch": 1.23, + "grad_norm": 3.03125, + "learning_rate": 1.617970458337318e-06, + "log_odds": 6.981860160827637, + "log_odds_ratio": -0.1493723839521408, + "loss": 0.2919, + "rejected_geometric_mean": -7.765430450439453, + "step": 4970 + }, + { + "chosen_geometric_mean": -0.985871434211731, + "epoch": 1.23, + "grad_norm": 2.6875, + "learning_rate": 1.6170596519215121e-06, + "log_odds": 8.49074935913086, + "log_odds_ratio": -0.2157004475593567, + "loss": 0.2453, + "rejected_geometric_mean": -9.179655075073242, + "step": 4971 + }, + { + "chosen_geometric_mean": -0.9300616979598999, + "epoch": 1.23, + "grad_norm": 17.875, + "learning_rate": 1.6161489793806572e-06, + "log_odds": 11.932186126708984, + "log_odds_ratio": -0.27707749605178833, + "loss": 0.2988, + "rejected_geometric_mean": -12.399463653564453, + "step": 4972 + }, + { + "chosen_geometric_mean": -0.9887561798095703, + "epoch": 1.23, + "grad_norm": 18.875, + "learning_rate": 1.615238440852833e-06, + "log_odds": 8.229095458984375, + "log_odds_ratio": -0.06179329380393028, + "loss": 0.2415, + "rejected_geometric_mean": -8.780019760131836, + "step": 4973 + }, + { + "chosen_geometric_mean": -0.9039182662963867, + "epoch": 1.23, + "grad_norm": 5.40625, + "learning_rate": 1.6143280364760994e-06, + "log_odds": 7.920293807983398, + "log_odds_ratio": -0.0023841254878789186, + "loss": 0.3168, + "rejected_geometric_mean": -8.278919219970703, + "step": 4974 + }, + { + "chosen_geometric_mean": -0.9878998398780823, + "epoch": 1.23, + "grad_norm": 38.5, + "learning_rate": 1.6134177663884948e-06, + "log_odds": 16.608631134033203, + "log_odds_ratio": -0.0058432393707334995, + "loss": 0.2375, + "rejected_geometric_mean": -17.124967575073242, + "step": 4975 + }, + { + "chosen_geometric_mean": -0.9857348799705505, + "epoch": 1.23, + "grad_norm": 4.90625, + "learning_rate": 1.6125076307280368e-06, + "log_odds": 3.503648519515991, + "log_odds_ratio": -0.2696582078933716, + "loss": 0.2825, + "rejected_geometric_mean": -4.188848495483398, + "step": 4976 + }, + { + "chosen_geometric_mean": -1.0720891952514648, + "epoch": 1.23, + "grad_norm": 2.546875, + "learning_rate": 1.6115976296327272e-06, + "log_odds": 3.1529576778411865, + "log_odds_ratio": -0.19290956854820251, + "loss": 0.2892, + "rejected_geometric_mean": -3.8995184898376465, + "step": 4977 + }, + { + "chosen_geometric_mean": -0.8385037183761597, + "epoch": 1.23, + "grad_norm": 9.75, + "learning_rate": 1.6106877632405413e-06, + "log_odds": 4.791492462158203, + "log_odds_ratio": -0.03858925774693489, + "loss": 0.2291, + "rejected_geometric_mean": -5.079750061035156, + "step": 4978 + }, + { + "chosen_geometric_mean": -1.2093437910079956, + "epoch": 1.23, + "grad_norm": 36.0, + "learning_rate": 1.6097780316894377e-06, + "log_odds": 9.210777282714844, + "log_odds_ratio": -0.11692927032709122, + "loss": 0.274, + "rejected_geometric_mean": -10.124696731567383, + "step": 4979 + }, + { + "chosen_geometric_mean": -0.9401586055755615, + "epoch": 1.23, + "grad_norm": 4.0625, + "learning_rate": 1.6088684351173532e-06, + "log_odds": 9.652168273925781, + "log_odds_ratio": -0.13468480110168457, + "loss": 0.287, + "rejected_geometric_mean": -10.227550506591797, + "step": 4980 + }, + { + "chosen_geometric_mean": -1.1134107112884521, + "epoch": 1.23, + "grad_norm": 2.78125, + "learning_rate": 1.607958973662205e-06, + "log_odds": 3.5745913982391357, + "log_odds_ratio": -0.18429034948349, + "loss": 0.2747, + "rejected_geometric_mean": -4.370164394378662, + "step": 4981 + }, + { + "chosen_geometric_mean": -1.0010465383529663, + "epoch": 1.23, + "grad_norm": 17.25, + "learning_rate": 1.6070496474618875e-06, + "log_odds": 5.479479789733887, + "log_odds_ratio": -0.3039630949497223, + "loss": 0.2567, + "rejected_geometric_mean": -6.180156707763672, + "step": 4982 + }, + { + "chosen_geometric_mean": -0.8394415378570557, + "epoch": 1.23, + "grad_norm": 1.9375, + "learning_rate": 1.6061404566542787e-06, + "log_odds": 9.138507843017578, + "log_odds_ratio": -0.14485906064510345, + "loss": 0.2357, + "rejected_geometric_mean": -9.474472045898438, + "step": 4983 + }, + { + "chosen_geometric_mean": -1.0683588981628418, + "epoch": 1.23, + "grad_norm": 31.25, + "learning_rate": 1.6052314013772333e-06, + "log_odds": 5.895925998687744, + "log_odds_ratio": -0.4077807664871216, + "loss": 0.3276, + "rejected_geometric_mean": -6.77544641494751, + "step": 4984 + }, + { + "chosen_geometric_mean": -0.7837260365486145, + "epoch": 1.23, + "grad_norm": 2.703125, + "learning_rate": 1.604322481768585e-06, + "log_odds": 8.723210334777832, + "log_odds_ratio": -0.029431546106934547, + "loss": 0.2667, + "rejected_geometric_mean": -8.91521167755127, + "step": 4985 + }, + { + "chosen_geometric_mean": -0.7871630191802979, + "epoch": 1.23, + "grad_norm": 8.0625, + "learning_rate": 1.603413697966148e-06, + "log_odds": 9.152440071105957, + "log_odds_ratio": -0.1221722736954689, + "loss": 0.2858, + "rejected_geometric_mean": -9.389670372009277, + "step": 4986 + }, + { + "chosen_geometric_mean": -0.9169973134994507, + "epoch": 1.23, + "grad_norm": 36.5, + "learning_rate": 1.6025050501077155e-06, + "log_odds": 7.898706436157227, + "log_odds_ratio": -0.19132722914218903, + "loss": 0.2706, + "rejected_geometric_mean": -8.249068260192871, + "step": 4987 + }, + { + "chosen_geometric_mean": -1.0218900442123413, + "epoch": 1.23, + "grad_norm": 28.875, + "learning_rate": 1.6015965383310618e-06, + "log_odds": 18.6278133392334, + "log_odds_ratio": -2.980232594040899e-08, + "loss": 0.2856, + "rejected_geometric_mean": -19.193639755249023, + "step": 4988 + }, + { + "chosen_geometric_mean": -0.7061741352081299, + "epoch": 1.24, + "grad_norm": 1.859375, + "learning_rate": 1.6006881627739374e-06, + "log_odds": 6.1139373779296875, + "log_odds_ratio": -0.1154739186167717, + "loss": 0.233, + "rejected_geometric_mean": -6.276737213134766, + "step": 4989 + }, + { + "chosen_geometric_mean": -1.0936682224273682, + "epoch": 1.24, + "grad_norm": 2.953125, + "learning_rate": 1.599779923574075e-06, + "log_odds": 14.759763717651367, + "log_odds_ratio": -0.01637612283229828, + "loss": 0.2618, + "rejected_geometric_mean": -15.45138931274414, + "step": 4990 + }, + { + "chosen_geometric_mean": -1.146712064743042, + "epoch": 1.24, + "grad_norm": 2.640625, + "learning_rate": 1.5988718208691853e-06, + "log_odds": 6.302542686462402, + "log_odds_ratio": -0.05916133522987366, + "loss": 0.2562, + "rejected_geometric_mean": -7.034488677978516, + "step": 4991 + }, + { + "chosen_geometric_mean": -0.9992583990097046, + "epoch": 1.24, + "grad_norm": 2.671875, + "learning_rate": 1.5979638547969578e-06, + "log_odds": 1.8909721374511719, + "log_odds_ratio": -0.261233389377594, + "loss": 0.2791, + "rejected_geometric_mean": -2.6225030422210693, + "step": 4992 + }, + { + "chosen_geometric_mean": -1.0370354652404785, + "epoch": 1.24, + "grad_norm": 12.875, + "learning_rate": 1.5970560254950617e-06, + "log_odds": 9.626760482788086, + "log_odds_ratio": -0.15207986533641815, + "loss": 0.2448, + "rejected_geometric_mean": -10.313879013061523, + "step": 4993 + }, + { + "chosen_geometric_mean": -0.8998607397079468, + "epoch": 1.24, + "grad_norm": 1.90625, + "learning_rate": 1.596148333101147e-06, + "log_odds": 13.190315246582031, + "log_odds_ratio": -0.13021722435951233, + "loss": 0.2293, + "rejected_geometric_mean": -13.64002799987793, + "step": 4994 + }, + { + "chosen_geometric_mean": -0.9839632511138916, + "epoch": 1.24, + "grad_norm": 5.125, + "learning_rate": 1.5952407777528405e-06, + "log_odds": 8.860218048095703, + "log_odds_ratio": -0.2746724486351013, + "loss": 0.2884, + "rejected_geometric_mean": -9.532336235046387, + "step": 4995 + }, + { + "chosen_geometric_mean": -0.8605602979660034, + "epoch": 1.24, + "grad_norm": 29.25, + "learning_rate": 1.5943333595877504e-06, + "log_odds": 11.819652557373047, + "log_odds_ratio": -0.09712674468755722, + "loss": 0.3104, + "rejected_geometric_mean": -12.163156509399414, + "step": 4996 + }, + { + "chosen_geometric_mean": -0.9354231357574463, + "epoch": 1.24, + "grad_norm": 2.453125, + "learning_rate": 1.5934260787434622e-06, + "log_odds": 6.660907745361328, + "log_odds_ratio": -0.14419527351856232, + "loss": 0.2366, + "rejected_geometric_mean": -7.179490089416504, + "step": 4997 + }, + { + "chosen_geometric_mean": -0.976729691028595, + "epoch": 1.24, + "grad_norm": 22.125, + "learning_rate": 1.5925189353575415e-06, + "log_odds": 7.616041660308838, + "log_odds_ratio": -0.017853623256087303, + "loss": 0.1973, + "rejected_geometric_mean": -8.107547760009766, + "step": 4998 + }, + { + "chosen_geometric_mean": -0.8363742828369141, + "epoch": 1.24, + "grad_norm": 23.875, + "learning_rate": 1.5916119295675336e-06, + "log_odds": 14.222078323364258, + "log_odds_ratio": -7.155034836614504e-05, + "loss": 0.2766, + "rejected_geometric_mean": -14.487215995788574, + "step": 4999 + }, + { + "chosen_geometric_mean": -1.1406193971633911, + "epoch": 1.24, + "grad_norm": 15.5625, + "learning_rate": 1.5907050615109626e-06, + "log_odds": 11.983386039733887, + "log_odds_ratio": -0.03844013437628746, + "loss": 0.2946, + "rejected_geometric_mean": -12.673187255859375, + "step": 5000 + }, + { + "chosen_geometric_mean": -0.9556093215942383, + "epoch": 1.24, + "grad_norm": 2.171875, + "learning_rate": 1.5897983313253307e-06, + "log_odds": 5.61113977432251, + "log_odds_ratio": -0.14821593463420868, + "loss": 0.2383, + "rejected_geometric_mean": -6.186192989349365, + "step": 5001 + }, + { + "chosen_geometric_mean": -1.0437097549438477, + "epoch": 1.24, + "grad_norm": 2.25, + "learning_rate": 1.5888917391481195e-06, + "log_odds": 10.101014137268066, + "log_odds_ratio": -0.05141925439238548, + "loss": 0.2292, + "rejected_geometric_mean": -10.713326454162598, + "step": 5002 + }, + { + "chosen_geometric_mean": -0.8562819957733154, + "epoch": 1.24, + "grad_norm": 9.375, + "learning_rate": 1.5879852851167914e-06, + "log_odds": 8.026256561279297, + "log_odds_ratio": -0.013827996328473091, + "loss": 0.2393, + "rejected_geometric_mean": -8.317163467407227, + "step": 5003 + }, + { + "chosen_geometric_mean": -0.7991875410079956, + "epoch": 1.24, + "grad_norm": 11.25, + "learning_rate": 1.5870789693687855e-06, + "log_odds": 6.214406967163086, + "log_odds_ratio": -0.25455471873283386, + "loss": 0.3094, + "rejected_geometric_mean": -6.618322372436523, + "step": 5004 + }, + { + "chosen_geometric_mean": -1.0449159145355225, + "epoch": 1.24, + "grad_norm": 3.671875, + "learning_rate": 1.5861727920415217e-06, + "log_odds": 6.000326156616211, + "log_odds_ratio": -0.2395838350057602, + "loss": 0.2458, + "rejected_geometric_mean": -6.701094150543213, + "step": 5005 + }, + { + "chosen_geometric_mean": -0.9254275560379028, + "epoch": 1.24, + "grad_norm": 10.25, + "learning_rate": 1.5852667532723985e-06, + "log_odds": 0.5891189575195312, + "log_odds_ratio": -0.472560852766037, + "loss": 0.2433, + "rejected_geometric_mean": -1.351097583770752, + "step": 5006 + }, + { + "chosen_geometric_mean": -0.9868345260620117, + "epoch": 1.24, + "grad_norm": 4.71875, + "learning_rate": 1.5843608531987927e-06, + "log_odds": 6.3408308029174805, + "log_odds_ratio": -0.10322225838899612, + "loss": 0.2518, + "rejected_geometric_mean": -6.905054569244385, + "step": 5007 + }, + { + "chosen_geometric_mean": -0.8483489751815796, + "epoch": 1.24, + "grad_norm": 3.40625, + "learning_rate": 1.5834550919580605e-06, + "log_odds": 12.908360481262207, + "log_odds_ratio": -0.1043279841542244, + "loss": 0.2366, + "rejected_geometric_mean": -13.254972457885742, + "step": 5008 + }, + { + "chosen_geometric_mean": -0.9856399893760681, + "epoch": 1.24, + "grad_norm": 8.5625, + "learning_rate": 1.5825494696875355e-06, + "log_odds": 2.917510986328125, + "log_odds_ratio": -0.16749677062034607, + "loss": 0.2927, + "rejected_geometric_mean": -3.5065228939056396, + "step": 5009 + }, + { + "chosen_geometric_mean": -1.0098938941955566, + "epoch": 1.24, + "grad_norm": 2.484375, + "learning_rate": 1.5816439865245352e-06, + "log_odds": 7.922606468200684, + "log_odds_ratio": -0.1305462121963501, + "loss": 0.2866, + "rejected_geometric_mean": -8.515668869018555, + "step": 5010 + }, + { + "chosen_geometric_mean": -0.8482488393783569, + "epoch": 1.24, + "grad_norm": 20.0, + "learning_rate": 1.5807386426063498e-06, + "log_odds": 7.621668815612793, + "log_odds_ratio": -0.19603681564331055, + "loss": 0.2478, + "rejected_geometric_mean": -8.038484573364258, + "step": 5011 + }, + { + "chosen_geometric_mean": -1.0234663486480713, + "epoch": 1.24, + "grad_norm": 5.09375, + "learning_rate": 1.5798334380702523e-06, + "log_odds": 13.63081169128418, + "log_odds_ratio": -0.023477301001548767, + "loss": 0.2149, + "rejected_geometric_mean": -14.143360137939453, + "step": 5012 + }, + { + "chosen_geometric_mean": -0.981238842010498, + "epoch": 1.24, + "grad_norm": 37.75, + "learning_rate": 1.5789283730534932e-06, + "log_odds": 5.982048988342285, + "log_odds_ratio": -0.17769049108028412, + "loss": 0.3399, + "rejected_geometric_mean": -6.536557197570801, + "step": 5013 + }, + { + "chosen_geometric_mean": -0.959227442741394, + "epoch": 1.24, + "grad_norm": 18.5, + "learning_rate": 1.5780234476933026e-06, + "log_odds": 13.793639183044434, + "log_odds_ratio": -0.05570095404982567, + "loss": 0.2811, + "rejected_geometric_mean": -14.269381523132324, + "step": 5014 + }, + { + "chosen_geometric_mean": -1.0130738019943237, + "epoch": 1.24, + "grad_norm": 2.34375, + "learning_rate": 1.5771186621268869e-06, + "log_odds": 9.364678382873535, + "log_odds_ratio": -0.1317472606897354, + "loss": 0.2439, + "rejected_geometric_mean": -9.98511791229248, + "step": 5015 + }, + { + "chosen_geometric_mean": -1.0682355165481567, + "epoch": 1.24, + "grad_norm": 3.75, + "learning_rate": 1.5762140164914359e-06, + "log_odds": 14.124618530273438, + "log_odds_ratio": -0.01543750986456871, + "loss": 0.2124, + "rejected_geometric_mean": -14.717061996459961, + "step": 5016 + }, + { + "chosen_geometric_mean": -0.8696529865264893, + "epoch": 1.24, + "grad_norm": 3.15625, + "learning_rate": 1.5753095109241146e-06, + "log_odds": 10.032042503356934, + "log_odds_ratio": -0.013480901718139648, + "loss": 0.2636, + "rejected_geometric_mean": -10.323234558105469, + "step": 5017 + }, + { + "chosen_geometric_mean": -1.18166983127594, + "epoch": 1.24, + "grad_norm": 3.21875, + "learning_rate": 1.5744051455620674e-06, + "log_odds": 2.0644052028656006, + "log_odds_ratio": -0.22363053262233734, + "loss": 0.2337, + "rejected_geometric_mean": -3.0087437629699707, + "step": 5018 + }, + { + "chosen_geometric_mean": -1.0569803714752197, + "epoch": 1.24, + "grad_norm": 59.25, + "learning_rate": 1.5735009205424179e-06, + "log_odds": 6.00948429107666, + "log_odds_ratio": -0.02885732799768448, + "loss": 0.3671, + "rejected_geometric_mean": -6.649796485900879, + "step": 5019 + }, + { + "chosen_geometric_mean": -1.3660755157470703, + "epoch": 1.24, + "grad_norm": 22.125, + "learning_rate": 1.572596836002268e-06, + "log_odds": 7.05377197265625, + "log_odds_ratio": -0.13946327567100525, + "loss": 0.2702, + "rejected_geometric_mean": -8.078502655029297, + "step": 5020 + }, + { + "chosen_geometric_mean": -0.8050683736801147, + "epoch": 1.24, + "grad_norm": 11.4375, + "learning_rate": 1.5716928920786995e-06, + "log_odds": 2.524418830871582, + "log_odds_ratio": -0.19050373136997223, + "loss": 0.2717, + "rejected_geometric_mean": -2.8235230445861816, + "step": 5021 + }, + { + "chosen_geometric_mean": -1.2056785821914673, + "epoch": 1.24, + "grad_norm": 26.25, + "learning_rate": 1.5707890889087718e-06, + "log_odds": 14.198722839355469, + "log_odds_ratio": -0.0031120600178837776, + "loss": 0.3154, + "rejected_geometric_mean": -15.021240234375, + "step": 5022 + }, + { + "chosen_geometric_mean": -0.9625939726829529, + "epoch": 1.24, + "grad_norm": 5.4375, + "learning_rate": 1.5698854266295228e-06, + "log_odds": 14.07996940612793, + "log_odds_ratio": -0.0034192237071692944, + "loss": 0.2434, + "rejected_geometric_mean": -14.552034378051758, + "step": 5023 + }, + { + "chosen_geometric_mean": -1.0773530006408691, + "epoch": 1.24, + "grad_norm": 2.109375, + "learning_rate": 1.56898190537797e-06, + "log_odds": 9.553905487060547, + "log_odds_ratio": -0.14873655140399933, + "loss": 0.2708, + "rejected_geometric_mean": -10.298367500305176, + "step": 5024 + }, + { + "chosen_geometric_mean": -0.9540884494781494, + "epoch": 1.24, + "grad_norm": 25.125, + "learning_rate": 1.5680785252911079e-06, + "log_odds": 8.409436225891113, + "log_odds_ratio": -0.18079206347465515, + "loss": 0.2375, + "rejected_geometric_mean": -8.960836410522461, + "step": 5025 + }, + { + "chosen_geometric_mean": -0.9750162959098816, + "epoch": 1.24, + "grad_norm": 4.125, + "learning_rate": 1.567175286505911e-06, + "log_odds": 8.547880172729492, + "log_odds_ratio": -0.056619755923748016, + "loss": 0.2396, + "rejected_geometric_mean": -9.036131858825684, + "step": 5026 + }, + { + "chosen_geometric_mean": -0.8040571212768555, + "epoch": 1.24, + "grad_norm": 2.703125, + "learning_rate": 1.566272189159332e-06, + "log_odds": 10.81942367553711, + "log_odds_ratio": -0.0005035475478507578, + "loss": 0.222, + "rejected_geometric_mean": -10.989542007446289, + "step": 5027 + }, + { + "chosen_geometric_mean": -1.0195156335830688, + "epoch": 1.24, + "grad_norm": 3.921875, + "learning_rate": 1.5653692333883025e-06, + "log_odds": 13.671607971191406, + "log_odds_ratio": -0.20865750312805176, + "loss": 0.3273, + "rejected_geometric_mean": -14.379436492919922, + "step": 5028 + }, + { + "chosen_geometric_mean": -1.046004056930542, + "epoch": 1.25, + "grad_norm": 2.140625, + "learning_rate": 1.564466419329732e-06, + "log_odds": 12.320514678955078, + "log_odds_ratio": -0.031404364854097366, + "loss": 0.2448, + "rejected_geometric_mean": -12.946599960327148, + "step": 5029 + }, + { + "chosen_geometric_mean": -1.4031206369400024, + "epoch": 1.25, + "grad_norm": 34.25, + "learning_rate": 1.563563747120509e-06, + "log_odds": 15.198158264160156, + "log_odds_ratio": -0.2743009626865387, + "loss": 0.2856, + "rejected_geometric_mean": -16.297426223754883, + "step": 5030 + }, + { + "chosen_geometric_mean": -1.0611555576324463, + "epoch": 1.25, + "grad_norm": 8.4375, + "learning_rate": 1.5626612168974992e-06, + "log_odds": 7.4441914558410645, + "log_odds_ratio": -0.12683887779712677, + "loss": 0.2376, + "rejected_geometric_mean": -8.146245956420898, + "step": 5031 + }, + { + "chosen_geometric_mean": -1.0102229118347168, + "epoch": 1.25, + "grad_norm": 4.875, + "learning_rate": 1.5617588287975495e-06, + "log_odds": 8.412100791931152, + "log_odds_ratio": -0.12793608009815216, + "loss": 0.2335, + "rejected_geometric_mean": -9.03660774230957, + "step": 5032 + }, + { + "chosen_geometric_mean": -1.0576168298721313, + "epoch": 1.25, + "grad_norm": 3.453125, + "learning_rate": 1.5608565829574835e-06, + "log_odds": 13.60395336151123, + "log_odds_ratio": -0.03797237202525139, + "loss": 0.2656, + "rejected_geometric_mean": -14.217049598693848, + "step": 5033 + }, + { + "chosen_geometric_mean": -1.14863920211792, + "epoch": 1.25, + "grad_norm": 8.875, + "learning_rate": 1.5599544795141016e-06, + "log_odds": 7.183563709259033, + "log_odds_ratio": -0.06589293479919434, + "loss": 0.2433, + "rejected_geometric_mean": -7.928359031677246, + "step": 5034 + }, + { + "chosen_geometric_mean": -1.079215407371521, + "epoch": 1.25, + "grad_norm": 5.15625, + "learning_rate": 1.5590525186041858e-06, + "log_odds": 8.48810863494873, + "log_odds_ratio": -0.01171082817018032, + "loss": 0.2325, + "rejected_geometric_mean": -9.139966011047363, + "step": 5035 + }, + { + "chosen_geometric_mean": -1.060740351676941, + "epoch": 1.25, + "grad_norm": 7.5625, + "learning_rate": 1.5581507003644946e-06, + "log_odds": 4.052881240844727, + "log_odds_ratio": -0.259258896112442, + "loss": 0.2761, + "rejected_geometric_mean": -4.834506988525391, + "step": 5036 + }, + { + "chosen_geometric_mean": -0.9534995555877686, + "epoch": 1.25, + "grad_norm": 23.25, + "learning_rate": 1.5572490249317645e-06, + "log_odds": 9.294364929199219, + "log_odds_ratio": -0.150777205824852, + "loss": 0.2333, + "rejected_geometric_mean": -9.881590843200684, + "step": 5037 + }, + { + "chosen_geometric_mean": -1.0947047472000122, + "epoch": 1.25, + "grad_norm": 4.625, + "learning_rate": 1.5563474924427127e-06, + "log_odds": 8.387534141540527, + "log_odds_ratio": -0.141996368765831, + "loss": 0.2618, + "rejected_geometric_mean": -9.147697448730469, + "step": 5038 + }, + { + "chosen_geometric_mean": -1.176541805267334, + "epoch": 1.25, + "grad_norm": 15.4375, + "learning_rate": 1.5554461030340323e-06, + "log_odds": 4.280649662017822, + "log_odds_ratio": -0.14282003045082092, + "loss": 0.2791, + "rejected_geometric_mean": -5.150564670562744, + "step": 5039 + }, + { + "chosen_geometric_mean": -1.0066314935684204, + "epoch": 1.25, + "grad_norm": 3.21875, + "learning_rate": 1.5545448568423963e-06, + "log_odds": 6.333895683288574, + "log_odds_ratio": -0.11422378569841385, + "loss": 0.2707, + "rejected_geometric_mean": -6.927424430847168, + "step": 5040 + }, + { + "chosen_geometric_mean": -1.003269910812378, + "epoch": 1.25, + "grad_norm": 14.5, + "learning_rate": 1.553643754004454e-06, + "log_odds": 4.552647590637207, + "log_odds_ratio": -0.12287852168083191, + "loss": 0.2527, + "rejected_geometric_mean": -5.171074867248535, + "step": 5041 + }, + { + "chosen_geometric_mean": -1.0170035362243652, + "epoch": 1.25, + "grad_norm": 2.4375, + "learning_rate": 1.5527427946568338e-06, + "log_odds": 2.160212516784668, + "log_odds_ratio": -0.18588000535964966, + "loss": 0.2616, + "rejected_geometric_mean": -2.844758987426758, + "step": 5042 + }, + { + "chosen_geometric_mean": -1.0563092231750488, + "epoch": 1.25, + "grad_norm": 8.25, + "learning_rate": 1.5518419789361456e-06, + "log_odds": 5.812400817871094, + "log_odds_ratio": -0.1383664309978485, + "loss": 0.2554, + "rejected_geometric_mean": -6.481243133544922, + "step": 5043 + }, + { + "chosen_geometric_mean": -1.0517220497131348, + "epoch": 1.25, + "grad_norm": 4.5, + "learning_rate": 1.5509413069789718e-06, + "log_odds": 7.266408443450928, + "log_odds_ratio": -0.17713844776153564, + "loss": 0.2874, + "rejected_geometric_mean": -7.996798038482666, + "step": 5044 + }, + { + "chosen_geometric_mean": -1.0366485118865967, + "epoch": 1.25, + "grad_norm": 9.0, + "learning_rate": 1.5500407789218775e-06, + "log_odds": 5.388972282409668, + "log_odds_ratio": -0.1415608823299408, + "loss": 0.2264, + "rejected_geometric_mean": -5.996683597564697, + "step": 5045 + }, + { + "chosen_geometric_mean": -1.0265027284622192, + "epoch": 1.25, + "grad_norm": 6.625, + "learning_rate": 1.549140394901404e-06, + "log_odds": 12.063553810119629, + "log_odds_ratio": -0.006677999161183834, + "loss": 0.2513, + "rejected_geometric_mean": -12.640466690063477, + "step": 5046 + }, + { + "chosen_geometric_mean": -1.2408506870269775, + "epoch": 1.25, + "grad_norm": 1.9375, + "learning_rate": 1.548240155054071e-06, + "log_odds": 11.657654762268066, + "log_odds_ratio": -0.003639973234385252, + "loss": 0.2542, + "rejected_geometric_mean": -12.540700912475586, + "step": 5047 + }, + { + "chosen_geometric_mean": -1.0966869592666626, + "epoch": 1.25, + "grad_norm": 25.25, + "learning_rate": 1.5473400595163751e-06, + "log_odds": 9.208056449890137, + "log_odds_ratio": -0.08059308677911758, + "loss": 0.225, + "rejected_geometric_mean": -9.940677642822266, + "step": 5048 + }, + { + "chosen_geometric_mean": -0.9927868843078613, + "epoch": 1.25, + "grad_norm": 4.5625, + "learning_rate": 1.5464401084247948e-06, + "log_odds": 2.108839750289917, + "log_odds_ratio": -0.24394536018371582, + "loss": 0.2414, + "rejected_geometric_mean": -2.792133331298828, + "step": 5049 + }, + { + "chosen_geometric_mean": -0.9527648687362671, + "epoch": 1.25, + "grad_norm": 22.5, + "learning_rate": 1.545540301915784e-06, + "log_odds": 14.143338203430176, + "log_odds_ratio": -0.10095898807048798, + "loss": 0.2194, + "rejected_geometric_mean": -14.629332542419434, + "step": 5050 + }, + { + "chosen_geometric_mean": -0.8786978721618652, + "epoch": 1.25, + "grad_norm": 6.75, + "learning_rate": 1.5446406401257736e-06, + "log_odds": 15.212545394897461, + "log_odds_ratio": -0.001908089965581894, + "loss": 0.2942, + "rejected_geometric_mean": -15.455841064453125, + "step": 5051 + }, + { + "chosen_geometric_mean": -0.9637885093688965, + "epoch": 1.25, + "grad_norm": 51.0, + "learning_rate": 1.5437411231911742e-06, + "log_odds": 3.9491305351257324, + "log_odds_ratio": -0.17175783216953278, + "loss": 0.2423, + "rejected_geometric_mean": -4.499278545379639, + "step": 5052 + }, + { + "chosen_geometric_mean": -1.0546233654022217, + "epoch": 1.25, + "grad_norm": 1.9140625, + "learning_rate": 1.5428417512483742e-06, + "log_odds": 3.7382895946502686, + "log_odds_ratio": -0.25361526012420654, + "loss": 0.2128, + "rejected_geometric_mean": -4.486685276031494, + "step": 5053 + }, + { + "chosen_geometric_mean": -0.906233549118042, + "epoch": 1.25, + "grad_norm": 52.0, + "learning_rate": 1.5419425244337407e-06, + "log_odds": 17.717437744140625, + "log_odds_ratio": -0.000802297901827842, + "loss": 0.2665, + "rejected_geometric_mean": -18.103239059448242, + "step": 5054 + }, + { + "chosen_geometric_mean": -1.0519238710403442, + "epoch": 1.25, + "grad_norm": 1.984375, + "learning_rate": 1.5410434428836178e-06, + "log_odds": 9.547245025634766, + "log_odds_ratio": -0.13927435874938965, + "loss": 0.2563, + "rejected_geometric_mean": -10.268899917602539, + "step": 5055 + }, + { + "chosen_geometric_mean": -1.0092490911483765, + "epoch": 1.25, + "grad_norm": 25.625, + "learning_rate": 1.540144506734328e-06, + "log_odds": 6.741384506225586, + "log_odds_ratio": -0.14494860172271729, + "loss": 0.2531, + "rejected_geometric_mean": -7.385569095611572, + "step": 5056 + }, + { + "chosen_geometric_mean": -0.9653531312942505, + "epoch": 1.25, + "grad_norm": 19.5, + "learning_rate": 1.5392457161221708e-06, + "log_odds": 9.695072174072266, + "log_odds_ratio": -0.120860755443573, + "loss": 0.2844, + "rejected_geometric_mean": -10.231266021728516, + "step": 5057 + }, + { + "chosen_geometric_mean": -0.8658975958824158, + "epoch": 1.25, + "grad_norm": 3.25, + "learning_rate": 1.5383470711834238e-06, + "log_odds": 11.654918670654297, + "log_odds_ratio": -0.08347952365875244, + "loss": 0.2439, + "rejected_geometric_mean": -12.003808975219727, + "step": 5058 + }, + { + "chosen_geometric_mean": -0.8507101535797119, + "epoch": 1.25, + "grad_norm": 2.078125, + "learning_rate": 1.5374485720543457e-06, + "log_odds": 13.466142654418945, + "log_odds_ratio": -0.09172740578651428, + "loss": 0.2905, + "rejected_geometric_mean": -13.823192596435547, + "step": 5059 + }, + { + "chosen_geometric_mean": -0.8948249220848083, + "epoch": 1.25, + "grad_norm": 6.21875, + "learning_rate": 1.536550218871169e-06, + "log_odds": 6.414854526519775, + "log_odds_ratio": -0.11145813763141632, + "loss": 0.2679, + "rejected_geometric_mean": -6.814620018005371, + "step": 5060 + }, + { + "chosen_geometric_mean": -1.016862392425537, + "epoch": 1.25, + "grad_norm": 25.875, + "learning_rate": 1.5356520117701055e-06, + "log_odds": 4.323718070983887, + "log_odds_ratio": -0.07632289081811905, + "loss": 0.2752, + "rejected_geometric_mean": -4.93845272064209, + "step": 5061 + }, + { + "chosen_geometric_mean": -1.2890517711639404, + "epoch": 1.25, + "grad_norm": 13.1875, + "learning_rate": 1.5347539508873449e-06, + "log_odds": 8.469291687011719, + "log_odds_ratio": -0.03448301553726196, + "loss": 0.2457, + "rejected_geometric_mean": -9.446770668029785, + "step": 5062 + }, + { + "chosen_geometric_mean": -0.9024736285209656, + "epoch": 1.25, + "grad_norm": 21.125, + "learning_rate": 1.533856036359056e-06, + "log_odds": 12.690837860107422, + "log_odds_ratio": -0.13426150381565094, + "loss": 0.3075, + "rejected_geometric_mean": -13.09671688079834, + "step": 5063 + }, + { + "chosen_geometric_mean": -1.0324366092681885, + "epoch": 1.25, + "grad_norm": 21.0, + "learning_rate": 1.5329582683213812e-06, + "log_odds": 5.872317314147949, + "log_odds_ratio": -0.2600165605545044, + "loss": 0.269, + "rejected_geometric_mean": -6.534731864929199, + "step": 5064 + }, + { + "chosen_geometric_mean": -1.1659681797027588, + "epoch": 1.25, + "grad_norm": 3.40625, + "learning_rate": 1.532060646910447e-06, + "log_odds": 4.393255710601807, + "log_odds_ratio": -0.05910862982273102, + "loss": 0.2519, + "rejected_geometric_mean": -5.196425437927246, + "step": 5065 + }, + { + "chosen_geometric_mean": -1.3085609674453735, + "epoch": 1.25, + "grad_norm": 17.625, + "learning_rate": 1.5311631722623535e-06, + "log_odds": 2.5563626289367676, + "log_odds_ratio": -0.326787531375885, + "loss": 0.3354, + "rejected_geometric_mean": -3.662196159362793, + "step": 5066 + }, + { + "chosen_geometric_mean": -1.3655829429626465, + "epoch": 1.25, + "grad_norm": 40.75, + "learning_rate": 1.5302658445131785e-06, + "log_odds": 5.259031772613525, + "log_odds_ratio": -0.17017368972301483, + "loss": 0.2844, + "rejected_geometric_mean": -6.388845920562744, + "step": 5067 + }, + { + "chosen_geometric_mean": -0.969102144241333, + "epoch": 1.25, + "grad_norm": 12.9375, + "learning_rate": 1.5293686637989791e-06, + "log_odds": 6.046197891235352, + "log_odds_ratio": -0.08135265856981277, + "loss": 0.2491, + "rejected_geometric_mean": -6.523261547088623, + "step": 5068 + }, + { + "chosen_geometric_mean": -1.1757051944732666, + "epoch": 1.26, + "grad_norm": 7.84375, + "learning_rate": 1.5284716302557883e-06, + "log_odds": 8.151458740234375, + "log_odds_ratio": -0.1833883672952652, + "loss": 0.2692, + "rejected_geometric_mean": -8.980691909790039, + "step": 5069 + }, + { + "chosen_geometric_mean": -1.0386114120483398, + "epoch": 1.26, + "grad_norm": 36.25, + "learning_rate": 1.5275747440196195e-06, + "log_odds": 8.615106582641602, + "log_odds_ratio": -0.21194475889205933, + "loss": 0.2909, + "rejected_geometric_mean": -9.325480461120605, + "step": 5070 + }, + { + "chosen_geometric_mean": -0.9085831642150879, + "epoch": 1.26, + "grad_norm": 2.953125, + "learning_rate": 1.5266780052264615e-06, + "log_odds": 8.997095108032227, + "log_odds_ratio": -0.004705042578279972, + "loss": 0.2739, + "rejected_geometric_mean": -9.380391120910645, + "step": 5071 + }, + { + "chosen_geometric_mean": -1.0178437232971191, + "epoch": 1.26, + "grad_norm": 2.21875, + "learning_rate": 1.5257814140122818e-06, + "log_odds": 5.78921365737915, + "log_odds_ratio": -0.23505601286888123, + "loss": 0.2621, + "rejected_geometric_mean": -6.449524402618408, + "step": 5072 + }, + { + "chosen_geometric_mean": -0.9158880710601807, + "epoch": 1.26, + "grad_norm": 2.921875, + "learning_rate": 1.5248849705130247e-06, + "log_odds": 10.993254661560059, + "log_odds_ratio": -0.17609314620494843, + "loss": 0.2433, + "rejected_geometric_mean": -11.58098030090332, + "step": 5073 + }, + { + "chosen_geometric_mean": -0.9896278381347656, + "epoch": 1.26, + "grad_norm": 47.5, + "learning_rate": 1.5239886748646126e-06, + "log_odds": 5.76359748840332, + "log_odds_ratio": -0.3171408772468567, + "loss": 0.2723, + "rejected_geometric_mean": -6.436315536499023, + "step": 5074 + }, + { + "chosen_geometric_mean": -1.1111764907836914, + "epoch": 1.26, + "grad_norm": 4.84375, + "learning_rate": 1.5230925272029448e-06, + "log_odds": 11.31298542022705, + "log_odds_ratio": -0.06953991949558258, + "loss": 0.2889, + "rejected_geometric_mean": -12.012887954711914, + "step": 5075 + }, + { + "chosen_geometric_mean": -1.0643588304519653, + "epoch": 1.26, + "grad_norm": 10.75, + "learning_rate": 1.5221965276639e-06, + "log_odds": 8.330357551574707, + "log_odds_ratio": -0.26055142283439636, + "loss": 0.2678, + "rejected_geometric_mean": -9.14201545715332, + "step": 5076 + }, + { + "chosen_geometric_mean": -0.9588575959205627, + "epoch": 1.26, + "grad_norm": 2.015625, + "learning_rate": 1.5213006763833327e-06, + "log_odds": 10.705248832702637, + "log_odds_ratio": -0.15503326058387756, + "loss": 0.2427, + "rejected_geometric_mean": -11.273463249206543, + "step": 5077 + }, + { + "chosen_geometric_mean": -1.0207866430282593, + "epoch": 1.26, + "grad_norm": 15.1875, + "learning_rate": 1.5204049734970756e-06, + "log_odds": 6.544388771057129, + "log_odds_ratio": -0.1405206024646759, + "loss": 0.2606, + "rejected_geometric_mean": -7.22048282623291, + "step": 5078 + }, + { + "chosen_geometric_mean": -0.8988592624664307, + "epoch": 1.26, + "grad_norm": 1.890625, + "learning_rate": 1.5195094191409386e-06, + "log_odds": 7.685761451721191, + "log_odds_ratio": -0.16157786548137665, + "loss": 0.2173, + "rejected_geometric_mean": -8.16423511505127, + "step": 5079 + }, + { + "chosen_geometric_mean": -0.9009437561035156, + "epoch": 1.26, + "grad_norm": 7.40625, + "learning_rate": 1.5186140134507088e-06, + "log_odds": 7.234602451324463, + "log_odds_ratio": -0.05033223330974579, + "loss": 0.2303, + "rejected_geometric_mean": -7.643101692199707, + "step": 5080 + }, + { + "chosen_geometric_mean": -0.8866632580757141, + "epoch": 1.26, + "grad_norm": 3.578125, + "learning_rate": 1.517718756562152e-06, + "log_odds": 10.55337905883789, + "log_odds_ratio": -0.05711577460169792, + "loss": 0.2336, + "rejected_geometric_mean": -10.923095703125, + "step": 5081 + }, + { + "chosen_geometric_mean": -1.1023337841033936, + "epoch": 1.26, + "grad_norm": 21.25, + "learning_rate": 1.516823648611011e-06, + "log_odds": 6.11210298538208, + "log_odds_ratio": -0.2622336447238922, + "loss": 0.2842, + "rejected_geometric_mean": -6.919129371643066, + "step": 5082 + }, + { + "chosen_geometric_mean": -0.9114471673965454, + "epoch": 1.26, + "grad_norm": 1.828125, + "learning_rate": 1.5159286897330045e-06, + "log_odds": 13.924342155456543, + "log_odds_ratio": -0.005481683649122715, + "loss": 0.231, + "rejected_geometric_mean": -14.313031196594238, + "step": 5083 + }, + { + "chosen_geometric_mean": -1.0751827955245972, + "epoch": 1.26, + "grad_norm": 7.9375, + "learning_rate": 1.5150338800638298e-06, + "log_odds": 11.60421085357666, + "log_odds_ratio": -0.062237758189439774, + "loss": 0.3132, + "rejected_geometric_mean": -12.267454147338867, + "step": 5084 + }, + { + "chosen_geometric_mean": -1.0330870151519775, + "epoch": 1.26, + "grad_norm": 3.859375, + "learning_rate": 1.5141392197391621e-06, + "log_odds": 4.3998847007751465, + "log_odds_ratio": -0.12551608681678772, + "loss": 0.2206, + "rejected_geometric_mean": -4.998668670654297, + "step": 5085 + }, + { + "chosen_geometric_mean": -0.8761821985244751, + "epoch": 1.26, + "grad_norm": 2.625, + "learning_rate": 1.5132447088946529e-06, + "log_odds": 9.230537414550781, + "log_odds_ratio": -0.005827511660754681, + "loss": 0.2505, + "rejected_geometric_mean": -9.536179542541504, + "step": 5086 + }, + { + "chosen_geometric_mean": -0.8678299188613892, + "epoch": 1.26, + "grad_norm": 3.765625, + "learning_rate": 1.5123503476659323e-06, + "log_odds": 6.072420597076416, + "log_odds_ratio": -0.27196669578552246, + "loss": 0.2208, + "rejected_geometric_mean": -6.6096415519714355, + "step": 5087 + }, + { + "chosen_geometric_mean": -1.1524553298950195, + "epoch": 1.26, + "grad_norm": 4.375, + "learning_rate": 1.5114561361886063e-06, + "log_odds": 9.427891731262207, + "log_odds_ratio": -0.010258915834128857, + "loss": 0.2369, + "rejected_geometric_mean": -10.196558952331543, + "step": 5088 + }, + { + "chosen_geometric_mean": -1.129796028137207, + "epoch": 1.26, + "grad_norm": 4.34375, + "learning_rate": 1.5105620745982596e-06, + "log_odds": 3.6482484340667725, + "log_odds_ratio": -0.24181120097637177, + "loss": 0.2621, + "rejected_geometric_mean": -4.522594451904297, + "step": 5089 + }, + { + "chosen_geometric_mean": -0.8733484148979187, + "epoch": 1.26, + "grad_norm": 5.21875, + "learning_rate": 1.5096681630304523e-06, + "log_odds": 4.556182861328125, + "log_odds_ratio": -0.07120893895626068, + "loss": 0.2367, + "rejected_geometric_mean": -4.936032295227051, + "step": 5090 + }, + { + "chosen_geometric_mean": -0.9662311673164368, + "epoch": 1.26, + "grad_norm": 3.171875, + "learning_rate": 1.5087744016207223e-06, + "log_odds": 5.952513217926025, + "log_odds_ratio": -0.2513096034526825, + "loss": 0.2853, + "rejected_geometric_mean": -6.584939002990723, + "step": 5091 + }, + { + "chosen_geometric_mean": -0.7757604718208313, + "epoch": 1.26, + "grad_norm": 6.4375, + "learning_rate": 1.5078807905045879e-06, + "log_odds": 7.186680793762207, + "log_odds_ratio": -0.2176554799079895, + "loss": 0.2866, + "rejected_geometric_mean": -7.55832052230835, + "step": 5092 + }, + { + "chosen_geometric_mean": -1.1444801092147827, + "epoch": 1.26, + "grad_norm": 4.65625, + "learning_rate": 1.5069873298175397e-06, + "log_odds": 5.295181751251221, + "log_odds_ratio": -0.08721625059843063, + "loss": 0.2951, + "rejected_geometric_mean": -6.079774856567383, + "step": 5093 + }, + { + "chosen_geometric_mean": -1.0742557048797607, + "epoch": 1.26, + "grad_norm": 6.5, + "learning_rate": 1.5060940196950486e-06, + "log_odds": 3.8187813758850098, + "log_odds_ratio": -0.16211700439453125, + "loss": 0.2561, + "rejected_geometric_mean": -4.58649206161499, + "step": 5094 + }, + { + "chosen_geometric_mean": -1.103223443031311, + "epoch": 1.26, + "grad_norm": 26.125, + "learning_rate": 1.5052008602725616e-06, + "log_odds": 9.099211692810059, + "log_odds_ratio": -0.06019824370741844, + "loss": 0.3212, + "rejected_geometric_mean": -9.79958438873291, + "step": 5095 + }, + { + "chosen_geometric_mean": -0.9435569047927856, + "epoch": 1.26, + "grad_norm": 5.96875, + "learning_rate": 1.5043078516855037e-06, + "log_odds": 2.361938714981079, + "log_odds_ratio": -0.3420082628726959, + "loss": 0.3119, + "rejected_geometric_mean": -3.042269229888916, + "step": 5096 + }, + { + "chosen_geometric_mean": -0.953152060508728, + "epoch": 1.26, + "grad_norm": 3.734375, + "learning_rate": 1.5034149940692749e-06, + "log_odds": 6.373698711395264, + "log_odds_ratio": -0.103347547352314, + "loss": 0.2542, + "rejected_geometric_mean": -6.9068474769592285, + "step": 5097 + }, + { + "chosen_geometric_mean": -1.0440808534622192, + "epoch": 1.26, + "grad_norm": 2.28125, + "learning_rate": 1.5025222875592555e-06, + "log_odds": 8.038064002990723, + "log_odds_ratio": -0.14328353106975555, + "loss": 0.2177, + "rejected_geometric_mean": -8.639436721801758, + "step": 5098 + }, + { + "chosen_geometric_mean": -0.8854131102561951, + "epoch": 1.26, + "grad_norm": 18.25, + "learning_rate": 1.5016297322908012e-06, + "log_odds": 11.698678016662598, + "log_odds_ratio": -0.0005632166867144406, + "loss": 0.244, + "rejected_geometric_mean": -12.029648780822754, + "step": 5099 + }, + { + "chosen_geometric_mean": -1.0874501466751099, + "epoch": 1.26, + "grad_norm": 12.5, + "learning_rate": 1.5007373283992443e-06, + "log_odds": 12.14200496673584, + "log_odds_ratio": -0.18975457549095154, + "loss": 0.2501, + "rejected_geometric_mean": -12.765893936157227, + "step": 5100 + }, + { + "chosen_geometric_mean": -1.2362602949142456, + "epoch": 1.26, + "grad_norm": 16.625, + "learning_rate": 1.4998450760198943e-06, + "log_odds": 15.259156227111816, + "log_odds_ratio": -0.049966998398303986, + "loss": 0.2613, + "rejected_geometric_mean": -16.167722702026367, + "step": 5101 + }, + { + "chosen_geometric_mean": -0.9928755760192871, + "epoch": 1.26, + "grad_norm": 3.390625, + "learning_rate": 1.4989529752880377e-06, + "log_odds": 3.620151996612549, + "log_odds_ratio": -0.0829310193657875, + "loss": 0.2685, + "rejected_geometric_mean": -4.171758651733398, + "step": 5102 + }, + { + "chosen_geometric_mean": -1.0576040744781494, + "epoch": 1.26, + "grad_norm": 12.1875, + "learning_rate": 1.49806102633894e-06, + "log_odds": 11.433707237243652, + "log_odds_ratio": -0.04767498001456261, + "loss": 0.2669, + "rejected_geometric_mean": -12.075039863586426, + "step": 5103 + }, + { + "chosen_geometric_mean": -1.077707290649414, + "epoch": 1.26, + "grad_norm": 13.3125, + "learning_rate": 1.4971692293078411e-06, + "log_odds": 7.745089530944824, + "log_odds_ratio": -0.00493673887103796, + "loss": 0.2666, + "rejected_geometric_mean": -8.372663497924805, + "step": 5104 + }, + { + "chosen_geometric_mean": -0.8410641551017761, + "epoch": 1.26, + "grad_norm": 8.375, + "learning_rate": 1.4962775843299587e-06, + "log_odds": 11.918055534362793, + "log_odds_ratio": -0.015295345336198807, + "loss": 0.2639, + "rejected_geometric_mean": -12.178963661193848, + "step": 5105 + }, + { + "chosen_geometric_mean": -0.8398533463478088, + "epoch": 1.26, + "grad_norm": 16.25, + "learning_rate": 1.4953860915404883e-06, + "log_odds": 16.398183822631836, + "log_odds_ratio": -1.1324905244691763e-06, + "loss": 0.2961, + "rejected_geometric_mean": -16.652284622192383, + "step": 5106 + }, + { + "chosen_geometric_mean": -1.0803601741790771, + "epoch": 1.26, + "grad_norm": 11.9375, + "learning_rate": 1.4944947510746007e-06, + "log_odds": 4.31580924987793, + "log_odds_ratio": -0.24944712221622467, + "loss": 0.3262, + "rejected_geometric_mean": -5.076434135437012, + "step": 5107 + }, + { + "chosen_geometric_mean": -0.9759179949760437, + "epoch": 1.26, + "grad_norm": 34.25, + "learning_rate": 1.4936035630674445e-06, + "log_odds": 8.664056777954102, + "log_odds_ratio": -0.09979838877916336, + "loss": 0.302, + "rejected_geometric_mean": -9.208257675170898, + "step": 5108 + }, + { + "chosen_geometric_mean": -0.9643071293830872, + "epoch": 1.26, + "grad_norm": 57.25, + "learning_rate": 1.492712527654146e-06, + "log_odds": 8.218917846679688, + "log_odds_ratio": -0.012804619036614895, + "loss": 0.343, + "rejected_geometric_mean": -8.687036514282227, + "step": 5109 + }, + { + "chosen_geometric_mean": -0.8528753519058228, + "epoch": 1.27, + "grad_norm": 4.40625, + "learning_rate": 1.491821644969807e-06, + "log_odds": 8.228113174438477, + "log_odds_ratio": -0.1110977828502655, + "loss": 0.2346, + "rejected_geometric_mean": -8.595551490783691, + "step": 5110 + }, + { + "chosen_geometric_mean": -1.2360910177230835, + "epoch": 1.27, + "grad_norm": 2.015625, + "learning_rate": 1.4909309151495073e-06, + "log_odds": 3.896334648132324, + "log_odds_ratio": -0.18425264954566956, + "loss": 0.261, + "rejected_geometric_mean": -4.818280220031738, + "step": 5111 + }, + { + "chosen_geometric_mean": -0.8787314295768738, + "epoch": 1.27, + "grad_norm": 2.0, + "learning_rate": 1.4900403383283018e-06, + "log_odds": 12.873674392700195, + "log_odds_ratio": -0.1319095939397812, + "loss": 0.2581, + "rejected_geometric_mean": -13.320908546447754, + "step": 5112 + }, + { + "chosen_geometric_mean": -1.066331386566162, + "epoch": 1.27, + "grad_norm": 20.375, + "learning_rate": 1.4891499146412241e-06, + "log_odds": 4.445549488067627, + "log_odds_ratio": -0.08037090301513672, + "loss": 0.2552, + "rejected_geometric_mean": -5.133213996887207, + "step": 5113 + }, + { + "chosen_geometric_mean": -1.294625997543335, + "epoch": 1.27, + "grad_norm": 15.625, + "learning_rate": 1.4882596442232838e-06, + "log_odds": 8.053705215454102, + "log_odds_ratio": -0.027639687061309814, + "loss": 0.2594, + "rejected_geometric_mean": -9.026872634887695, + "step": 5114 + }, + { + "chosen_geometric_mean": -1.117274284362793, + "epoch": 1.27, + "grad_norm": 2.953125, + "learning_rate": 1.487369527209468e-06, + "log_odds": 10.593015670776367, + "log_odds_ratio": -0.00631773890927434, + "loss": 0.2989, + "rejected_geometric_mean": -11.312125205993652, + "step": 5115 + }, + { + "chosen_geometric_mean": -1.0016560554504395, + "epoch": 1.27, + "grad_norm": 22.625, + "learning_rate": 1.4864795637347381e-06, + "log_odds": 8.71842098236084, + "log_odds_ratio": -0.006153532769531012, + "loss": 0.2797, + "rejected_geometric_mean": -9.25663948059082, + "step": 5116 + }, + { + "chosen_geometric_mean": -0.9747797250747681, + "epoch": 1.27, + "grad_norm": 5.25, + "learning_rate": 1.4855897539340353e-06, + "log_odds": 11.931793212890625, + "log_odds_ratio": -0.04492635279893875, + "loss": 0.2963, + "rejected_geometric_mean": -12.43692398071289, + "step": 5117 + }, + { + "chosen_geometric_mean": -1.0387475490570068, + "epoch": 1.27, + "grad_norm": 23.0, + "learning_rate": 1.4847000979422752e-06, + "log_odds": 6.171431541442871, + "log_odds_ratio": -0.12173639982938766, + "loss": 0.3636, + "rejected_geometric_mean": -6.819034576416016, + "step": 5118 + }, + { + "chosen_geometric_mean": -1.3940098285675049, + "epoch": 1.27, + "grad_norm": 20.0, + "learning_rate": 1.483810595894351e-06, + "log_odds": 6.554758071899414, + "log_odds_ratio": -0.17877286672592163, + "loss": 0.2733, + "rejected_geometric_mean": -7.741032123565674, + "step": 5119 + }, + { + "chosen_geometric_mean": -0.9214503765106201, + "epoch": 1.27, + "grad_norm": 47.75, + "learning_rate": 1.4829212479251337e-06, + "log_odds": 4.346855163574219, + "log_odds_ratio": -0.20839618146419525, + "loss": 0.3048, + "rejected_geometric_mean": -4.844061374664307, + "step": 5120 + }, + { + "chosen_geometric_mean": -0.9330310821533203, + "epoch": 1.27, + "grad_norm": 2.78125, + "learning_rate": 1.4820320541694688e-06, + "log_odds": 9.999985694885254, + "log_odds_ratio": -0.004308219533413649, + "loss": 0.2184, + "rejected_geometric_mean": -10.392178535461426, + "step": 5121 + }, + { + "chosen_geometric_mean": -0.9567491412162781, + "epoch": 1.27, + "grad_norm": 39.0, + "learning_rate": 1.4811430147621802e-06, + "log_odds": 4.45109748840332, + "log_odds_ratio": -0.2186611145734787, + "loss": 0.3406, + "rejected_geometric_mean": -5.006242752075195, + "step": 5122 + }, + { + "chosen_geometric_mean": -1.0376176834106445, + "epoch": 1.27, + "grad_norm": 18.5, + "learning_rate": 1.4802541298380669e-06, + "log_odds": 11.364547729492188, + "log_odds_ratio": -0.11916477233171463, + "loss": 0.2852, + "rejected_geometric_mean": -12.0333251953125, + "step": 5123 + }, + { + "chosen_geometric_mean": -1.099663496017456, + "epoch": 1.27, + "grad_norm": 34.5, + "learning_rate": 1.479365399531904e-06, + "log_odds": 5.8862199783325195, + "log_odds_ratio": -0.1452719271183014, + "loss": 0.2549, + "rejected_geometric_mean": -6.634139060974121, + "step": 5124 + }, + { + "chosen_geometric_mean": -0.975055456161499, + "epoch": 1.27, + "grad_norm": 2.15625, + "learning_rate": 1.4784768239784478e-06, + "log_odds": 5.230935096740723, + "log_odds_ratio": -0.07328681647777557, + "loss": 0.2585, + "rejected_geometric_mean": -5.754274368286133, + "step": 5125 + }, + { + "chosen_geometric_mean": -1.0344610214233398, + "epoch": 1.27, + "grad_norm": 7.96875, + "learning_rate": 1.477588403312425e-06, + "log_odds": 8.57798957824707, + "log_odds_ratio": -0.10960282385349274, + "loss": 0.2491, + "rejected_geometric_mean": -9.253031730651855, + "step": 5126 + }, + { + "chosen_geometric_mean": -1.253369927406311, + "epoch": 1.27, + "grad_norm": 6.3125, + "learning_rate": 1.4767001376685419e-06, + "log_odds": 8.946374893188477, + "log_odds_ratio": -0.1970198154449463, + "loss": 0.3876, + "rejected_geometric_mean": -9.929605484008789, + "step": 5127 + }, + { + "chosen_geometric_mean": -1.1162091493606567, + "epoch": 1.27, + "grad_norm": 2.5, + "learning_rate": 1.4758120271814813e-06, + "log_odds": 4.098971843719482, + "log_odds_ratio": -0.37495413422584534, + "loss": 0.2339, + "rejected_geometric_mean": -5.019430160522461, + "step": 5128 + }, + { + "chosen_geometric_mean": -0.9919555187225342, + "epoch": 1.27, + "grad_norm": 37.0, + "learning_rate": 1.4749240719859021e-06, + "log_odds": 8.554407119750977, + "log_odds_ratio": -0.2219466269016266, + "loss": 0.3104, + "rejected_geometric_mean": -9.236204147338867, + "step": 5129 + }, + { + "chosen_geometric_mean": -1.1436119079589844, + "epoch": 1.27, + "grad_norm": 13.125, + "learning_rate": 1.4740362722164382e-06, + "log_odds": 11.626662254333496, + "log_odds_ratio": -0.0023028880823403597, + "loss": 0.3225, + "rejected_geometric_mean": -12.36105728149414, + "step": 5130 + }, + { + "chosen_geometric_mean": -1.0034695863723755, + "epoch": 1.27, + "grad_norm": 1.9921875, + "learning_rate": 1.4731486280077035e-06, + "log_odds": 8.446187973022461, + "log_odds_ratio": -0.10795363038778305, + "loss": 0.2455, + "rejected_geometric_mean": -9.043496131896973, + "step": 5131 + }, + { + "chosen_geometric_mean": -0.9713853597640991, + "epoch": 1.27, + "grad_norm": 3.40625, + "learning_rate": 1.472261139494286e-06, + "log_odds": 4.336805820465088, + "log_odds_ratio": -0.09447488933801651, + "loss": 0.2428, + "rejected_geometric_mean": -4.8777313232421875, + "step": 5132 + }, + { + "chosen_geometric_mean": -0.9061440229415894, + "epoch": 1.27, + "grad_norm": 3.5, + "learning_rate": 1.4713738068107487e-06, + "log_odds": 5.102183818817139, + "log_odds_ratio": -0.2926119565963745, + "loss": 0.2781, + "rejected_geometric_mean": -5.713157653808594, + "step": 5133 + }, + { + "chosen_geometric_mean": -0.8035174012184143, + "epoch": 1.27, + "grad_norm": 2.828125, + "learning_rate": 1.4704866300916334e-06, + "log_odds": 5.035759925842285, + "log_odds_ratio": -0.20414431393146515, + "loss": 0.2629, + "rejected_geometric_mean": -5.3662543296813965, + "step": 5134 + }, + { + "chosen_geometric_mean": -1.0879676342010498, + "epoch": 1.27, + "grad_norm": 7.75, + "learning_rate": 1.469599609471457e-06, + "log_odds": 9.016027450561523, + "log_odds_ratio": -0.1126324012875557, + "loss": 0.2251, + "rejected_geometric_mean": -9.75715446472168, + "step": 5135 + }, + { + "chosen_geometric_mean": -1.0673984289169312, + "epoch": 1.27, + "grad_norm": 3.84375, + "learning_rate": 1.4687127450847139e-06, + "log_odds": 6.45834493637085, + "log_odds_ratio": -0.39482006430625916, + "loss": 0.2758, + "rejected_geometric_mean": -7.384424209594727, + "step": 5136 + }, + { + "chosen_geometric_mean": -0.8529039621353149, + "epoch": 1.27, + "grad_norm": 24.375, + "learning_rate": 1.4678260370658734e-06, + "log_odds": 15.528828620910645, + "log_odds_ratio": -0.06384754180908203, + "loss": 0.2287, + "rejected_geometric_mean": -15.808463096618652, + "step": 5137 + }, + { + "chosen_geometric_mean": -0.891421377658844, + "epoch": 1.27, + "grad_norm": 13.75, + "learning_rate": 1.4669394855493827e-06, + "log_odds": 8.754404067993164, + "log_odds_ratio": -0.06217297166585922, + "loss": 0.2466, + "rejected_geometric_mean": -9.140848159790039, + "step": 5138 + }, + { + "chosen_geometric_mean": -0.9199277758598328, + "epoch": 1.27, + "grad_norm": 2.328125, + "learning_rate": 1.4660530906696627e-06, + "log_odds": 4.337741851806641, + "log_odds_ratio": -0.10139396786689758, + "loss": 0.2439, + "rejected_geometric_mean": -4.790954113006592, + "step": 5139 + }, + { + "chosen_geometric_mean": -0.9787785410881042, + "epoch": 1.27, + "grad_norm": 12.4375, + "learning_rate": 1.465166852561112e-06, + "log_odds": 10.413268089294434, + "log_odds_ratio": -0.0729013979434967, + "loss": 0.27, + "rejected_geometric_mean": -10.953248023986816, + "step": 5140 + }, + { + "chosen_geometric_mean": -0.980505645275116, + "epoch": 1.27, + "grad_norm": 5.71875, + "learning_rate": 1.464280771358108e-06, + "log_odds": 6.580655097961426, + "log_odds_ratio": -0.1080576479434967, + "loss": 0.2747, + "rejected_geometric_mean": -7.108793258666992, + "step": 5141 + }, + { + "chosen_geometric_mean": -1.1818733215332031, + "epoch": 1.27, + "grad_norm": 22.0, + "learning_rate": 1.463394847195e-06, + "log_odds": 6.765700340270996, + "log_odds_ratio": -0.10470162332057953, + "loss": 0.2642, + "rejected_geometric_mean": -7.539222717285156, + "step": 5142 + }, + { + "chosen_geometric_mean": -1.041468620300293, + "epoch": 1.27, + "grad_norm": 16.25, + "learning_rate": 1.4625090802061156e-06, + "log_odds": 10.954327583312988, + "log_odds_ratio": -0.16702231764793396, + "loss": 0.2761, + "rejected_geometric_mean": -11.63774585723877, + "step": 5143 + }, + { + "chosen_geometric_mean": -0.8974266648292542, + "epoch": 1.27, + "grad_norm": 6.03125, + "learning_rate": 1.4616234705257588e-06, + "log_odds": 13.655216217041016, + "log_odds_ratio": -0.21610207855701447, + "loss": 0.217, + "rejected_geometric_mean": -14.154247283935547, + "step": 5144 + }, + { + "chosen_geometric_mean": -0.970652163028717, + "epoch": 1.27, + "grad_norm": 11.4375, + "learning_rate": 1.4607380182882092e-06, + "log_odds": 9.83733081817627, + "log_odds_ratio": -0.09554292261600494, + "loss": 0.256, + "rejected_geometric_mean": -10.385431289672852, + "step": 5145 + }, + { + "chosen_geometric_mean": -0.9939133524894714, + "epoch": 1.27, + "grad_norm": 7.21875, + "learning_rate": 1.459852723627721e-06, + "log_odds": 7.026181697845459, + "log_odds_ratio": -0.016165295615792274, + "loss": 0.2833, + "rejected_geometric_mean": -7.55949592590332, + "step": 5146 + }, + { + "chosen_geometric_mean": -1.0195229053497314, + "epoch": 1.27, + "grad_norm": 8.6875, + "learning_rate": 1.4589675866785285e-06, + "log_odds": 8.036888122558594, + "log_odds_ratio": -0.015418143942952156, + "loss": 0.3218, + "rejected_geometric_mean": -8.605305671691895, + "step": 5147 + }, + { + "chosen_geometric_mean": -0.9227940440177917, + "epoch": 1.27, + "grad_norm": 8.125, + "learning_rate": 1.4580826075748382e-06, + "log_odds": 7.495697021484375, + "log_odds_ratio": -0.13323287665843964, + "loss": 0.3083, + "rejected_geometric_mean": -8.000239372253418, + "step": 5148 + }, + { + "chosen_geometric_mean": -0.9757641553878784, + "epoch": 1.27, + "grad_norm": 2.203125, + "learning_rate": 1.4571977864508358e-06, + "log_odds": 6.368471145629883, + "log_odds_ratio": -0.01882658340036869, + "loss": 0.2346, + "rejected_geometric_mean": -6.862606525421143, + "step": 5149 + }, + { + "chosen_geometric_mean": -1.087742567062378, + "epoch": 1.28, + "grad_norm": 1.75, + "learning_rate": 1.4563131234406801e-06, + "log_odds": 7.967977046966553, + "log_odds_ratio": -0.0821913629770279, + "loss": 0.1938, + "rejected_geometric_mean": -8.633710861206055, + "step": 5150 + }, + { + "chosen_geometric_mean": -1.1184324026107788, + "epoch": 1.28, + "grad_norm": 2.96875, + "learning_rate": 1.4554286186785055e-06, + "log_odds": 3.0208849906921387, + "log_odds_ratio": -0.42900803685188293, + "loss": 0.2589, + "rejected_geometric_mean": -3.964817762374878, + "step": 5151 + }, + { + "chosen_geometric_mean": -0.9849481582641602, + "epoch": 1.28, + "grad_norm": 2.953125, + "learning_rate": 1.4545442722984282e-06, + "log_odds": 1.3904768228530884, + "log_odds_ratio": -0.33132001757621765, + "loss": 0.2506, + "rejected_geometric_mean": -2.141871690750122, + "step": 5152 + }, + { + "chosen_geometric_mean": -0.8115172386169434, + "epoch": 1.28, + "grad_norm": 10.1875, + "learning_rate": 1.4536600844345342e-06, + "log_odds": 9.571269035339355, + "log_odds_ratio": -0.08938561379909515, + "loss": 0.2742, + "rejected_geometric_mean": -9.871793746948242, + "step": 5153 + }, + { + "chosen_geometric_mean": -0.8571381568908691, + "epoch": 1.28, + "grad_norm": 2.453125, + "learning_rate": 1.4527760552208867e-06, + "log_odds": 5.672183036804199, + "log_odds_ratio": -0.12078829109668732, + "loss": 0.2468, + "rejected_geometric_mean": -6.0386810302734375, + "step": 5154 + }, + { + "chosen_geometric_mean": -0.9464633464813232, + "epoch": 1.28, + "grad_norm": 2.40625, + "learning_rate": 1.4518921847915273e-06, + "log_odds": 4.751857757568359, + "log_odds_ratio": -0.2566007375717163, + "loss": 0.2287, + "rejected_geometric_mean": -5.3751397132873535, + "step": 5155 + }, + { + "chosen_geometric_mean": -0.9909077286720276, + "epoch": 1.28, + "grad_norm": 9.3125, + "learning_rate": 1.4510084732804711e-06, + "log_odds": 4.897761821746826, + "log_odds_ratio": -0.17370204627513885, + "loss": 0.2803, + "rejected_geometric_mean": -5.531906604766846, + "step": 5156 + }, + { + "chosen_geometric_mean": -0.8330958485603333, + "epoch": 1.28, + "grad_norm": 2.1875, + "learning_rate": 1.45012492082171e-06, + "log_odds": 15.290618896484375, + "log_odds_ratio": -3.3080757475545397e-06, + "loss": 0.2313, + "rejected_geometric_mean": -15.546490669250488, + "step": 5157 + }, + { + "chosen_geometric_mean": -0.7751819491386414, + "epoch": 1.28, + "grad_norm": 16.625, + "learning_rate": 1.4492415275492128e-06, + "log_odds": 12.675552368164062, + "log_odds_ratio": -0.06532878428697586, + "loss": 0.2979, + "rejected_geometric_mean": -12.843656539916992, + "step": 5158 + }, + { + "chosen_geometric_mean": -1.0192394256591797, + "epoch": 1.28, + "grad_norm": 6.9375, + "learning_rate": 1.4483582935969215e-06, + "log_odds": 4.405187606811523, + "log_odds_ratio": -0.31010502576828003, + "loss": 0.2415, + "rejected_geometric_mean": -5.187154293060303, + "step": 5159 + }, + { + "chosen_geometric_mean": -0.950122594833374, + "epoch": 1.28, + "grad_norm": 2.34375, + "learning_rate": 1.4474752190987574e-06, + "log_odds": 8.146782875061035, + "log_odds_ratio": -0.18311476707458496, + "loss": 0.2797, + "rejected_geometric_mean": -8.736427307128906, + "step": 5160 + }, + { + "chosen_geometric_mean": -1.0774974822998047, + "epoch": 1.28, + "grad_norm": 54.75, + "learning_rate": 1.4465923041886146e-06, + "log_odds": 16.37646484375, + "log_odds_ratio": -0.10757092386484146, + "loss": 0.2671, + "rejected_geometric_mean": -17.10187339782715, + "step": 5161 + }, + { + "chosen_geometric_mean": -0.894223690032959, + "epoch": 1.28, + "grad_norm": 1.796875, + "learning_rate": 1.4457095490003636e-06, + "log_odds": 8.564706802368164, + "log_odds_ratio": -0.20055457949638367, + "loss": 0.2117, + "rejected_geometric_mean": -9.082626342773438, + "step": 5162 + }, + { + "chosen_geometric_mean": -0.9431461095809937, + "epoch": 1.28, + "grad_norm": 5.75, + "learning_rate": 1.444826953667852e-06, + "log_odds": 1.1915514469146729, + "log_odds_ratio": -0.31917262077331543, + "loss": 0.2559, + "rejected_geometric_mean": -1.8649852275848389, + "step": 5163 + }, + { + "chosen_geometric_mean": -0.8535016775131226, + "epoch": 1.28, + "grad_norm": 9.375, + "learning_rate": 1.4439445183249034e-06, + "log_odds": 13.749125480651855, + "log_odds_ratio": -0.0001168658709502779, + "loss": 0.2639, + "rejected_geometric_mean": -14.027338981628418, + "step": 5164 + }, + { + "chosen_geometric_mean": -1.1168831586837769, + "epoch": 1.28, + "grad_norm": 2.640625, + "learning_rate": 1.4430622431053143e-06, + "log_odds": 5.2775468826293945, + "log_odds_ratio": -0.2236122041940689, + "loss": 0.2482, + "rejected_geometric_mean": -6.064120769500732, + "step": 5165 + }, + { + "chosen_geometric_mean": -0.9054524898529053, + "epoch": 1.28, + "grad_norm": 2.265625, + "learning_rate": 1.442180128142861e-06, + "log_odds": 9.001416206359863, + "log_odds_ratio": -0.07938161492347717, + "loss": 0.2617, + "rejected_geometric_mean": -9.44163990020752, + "step": 5166 + }, + { + "chosen_geometric_mean": -1.2082545757293701, + "epoch": 1.28, + "grad_norm": 2.25, + "learning_rate": 1.4412981735712923e-06, + "log_odds": 7.182443141937256, + "log_odds_ratio": -0.06263165175914764, + "loss": 0.2707, + "rejected_geometric_mean": -8.049274444580078, + "step": 5167 + }, + { + "chosen_geometric_mean": -0.6813540458679199, + "epoch": 1.28, + "grad_norm": 2.203125, + "learning_rate": 1.4404163795243308e-06, + "log_odds": 8.556097984313965, + "log_odds_ratio": -0.02611595019698143, + "loss": 0.2672, + "rejected_geometric_mean": -8.549967765808105, + "step": 5168 + }, + { + "chosen_geometric_mean": -1.127846121788025, + "epoch": 1.28, + "grad_norm": 2.734375, + "learning_rate": 1.4395347461356828e-06, + "log_odds": 15.877235412597656, + "log_odds_ratio": -0.13296201825141907, + "loss": 0.278, + "rejected_geometric_mean": -16.707956314086914, + "step": 5169 + }, + { + "chosen_geometric_mean": -1.1060614585876465, + "epoch": 1.28, + "grad_norm": 45.5, + "learning_rate": 1.4386532735390224e-06, + "log_odds": 4.734560489654541, + "log_odds_ratio": -0.18384207785129547, + "loss": 0.2529, + "rejected_geometric_mean": -5.502228736877441, + "step": 5170 + }, + { + "chosen_geometric_mean": -0.8461428880691528, + "epoch": 1.28, + "grad_norm": 27.0, + "learning_rate": 1.4377719618680013e-06, + "log_odds": 7.199221611022949, + "log_odds_ratio": -0.07845867425203323, + "loss": 0.2178, + "rejected_geometric_mean": -7.502035617828369, + "step": 5171 + }, + { + "chosen_geometric_mean": -1.098017930984497, + "epoch": 1.28, + "grad_norm": 2.609375, + "learning_rate": 1.4368908112562497e-06, + "log_odds": 4.847018718719482, + "log_odds_ratio": -0.0994943380355835, + "loss": 0.2945, + "rejected_geometric_mean": -5.597179889678955, + "step": 5172 + }, + { + "chosen_geometric_mean": -1.0186655521392822, + "epoch": 1.28, + "grad_norm": 15.75, + "learning_rate": 1.4360098218373686e-06, + "log_odds": 11.320229530334473, + "log_odds_ratio": -0.15373587608337402, + "loss": 0.3, + "rejected_geometric_mean": -11.974235534667969, + "step": 5173 + }, + { + "chosen_geometric_mean": -0.9773218631744385, + "epoch": 1.28, + "grad_norm": 11.4375, + "learning_rate": 1.4351289937449387e-06, + "log_odds": 13.12208080291748, + "log_odds_ratio": -0.0010594046907499433, + "loss": 0.2711, + "rejected_geometric_mean": -13.607015609741211, + "step": 5174 + }, + { + "chosen_geometric_mean": -0.9035020470619202, + "epoch": 1.28, + "grad_norm": 24.125, + "learning_rate": 1.4342483271125152e-06, + "log_odds": 5.957650184631348, + "log_odds_ratio": -0.08115693181753159, + "loss": 0.2303, + "rejected_geometric_mean": -6.39634895324707, + "step": 5175 + }, + { + "chosen_geometric_mean": -0.9882875680923462, + "epoch": 1.28, + "grad_norm": 30.875, + "learning_rate": 1.4333678220736276e-06, + "log_odds": 7.21058988571167, + "log_odds_ratio": -0.12099076062440872, + "loss": 0.2939, + "rejected_geometric_mean": -7.769957542419434, + "step": 5176 + }, + { + "chosen_geometric_mean": -0.9569228291511536, + "epoch": 1.28, + "grad_norm": 3.734375, + "learning_rate": 1.43248747876178e-06, + "log_odds": 5.343160629272461, + "log_odds_ratio": -0.3256581127643585, + "loss": 0.3197, + "rejected_geometric_mean": -5.980024814605713, + "step": 5177 + }, + { + "chosen_geometric_mean": -1.0795820951461792, + "epoch": 1.28, + "grad_norm": 5.375, + "learning_rate": 1.4316072973104567e-06, + "log_odds": 13.49941635131836, + "log_odds_ratio": -0.11454391479492188, + "loss": 0.2458, + "rejected_geometric_mean": -14.240026473999023, + "step": 5178 + }, + { + "chosen_geometric_mean": -1.0169024467468262, + "epoch": 1.28, + "grad_norm": 3.640625, + "learning_rate": 1.4307272778531118e-06, + "log_odds": 12.049520492553711, + "log_odds_ratio": -0.0013856318546459079, + "loss": 0.2148, + "rejected_geometric_mean": -12.61269474029541, + "step": 5179 + }, + { + "chosen_geometric_mean": -0.9350043535232544, + "epoch": 1.28, + "grad_norm": 14.125, + "learning_rate": 1.429847420523178e-06, + "log_odds": 6.847099304199219, + "log_odds_ratio": -0.1859998106956482, + "loss": 0.333, + "rejected_geometric_mean": -7.407360076904297, + "step": 5180 + }, + { + "chosen_geometric_mean": -0.892072319984436, + "epoch": 1.28, + "grad_norm": 4.71875, + "learning_rate": 1.428967725454064e-06, + "log_odds": 4.679309844970703, + "log_odds_ratio": -0.24353951215744019, + "loss": 0.258, + "rejected_geometric_mean": -5.204641819000244, + "step": 5181 + }, + { + "chosen_geometric_mean": -1.223768711090088, + "epoch": 1.28, + "grad_norm": 3.4375, + "learning_rate": 1.4280881927791512e-06, + "log_odds": 7.9330644607543945, + "log_odds_ratio": -0.005887910723686218, + "loss": 0.2323, + "rejected_geometric_mean": -8.808996200561523, + "step": 5182 + }, + { + "chosen_geometric_mean": -0.9610340595245361, + "epoch": 1.28, + "grad_norm": 4.96875, + "learning_rate": 1.4272088226317992e-06, + "log_odds": 2.173300266265869, + "log_odds_ratio": -0.32026466727256775, + "loss": 0.2678, + "rejected_geometric_mean": -2.8845224380493164, + "step": 5183 + }, + { + "chosen_geometric_mean": -1.2838760614395142, + "epoch": 1.28, + "grad_norm": 8.5625, + "learning_rate": 1.4263296151453398e-06, + "log_odds": 2.325234889984131, + "log_odds_ratio": -0.17260441184043884, + "loss": 0.2634, + "rejected_geometric_mean": -3.3403735160827637, + "step": 5184 + }, + { + "chosen_geometric_mean": -0.9559897184371948, + "epoch": 1.28, + "grad_norm": 9.125, + "learning_rate": 1.4254505704530845e-06, + "log_odds": 7.977889060974121, + "log_odds_ratio": -0.26262012124061584, + "loss": 0.2582, + "rejected_geometric_mean": -8.646034240722656, + "step": 5185 + }, + { + "chosen_geometric_mean": -1.5474908351898193, + "epoch": 1.28, + "grad_norm": 39.75, + "learning_rate": 1.4245716886883145e-06, + "log_odds": 5.378482818603516, + "log_odds_ratio": -0.09889781475067139, + "loss": 0.2913, + "rejected_geometric_mean": -6.500382423400879, + "step": 5186 + }, + { + "chosen_geometric_mean": -0.9332749247550964, + "epoch": 1.28, + "grad_norm": 3.859375, + "learning_rate": 1.4236929699842926e-06, + "log_odds": 3.5896363258361816, + "log_odds_ratio": -0.401376336812973, + "loss": 0.2796, + "rejected_geometric_mean": -4.321540355682373, + "step": 5187 + }, + { + "chosen_geometric_mean": -0.833986759185791, + "epoch": 1.28, + "grad_norm": 46.0, + "learning_rate": 1.4228144144742507e-06, + "log_odds": 10.574155807495117, + "log_odds_ratio": -0.12618352472782135, + "loss": 0.2392, + "rejected_geometric_mean": -10.913187026977539, + "step": 5188 + }, + { + "chosen_geometric_mean": -0.9616852402687073, + "epoch": 1.28, + "grad_norm": 2.359375, + "learning_rate": 1.421936022291401e-06, + "log_odds": 13.419374465942383, + "log_odds_ratio": -0.009700598195195198, + "loss": 0.2744, + "rejected_geometric_mean": -13.846368789672852, + "step": 5189 + }, + { + "chosen_geometric_mean": -1.0731654167175293, + "epoch": 1.28, + "grad_norm": 2.140625, + "learning_rate": 1.4210577935689274e-06, + "log_odds": 7.303254127502441, + "log_odds_ratio": -0.11305725574493408, + "loss": 0.2471, + "rejected_geometric_mean": -8.007257461547852, + "step": 5190 + }, + { + "chosen_geometric_mean": -0.9593824148178101, + "epoch": 1.29, + "grad_norm": 1.8984375, + "learning_rate": 1.420179728439991e-06, + "log_odds": 12.565784454345703, + "log_odds_ratio": -0.03424770385026932, + "loss": 0.2599, + "rejected_geometric_mean": -13.042670249938965, + "step": 5191 + }, + { + "chosen_geometric_mean": -1.2988171577453613, + "epoch": 1.29, + "grad_norm": 2.859375, + "learning_rate": 1.419301827037729e-06, + "log_odds": 11.595561027526855, + "log_odds_ratio": -0.009701814502477646, + "loss": 0.2809, + "rejected_geometric_mean": -12.525283813476562, + "step": 5192 + }, + { + "chosen_geometric_mean": -1.0528714656829834, + "epoch": 1.29, + "grad_norm": 2.03125, + "learning_rate": 1.4184240894952505e-06, + "log_odds": 7.829056262969971, + "log_odds_ratio": -0.1469251662492752, + "loss": 0.2665, + "rejected_geometric_mean": -8.527276992797852, + "step": 5193 + }, + { + "chosen_geometric_mean": -1.1389235258102417, + "epoch": 1.29, + "grad_norm": 2.953125, + "learning_rate": 1.4175465159456414e-06, + "log_odds": 3.681971549987793, + "log_odds_ratio": -0.2372514307498932, + "loss": 0.288, + "rejected_geometric_mean": -4.548958778381348, + "step": 5194 + }, + { + "chosen_geometric_mean": -1.1697051525115967, + "epoch": 1.29, + "grad_norm": 4.3125, + "learning_rate": 1.4166691065219638e-06, + "log_odds": 5.020849227905273, + "log_odds_ratio": -0.04864974319934845, + "loss": 0.3235, + "rejected_geometric_mean": -5.837635517120361, + "step": 5195 + }, + { + "chosen_geometric_mean": -1.2607886791229248, + "epoch": 1.29, + "grad_norm": 8.0625, + "learning_rate": 1.4157918613572546e-06, + "log_odds": 3.287346363067627, + "log_odds_ratio": -0.45242390036582947, + "loss": 0.2765, + "rejected_geometric_mean": -4.341607093811035, + "step": 5196 + }, + { + "chosen_geometric_mean": -0.8568505644798279, + "epoch": 1.29, + "grad_norm": 4.0625, + "learning_rate": 1.414914780584523e-06, + "log_odds": 7.914855480194092, + "log_odds_ratio": -0.40328800678253174, + "loss": 0.2894, + "rejected_geometric_mean": -8.469112396240234, + "step": 5197 + }, + { + "chosen_geometric_mean": -1.091223120689392, + "epoch": 1.29, + "grad_norm": 40.0, + "learning_rate": 1.4140378643367586e-06, + "log_odds": 3.6741347312927246, + "log_odds_ratio": -0.06614229083061218, + "loss": 0.2576, + "rejected_geometric_mean": -4.387161731719971, + "step": 5198 + }, + { + "chosen_geometric_mean": -1.1505054235458374, + "epoch": 1.29, + "grad_norm": 48.0, + "learning_rate": 1.4131611127469213e-06, + "log_odds": 8.347311973571777, + "log_odds_ratio": -0.09965036809444427, + "loss": 0.3615, + "rejected_geometric_mean": -9.156344413757324, + "step": 5199 + }, + { + "chosen_geometric_mean": -1.14701509475708, + "epoch": 1.29, + "grad_norm": 4.875, + "learning_rate": 1.4122845259479466e-06, + "log_odds": 2.5644314289093018, + "log_odds_ratio": -0.33590924739837646, + "loss": 0.3037, + "rejected_geometric_mean": -3.506549835205078, + "step": 5200 + }, + { + "chosen_geometric_mean": -0.9141066074371338, + "epoch": 1.29, + "grad_norm": 23.25, + "learning_rate": 1.4114081040727471e-06, + "log_odds": 0.8365395069122314, + "log_odds_ratio": -0.3627924621105194, + "loss": 0.2743, + "rejected_geometric_mean": -1.493959903717041, + "step": 5201 + }, + { + "chosen_geometric_mean": -0.9953212738037109, + "epoch": 1.29, + "grad_norm": 9.4375, + "learning_rate": 1.4105318472542106e-06, + "log_odds": 8.359129905700684, + "log_odds_ratio": -0.04071209952235222, + "loss": 0.2608, + "rejected_geometric_mean": -8.90274429321289, + "step": 5202 + }, + { + "chosen_geometric_mean": -1.0491291284561157, + "epoch": 1.29, + "grad_norm": 15.9375, + "learning_rate": 1.4096557556251964e-06, + "log_odds": 9.010848999023438, + "log_odds_ratio": -0.21590760350227356, + "loss": 0.2196, + "rejected_geometric_mean": -9.720717430114746, + "step": 5203 + }, + { + "chosen_geometric_mean": -1.1553971767425537, + "epoch": 1.29, + "grad_norm": 1.96875, + "learning_rate": 1.4087798293185434e-06, + "log_odds": 13.340802192687988, + "log_odds_ratio": -0.11678481101989746, + "loss": 0.2678, + "rejected_geometric_mean": -14.176767349243164, + "step": 5204 + }, + { + "chosen_geometric_mean": -0.7517720460891724, + "epoch": 1.29, + "grad_norm": 21.375, + "learning_rate": 1.4079040684670606e-06, + "log_odds": 14.064030647277832, + "log_odds_ratio": -3.874556568916887e-05, + "loss": 0.2339, + "rejected_geometric_mean": -14.170940399169922, + "step": 5205 + }, + { + "chosen_geometric_mean": -0.8964884281158447, + "epoch": 1.29, + "grad_norm": 22.75, + "learning_rate": 1.4070284732035355e-06, + "log_odds": 13.530098915100098, + "log_odds_ratio": -0.03333643078804016, + "loss": 0.2747, + "rejected_geometric_mean": -13.908493041992188, + "step": 5206 + }, + { + "chosen_geometric_mean": -0.9543573260307312, + "epoch": 1.29, + "grad_norm": 6.1875, + "learning_rate": 1.4061530436607306e-06, + "log_odds": 4.893047332763672, + "log_odds_ratio": -0.33372384309768677, + "loss": 0.2718, + "rejected_geometric_mean": -5.593125343322754, + "step": 5207 + }, + { + "chosen_geometric_mean": -1.3789117336273193, + "epoch": 1.29, + "grad_norm": 14.4375, + "learning_rate": 1.4052777799713802e-06, + "log_odds": 4.877983093261719, + "log_odds_ratio": -0.024034807458519936, + "loss": 0.3288, + "rejected_geometric_mean": -5.970982074737549, + "step": 5208 + }, + { + "chosen_geometric_mean": -1.024977684020996, + "epoch": 1.29, + "grad_norm": 3.625, + "learning_rate": 1.4044026822681967e-06, + "log_odds": 3.671272039413452, + "log_odds_ratio": -0.12763701379299164, + "loss": 0.2831, + "rejected_geometric_mean": -4.316895484924316, + "step": 5209 + }, + { + "chosen_geometric_mean": -1.0178158283233643, + "epoch": 1.29, + "grad_norm": 9.3125, + "learning_rate": 1.4035277506838652e-06, + "log_odds": 4.724637031555176, + "log_odds_ratio": -0.10762202739715576, + "loss": 0.2521, + "rejected_geometric_mean": -5.36838436126709, + "step": 5210 + }, + { + "chosen_geometric_mean": -1.2229810953140259, + "epoch": 1.29, + "grad_norm": 31.125, + "learning_rate": 1.4026529853510453e-06, + "log_odds": 11.457919120788574, + "log_odds_ratio": -0.05675377696752548, + "loss": 0.2625, + "rejected_geometric_mean": -12.328682899475098, + "step": 5211 + }, + { + "chosen_geometric_mean": -0.9643677473068237, + "epoch": 1.29, + "grad_norm": 2.625, + "learning_rate": 1.4017783864023737e-06, + "log_odds": 7.2371015548706055, + "log_odds_ratio": -0.2238699048757553, + "loss": 0.2164, + "rejected_geometric_mean": -7.867583274841309, + "step": 5212 + }, + { + "chosen_geometric_mean": -0.8204896450042725, + "epoch": 1.29, + "grad_norm": 3.3125, + "learning_rate": 1.4009039539704612e-06, + "log_odds": 8.0054292678833, + "log_odds_ratio": -0.2818913757801056, + "loss": 0.249, + "rejected_geometric_mean": -8.477027893066406, + "step": 5213 + }, + { + "chosen_geometric_mean": -0.839787483215332, + "epoch": 1.29, + "grad_norm": 2.65625, + "learning_rate": 1.4000296881878906e-06, + "log_odds": 6.369910717010498, + "log_odds_ratio": -0.11628496646881104, + "loss": 0.2405, + "rejected_geometric_mean": -6.739992141723633, + "step": 5214 + }, + { + "chosen_geometric_mean": -1.038098692893982, + "epoch": 1.29, + "grad_norm": 3.1875, + "learning_rate": 1.3991555891872244e-06, + "log_odds": 5.283071041107178, + "log_odds_ratio": -0.22544153034687042, + "loss": 0.2659, + "rejected_geometric_mean": -6.007637023925781, + "step": 5215 + }, + { + "chosen_geometric_mean": -1.1729239225387573, + "epoch": 1.29, + "grad_norm": 42.0, + "learning_rate": 1.398281657100995e-06, + "log_odds": 0.7841984033584595, + "log_odds_ratio": -0.41990238428115845, + "loss": 0.3339, + "rejected_geometric_mean": -1.8045928478240967, + "step": 5216 + }, + { + "chosen_geometric_mean": -1.0765271186828613, + "epoch": 1.29, + "grad_norm": 11.4375, + "learning_rate": 1.3974078920617102e-06, + "log_odds": 13.428725242614746, + "log_odds_ratio": -0.0011153121013194323, + "loss": 0.2465, + "rejected_geometric_mean": -14.066793441772461, + "step": 5217 + }, + { + "chosen_geometric_mean": -1.0129088163375854, + "epoch": 1.29, + "grad_norm": 7.75, + "learning_rate": 1.3965342942018573e-06, + "log_odds": 7.548373222351074, + "log_odds_ratio": -0.2585071921348572, + "loss": 0.2617, + "rejected_geometric_mean": -8.08810043334961, + "step": 5218 + }, + { + "chosen_geometric_mean": -1.1499258279800415, + "epoch": 1.29, + "grad_norm": 4.84375, + "learning_rate": 1.395660863653893e-06, + "log_odds": 4.603860378265381, + "log_odds_ratio": -0.35324978828430176, + "loss": 0.2722, + "rejected_geometric_mean": -5.549216270446777, + "step": 5219 + }, + { + "chosen_geometric_mean": -0.7716786861419678, + "epoch": 1.29, + "grad_norm": 3.328125, + "learning_rate": 1.3947876005502492e-06, + "log_odds": 8.147595405578613, + "log_odds_ratio": -0.1632899045944214, + "loss": 0.2513, + "rejected_geometric_mean": -8.433428764343262, + "step": 5220 + }, + { + "chosen_geometric_mean": -1.2048134803771973, + "epoch": 1.29, + "grad_norm": 6.0, + "learning_rate": 1.3939145050233354e-06, + "log_odds": 10.219880104064941, + "log_odds_ratio": -0.07557639479637146, + "loss": 0.2232, + "rejected_geometric_mean": -11.028335571289062, + "step": 5221 + }, + { + "chosen_geometric_mean": -1.064314603805542, + "epoch": 1.29, + "grad_norm": 8.8125, + "learning_rate": 1.3930415772055325e-06, + "log_odds": 10.876934051513672, + "log_odds_ratio": -0.15668104588985443, + "loss": 0.2834, + "rejected_geometric_mean": -11.563384056091309, + "step": 5222 + }, + { + "chosen_geometric_mean": -0.7617123126983643, + "epoch": 1.29, + "grad_norm": 1.8359375, + "learning_rate": 1.3921688172291974e-06, + "log_odds": 8.095793724060059, + "log_odds_ratio": -0.23398809134960175, + "loss": 0.2434, + "rejected_geometric_mean": -8.439711570739746, + "step": 5223 + }, + { + "chosen_geometric_mean": -0.8622814416885376, + "epoch": 1.29, + "grad_norm": 2.953125, + "learning_rate": 1.3912962252266631e-06, + "log_odds": 2.860380172729492, + "log_odds_ratio": -0.22728005051612854, + "loss": 0.2382, + "rejected_geometric_mean": -3.332784652709961, + "step": 5224 + }, + { + "chosen_geometric_mean": -0.9220983386039734, + "epoch": 1.29, + "grad_norm": 2.828125, + "learning_rate": 1.3904238013302343e-06, + "log_odds": 8.858820915222168, + "log_odds_ratio": -0.06884904950857162, + "loss": 0.2752, + "rejected_geometric_mean": -9.317258834838867, + "step": 5225 + }, + { + "chosen_geometric_mean": -1.140263557434082, + "epoch": 1.29, + "grad_norm": 2.6875, + "learning_rate": 1.3895515456721904e-06, + "log_odds": 5.328268051147461, + "log_odds_ratio": -0.22040992975234985, + "loss": 0.361, + "rejected_geometric_mean": -6.121588230133057, + "step": 5226 + }, + { + "chosen_geometric_mean": -0.8242460489273071, + "epoch": 1.29, + "grad_norm": 3.609375, + "learning_rate": 1.3886794583847884e-06, + "log_odds": 6.027073860168457, + "log_odds_ratio": -0.1683182269334793, + "loss": 0.2601, + "rejected_geometric_mean": -6.362383842468262, + "step": 5227 + }, + { + "chosen_geometric_mean": -1.0584927797317505, + "epoch": 1.29, + "grad_norm": 2.1875, + "learning_rate": 1.3878075396002552e-06, + "log_odds": 4.72613000869751, + "log_odds_ratio": -0.01935688406229019, + "loss": 0.278, + "rejected_geometric_mean": -5.359884262084961, + "step": 5228 + }, + { + "chosen_geometric_mean": -1.0674488544464111, + "epoch": 1.29, + "grad_norm": 2.578125, + "learning_rate": 1.3869357894507962e-06, + "log_odds": 12.076679229736328, + "log_odds_ratio": -0.006850349251180887, + "loss": 0.2307, + "rejected_geometric_mean": -12.699542999267578, + "step": 5229 + }, + { + "chosen_geometric_mean": -1.02029550075531, + "epoch": 1.29, + "grad_norm": 3.328125, + "learning_rate": 1.3860642080685905e-06, + "log_odds": 7.504617691040039, + "log_odds_ratio": -0.02728695422410965, + "loss": 0.2485, + "rejected_geometric_mean": -8.073981285095215, + "step": 5230 + }, + { + "chosen_geometric_mean": -1.2585458755493164, + "epoch": 1.3, + "grad_norm": 9.4375, + "learning_rate": 1.3851927955857886e-06, + "log_odds": 6.135540008544922, + "log_odds_ratio": -0.042530231177806854, + "loss": 0.2728, + "rejected_geometric_mean": -7.04296875, + "step": 5231 + }, + { + "chosen_geometric_mean": -1.4167749881744385, + "epoch": 1.3, + "grad_norm": 24.625, + "learning_rate": 1.3843215521345193e-06, + "log_odds": 5.728838920593262, + "log_odds_ratio": -0.22865036129951477, + "loss": 0.2879, + "rejected_geometric_mean": -6.958642482757568, + "step": 5232 + }, + { + "chosen_geometric_mean": -0.9916955828666687, + "epoch": 1.3, + "grad_norm": 2.421875, + "learning_rate": 1.3834504778468826e-06, + "log_odds": 9.940839767456055, + "log_odds_ratio": -0.15999352931976318, + "loss": 0.2445, + "rejected_geometric_mean": -10.563692092895508, + "step": 5233 + }, + { + "chosen_geometric_mean": -1.9964537620544434, + "epoch": 1.3, + "grad_norm": 13.8125, + "learning_rate": 1.3825795728549552e-06, + "log_odds": 6.844610691070557, + "log_odds_ratio": -0.14644013345241547, + "loss": 0.2929, + "rejected_geometric_mean": -8.348468780517578, + "step": 5234 + }, + { + "chosen_geometric_mean": -0.8370996713638306, + "epoch": 1.3, + "grad_norm": 19.875, + "learning_rate": 1.3817088372907881e-06, + "log_odds": 6.906651020050049, + "log_odds_ratio": -0.11519160866737366, + "loss": 0.3402, + "rejected_geometric_mean": -7.231800556182861, + "step": 5235 + }, + { + "chosen_geometric_mean": -1.136561632156372, + "epoch": 1.3, + "grad_norm": 2.703125, + "learning_rate": 1.3808382712864043e-06, + "log_odds": 7.185792922973633, + "log_odds_ratio": -0.05661585181951523, + "loss": 0.2081, + "rejected_geometric_mean": -7.960239887237549, + "step": 5236 + }, + { + "chosen_geometric_mean": -0.9790704250335693, + "epoch": 1.3, + "grad_norm": 14.75, + "learning_rate": 1.379967874973802e-06, + "log_odds": 4.9882588386535645, + "log_odds_ratio": -0.2525675296783447, + "loss": 0.2657, + "rejected_geometric_mean": -5.670087814331055, + "step": 5237 + }, + { + "chosen_geometric_mean": -0.9274409413337708, + "epoch": 1.3, + "grad_norm": 2.515625, + "learning_rate": 1.379097648484956e-06, + "log_odds": 9.099637031555176, + "log_odds_ratio": -0.1084543913602829, + "loss": 0.2699, + "rejected_geometric_mean": -9.593781471252441, + "step": 5238 + }, + { + "chosen_geometric_mean": -0.8942440748214722, + "epoch": 1.3, + "grad_norm": 2.75, + "learning_rate": 1.3782275919518116e-06, + "log_odds": 8.585325241088867, + "log_odds_ratio": -0.019177617505192757, + "loss": 0.23, + "rejected_geometric_mean": -8.945013046264648, + "step": 5239 + }, + { + "chosen_geometric_mean": -1.1372474431991577, + "epoch": 1.3, + "grad_norm": 10.875, + "learning_rate": 1.3773577055062908e-06, + "log_odds": 5.681398391723633, + "log_odds_ratio": -0.24317944049835205, + "loss": 0.2544, + "rejected_geometric_mean": -6.538680553436279, + "step": 5240 + }, + { + "chosen_geometric_mean": -0.926089882850647, + "epoch": 1.3, + "grad_norm": 3.3125, + "learning_rate": 1.3764879892802913e-06, + "log_odds": 6.228503227233887, + "log_odds_ratio": -0.06754186004400253, + "loss": 0.275, + "rejected_geometric_mean": -6.655762672424316, + "step": 5241 + }, + { + "chosen_geometric_mean": -0.8455789685249329, + "epoch": 1.3, + "grad_norm": 3.1875, + "learning_rate": 1.3756184434056808e-06, + "log_odds": 8.153942108154297, + "log_odds_ratio": -0.013187306933104992, + "loss": 0.2531, + "rejected_geometric_mean": -8.425155639648438, + "step": 5242 + }, + { + "chosen_geometric_mean": -0.9609505534172058, + "epoch": 1.3, + "grad_norm": 116.5, + "learning_rate": 1.3747490680143033e-06, + "log_odds": 4.129519939422607, + "log_odds_ratio": -0.3583626449108124, + "loss": 0.2562, + "rejected_geometric_mean": -4.852717876434326, + "step": 5243 + }, + { + "chosen_geometric_mean": -0.8336217999458313, + "epoch": 1.3, + "grad_norm": 4.875, + "learning_rate": 1.373879863237977e-06, + "log_odds": 11.736936569213867, + "log_odds_ratio": -0.17194822430610657, + "loss": 0.2345, + "rejected_geometric_mean": -12.06397819519043, + "step": 5244 + }, + { + "chosen_geometric_mean": -1.0132553577423096, + "epoch": 1.3, + "grad_norm": 6.3125, + "learning_rate": 1.373010829208496e-06, + "log_odds": 5.515261173248291, + "log_odds_ratio": -0.038079023361206055, + "loss": 0.2444, + "rejected_geometric_mean": -6.093072414398193, + "step": 5245 + }, + { + "chosen_geometric_mean": -0.9366610050201416, + "epoch": 1.3, + "grad_norm": 13.75, + "learning_rate": 1.3721419660576246e-06, + "log_odds": 1.051830530166626, + "log_odds_ratio": -0.43002232909202576, + "loss": 0.2657, + "rejected_geometric_mean": -1.841391682624817, + "step": 5246 + }, + { + "chosen_geometric_mean": -1.1369984149932861, + "epoch": 1.3, + "grad_norm": 3.515625, + "learning_rate": 1.3712732739171048e-06, + "log_odds": 5.596907138824463, + "log_odds_ratio": -0.11280637979507446, + "loss": 0.2602, + "rejected_geometric_mean": -6.416701316833496, + "step": 5247 + }, + { + "chosen_geometric_mean": -0.8635257482528687, + "epoch": 1.3, + "grad_norm": 2.09375, + "learning_rate": 1.37040475291865e-06, + "log_odds": 3.8695263862609863, + "log_odds_ratio": -0.1422848403453827, + "loss": 0.2655, + "rejected_geometric_mean": -4.31261682510376, + "step": 5248 + }, + { + "chosen_geometric_mean": -1.1816043853759766, + "epoch": 1.3, + "grad_norm": 180.0, + "learning_rate": 1.3695364031939498e-06, + "log_odds": 13.395709991455078, + "log_odds_ratio": -0.19981113076210022, + "loss": 0.3288, + "rejected_geometric_mean": -14.27684211730957, + "step": 5249 + }, + { + "chosen_geometric_mean": -1.975220799446106, + "epoch": 1.3, + "grad_norm": 30.25, + "learning_rate": 1.3686682248746658e-06, + "log_odds": 12.218926429748535, + "log_odds_ratio": -0.0055508301593363285, + "loss": 0.2568, + "rejected_geometric_mean": -13.851430892944336, + "step": 5250 + }, + { + "chosen_geometric_mean": -0.8892708420753479, + "epoch": 1.3, + "grad_norm": 3.5625, + "learning_rate": 1.367800218092436e-06, + "log_odds": 15.057799339294434, + "log_odds_ratio": -0.07061552256345749, + "loss": 0.2479, + "rejected_geometric_mean": -15.419387817382812, + "step": 5251 + }, + { + "chosen_geometric_mean": -1.1667088270187378, + "epoch": 1.3, + "grad_norm": 29.25, + "learning_rate": 1.36693238297887e-06, + "log_odds": 3.536217212677002, + "log_odds_ratio": -0.23064279556274414, + "loss": 0.2941, + "rejected_geometric_mean": -4.418420314788818, + "step": 5252 + }, + { + "chosen_geometric_mean": -0.989761233329773, + "epoch": 1.3, + "grad_norm": 38.25, + "learning_rate": 1.366064719665553e-06, + "log_odds": 5.298108100891113, + "log_odds_ratio": -0.2571313977241516, + "loss": 0.2674, + "rejected_geometric_mean": -5.965870380401611, + "step": 5253 + }, + { + "chosen_geometric_mean": -1.4744750261306763, + "epoch": 1.3, + "grad_norm": 54.75, + "learning_rate": 1.3651972282840432e-06, + "log_odds": 7.187117099761963, + "log_odds_ratio": -0.08949171751737595, + "loss": 0.3181, + "rejected_geometric_mean": -8.346464157104492, + "step": 5254 + }, + { + "chosen_geometric_mean": -1.0520926713943481, + "epoch": 1.3, + "grad_norm": 11.8125, + "learning_rate": 1.3643299089658726e-06, + "log_odds": 3.823838710784912, + "log_odds_ratio": -0.13029621541500092, + "loss": 0.2462, + "rejected_geometric_mean": -4.495021343231201, + "step": 5255 + }, + { + "chosen_geometric_mean": -0.8794914484024048, + "epoch": 1.3, + "grad_norm": 18.75, + "learning_rate": 1.3634627618425495e-06, + "log_odds": 6.464596748352051, + "log_odds_ratio": -0.22760702669620514, + "loss": 0.2592, + "rejected_geometric_mean": -6.949864387512207, + "step": 5256 + }, + { + "chosen_geometric_mean": -1.0663682222366333, + "epoch": 1.3, + "grad_norm": 2.625, + "learning_rate": 1.3625957870455525e-06, + "log_odds": 10.693436622619629, + "log_odds_ratio": -0.11068867146968842, + "loss": 0.2767, + "rejected_geometric_mean": -11.410158157348633, + "step": 5257 + }, + { + "chosen_geometric_mean": -0.8917797803878784, + "epoch": 1.3, + "grad_norm": 11.1875, + "learning_rate": 1.3617289847063369e-06, + "log_odds": 16.053516387939453, + "log_odds_ratio": -0.14383037388324738, + "loss": 0.2375, + "rejected_geometric_mean": -16.513553619384766, + "step": 5258 + }, + { + "chosen_geometric_mean": -1.131302833557129, + "epoch": 1.3, + "grad_norm": 21.875, + "learning_rate": 1.3608623549563303e-06, + "log_odds": 8.919454574584961, + "log_odds_ratio": -0.09351810812950134, + "loss": 0.2719, + "rejected_geometric_mean": -9.685213088989258, + "step": 5259 + }, + { + "chosen_geometric_mean": -1.1085519790649414, + "epoch": 1.3, + "grad_norm": 4.90625, + "learning_rate": 1.3599958979269335e-06, + "log_odds": 6.9144158363342285, + "log_odds_ratio": -0.36201462149620056, + "loss": 0.3217, + "rejected_geometric_mean": -7.782519340515137, + "step": 5260 + }, + { + "chosen_geometric_mean": -0.8783911466598511, + "epoch": 1.3, + "grad_norm": 6.9375, + "learning_rate": 1.359129613749523e-06, + "log_odds": 12.319807052612305, + "log_odds_ratio": -0.07393039762973785, + "loss": 0.2585, + "rejected_geometric_mean": -12.690034866333008, + "step": 5261 + }, + { + "chosen_geometric_mean": -1.1892998218536377, + "epoch": 1.3, + "grad_norm": 7.6875, + "learning_rate": 1.3582635025554494e-06, + "log_odds": 8.318694114685059, + "log_odds_ratio": -0.038724977523088455, + "loss": 0.2434, + "rejected_geometric_mean": -9.111078262329102, + "step": 5262 + }, + { + "chosen_geometric_mean": -1.2820184230804443, + "epoch": 1.3, + "grad_norm": 3.84375, + "learning_rate": 1.357397564476034e-06, + "log_odds": 10.963340759277344, + "log_odds_ratio": -0.009063497185707092, + "loss": 0.3107, + "rejected_geometric_mean": -11.916447639465332, + "step": 5263 + }, + { + "chosen_geometric_mean": -0.8452396988868713, + "epoch": 1.3, + "grad_norm": 40.75, + "learning_rate": 1.3565317996425758e-06, + "log_odds": 9.032354354858398, + "log_odds_ratio": -0.03158218041062355, + "loss": 0.258, + "rejected_geometric_mean": -9.340788841247559, + "step": 5264 + }, + { + "chosen_geometric_mean": -0.8119906187057495, + "epoch": 1.3, + "grad_norm": 22.125, + "learning_rate": 1.3556662081863447e-06, + "log_odds": 7.834280967712402, + "log_odds_ratio": -0.08240486681461334, + "loss": 0.2275, + "rejected_geometric_mean": -8.088571548461914, + "step": 5265 + }, + { + "chosen_geometric_mean": -1.0184004306793213, + "epoch": 1.3, + "grad_norm": 2.59375, + "learning_rate": 1.3548007902385825e-06, + "log_odds": 10.208277702331543, + "log_odds_ratio": -0.14318011701107025, + "loss": 0.257, + "rejected_geometric_mean": -10.813204765319824, + "step": 5266 + }, + { + "chosen_geometric_mean": -0.9632017612457275, + "epoch": 1.3, + "grad_norm": 21.375, + "learning_rate": 1.353935545930512e-06, + "log_odds": 8.016364097595215, + "log_odds_ratio": -0.21986114978790283, + "loss": 0.3029, + "rejected_geometric_mean": -8.666611671447754, + "step": 5267 + }, + { + "chosen_geometric_mean": -0.9353927373886108, + "epoch": 1.3, + "grad_norm": 5.75, + "learning_rate": 1.3530704753933229e-06, + "log_odds": 8.092676162719727, + "log_odds_ratio": -0.07888823747634888, + "loss": 0.2737, + "rejected_geometric_mean": -8.58881950378418, + "step": 5268 + }, + { + "chosen_geometric_mean": -1.060024380683899, + "epoch": 1.3, + "grad_norm": 24.375, + "learning_rate": 1.3522055787581795e-06, + "log_odds": 3.1468636989593506, + "log_odds_ratio": -0.2116013914346695, + "loss": 0.3091, + "rejected_geometric_mean": -3.823273181915283, + "step": 5269 + }, + { + "chosen_geometric_mean": -0.9153019785881042, + "epoch": 1.3, + "grad_norm": 11.8125, + "learning_rate": 1.3513408561562228e-06, + "log_odds": 4.308934688568115, + "log_odds_ratio": -0.24369989335536957, + "loss": 0.2793, + "rejected_geometric_mean": -4.887196063995361, + "step": 5270 + }, + { + "chosen_geometric_mean": -0.7919629812240601, + "epoch": 1.31, + "grad_norm": 1.859375, + "learning_rate": 1.3504763077185636e-06, + "log_odds": 6.506556987762451, + "log_odds_ratio": -0.045535676181316376, + "loss": 0.2095, + "rejected_geometric_mean": -6.73164701461792, + "step": 5271 + }, + { + "chosen_geometric_mean": -0.888944149017334, + "epoch": 1.31, + "grad_norm": 15.0625, + "learning_rate": 1.3496119335762892e-06, + "log_odds": 1.9653834104537964, + "log_odds_ratio": -0.26871490478515625, + "loss": 0.278, + "rejected_geometric_mean": -2.541926383972168, + "step": 5272 + }, + { + "chosen_geometric_mean": -0.924523651599884, + "epoch": 1.31, + "grad_norm": 17.25, + "learning_rate": 1.3487477338604607e-06, + "log_odds": 10.359125137329102, + "log_odds_ratio": -0.1322929561138153, + "loss": 0.2669, + "rejected_geometric_mean": -10.849374771118164, + "step": 5273 + }, + { + "chosen_geometric_mean": -1.3675637245178223, + "epoch": 1.31, + "grad_norm": 3.46875, + "learning_rate": 1.34788370870211e-06, + "log_odds": 1.2004817724227905, + "log_odds_ratio": -0.5501244068145752, + "loss": 0.2873, + "rejected_geometric_mean": -2.3960366249084473, + "step": 5274 + }, + { + "chosen_geometric_mean": -0.9055162668228149, + "epoch": 1.31, + "grad_norm": 3.109375, + "learning_rate": 1.3470198582322435e-06, + "log_odds": 8.919718742370605, + "log_odds_ratio": -0.018910184502601624, + "loss": 0.2456, + "rejected_geometric_mean": -9.281936645507812, + "step": 5275 + }, + { + "chosen_geometric_mean": -0.8936887979507446, + "epoch": 1.31, + "grad_norm": 46.25, + "learning_rate": 1.3461561825818437e-06, + "log_odds": 8.754057884216309, + "log_odds_ratio": -0.22290563583374023, + "loss": 0.2886, + "rejected_geometric_mean": -9.25644588470459, + "step": 5276 + }, + { + "chosen_geometric_mean": -0.6666915416717529, + "epoch": 1.31, + "grad_norm": 62.5, + "learning_rate": 1.3452926818818624e-06, + "log_odds": 15.419496536254883, + "log_odds_ratio": -2.327659422007855e-05, + "loss": 0.3274, + "rejected_geometric_mean": -15.357425689697266, + "step": 5277 + }, + { + "chosen_geometric_mean": -1.1057472229003906, + "epoch": 1.31, + "grad_norm": 10.125, + "learning_rate": 1.344429356263228e-06, + "log_odds": 4.525928497314453, + "log_odds_ratio": -0.21475598216056824, + "loss": 0.4148, + "rejected_geometric_mean": -5.333470821380615, + "step": 5278 + }, + { + "chosen_geometric_mean": -0.969649612903595, + "epoch": 1.31, + "grad_norm": 5.28125, + "learning_rate": 1.343566205856842e-06, + "log_odds": 12.055816650390625, + "log_odds_ratio": -0.126238152384758, + "loss": 0.235, + "rejected_geometric_mean": -12.622751235961914, + "step": 5279 + }, + { + "chosen_geometric_mean": -0.8941445350646973, + "epoch": 1.31, + "grad_norm": 4.3125, + "learning_rate": 1.3427032307935777e-06, + "log_odds": 8.655542373657227, + "log_odds_ratio": -0.11496379971504211, + "loss": 0.4025, + "rejected_geometric_mean": -9.058749198913574, + "step": 5280 + }, + { + "chosen_geometric_mean": -1.2786264419555664, + "epoch": 1.31, + "grad_norm": 6.625, + "learning_rate": 1.341840431204284e-06, + "log_odds": 4.953866958618164, + "log_odds_ratio": -0.04743342101573944, + "loss": 0.2218, + "rejected_geometric_mean": -5.887614727020264, + "step": 5281 + }, + { + "chosen_geometric_mean": -0.9310171604156494, + "epoch": 1.31, + "grad_norm": 4.65625, + "learning_rate": 1.3409778072197814e-06, + "log_odds": 6.370243549346924, + "log_odds_ratio": -0.19426970183849335, + "loss": 0.2671, + "rejected_geometric_mean": -6.926481246948242, + "step": 5282 + }, + { + "chosen_geometric_mean": -1.1028462648391724, + "epoch": 1.31, + "grad_norm": 2.25, + "learning_rate": 1.340115358970862e-06, + "log_odds": 1.7183979749679565, + "log_odds_ratio": -0.3157310485839844, + "loss": 0.2856, + "rejected_geometric_mean": -2.593672752380371, + "step": 5283 + }, + { + "chosen_geometric_mean": -0.8715165853500366, + "epoch": 1.31, + "grad_norm": 3.21875, + "learning_rate": 1.3392530865882984e-06, + "log_odds": 5.158888816833496, + "log_odds_ratio": -0.1732056736946106, + "loss": 0.2495, + "rejected_geometric_mean": -5.6121110916137695, + "step": 5284 + }, + { + "chosen_geometric_mean": -1.0176990032196045, + "epoch": 1.31, + "grad_norm": 4.5, + "learning_rate": 1.3383909902028291e-06, + "log_odds": 9.01642894744873, + "log_odds_ratio": -0.07276400923728943, + "loss": 0.2313, + "rejected_geometric_mean": -9.586106300354004, + "step": 5285 + }, + { + "chosen_geometric_mean": -0.8997328877449036, + "epoch": 1.31, + "grad_norm": 43.25, + "learning_rate": 1.3375290699451676e-06, + "log_odds": 6.923488616943359, + "log_odds_ratio": -0.13504359126091003, + "loss": 0.2917, + "rejected_geometric_mean": -7.418391227722168, + "step": 5286 + }, + { + "chosen_geometric_mean": -0.986742377281189, + "epoch": 1.31, + "grad_norm": 9.5, + "learning_rate": 1.3366673259460045e-06, + "log_odds": 2.63077449798584, + "log_odds_ratio": -0.2871934771537781, + "loss": 0.2449, + "rejected_geometric_mean": -3.288357734680176, + "step": 5287 + }, + { + "chosen_geometric_mean": -0.8939235806465149, + "epoch": 1.31, + "grad_norm": 6.3125, + "learning_rate": 1.335805758335998e-06, + "log_odds": 6.113137245178223, + "log_odds_ratio": -0.2777201235294342, + "loss": 0.2393, + "rejected_geometric_mean": -6.585053443908691, + "step": 5288 + }, + { + "chosen_geometric_mean": -0.9382007718086243, + "epoch": 1.31, + "grad_norm": 5.8125, + "learning_rate": 1.3349443672457838e-06, + "log_odds": 3.91430926322937, + "log_odds_ratio": -0.555014967918396, + "loss": 0.2891, + "rejected_geometric_mean": -4.78636360168457, + "step": 5289 + }, + { + "chosen_geometric_mean": -1.1068224906921387, + "epoch": 1.31, + "grad_norm": 7.15625, + "learning_rate": 1.3340831528059706e-06, + "log_odds": 7.044919967651367, + "log_odds_ratio": -0.13555142283439636, + "loss": 0.2439, + "rejected_geometric_mean": -7.7806806564331055, + "step": 5290 + }, + { + "chosen_geometric_mean": -0.9003108739852905, + "epoch": 1.31, + "grad_norm": 8.9375, + "learning_rate": 1.3332221151471375e-06, + "log_odds": 5.077056407928467, + "log_odds_ratio": -0.048631615936756134, + "loss": 0.2559, + "rejected_geometric_mean": -5.445839881896973, + "step": 5291 + }, + { + "chosen_geometric_mean": -1.0271167755126953, + "epoch": 1.31, + "grad_norm": 2.0625, + "learning_rate": 1.3323612543998385e-06, + "log_odds": 9.620136260986328, + "log_odds_ratio": -0.030730336904525757, + "loss": 0.2338, + "rejected_geometric_mean": -10.216681480407715, + "step": 5292 + }, + { + "chosen_geometric_mean": -0.9616336822509766, + "epoch": 1.31, + "grad_norm": 27.0, + "learning_rate": 1.3315005706946022e-06, + "log_odds": 5.937394618988037, + "log_odds_ratio": -0.07604716718196869, + "loss": 0.271, + "rejected_geometric_mean": -6.455042839050293, + "step": 5293 + }, + { + "chosen_geometric_mean": -1.126536250114441, + "epoch": 1.31, + "grad_norm": 18.75, + "learning_rate": 1.3306400641619265e-06, + "log_odds": 2.8292016983032227, + "log_odds_ratio": -0.254829466342926, + "loss": 0.3077, + "rejected_geometric_mean": -3.6787490844726562, + "step": 5294 + }, + { + "chosen_geometric_mean": -0.8971874713897705, + "epoch": 1.31, + "grad_norm": 3.453125, + "learning_rate": 1.3297797349322865e-06, + "log_odds": 5.448435306549072, + "log_odds_ratio": -0.17233727872371674, + "loss": 0.2769, + "rejected_geometric_mean": -5.967624187469482, + "step": 5295 + }, + { + "chosen_geometric_mean": -0.8896605968475342, + "epoch": 1.31, + "grad_norm": 14.0625, + "learning_rate": 1.32891958313613e-06, + "log_odds": 14.250741958618164, + "log_odds_ratio": -0.10029031336307526, + "loss": 0.2563, + "rejected_geometric_mean": -14.673583984375, + "step": 5296 + }, + { + "chosen_geometric_mean": -0.9098871946334839, + "epoch": 1.31, + "grad_norm": 2.25, + "learning_rate": 1.3280596089038738e-06, + "log_odds": 6.369268894195557, + "log_odds_ratio": -0.13891053199768066, + "loss": 0.2487, + "rejected_geometric_mean": -6.833406448364258, + "step": 5297 + }, + { + "chosen_geometric_mean": -0.9070172905921936, + "epoch": 1.31, + "grad_norm": 31.125, + "learning_rate": 1.327199812365913e-06, + "log_odds": 11.471056938171387, + "log_odds_ratio": -0.17658737301826477, + "loss": 0.2775, + "rejected_geometric_mean": -11.986922264099121, + "step": 5298 + }, + { + "chosen_geometric_mean": -1.100150465965271, + "epoch": 1.31, + "grad_norm": 26.5, + "learning_rate": 1.3263401936526115e-06, + "log_odds": 6.935917854309082, + "log_odds_ratio": -0.21890896558761597, + "loss": 0.3051, + "rejected_geometric_mean": -7.722954750061035, + "step": 5299 + }, + { + "chosen_geometric_mean": -1.022512674331665, + "epoch": 1.31, + "grad_norm": 20.75, + "learning_rate": 1.3254807528943104e-06, + "log_odds": 5.631248474121094, + "log_odds_ratio": -0.027176445350050926, + "loss": 0.3183, + "rejected_geometric_mean": -6.205426216125488, + "step": 5300 + }, + { + "chosen_geometric_mean": -0.8195250034332275, + "epoch": 1.31, + "grad_norm": 5.96875, + "learning_rate": 1.324621490221319e-06, + "log_odds": 7.737618923187256, + "log_odds_ratio": -0.28754723072052, + "loss": 0.2604, + "rejected_geometric_mean": -8.187478065490723, + "step": 5301 + }, + { + "chosen_geometric_mean": -1.2014946937561035, + "epoch": 1.31, + "grad_norm": 29.875, + "learning_rate": 1.3237624057639242e-06, + "log_odds": 0.6731253862380981, + "log_odds_ratio": -0.5605846643447876, + "loss": 0.3229, + "rejected_geometric_mean": -1.8179280757904053, + "step": 5302 + }, + { + "chosen_geometric_mean": -0.943415105342865, + "epoch": 1.31, + "grad_norm": 11.25, + "learning_rate": 1.322903499652382e-06, + "log_odds": 6.508234977722168, + "log_odds_ratio": -0.11115820705890656, + "loss": 0.25, + "rejected_geometric_mean": -7.027467250823975, + "step": 5303 + }, + { + "chosen_geometric_mean": -1.0008407831192017, + "epoch": 1.31, + "grad_norm": 8.3125, + "learning_rate": 1.3220447720169244e-06, + "log_odds": 1.6403534412384033, + "log_odds_ratio": -0.2920796871185303, + "loss": 0.2581, + "rejected_geometric_mean": -2.3912525177001953, + "step": 5304 + }, + { + "chosen_geometric_mean": -1.119537353515625, + "epoch": 1.31, + "grad_norm": 25.875, + "learning_rate": 1.321186222987756e-06, + "log_odds": 5.3809814453125, + "log_odds_ratio": -0.1360185444355011, + "loss": 0.2922, + "rejected_geometric_mean": -6.160360336303711, + "step": 5305 + }, + { + "chosen_geometric_mean": -1.0097583532333374, + "epoch": 1.31, + "grad_norm": 2.8125, + "learning_rate": 1.3203278526950508e-06, + "log_odds": 1.6681907176971436, + "log_odds_ratio": -0.42198678851127625, + "loss": 0.2655, + "rejected_geometric_mean": -2.5091702938079834, + "step": 5306 + }, + { + "chosen_geometric_mean": -0.8998705148696899, + "epoch": 1.31, + "grad_norm": 3.84375, + "learning_rate": 1.3194696612689613e-06, + "log_odds": 8.828439712524414, + "log_odds_ratio": -0.006528831087052822, + "loss": 0.2518, + "rejected_geometric_mean": -9.18617057800293, + "step": 5307 + }, + { + "chosen_geometric_mean": -0.9499194025993347, + "epoch": 1.31, + "grad_norm": 26.875, + "learning_rate": 1.3186116488396086e-06, + "log_odds": 5.335073947906494, + "log_odds_ratio": -0.004899915307760239, + "loss": 0.2335, + "rejected_geometric_mean": -5.787027359008789, + "step": 5308 + }, + { + "chosen_geometric_mean": -1.1966074705123901, + "epoch": 1.31, + "grad_norm": 11.375, + "learning_rate": 1.3177538155370866e-06, + "log_odds": 8.370240211486816, + "log_odds_ratio": -0.04584995284676552, + "loss": 0.2506, + "rejected_geometric_mean": -9.226280212402344, + "step": 5309 + }, + { + "chosen_geometric_mean": -1.060024619102478, + "epoch": 1.31, + "grad_norm": 4.53125, + "learning_rate": 1.316896161491465e-06, + "log_odds": 8.278759956359863, + "log_odds_ratio": -0.25121283531188965, + "loss": 0.206, + "rejected_geometric_mean": -9.02832317352295, + "step": 5310 + }, + { + "chosen_geometric_mean": -1.004565715789795, + "epoch": 1.31, + "grad_norm": 4.46875, + "learning_rate": 1.3160386868327851e-06, + "log_odds": 8.777862548828125, + "log_odds_ratio": -0.02519831247627735, + "loss": 0.2296, + "rejected_geometric_mean": -9.34112548828125, + "step": 5311 + }, + { + "chosen_geometric_mean": -0.9579641819000244, + "epoch": 1.32, + "grad_norm": 14.375, + "learning_rate": 1.3151813916910593e-06, + "log_odds": 7.658304214477539, + "log_odds_ratio": -0.010790559463202953, + "loss": 0.2447, + "rejected_geometric_mean": -8.117347717285156, + "step": 5312 + }, + { + "chosen_geometric_mean": -0.8739795684814453, + "epoch": 1.32, + "grad_norm": 24.625, + "learning_rate": 1.3143242761962758e-06, + "log_odds": 13.468159675598145, + "log_odds_ratio": -0.010892032645642757, + "loss": 0.2631, + "rejected_geometric_mean": -13.772015571594238, + "step": 5313 + }, + { + "chosen_geometric_mean": -0.9498406648635864, + "epoch": 1.32, + "grad_norm": 37.25, + "learning_rate": 1.3134673404783927e-06, + "log_odds": 2.8789193630218506, + "log_odds_ratio": -0.13409483432769775, + "loss": 0.2966, + "rejected_geometric_mean": -3.347917079925537, + "step": 5314 + }, + { + "chosen_geometric_mean": -0.9903116226196289, + "epoch": 1.32, + "grad_norm": 13.0625, + "learning_rate": 1.3126105846673404e-06, + "log_odds": 7.148595333099365, + "log_odds_ratio": -0.06214148551225662, + "loss": 0.2484, + "rejected_geometric_mean": -7.708768367767334, + "step": 5315 + }, + { + "chosen_geometric_mean": -0.977820634841919, + "epoch": 1.32, + "grad_norm": 15.875, + "learning_rate": 1.3117540088930272e-06, + "log_odds": 8.52326488494873, + "log_odds_ratio": -0.027840182185173035, + "loss": 0.2429, + "rejected_geometric_mean": -8.982857704162598, + "step": 5316 + }, + { + "chosen_geometric_mean": -0.8782610893249512, + "epoch": 1.32, + "grad_norm": 9.5, + "learning_rate": 1.3108976132853291e-06, + "log_odds": 5.714158058166504, + "log_odds_ratio": -0.1468769758939743, + "loss": 0.2208, + "rejected_geometric_mean": -6.145676612854004, + "step": 5317 + }, + { + "chosen_geometric_mean": -1.1527881622314453, + "epoch": 1.32, + "grad_norm": 2.265625, + "learning_rate": 1.3100413979740945e-06, + "log_odds": 3.149712085723877, + "log_odds_ratio": -0.14490120112895966, + "loss": 0.2494, + "rejected_geometric_mean": -4.006125450134277, + "step": 5318 + }, + { + "chosen_geometric_mean": -1.218576431274414, + "epoch": 1.32, + "grad_norm": 13.9375, + "learning_rate": 1.309185363089149e-06, + "log_odds": 5.20939302444458, + "log_odds_ratio": -0.14060567319393158, + "loss": 0.2555, + "rejected_geometric_mean": -6.137567043304443, + "step": 5319 + }, + { + "chosen_geometric_mean": -0.8694121837615967, + "epoch": 1.32, + "grad_norm": 5.90625, + "learning_rate": 1.3083295087602857e-06, + "log_odds": 13.750455856323242, + "log_odds_ratio": -0.029489746317267418, + "loss": 0.1948, + "rejected_geometric_mean": -14.081130981445312, + "step": 5320 + }, + { + "chosen_geometric_mean": -0.8680148720741272, + "epoch": 1.32, + "grad_norm": 6.78125, + "learning_rate": 1.3074738351172732e-06, + "log_odds": 8.569206237792969, + "log_odds_ratio": -0.12018489092588425, + "loss": 0.243, + "rejected_geometric_mean": -8.903913497924805, + "step": 5321 + }, + { + "chosen_geometric_mean": -1.237442135810852, + "epoch": 1.32, + "grad_norm": 5.0625, + "learning_rate": 1.3066183422898542e-06, + "log_odds": 6.990405559539795, + "log_odds_ratio": -0.11661739647388458, + "loss": 0.2912, + "rejected_geometric_mean": -7.8923821449279785, + "step": 5322 + }, + { + "chosen_geometric_mean": -1.1090342998504639, + "epoch": 1.32, + "grad_norm": 30.875, + "learning_rate": 1.305763030407739e-06, + "log_odds": 7.57985782623291, + "log_odds_ratio": -0.2070217728614807, + "loss": 0.3215, + "rejected_geometric_mean": -8.39237117767334, + "step": 5323 + }, + { + "chosen_geometric_mean": -0.862050473690033, + "epoch": 1.32, + "grad_norm": 2.640625, + "learning_rate": 1.304907899600616e-06, + "log_odds": 5.196111679077148, + "log_odds_ratio": -0.16261589527130127, + "loss": 0.2275, + "rejected_geometric_mean": -5.633108139038086, + "step": 5324 + }, + { + "chosen_geometric_mean": -1.0170681476593018, + "epoch": 1.32, + "grad_norm": 2.0, + "learning_rate": 1.304052949998142e-06, + "log_odds": 4.178717136383057, + "log_odds_ratio": -0.2781077027320862, + "loss": 0.258, + "rejected_geometric_mean": -4.872443199157715, + "step": 5325 + }, + { + "chosen_geometric_mean": -1.1197493076324463, + "epoch": 1.32, + "grad_norm": 14.0, + "learning_rate": 1.3031981817299475e-06, + "log_odds": 5.729322910308838, + "log_odds_ratio": -0.13104304671287537, + "loss": 0.2939, + "rejected_geometric_mean": -6.507808208465576, + "step": 5326 + }, + { + "chosen_geometric_mean": -1.0023610591888428, + "epoch": 1.32, + "grad_norm": 2.171875, + "learning_rate": 1.3023435949256363e-06, + "log_odds": 1.9380948543548584, + "log_odds_ratio": -0.19478479027748108, + "loss": 0.2736, + "rejected_geometric_mean": -2.5150039196014404, + "step": 5327 + }, + { + "chosen_geometric_mean": -1.0232363939285278, + "epoch": 1.32, + "grad_norm": 64.5, + "learning_rate": 1.3014891897147857e-06, + "log_odds": 4.6674089431762695, + "log_odds_ratio": -0.1349981278181076, + "loss": 0.2511, + "rejected_geometric_mean": -5.339663505554199, + "step": 5328 + }, + { + "chosen_geometric_mean": -0.9897055625915527, + "epoch": 1.32, + "grad_norm": 12.75, + "learning_rate": 1.3006349662269418e-06, + "log_odds": 5.836772441864014, + "log_odds_ratio": -0.2401893585920334, + "loss": 0.2592, + "rejected_geometric_mean": -6.528980731964111, + "step": 5329 + }, + { + "chosen_geometric_mean": -0.8934619426727295, + "epoch": 1.32, + "grad_norm": 8.0625, + "learning_rate": 1.2997809245916276e-06, + "log_odds": 8.505866050720215, + "log_odds_ratio": -0.10453439503908157, + "loss": 0.2589, + "rejected_geometric_mean": -8.955928802490234, + "step": 5330 + }, + { + "chosen_geometric_mean": -0.8090389370918274, + "epoch": 1.32, + "grad_norm": 1.8984375, + "learning_rate": 1.2989270649383351e-06, + "log_odds": 5.892014503479004, + "log_odds_ratio": -0.07950639724731445, + "loss": 0.2685, + "rejected_geometric_mean": -6.191356182098389, + "step": 5331 + }, + { + "chosen_geometric_mean": -1.0266622304916382, + "epoch": 1.32, + "grad_norm": 3.71875, + "learning_rate": 1.2980733873965281e-06, + "log_odds": 10.797037124633789, + "log_odds_ratio": -0.001818799297325313, + "loss": 0.3019, + "rejected_geometric_mean": -11.378442764282227, + "step": 5332 + }, + { + "chosen_geometric_mean": -0.7879327535629272, + "epoch": 1.32, + "grad_norm": 1.875, + "learning_rate": 1.2972198920956487e-06, + "log_odds": 4.245726585388184, + "log_odds_ratio": -0.14433720707893372, + "loss": 0.1735, + "rejected_geometric_mean": -4.508886337280273, + "step": 5333 + }, + { + "chosen_geometric_mean": -0.934594988822937, + "epoch": 1.32, + "grad_norm": 3.171875, + "learning_rate": 1.2963665791651047e-06, + "log_odds": 7.210865020751953, + "log_odds_ratio": -0.170720174908638, + "loss": 0.2675, + "rejected_geometric_mean": -7.739624500274658, + "step": 5334 + }, + { + "chosen_geometric_mean": -1.0803757905960083, + "epoch": 1.32, + "grad_norm": 11.6875, + "learning_rate": 1.2955134487342785e-06, + "log_odds": 6.936704158782959, + "log_odds_ratio": -0.12562307715415955, + "loss": 0.2589, + "rejected_geometric_mean": -7.659132480621338, + "step": 5335 + }, + { + "chosen_geometric_mean": -1.191881537437439, + "epoch": 1.32, + "grad_norm": 2.25, + "learning_rate": 1.2946605009325267e-06, + "log_odds": 7.079964637756348, + "log_odds_ratio": -0.03059561550617218, + "loss": 0.3174, + "rejected_geometric_mean": -7.91785192489624, + "step": 5336 + }, + { + "chosen_geometric_mean": -0.9618646502494812, + "epoch": 1.32, + "grad_norm": 6.46875, + "learning_rate": 1.293807735889175e-06, + "log_odds": 7.336924076080322, + "log_odds_ratio": -0.18188294768333435, + "loss": 0.2515, + "rejected_geometric_mean": -7.871091365814209, + "step": 5337 + }, + { + "chosen_geometric_mean": -1.1747044324874878, + "epoch": 1.32, + "grad_norm": 7.375, + "learning_rate": 1.2929551537335235e-06, + "log_odds": 5.018599987030029, + "log_odds_ratio": -0.2446235865354538, + "loss": 0.2799, + "rejected_geometric_mean": -5.926580429077148, + "step": 5338 + }, + { + "chosen_geometric_mean": -0.8590237498283386, + "epoch": 1.32, + "grad_norm": 13.375, + "learning_rate": 1.292102754594846e-06, + "log_odds": 5.107649803161621, + "log_odds_ratio": -0.17568686604499817, + "loss": 0.2562, + "rejected_geometric_mean": -5.529361724853516, + "step": 5339 + }, + { + "chosen_geometric_mean": -0.9418718814849854, + "epoch": 1.32, + "grad_norm": 3.5, + "learning_rate": 1.2912505386023846e-06, + "log_odds": 1.9283447265625, + "log_odds_ratio": -0.28762033581733704, + "loss": 0.2902, + "rejected_geometric_mean": -2.517313003540039, + "step": 5340 + }, + { + "chosen_geometric_mean": -0.8978232145309448, + "epoch": 1.32, + "grad_norm": 13.5, + "learning_rate": 1.2903985058853558e-06, + "log_odds": 4.1822028160095215, + "log_odds_ratio": -0.24086570739746094, + "loss": 0.2837, + "rejected_geometric_mean": -4.660464286804199, + "step": 5341 + }, + { + "chosen_geometric_mean": -0.8291987180709839, + "epoch": 1.32, + "grad_norm": 11.0, + "learning_rate": 1.2895466565729491e-06, + "log_odds": 5.479753494262695, + "log_odds_ratio": -0.22313380241394043, + "loss": 0.2505, + "rejected_geometric_mean": -5.93946647644043, + "step": 5342 + }, + { + "chosen_geometric_mean": -0.9703309535980225, + "epoch": 1.32, + "grad_norm": 8.625, + "learning_rate": 1.2886949907943247e-06, + "log_odds": 3.802076816558838, + "log_odds_ratio": -0.18843400478363037, + "loss": 0.2612, + "rejected_geometric_mean": -4.413903713226318, + "step": 5343 + }, + { + "chosen_geometric_mean": -2.079220771789551, + "epoch": 1.32, + "grad_norm": 37.25, + "learning_rate": 1.287843508678615e-06, + "log_odds": 11.343063354492188, + "log_odds_ratio": -0.07492989301681519, + "loss": 0.2906, + "rejected_geometric_mean": -13.126694679260254, + "step": 5344 + }, + { + "chosen_geometric_mean": -1.2081955671310425, + "epoch": 1.32, + "grad_norm": 3.34375, + "learning_rate": 1.2869922103549274e-06, + "log_odds": 1.7293834686279297, + "log_odds_ratio": -0.3480615019798279, + "loss": 0.2563, + "rejected_geometric_mean": -2.7792139053344727, + "step": 5345 + }, + { + "chosen_geometric_mean": -1.2259794473648071, + "epoch": 1.32, + "grad_norm": 37.0, + "learning_rate": 1.2861410959523362e-06, + "log_odds": 4.736387252807617, + "log_odds_ratio": -0.36140549182891846, + "loss": 0.3355, + "rejected_geometric_mean": -5.629604339599609, + "step": 5346 + }, + { + "chosen_geometric_mean": -1.1275345087051392, + "epoch": 1.32, + "grad_norm": 27.75, + "learning_rate": 1.2852901655998937e-06, + "log_odds": 11.121454238891602, + "log_odds_ratio": -0.021971875801682472, + "loss": 0.257, + "rejected_geometric_mean": -11.845880508422852, + "step": 5347 + }, + { + "chosen_geometric_mean": -0.9536559581756592, + "epoch": 1.32, + "grad_norm": 6.40625, + "learning_rate": 1.2844394194266186e-06, + "log_odds": 10.775007247924805, + "log_odds_ratio": -0.10851326584815979, + "loss": 0.2497, + "rejected_geometric_mean": -11.281099319458008, + "step": 5348 + }, + { + "chosen_geometric_mean": -1.1475768089294434, + "epoch": 1.32, + "grad_norm": 4.5, + "learning_rate": 1.2835888575615068e-06, + "log_odds": 4.674772262573242, + "log_odds_ratio": -0.4330955147743225, + "loss": 0.2818, + "rejected_geometric_mean": -5.62179708480835, + "step": 5349 + }, + { + "chosen_geometric_mean": -0.9542433023452759, + "epoch": 1.32, + "grad_norm": 4.90625, + "learning_rate": 1.2827384801335219e-06, + "log_odds": 10.09581470489502, + "log_odds_ratio": -0.002175204688683152, + "loss": 0.2471, + "rejected_geometric_mean": -10.535486221313477, + "step": 5350 + }, + { + "chosen_geometric_mean": -1.0069857835769653, + "epoch": 1.32, + "grad_norm": 1.9375, + "learning_rate": 1.2818882872716034e-06, + "log_odds": 11.101865768432617, + "log_odds_ratio": -0.0015438836999237537, + "loss": 0.2378, + "rejected_geometric_mean": -11.615538597106934, + "step": 5351 + }, + { + "chosen_geometric_mean": -0.9951539039611816, + "epoch": 1.33, + "grad_norm": 18.625, + "learning_rate": 1.281038279104659e-06, + "log_odds": 3.191864252090454, + "log_odds_ratio": -0.23926696181297302, + "loss": 0.3008, + "rejected_geometric_mean": -3.826533317565918, + "step": 5352 + }, + { + "chosen_geometric_mean": -1.094334363937378, + "epoch": 1.33, + "grad_norm": 2.96875, + "learning_rate": 1.2801884557615724e-06, + "log_odds": 10.20484447479248, + "log_odds_ratio": -0.16059380769729614, + "loss": 0.2538, + "rejected_geometric_mean": -10.953624725341797, + "step": 5353 + }, + { + "chosen_geometric_mean": -0.980919599533081, + "epoch": 1.33, + "grad_norm": 3.890625, + "learning_rate": 1.2793388173711951e-06, + "log_odds": 5.365880012512207, + "log_odds_ratio": -0.25409388542175293, + "loss": 0.3246, + "rejected_geometric_mean": -6.04107666015625, + "step": 5354 + }, + { + "chosen_geometric_mean": -1.1949145793914795, + "epoch": 1.33, + "grad_norm": 10.25, + "learning_rate": 1.2784893640623536e-06, + "log_odds": 10.392322540283203, + "log_odds_ratio": -0.03292619064450264, + "loss": 0.2578, + "rejected_geometric_mean": -11.232421875, + "step": 5355 + }, + { + "chosen_geometric_mean": -1.0475479364395142, + "epoch": 1.33, + "grad_norm": 2.90625, + "learning_rate": 1.2776400959638468e-06, + "log_odds": 6.7096428871154785, + "log_odds_ratio": -0.10331835597753525, + "loss": 0.2644, + "rejected_geometric_mean": -7.353810787200928, + "step": 5356 + }, + { + "chosen_geometric_mean": -0.824195146560669, + "epoch": 1.33, + "grad_norm": 39.75, + "learning_rate": 1.2767910132044425e-06, + "log_odds": 5.05179500579834, + "log_odds_ratio": -0.16305217146873474, + "loss": 0.2726, + "rejected_geometric_mean": -5.401942729949951, + "step": 5357 + }, + { + "chosen_geometric_mean": -0.8381358981132507, + "epoch": 1.33, + "grad_norm": 5.6875, + "learning_rate": 1.275942115912882e-06, + "log_odds": 2.876431703567505, + "log_odds_ratio": -0.3055429756641388, + "loss": 0.2124, + "rejected_geometric_mean": -3.3909735679626465, + "step": 5358 + }, + { + "chosen_geometric_mean": -0.7934576272964478, + "epoch": 1.33, + "grad_norm": 15.3125, + "learning_rate": 1.2750934042178786e-06, + "log_odds": 4.715677261352539, + "log_odds_ratio": -0.1181526929140091, + "loss": 0.2263, + "rejected_geometric_mean": -4.934731483459473, + "step": 5359 + }, + { + "chosen_geometric_mean": -0.794710099697113, + "epoch": 1.33, + "grad_norm": 6.53125, + "learning_rate": 1.2742448782481188e-06, + "log_odds": 8.566125869750977, + "log_odds_ratio": -0.001266739796847105, + "loss": 0.2486, + "rejected_geometric_mean": -8.734214782714844, + "step": 5360 + }, + { + "chosen_geometric_mean": -1.108642816543579, + "epoch": 1.33, + "grad_norm": 1.7734375, + "learning_rate": 1.273396538132257e-06, + "log_odds": 4.033405303955078, + "log_odds_ratio": -0.031843800097703934, + "loss": 0.2117, + "rejected_geometric_mean": -4.75445032119751, + "step": 5361 + }, + { + "chosen_geometric_mean": -1.4289498329162598, + "epoch": 1.33, + "grad_norm": 105.5, + "learning_rate": 1.2725483839989245e-06, + "log_odds": 2.9992620944976807, + "log_odds_ratio": -0.27310627698898315, + "loss": 0.3267, + "rejected_geometric_mean": -4.2337446212768555, + "step": 5362 + }, + { + "chosen_geometric_mean": -0.9985307455062866, + "epoch": 1.33, + "grad_norm": 2.234375, + "learning_rate": 1.2717004159767206e-06, + "log_odds": 6.625472545623779, + "log_odds_ratio": -0.08750929683446884, + "loss": 0.2619, + "rejected_geometric_mean": -7.181424140930176, + "step": 5363 + }, + { + "chosen_geometric_mean": -1.261003851890564, + "epoch": 1.33, + "grad_norm": 8.875, + "learning_rate": 1.2708526341942167e-06, + "log_odds": 5.960021495819092, + "log_odds_ratio": -0.32862141728401184, + "loss": 0.3181, + "rejected_geometric_mean": -6.992912292480469, + "step": 5364 + }, + { + "chosen_geometric_mean": -1.0445215702056885, + "epoch": 1.33, + "grad_norm": 27.625, + "learning_rate": 1.2700050387799579e-06, + "log_odds": 6.817270755767822, + "log_odds_ratio": -0.23404482007026672, + "loss": 0.3024, + "rejected_geometric_mean": -7.604246616363525, + "step": 5365 + }, + { + "chosen_geometric_mean": -0.7640578746795654, + "epoch": 1.33, + "grad_norm": 40.75, + "learning_rate": 1.2691576298624608e-06, + "log_odds": 4.127433776855469, + "log_odds_ratio": -0.052144601941108704, + "loss": 0.2917, + "rejected_geometric_mean": -4.310627460479736, + "step": 5366 + }, + { + "chosen_geometric_mean": -0.8383797407150269, + "epoch": 1.33, + "grad_norm": 4.46875, + "learning_rate": 1.2683104075702113e-06, + "log_odds": 3.8550970554351807, + "log_odds_ratio": -0.20137105882167816, + "loss": 0.2731, + "rejected_geometric_mean": -4.257681846618652, + "step": 5367 + }, + { + "chosen_geometric_mean": -1.193123459815979, + "epoch": 1.33, + "grad_norm": 8.3125, + "learning_rate": 1.26746337203167e-06, + "log_odds": 3.7095143795013428, + "log_odds_ratio": -0.12001422792673111, + "loss": 0.2428, + "rejected_geometric_mean": -4.597966194152832, + "step": 5368 + }, + { + "chosen_geometric_mean": -0.9503461122512817, + "epoch": 1.33, + "grad_norm": 2.515625, + "learning_rate": 1.2666165233752665e-06, + "log_odds": 11.923177719116211, + "log_odds_ratio": -0.06950367242097855, + "loss": 0.2495, + "rejected_geometric_mean": -12.419160842895508, + "step": 5369 + }, + { + "chosen_geometric_mean": -0.8747802972793579, + "epoch": 1.33, + "grad_norm": 2.453125, + "learning_rate": 1.265769861729404e-06, + "log_odds": 10.114571571350098, + "log_odds_ratio": -0.031864166259765625, + "loss": 0.2741, + "rejected_geometric_mean": -10.465456008911133, + "step": 5370 + }, + { + "chosen_geometric_mean": -1.0531185865402222, + "epoch": 1.33, + "grad_norm": 4.90625, + "learning_rate": 1.264923387222458e-06, + "log_odds": 9.891721725463867, + "log_odds_ratio": -0.0038205203600227833, + "loss": 0.2593, + "rejected_geometric_mean": -10.502840042114258, + "step": 5371 + }, + { + "chosen_geometric_mean": -1.3327466249465942, + "epoch": 1.33, + "grad_norm": 13.875, + "learning_rate": 1.2640770999827718e-06, + "log_odds": 9.1109037399292, + "log_odds_ratio": -0.21341481804847717, + "loss": 0.2709, + "rejected_geometric_mean": -10.182387351989746, + "step": 5372 + }, + { + "chosen_geometric_mean": -1.0662147998809814, + "epoch": 1.33, + "grad_norm": 2.0, + "learning_rate": 1.2632310001386655e-06, + "log_odds": 8.692605972290039, + "log_odds_ratio": -0.11381091177463531, + "loss": 0.262, + "rejected_geometric_mean": -9.393235206604004, + "step": 5373 + }, + { + "chosen_geometric_mean": -0.9712717533111572, + "epoch": 1.33, + "grad_norm": 3.765625, + "learning_rate": 1.262385087818427e-06, + "log_odds": 8.366251945495605, + "log_odds_ratio": -0.06404373794794083, + "loss": 0.2452, + "rejected_geometric_mean": -8.906112670898438, + "step": 5374 + }, + { + "chosen_geometric_mean": -0.9611997604370117, + "epoch": 1.33, + "grad_norm": 6.21875, + "learning_rate": 1.2615393631503154e-06, + "log_odds": 7.726783752441406, + "log_odds_ratio": -0.02889859862625599, + "loss": 0.3014, + "rejected_geometric_mean": -8.214433670043945, + "step": 5375 + }, + { + "chosen_geometric_mean": -0.8895060420036316, + "epoch": 1.33, + "grad_norm": 6.65625, + "learning_rate": 1.2606938262625644e-06, + "log_odds": 10.621533393859863, + "log_odds_ratio": -0.1166592463850975, + "loss": 0.2738, + "rejected_geometric_mean": -11.073060035705566, + "step": 5376 + }, + { + "chosen_geometric_mean": -0.8693481683731079, + "epoch": 1.33, + "grad_norm": 2.625, + "learning_rate": 1.2598484772833787e-06, + "log_odds": 8.936517715454102, + "log_odds_ratio": -0.003512440249323845, + "loss": 0.2936, + "rejected_geometric_mean": -9.241012573242188, + "step": 5377 + }, + { + "chosen_geometric_mean": -1.0066800117492676, + "epoch": 1.33, + "grad_norm": 3.359375, + "learning_rate": 1.259003316340931e-06, + "log_odds": 5.975949764251709, + "log_odds_ratio": -0.04501808434724808, + "loss": 0.281, + "rejected_geometric_mean": -6.551461219787598, + "step": 5378 + }, + { + "chosen_geometric_mean": -0.9688190221786499, + "epoch": 1.33, + "grad_norm": 6.5625, + "learning_rate": 1.2581583435633704e-06, + "log_odds": 5.0744500160217285, + "log_odds_ratio": -0.16744676232337952, + "loss": 0.2953, + "rejected_geometric_mean": -5.631946563720703, + "step": 5379 + }, + { + "chosen_geometric_mean": -1.153419017791748, + "epoch": 1.33, + "grad_norm": 30.375, + "learning_rate": 1.2573135590788143e-06, + "log_odds": 13.422955513000488, + "log_odds_ratio": -0.34506291151046753, + "loss": 0.2915, + "rejected_geometric_mean": -14.264376640319824, + "step": 5380 + }, + { + "chosen_geometric_mean": -1.2662838697433472, + "epoch": 1.33, + "grad_norm": 56.5, + "learning_rate": 1.2564689630153498e-06, + "log_odds": 4.170644760131836, + "log_odds_ratio": -0.06620809435844421, + "loss": 0.2985, + "rejected_geometric_mean": -5.087366104125977, + "step": 5381 + }, + { + "chosen_geometric_mean": -0.8165011405944824, + "epoch": 1.33, + "grad_norm": 4.03125, + "learning_rate": 1.2556245555010421e-06, + "log_odds": 6.608489513397217, + "log_odds_ratio": -0.18776454031467438, + "loss": 0.2418, + "rejected_geometric_mean": -6.975905895233154, + "step": 5382 + }, + { + "chosen_geometric_mean": -0.9345622658729553, + "epoch": 1.33, + "grad_norm": 22.25, + "learning_rate": 1.2547803366639216e-06, + "log_odds": 9.685148239135742, + "log_odds_ratio": -0.04059218242764473, + "loss": 0.2558, + "rejected_geometric_mean": -10.127645492553711, + "step": 5383 + }, + { + "chosen_geometric_mean": -1.076879858970642, + "epoch": 1.33, + "grad_norm": 4.84375, + "learning_rate": 1.2539363066319916e-06, + "log_odds": 2.2281532287597656, + "log_odds_ratio": -0.18067580461502075, + "loss": 0.2549, + "rejected_geometric_mean": -2.941511869430542, + "step": 5384 + }, + { + "chosen_geometric_mean": -1.1782914400100708, + "epoch": 1.33, + "grad_norm": 4.0625, + "learning_rate": 1.2530924655332286e-06, + "log_odds": 5.126673698425293, + "log_odds_ratio": -0.12715384364128113, + "loss": 0.2779, + "rejected_geometric_mean": -5.9511847496032715, + "step": 5385 + }, + { + "chosen_geometric_mean": -1.2605061531066895, + "epoch": 1.33, + "grad_norm": 65.0, + "learning_rate": 1.2522488134955774e-06, + "log_odds": 3.1730618476867676, + "log_odds_ratio": -0.19277682900428772, + "loss": 0.451, + "rejected_geometric_mean": -4.185832500457764, + "step": 5386 + }, + { + "chosen_geometric_mean": -0.9368165135383606, + "epoch": 1.33, + "grad_norm": 7.0, + "learning_rate": 1.2514053506469573e-06, + "log_odds": 2.712116003036499, + "log_odds_ratio": -0.24166549742221832, + "loss": 0.3118, + "rejected_geometric_mean": -3.257079601287842, + "step": 5387 + }, + { + "chosen_geometric_mean": -1.14026939868927, + "epoch": 1.33, + "grad_norm": 2.0625, + "learning_rate": 1.250562077115258e-06, + "log_odds": 3.791098117828369, + "log_odds_ratio": -0.1515839844942093, + "loss": 0.27, + "rejected_geometric_mean": -4.623615264892578, + "step": 5388 + }, + { + "chosen_geometric_mean": -1.166189432144165, + "epoch": 1.33, + "grad_norm": 2.359375, + "learning_rate": 1.249718993028339e-06, + "log_odds": 5.560994625091553, + "log_odds_ratio": -0.012783633545041084, + "loss": 0.3328, + "rejected_geometric_mean": -6.3333845138549805, + "step": 5389 + }, + { + "chosen_geometric_mean": -0.9968434572219849, + "epoch": 1.33, + "grad_norm": 2.71875, + "learning_rate": 1.2488760985140316e-06, + "log_odds": 11.162912368774414, + "log_odds_ratio": -0.017826572060585022, + "loss": 0.2941, + "rejected_geometric_mean": -11.67907428741455, + "step": 5390 + }, + { + "chosen_geometric_mean": -0.9655848741531372, + "epoch": 1.33, + "grad_norm": 56.25, + "learning_rate": 1.2480333937001408e-06, + "log_odds": 9.226696014404297, + "log_odds_ratio": -0.0481819249689579, + "loss": 0.3137, + "rejected_geometric_mean": -9.733908653259277, + "step": 5391 + }, + { + "chosen_geometric_mean": -0.9834450483322144, + "epoch": 1.33, + "grad_norm": 26.75, + "learning_rate": 1.2471908787144382e-06, + "log_odds": 8.602933883666992, + "log_odds_ratio": -0.007492543663829565, + "loss": 0.2097, + "rejected_geometric_mean": -9.116390228271484, + "step": 5392 + }, + { + "chosen_geometric_mean": -0.8676198124885559, + "epoch": 1.34, + "grad_norm": 17.125, + "learning_rate": 1.2463485536846708e-06, + "log_odds": 0.720307469367981, + "log_odds_ratio": -0.4435124397277832, + "loss": 0.3054, + "rejected_geometric_mean": -1.354752779006958, + "step": 5393 + }, + { + "chosen_geometric_mean": -0.8358446359634399, + "epoch": 1.34, + "grad_norm": 3.65625, + "learning_rate": 1.2455064187385564e-06, + "log_odds": 3.3805932998657227, + "log_odds_ratio": -0.23455864191055298, + "loss": 0.2771, + "rejected_geometric_mean": -3.8069186210632324, + "step": 5394 + }, + { + "chosen_geometric_mean": -1.03205144405365, + "epoch": 1.34, + "grad_norm": 7.53125, + "learning_rate": 1.2446644740037803e-06, + "log_odds": 1.9421684741973877, + "log_odds_ratio": -0.23516862094402313, + "loss": 0.2514, + "rejected_geometric_mean": -2.6770238876342773, + "step": 5395 + }, + { + "chosen_geometric_mean": -1.0220710039138794, + "epoch": 1.34, + "grad_norm": 15.375, + "learning_rate": 1.243822719608004e-06, + "log_odds": 5.4897613525390625, + "log_odds_ratio": -0.2935695946216583, + "loss": 0.2208, + "rejected_geometric_mean": -6.213119983673096, + "step": 5396 + }, + { + "chosen_geometric_mean": -1.0955843925476074, + "epoch": 1.34, + "grad_norm": 3.71875, + "learning_rate": 1.2429811556788558e-06, + "log_odds": 5.462998867034912, + "log_odds_ratio": -0.17204684019088745, + "loss": 0.3022, + "rejected_geometric_mean": -6.242756366729736, + "step": 5397 + }, + { + "chosen_geometric_mean": -1.497017741203308, + "epoch": 1.34, + "grad_norm": 25.625, + "learning_rate": 1.2421397823439378e-06, + "log_odds": 5.909702301025391, + "log_odds_ratio": -0.08692623674869537, + "loss": 0.3644, + "rejected_geometric_mean": -7.044437408447266, + "step": 5398 + }, + { + "chosen_geometric_mean": -1.0426490306854248, + "epoch": 1.34, + "grad_norm": 18.5, + "learning_rate": 1.2412985997308232e-06, + "log_odds": 4.34896183013916, + "log_odds_ratio": -0.09464579820632935, + "loss": 0.276, + "rejected_geometric_mean": -5.0082502365112305, + "step": 5399 + }, + { + "chosen_geometric_mean": -0.990806519985199, + "epoch": 1.34, + "grad_norm": 33.5, + "learning_rate": 1.2404576079670546e-06, + "log_odds": 7.753767013549805, + "log_odds_ratio": -0.11800036579370499, + "loss": 0.1994, + "rejected_geometric_mean": -8.318563461303711, + "step": 5400 + }, + { + "chosen_geometric_mean": -1.0281472206115723, + "epoch": 1.34, + "grad_norm": 18.25, + "learning_rate": 1.2396168071801457e-06, + "log_odds": 9.140482902526855, + "log_odds_ratio": -0.010941035114228725, + "loss": 0.2806, + "rejected_geometric_mean": -9.695089340209961, + "step": 5401 + }, + { + "chosen_geometric_mean": -1.1189769506454468, + "epoch": 1.34, + "grad_norm": 6.5, + "learning_rate": 1.238776197497584e-06, + "log_odds": 2.0226922035217285, + "log_odds_ratio": -0.14811207354068756, + "loss": 0.2795, + "rejected_geometric_mean": -2.8200528621673584, + "step": 5402 + }, + { + "chosen_geometric_mean": -0.9717808365821838, + "epoch": 1.34, + "grad_norm": 54.75, + "learning_rate": 1.2379357790468241e-06, + "log_odds": 9.854890823364258, + "log_odds_ratio": -0.12680351734161377, + "loss": 0.2592, + "rejected_geometric_mean": -10.417144775390625, + "step": 5403 + }, + { + "chosen_geometric_mean": -0.9513037800788879, + "epoch": 1.34, + "grad_norm": 34.5, + "learning_rate": 1.2370955519552944e-06, + "log_odds": 3.3673667907714844, + "log_odds_ratio": -0.2711328864097595, + "loss": 0.2891, + "rejected_geometric_mean": -3.990494966506958, + "step": 5404 + }, + { + "chosen_geometric_mean": -1.239811897277832, + "epoch": 1.34, + "grad_norm": 28.25, + "learning_rate": 1.2362555163503948e-06, + "log_odds": 4.236049652099609, + "log_odds_ratio": -0.2431158423423767, + "loss": 0.3007, + "rejected_geometric_mean": -5.182504653930664, + "step": 5405 + }, + { + "chosen_geometric_mean": -0.8253483772277832, + "epoch": 1.34, + "grad_norm": 18.125, + "learning_rate": 1.2354156723594937e-06, + "log_odds": 7.102006912231445, + "log_odds_ratio": -0.20990118384361267, + "loss": 0.2614, + "rejected_geometric_mean": -7.487443923950195, + "step": 5406 + }, + { + "chosen_geometric_mean": -0.8773558139801025, + "epoch": 1.34, + "grad_norm": 21.625, + "learning_rate": 1.2345760201099305e-06, + "log_odds": 5.531132221221924, + "log_odds_ratio": -0.22344988584518433, + "loss": 0.2779, + "rejected_geometric_mean": -6.037071228027344, + "step": 5407 + }, + { + "chosen_geometric_mean": -1.108262062072754, + "epoch": 1.34, + "grad_norm": 1.84375, + "learning_rate": 1.2337365597290179e-06, + "log_odds": 3.7790255546569824, + "log_odds_ratio": -0.11530811339616776, + "loss": 0.2357, + "rejected_geometric_mean": -4.481849670410156, + "step": 5408 + }, + { + "chosen_geometric_mean": -0.9658856391906738, + "epoch": 1.34, + "grad_norm": 18.375, + "learning_rate": 1.2328972913440393e-06, + "log_odds": 4.1219706535339355, + "log_odds_ratio": -0.25734761357307434, + "loss": 0.2704, + "rejected_geometric_mean": -4.78511381149292, + "step": 5409 + }, + { + "chosen_geometric_mean": -0.8893389701843262, + "epoch": 1.34, + "grad_norm": 1.96875, + "learning_rate": 1.2320582150822458e-06, + "log_odds": 9.29488754272461, + "log_odds_ratio": -0.11960963159799576, + "loss": 0.2793, + "rejected_geometric_mean": -9.720237731933594, + "step": 5410 + }, + { + "chosen_geometric_mean": -0.9925456047058105, + "epoch": 1.34, + "grad_norm": 2.140625, + "learning_rate": 1.231219331070863e-06, + "log_odds": 9.56235122680664, + "log_odds_ratio": -0.08473387360572815, + "loss": 0.2417, + "rejected_geometric_mean": -10.120485305786133, + "step": 5411 + }, + { + "chosen_geometric_mean": -1.1144342422485352, + "epoch": 1.34, + "grad_norm": 2.0625, + "learning_rate": 1.230380639437085e-06, + "log_odds": 3.316953182220459, + "log_odds_ratio": -0.18225839734077454, + "loss": 0.2781, + "rejected_geometric_mean": -4.121996879577637, + "step": 5412 + }, + { + "chosen_geometric_mean": -1.1288706064224243, + "epoch": 1.34, + "grad_norm": 8.25, + "learning_rate": 1.2295421403080782e-06, + "log_odds": 3.9973697662353516, + "log_odds_ratio": -0.1746128499507904, + "loss": 0.2834, + "rejected_geometric_mean": -4.820582389831543, + "step": 5413 + }, + { + "chosen_geometric_mean": -0.9507818222045898, + "epoch": 1.34, + "grad_norm": 1.96875, + "learning_rate": 1.2287038338109784e-06, + "log_odds": 5.458793640136719, + "log_odds_ratio": -0.16687870025634766, + "loss": 0.2261, + "rejected_geometric_mean": -6.001626014709473, + "step": 5414 + }, + { + "chosen_geometric_mean": -0.9553581476211548, + "epoch": 1.34, + "grad_norm": 8.0625, + "learning_rate": 1.227865720072894e-06, + "log_odds": 2.943005084991455, + "log_odds_ratio": -0.08342322707176208, + "loss": 0.2297, + "rejected_geometric_mean": -3.3792362213134766, + "step": 5415 + }, + { + "chosen_geometric_mean": -0.9454489946365356, + "epoch": 1.34, + "grad_norm": 26.625, + "learning_rate": 1.227027799220902e-06, + "log_odds": 13.99456787109375, + "log_odds_ratio": -0.05387333407998085, + "loss": 0.2399, + "rejected_geometric_mean": -14.473730087280273, + "step": 5416 + }, + { + "chosen_geometric_mean": -1.1115825176239014, + "epoch": 1.34, + "grad_norm": 1.8046875, + "learning_rate": 1.2261900713820528e-06, + "log_odds": 8.773508071899414, + "log_odds_ratio": -0.06836222857236862, + "loss": 0.255, + "rejected_geometric_mean": -9.517005920410156, + "step": 5417 + }, + { + "chosen_geometric_mean": -0.9772508144378662, + "epoch": 1.34, + "grad_norm": 7.09375, + "learning_rate": 1.2253525366833647e-06, + "log_odds": 4.575305938720703, + "log_odds_ratio": -0.4074156582355499, + "loss": 0.2515, + "rejected_geometric_mean": -5.3010149002075195, + "step": 5418 + }, + { + "chosen_geometric_mean": -1.1010255813598633, + "epoch": 1.34, + "grad_norm": 9.6875, + "learning_rate": 1.224515195251828e-06, + "log_odds": 3.950089454650879, + "log_odds_ratio": -0.23168538510799408, + "loss": 0.323, + "rejected_geometric_mean": -4.723321914672852, + "step": 5419 + }, + { + "chosen_geometric_mean": -0.9249681234359741, + "epoch": 1.34, + "grad_norm": 4.03125, + "learning_rate": 1.2236780472144056e-06, + "log_odds": 1.4490500688552856, + "log_odds_ratio": -0.49329662322998047, + "loss": 0.304, + "rejected_geometric_mean": -2.1980795860290527, + "step": 5420 + }, + { + "chosen_geometric_mean": -0.8363593816757202, + "epoch": 1.34, + "grad_norm": 2.234375, + "learning_rate": 1.2228410926980269e-06, + "log_odds": 6.015656471252441, + "log_odds_ratio": -0.06476432085037231, + "loss": 0.2773, + "rejected_geometric_mean": -6.341974258422852, + "step": 5421 + }, + { + "chosen_geometric_mean": -0.9672918319702148, + "epoch": 1.34, + "grad_norm": 1.953125, + "learning_rate": 1.2220043318295962e-06, + "log_odds": 6.745476245880127, + "log_odds_ratio": -0.13101649284362793, + "loss": 0.2215, + "rejected_geometric_mean": -7.285342216491699, + "step": 5422 + }, + { + "chosen_geometric_mean": -1.0676121711730957, + "epoch": 1.34, + "grad_norm": 28.75, + "learning_rate": 1.2211677647359859e-06, + "log_odds": 9.962576866149902, + "log_odds_ratio": -0.10004857182502747, + "loss": 0.2684, + "rejected_geometric_mean": -10.60159683227539, + "step": 5423 + }, + { + "chosen_geometric_mean": -1.0155452489852905, + "epoch": 1.34, + "grad_norm": 12.6875, + "learning_rate": 1.2203313915440385e-06, + "log_odds": 9.113157272338867, + "log_odds_ratio": -0.004599474836140871, + "loss": 0.2836, + "rejected_geometric_mean": -9.672952651977539, + "step": 5424 + }, + { + "chosen_geometric_mean": -0.9701234102249146, + "epoch": 1.34, + "grad_norm": 2.8125, + "learning_rate": 1.2194952123805689e-06, + "log_odds": 2.610746383666992, + "log_odds_ratio": -0.34227728843688965, + "loss": 0.2199, + "rejected_geometric_mean": -3.3210666179656982, + "step": 5425 + }, + { + "chosen_geometric_mean": -1.103243350982666, + "epoch": 1.34, + "grad_norm": 8.625, + "learning_rate": 1.2186592273723636e-06, + "log_odds": 9.362540245056152, + "log_odds_ratio": -0.20145730674266815, + "loss": 0.2822, + "rejected_geometric_mean": -10.145605087280273, + "step": 5426 + }, + { + "chosen_geometric_mean": -1.0199224948883057, + "epoch": 1.34, + "grad_norm": 2.34375, + "learning_rate": 1.2178234366461753e-06, + "log_odds": 2.775306224822998, + "log_odds_ratio": -0.20036029815673828, + "loss": 0.2657, + "rejected_geometric_mean": -3.4558074474334717, + "step": 5427 + }, + { + "chosen_geometric_mean": -0.9390372037887573, + "epoch": 1.34, + "grad_norm": 5.84375, + "learning_rate": 1.2169878403287325e-06, + "log_odds": 5.3525390625, + "log_odds_ratio": -0.15175360441207886, + "loss": 0.2768, + "rejected_geometric_mean": -5.898166656494141, + "step": 5428 + }, + { + "chosen_geometric_mean": -1.2608280181884766, + "epoch": 1.34, + "grad_norm": 5.5, + "learning_rate": 1.2161524385467303e-06, + "log_odds": 8.521088600158691, + "log_odds_ratio": -0.12042605876922607, + "loss": 0.2719, + "rejected_geometric_mean": -9.44408130645752, + "step": 5429 + }, + { + "chosen_geometric_mean": -0.9263646602630615, + "epoch": 1.34, + "grad_norm": 3.078125, + "learning_rate": 1.2153172314268338e-06, + "log_odds": 7.114321231842041, + "log_odds_ratio": -0.010658971965312958, + "loss": 0.2256, + "rejected_geometric_mean": -7.502216339111328, + "step": 5430 + }, + { + "chosen_geometric_mean": -1.064589023590088, + "epoch": 1.34, + "grad_norm": 7.90625, + "learning_rate": 1.2144822190956846e-06, + "log_odds": 0.8331623077392578, + "log_odds_ratio": -0.430217981338501, + "loss": 0.2839, + "rejected_geometric_mean": -1.7300024032592773, + "step": 5431 + }, + { + "chosen_geometric_mean": -1.154613733291626, + "epoch": 1.34, + "grad_norm": 20.0, + "learning_rate": 1.2136474016798882e-06, + "log_odds": 9.429153442382812, + "log_odds_ratio": -0.01362618152052164, + "loss": 0.2609, + "rejected_geometric_mean": -10.156614303588867, + "step": 5432 + }, + { + "chosen_geometric_mean": -1.129382610321045, + "epoch": 1.35, + "grad_norm": 10.25, + "learning_rate": 1.2128127793060224e-06, + "log_odds": 6.80463981628418, + "log_odds_ratio": -0.117740698158741, + "loss": 0.2427, + "rejected_geometric_mean": -7.551730155944824, + "step": 5433 + }, + { + "chosen_geometric_mean": -0.8089396357536316, + "epoch": 1.35, + "grad_norm": 5.9375, + "learning_rate": 1.2119783521006375e-06, + "log_odds": 4.361223220825195, + "log_odds_ratio": -0.12259941548109055, + "loss": 0.2187, + "rejected_geometric_mean": -4.634992599487305, + "step": 5434 + }, + { + "chosen_geometric_mean": -1.0195640325546265, + "epoch": 1.35, + "grad_norm": 2.75, + "learning_rate": 1.2111441201902513e-06, + "log_odds": 7.103055000305176, + "log_odds_ratio": -0.1953977346420288, + "loss": 0.2987, + "rejected_geometric_mean": -7.782997131347656, + "step": 5435 + }, + { + "chosen_geometric_mean": -1.0229986906051636, + "epoch": 1.35, + "grad_norm": 27.25, + "learning_rate": 1.210310083701354e-06, + "log_odds": 14.536230087280273, + "log_odds_ratio": -0.037020765244960785, + "loss": 0.2646, + "rejected_geometric_mean": -15.110164642333984, + "step": 5436 + }, + { + "chosen_geometric_mean": -0.8995641469955444, + "epoch": 1.35, + "grad_norm": 3.03125, + "learning_rate": 1.2094762427604065e-06, + "log_odds": 7.6293745040893555, + "log_odds_ratio": -0.13045360147953033, + "loss": 0.2594, + "rejected_geometric_mean": -8.08650016784668, + "step": 5437 + }, + { + "chosen_geometric_mean": -0.8480435609817505, + "epoch": 1.35, + "grad_norm": 2.84375, + "learning_rate": 1.2086425974938384e-06, + "log_odds": 5.605553150177002, + "log_odds_ratio": -0.1912280023097992, + "loss": 0.2444, + "rejected_geometric_mean": -6.053806781768799, + "step": 5438 + }, + { + "chosen_geometric_mean": -0.9611655473709106, + "epoch": 1.35, + "grad_norm": 41.5, + "learning_rate": 1.2078091480280491e-06, + "log_odds": 7.4470930099487305, + "log_odds_ratio": -0.21392785012722015, + "loss": 0.3095, + "rejected_geometric_mean": -8.076898574829102, + "step": 5439 + }, + { + "chosen_geometric_mean": -1.0253634452819824, + "epoch": 1.35, + "grad_norm": 2.015625, + "learning_rate": 1.2069758944894115e-06, + "log_odds": 10.897075653076172, + "log_odds_ratio": -0.08217353373765945, + "loss": 0.2459, + "rejected_geometric_mean": -11.515830993652344, + "step": 5440 + }, + { + "chosen_geometric_mean": -0.8880500197410583, + "epoch": 1.35, + "grad_norm": 4.28125, + "learning_rate": 1.2061428370042649e-06, + "log_odds": 6.667533874511719, + "log_odds_ratio": -0.07092069834470749, + "loss": 0.2464, + "rejected_geometric_mean": -7.0633344650268555, + "step": 5441 + }, + { + "chosen_geometric_mean": -0.9011472463607788, + "epoch": 1.35, + "grad_norm": 2.78125, + "learning_rate": 1.2053099756989222e-06, + "log_odds": 2.583913803100586, + "log_odds_ratio": -0.24556668102741241, + "loss": 0.2729, + "rejected_geometric_mean": -3.1627466678619385, + "step": 5442 + }, + { + "chosen_geometric_mean": -1.0242056846618652, + "epoch": 1.35, + "grad_norm": 15.6875, + "learning_rate": 1.2044773106996654e-06, + "log_odds": 13.363607406616211, + "log_odds_ratio": -0.00011591200018301606, + "loss": 0.2622, + "rejected_geometric_mean": -13.9129638671875, + "step": 5443 + }, + { + "chosen_geometric_mean": -0.967189610004425, + "epoch": 1.35, + "grad_norm": 17.5, + "learning_rate": 1.2036448421327451e-06, + "log_odds": 8.327669143676758, + "log_odds_ratio": -0.10654255747795105, + "loss": 0.2758, + "rejected_geometric_mean": -8.864032745361328, + "step": 5444 + }, + { + "chosen_geometric_mean": -0.8917712569236755, + "epoch": 1.35, + "grad_norm": 2.828125, + "learning_rate": 1.2028125701243853e-06, + "log_odds": 3.4828124046325684, + "log_odds_ratio": -0.1299881935119629, + "loss": 0.2739, + "rejected_geometric_mean": -3.9231762886047363, + "step": 5445 + }, + { + "chosen_geometric_mean": -1.264418125152588, + "epoch": 1.35, + "grad_norm": 26.125, + "learning_rate": 1.2019804948007776e-06, + "log_odds": 4.49165678024292, + "log_odds_ratio": -0.06640538573265076, + "loss": 0.2611, + "rejected_geometric_mean": -5.428243160247803, + "step": 5446 + }, + { + "chosen_geometric_mean": -1.0487780570983887, + "epoch": 1.35, + "grad_norm": 9.375, + "learning_rate": 1.2011486162880822e-06, + "log_odds": 7.078042984008789, + "log_odds_ratio": -0.072978675365448, + "loss": 0.2673, + "rejected_geometric_mean": -7.721006393432617, + "step": 5447 + }, + { + "chosen_geometric_mean": -0.9686291813850403, + "epoch": 1.35, + "grad_norm": 21.5, + "learning_rate": 1.2003169347124362e-06, + "log_odds": 6.134735107421875, + "log_odds_ratio": -0.20904365181922913, + "loss": 0.3279, + "rejected_geometric_mean": -6.7446136474609375, + "step": 5448 + }, + { + "chosen_geometric_mean": -0.8860925436019897, + "epoch": 1.35, + "grad_norm": 7.4375, + "learning_rate": 1.1994854501999407e-06, + "log_odds": 3.1073122024536133, + "log_odds_ratio": -0.22580981254577637, + "loss": 0.2666, + "rejected_geometric_mean": -3.5697789192199707, + "step": 5449 + }, + { + "chosen_geometric_mean": -0.9682229161262512, + "epoch": 1.35, + "grad_norm": 11.1875, + "learning_rate": 1.1986541628766668e-06, + "log_odds": 4.881965160369873, + "log_odds_ratio": -0.0806557834148407, + "loss": 0.2375, + "rejected_geometric_mean": -5.346676349639893, + "step": 5450 + }, + { + "chosen_geometric_mean": -0.9844521880149841, + "epoch": 1.35, + "grad_norm": 18.125, + "learning_rate": 1.1978230728686607e-06, + "log_odds": 4.408834457397461, + "log_odds_ratio": -0.05358004942536354, + "loss": 0.2282, + "rejected_geometric_mean": -4.899665832519531, + "step": 5451 + }, + { + "chosen_geometric_mean": -1.0569087266921997, + "epoch": 1.35, + "grad_norm": 55.75, + "learning_rate": 1.1969921803019327e-06, + "log_odds": 2.757114887237549, + "log_odds_ratio": -0.2066190242767334, + "loss": 0.2668, + "rejected_geometric_mean": -3.5049352645874023, + "step": 5452 + }, + { + "chosen_geometric_mean": -1.0811960697174072, + "epoch": 1.35, + "grad_norm": 2.421875, + "learning_rate": 1.1961614853024679e-06, + "log_odds": 4.474245071411133, + "log_odds_ratio": -0.1203484758734703, + "loss": 0.2862, + "rejected_geometric_mean": -5.154760837554932, + "step": 5453 + }, + { + "chosen_geometric_mean": -1.025450348854065, + "epoch": 1.35, + "grad_norm": 2.203125, + "learning_rate": 1.1953309879962199e-06, + "log_odds": 8.527192115783691, + "log_odds_ratio": -0.23252594470977783, + "loss": 0.2672, + "rejected_geometric_mean": -9.226235389709473, + "step": 5454 + }, + { + "chosen_geometric_mean": -1.1025112867355347, + "epoch": 1.35, + "grad_norm": 2.078125, + "learning_rate": 1.1945006885091112e-06, + "log_odds": 3.1861793994903564, + "log_odds_ratio": -0.09300297498703003, + "loss": 0.2266, + "rejected_geometric_mean": -3.9348061084747314, + "step": 5455 + }, + { + "chosen_geometric_mean": -0.97259122133255, + "epoch": 1.35, + "grad_norm": 2.6875, + "learning_rate": 1.1936705869670341e-06, + "log_odds": 3.176255941390991, + "log_odds_ratio": -0.26939666271209717, + "loss": 0.2349, + "rejected_geometric_mean": -3.8311102390289307, + "step": 5456 + }, + { + "chosen_geometric_mean": -1.1740882396697998, + "epoch": 1.35, + "grad_norm": 6.625, + "learning_rate": 1.1928406834958536e-06, + "log_odds": 6.588276386260986, + "log_odds_ratio": -0.1098586767911911, + "loss": 0.2881, + "rejected_geometric_mean": -7.444415092468262, + "step": 5457 + }, + { + "chosen_geometric_mean": -1.0694297552108765, + "epoch": 1.35, + "grad_norm": 3.234375, + "learning_rate": 1.192010978221403e-06, + "log_odds": 6.230316162109375, + "log_odds_ratio": -0.20103895664215088, + "loss": 0.2553, + "rejected_geometric_mean": -7.0116682052612305, + "step": 5458 + }, + { + "chosen_geometric_mean": -0.9886742830276489, + "epoch": 1.35, + "grad_norm": 8.6875, + "learning_rate": 1.1911814712694841e-06, + "log_odds": 10.107940673828125, + "log_odds_ratio": -0.01648665964603424, + "loss": 0.2834, + "rejected_geometric_mean": -10.636575698852539, + "step": 5459 + }, + { + "chosen_geometric_mean": -0.9650636911392212, + "epoch": 1.35, + "grad_norm": 22.375, + "learning_rate": 1.1903521627658717e-06, + "log_odds": 6.241541862487793, + "log_odds_ratio": -0.08805913478136063, + "loss": 0.3339, + "rejected_geometric_mean": -6.760403633117676, + "step": 5460 + }, + { + "chosen_geometric_mean": -1.0227019786834717, + "epoch": 1.35, + "grad_norm": 14.75, + "learning_rate": 1.1895230528363076e-06, + "log_odds": 9.041202545166016, + "log_odds_ratio": -0.0008907898445613682, + "loss": 0.2047, + "rejected_geometric_mean": -9.584796905517578, + "step": 5461 + }, + { + "chosen_geometric_mean": -1.057472825050354, + "epoch": 1.35, + "grad_norm": 16.875, + "learning_rate": 1.1886941416065056e-06, + "log_odds": 8.335600852966309, + "log_odds_ratio": -0.1146833598613739, + "loss": 0.2726, + "rejected_geometric_mean": -9.024535179138184, + "step": 5462 + }, + { + "chosen_geometric_mean": -1.0287189483642578, + "epoch": 1.35, + "grad_norm": 5.1875, + "learning_rate": 1.1878654292021477e-06, + "log_odds": 4.820544719696045, + "log_odds_ratio": -0.18139515817165375, + "loss": 0.3003, + "rejected_geometric_mean": -5.42470645904541, + "step": 5463 + }, + { + "chosen_geometric_mean": -1.151519536972046, + "epoch": 1.35, + "grad_norm": 32.25, + "learning_rate": 1.1870369157488879e-06, + "log_odds": 3.094663143157959, + "log_odds_ratio": -0.24989065527915955, + "loss": 0.2828, + "rejected_geometric_mean": -3.9972760677337646, + "step": 5464 + }, + { + "chosen_geometric_mean": -0.8865658044815063, + "epoch": 1.35, + "grad_norm": 14.375, + "learning_rate": 1.186208601372347e-06, + "log_odds": 7.110083103179932, + "log_odds_ratio": -0.09951615333557129, + "loss": 0.2592, + "rejected_geometric_mean": -7.51018762588501, + "step": 5465 + }, + { + "chosen_geometric_mean": -0.9040393829345703, + "epoch": 1.35, + "grad_norm": 4.0, + "learning_rate": 1.1853804861981194e-06, + "log_odds": 8.804000854492188, + "log_odds_ratio": -0.07460872828960419, + "loss": 0.2872, + "rejected_geometric_mean": -9.197978019714355, + "step": 5466 + }, + { + "chosen_geometric_mean": -1.139880895614624, + "epoch": 1.35, + "grad_norm": 46.5, + "learning_rate": 1.184552570351765e-06, + "log_odds": 5.173205375671387, + "log_odds_ratio": -0.3656778335571289, + "loss": 0.2742, + "rejected_geometric_mean": -6.113912582397461, + "step": 5467 + }, + { + "chosen_geometric_mean": -1.1406939029693604, + "epoch": 1.35, + "grad_norm": 13.3125, + "learning_rate": 1.1837248539588168e-06, + "log_odds": 8.723567008972168, + "log_odds_ratio": -0.14976368844509125, + "loss": 0.2577, + "rejected_geometric_mean": -9.535823822021484, + "step": 5468 + }, + { + "chosen_geometric_mean": -1.1110384464263916, + "epoch": 1.35, + "grad_norm": 28.75, + "learning_rate": 1.1828973371447775e-06, + "log_odds": 2.658515453338623, + "log_odds_ratio": -0.564753532409668, + "loss": 0.2916, + "rejected_geometric_mean": -3.5195322036743164, + "step": 5469 + }, + { + "chosen_geometric_mean": -0.8454329967498779, + "epoch": 1.35, + "grad_norm": 1.953125, + "learning_rate": 1.1820700200351165e-06, + "log_odds": 3.0340735912323, + "log_odds_ratio": -0.3553270995616913, + "loss": 0.2293, + "rejected_geometric_mean": -3.602750301361084, + "step": 5470 + }, + { + "chosen_geometric_mean": -1.179088830947876, + "epoch": 1.35, + "grad_norm": 3.453125, + "learning_rate": 1.1812429027552768e-06, + "log_odds": 1.0510025024414062, + "log_odds_ratio": -0.34362706542015076, + "loss": 0.2497, + "rejected_geometric_mean": -2.049696683883667, + "step": 5471 + }, + { + "chosen_geometric_mean": -0.9517011642456055, + "epoch": 1.35, + "grad_norm": 28.625, + "learning_rate": 1.1804159854306688e-06, + "log_odds": 0.8059666156768799, + "log_odds_ratio": -0.42033666372299194, + "loss": 0.2932, + "rejected_geometric_mean": -1.5798919200897217, + "step": 5472 + }, + { + "chosen_geometric_mean": -1.119663953781128, + "epoch": 1.36, + "grad_norm": 17.5, + "learning_rate": 1.1795892681866714e-06, + "log_odds": 3.260526180267334, + "log_odds_ratio": -0.29302525520324707, + "loss": 0.2607, + "rejected_geometric_mean": -4.133875370025635, + "step": 5473 + }, + { + "chosen_geometric_mean": -1.0787895917892456, + "epoch": 1.36, + "grad_norm": 9.8125, + "learning_rate": 1.1787627511486358e-06, + "log_odds": 1.0678412914276123, + "log_odds_ratio": -0.4538519084453583, + "loss": 0.2997, + "rejected_geometric_mean": -1.9890062808990479, + "step": 5474 + }, + { + "chosen_geometric_mean": -0.9837669730186462, + "epoch": 1.36, + "grad_norm": 31.0, + "learning_rate": 1.1779364344418831e-06, + "log_odds": 6.1055216789245605, + "log_odds_ratio": -0.25896790623664856, + "loss": 0.306, + "rejected_geometric_mean": -6.815820217132568, + "step": 5475 + }, + { + "chosen_geometric_mean": -0.8660834431648254, + "epoch": 1.36, + "grad_norm": 2.875, + "learning_rate": 1.1771103181917008e-06, + "log_odds": 4.808960914611816, + "log_odds_ratio": -0.26238495111465454, + "loss": 0.2848, + "rejected_geometric_mean": -5.249478340148926, + "step": 5476 + }, + { + "chosen_geometric_mean": -0.9163619875907898, + "epoch": 1.36, + "grad_norm": 3.40625, + "learning_rate": 1.1762844025233495e-06, + "log_odds": 3.6287903785705566, + "log_odds_ratio": -0.17063724994659424, + "loss": 0.2665, + "rejected_geometric_mean": -4.131038188934326, + "step": 5477 + }, + { + "chosen_geometric_mean": -0.9891753792762756, + "epoch": 1.36, + "grad_norm": 3.078125, + "learning_rate": 1.1754586875620571e-06, + "log_odds": 9.626385688781738, + "log_odds_ratio": -0.02685678005218506, + "loss": 0.2385, + "rejected_geometric_mean": -10.156518936157227, + "step": 5478 + }, + { + "chosen_geometric_mean": -0.9438142776489258, + "epoch": 1.36, + "grad_norm": 3.078125, + "learning_rate": 1.1746331734330203e-06, + "log_odds": 4.757570743560791, + "log_odds_ratio": -0.1128728836774826, + "loss": 0.2758, + "rejected_geometric_mean": -5.2584123611450195, + "step": 5479 + }, + { + "chosen_geometric_mean": -1.018129825592041, + "epoch": 1.36, + "grad_norm": 2.21875, + "learning_rate": 1.17380786026141e-06, + "log_odds": 8.455123901367188, + "log_odds_ratio": -0.021237516775727272, + "loss": 0.2502, + "rejected_geometric_mean": -9.033822059631348, + "step": 5480 + }, + { + "chosen_geometric_mean": -0.8754857182502747, + "epoch": 1.36, + "grad_norm": 2.796875, + "learning_rate": 1.1729827481723622e-06, + "log_odds": 3.074444055557251, + "log_odds_ratio": -0.09600979089736938, + "loss": 0.2469, + "rejected_geometric_mean": -3.475386381149292, + "step": 5481 + }, + { + "chosen_geometric_mean": -0.8441970944404602, + "epoch": 1.36, + "grad_norm": 2.8125, + "learning_rate": 1.1721578372909819e-06, + "log_odds": 8.095661163330078, + "log_odds_ratio": -0.024156766012310982, + "loss": 0.2592, + "rejected_geometric_mean": -8.382377624511719, + "step": 5482 + }, + { + "chosen_geometric_mean": -0.9923582673072815, + "epoch": 1.36, + "grad_norm": 2.125, + "learning_rate": 1.1713331277423485e-06, + "log_odds": 7.396655082702637, + "log_odds_ratio": -0.14760024845600128, + "loss": 0.2808, + "rejected_geometric_mean": -7.986120223999023, + "step": 5483 + }, + { + "chosen_geometric_mean": -1.0709491968154907, + "epoch": 1.36, + "grad_norm": 7.875, + "learning_rate": 1.1705086196515048e-06, + "log_odds": 12.564346313476562, + "log_odds_ratio": -0.1344541609287262, + "loss": 0.257, + "rejected_geometric_mean": -13.282479286193848, + "step": 5484 + }, + { + "chosen_geometric_mean": -1.057222604751587, + "epoch": 1.36, + "grad_norm": 56.0, + "learning_rate": 1.1696843131434674e-06, + "log_odds": 4.914257049560547, + "log_odds_ratio": -0.15457962453365326, + "loss": 0.2715, + "rejected_geometric_mean": -5.62973690032959, + "step": 5485 + }, + { + "chosen_geometric_mean": -1.0452582836151123, + "epoch": 1.36, + "grad_norm": 25.125, + "learning_rate": 1.1688602083432218e-06, + "log_odds": 4.383933067321777, + "log_odds_ratio": -0.33518916368484497, + "loss": 0.2574, + "rejected_geometric_mean": -5.1564435958862305, + "step": 5486 + }, + { + "chosen_geometric_mean": -1.036220908164978, + "epoch": 1.36, + "grad_norm": 24.375, + "learning_rate": 1.1680363053757205e-06, + "log_odds": 6.591869831085205, + "log_odds_ratio": -0.09171362966299057, + "loss": 0.2991, + "rejected_geometric_mean": -7.229089736938477, + "step": 5487 + }, + { + "chosen_geometric_mean": -1.049526333808899, + "epoch": 1.36, + "grad_norm": 2.078125, + "learning_rate": 1.1672126043658882e-06, + "log_odds": 12.657733917236328, + "log_odds_ratio": -0.0016920507187023759, + "loss": 0.2644, + "rejected_geometric_mean": -13.251511573791504, + "step": 5488 + }, + { + "chosen_geometric_mean": -0.9396386742591858, + "epoch": 1.36, + "grad_norm": 4.46875, + "learning_rate": 1.1663891054386175e-06, + "log_odds": 3.8650097846984863, + "log_odds_ratio": -0.2183564156293869, + "loss": 0.2468, + "rejected_geometric_mean": -4.4228835105896, + "step": 5489 + }, + { + "chosen_geometric_mean": -0.8375095725059509, + "epoch": 1.36, + "grad_norm": 5.5, + "learning_rate": 1.1655658087187693e-06, + "log_odds": 15.51228141784668, + "log_odds_ratio": -1.3113194654579274e-05, + "loss": 0.2269, + "rejected_geometric_mean": -15.770868301391602, + "step": 5490 + }, + { + "chosen_geometric_mean": -1.0520278215408325, + "epoch": 1.36, + "grad_norm": 6.4375, + "learning_rate": 1.1647427143311763e-06, + "log_odds": 7.856998443603516, + "log_odds_ratio": -0.20641161501407623, + "loss": 0.2354, + "rejected_geometric_mean": -8.629739761352539, + "step": 5491 + }, + { + "chosen_geometric_mean": -0.9278661012649536, + "epoch": 1.36, + "grad_norm": 33.75, + "learning_rate": 1.16391982240064e-06, + "log_odds": 4.293978691101074, + "log_odds_ratio": -0.08955594897270203, + "loss": 0.256, + "rejected_geometric_mean": -4.734055519104004, + "step": 5492 + }, + { + "chosen_geometric_mean": -0.9334282875061035, + "epoch": 1.36, + "grad_norm": 25.625, + "learning_rate": 1.1630971330519286e-06, + "log_odds": 4.299685478210449, + "log_odds_ratio": -0.15290988981723785, + "loss": 0.2809, + "rejected_geometric_mean": -4.845776557922363, + "step": 5493 + }, + { + "chosen_geometric_mean": -1.0854970216751099, + "epoch": 1.36, + "grad_norm": 6.34375, + "learning_rate": 1.162274646409784e-06, + "log_odds": 1.5285205841064453, + "log_odds_ratio": -0.3433336615562439, + "loss": 0.3091, + "rejected_geometric_mean": -2.338550329208374, + "step": 5494 + }, + { + "chosen_geometric_mean": -1.0798873901367188, + "epoch": 1.36, + "grad_norm": 12.375, + "learning_rate": 1.1614523625989135e-06, + "log_odds": 7.5589070320129395, + "log_odds_ratio": -0.15301795303821564, + "loss": 0.2436, + "rejected_geometric_mean": -8.269667625427246, + "step": 5495 + }, + { + "chosen_geometric_mean": -0.9878411293029785, + "epoch": 1.36, + "grad_norm": 3.484375, + "learning_rate": 1.1606302817439935e-06, + "log_odds": 11.01403522491455, + "log_odds_ratio": -0.15590330958366394, + "loss": 0.2581, + "rejected_geometric_mean": -11.629773139953613, + "step": 5496 + }, + { + "chosen_geometric_mean": -1.262068271636963, + "epoch": 1.36, + "grad_norm": 5.71875, + "learning_rate": 1.1598084039696746e-06, + "log_odds": 3.7629103660583496, + "log_odds_ratio": -0.3197355270385742, + "loss": 0.2769, + "rejected_geometric_mean": -4.847822189331055, + "step": 5497 + }, + { + "chosen_geometric_mean": -0.978240966796875, + "epoch": 1.36, + "grad_norm": 23.0, + "learning_rate": 1.1589867294005717e-06, + "log_odds": 10.58588695526123, + "log_odds_ratio": -0.09203023463487625, + "loss": 0.2654, + "rejected_geometric_mean": -11.158233642578125, + "step": 5498 + }, + { + "chosen_geometric_mean": -1.1878314018249512, + "epoch": 1.36, + "grad_norm": 11.6875, + "learning_rate": 1.158165258161269e-06, + "log_odds": 13.348297119140625, + "log_odds_ratio": -0.10874820500612259, + "loss": 0.3164, + "rejected_geometric_mean": -14.232807159423828, + "step": 5499 + }, + { + "chosen_geometric_mean": -1.012376070022583, + "epoch": 1.36, + "grad_norm": 2.09375, + "learning_rate": 1.1573439903763236e-06, + "log_odds": 3.6541805267333984, + "log_odds_ratio": -0.2339240163564682, + "loss": 0.257, + "rejected_geometric_mean": -4.358506202697754, + "step": 5500 + }, + { + "chosen_geometric_mean": -0.7198750972747803, + "epoch": 1.36, + "grad_norm": 14.0, + "learning_rate": 1.156522926170257e-06, + "log_odds": 6.8597307205200195, + "log_odds_ratio": -0.22164253890514374, + "loss": 0.2733, + "rejected_geometric_mean": -7.051097869873047, + "step": 5501 + }, + { + "chosen_geometric_mean": -0.8734937906265259, + "epoch": 1.36, + "grad_norm": 2.0625, + "learning_rate": 1.155702065667564e-06, + "log_odds": 4.356527328491211, + "log_odds_ratio": -0.2917092442512512, + "loss": 0.2245, + "rejected_geometric_mean": -4.89249324798584, + "step": 5502 + }, + { + "chosen_geometric_mean": -1.1395539045333862, + "epoch": 1.36, + "grad_norm": 5.90625, + "learning_rate": 1.1548814089927068e-06, + "log_odds": 6.3501434326171875, + "log_odds_ratio": -0.005749610252678394, + "loss": 0.2463, + "rejected_geometric_mean": -7.080260276794434, + "step": 5503 + }, + { + "chosen_geometric_mean": -1.1271286010742188, + "epoch": 1.36, + "grad_norm": 25.0, + "learning_rate": 1.1540609562701164e-06, + "log_odds": 4.4626030921936035, + "log_odds_ratio": -0.23258927464485168, + "loss": 0.2574, + "rejected_geometric_mean": -5.331212043762207, + "step": 5504 + }, + { + "chosen_geometric_mean": -1.093193769454956, + "epoch": 1.36, + "grad_norm": 3.265625, + "learning_rate": 1.1532407076241917e-06, + "log_odds": 9.696527481079102, + "log_odds_ratio": -0.1569804847240448, + "loss": 0.2315, + "rejected_geometric_mean": -10.501330375671387, + "step": 5505 + }, + { + "chosen_geometric_mean": -1.0965287685394287, + "epoch": 1.36, + "grad_norm": 15.8125, + "learning_rate": 1.1524206631793044e-06, + "log_odds": 5.200136661529541, + "log_odds_ratio": -0.02099672518670559, + "loss": 0.2566, + "rejected_geometric_mean": -5.845930099487305, + "step": 5506 + }, + { + "chosen_geometric_mean": -0.9747902154922485, + "epoch": 1.36, + "grad_norm": 5.25, + "learning_rate": 1.1516008230597906e-06, + "log_odds": 7.076814651489258, + "log_odds_ratio": -0.08308286964893341, + "loss": 0.1992, + "rejected_geometric_mean": -7.618160247802734, + "step": 5507 + }, + { + "chosen_geometric_mean": -1.0315439701080322, + "epoch": 1.36, + "grad_norm": 2.515625, + "learning_rate": 1.1507811873899588e-06, + "log_odds": 9.491092681884766, + "log_odds_ratio": -0.0024084243923425674, + "loss": 0.2751, + "rejected_geometric_mean": -10.066720962524414, + "step": 5508 + }, + { + "chosen_geometric_mean": -0.9262397289276123, + "epoch": 1.36, + "grad_norm": 5.21875, + "learning_rate": 1.1499617562940868e-06, + "log_odds": 2.888108253479004, + "log_odds_ratio": -0.26298415660858154, + "loss": 0.2713, + "rejected_geometric_mean": -3.431504726409912, + "step": 5509 + }, + { + "chosen_geometric_mean": -1.0156261920928955, + "epoch": 1.36, + "grad_norm": 8.8125, + "learning_rate": 1.1491425298964177e-06, + "log_odds": 4.2833099365234375, + "log_odds_ratio": -0.038300905376672745, + "loss": 0.2657, + "rejected_geometric_mean": -4.868925094604492, + "step": 5510 + }, + { + "chosen_geometric_mean": -0.889630138874054, + "epoch": 1.36, + "grad_norm": 2.4375, + "learning_rate": 1.1483235083211678e-06, + "log_odds": 12.09214973449707, + "log_odds_ratio": -0.012447604909539223, + "loss": 0.2295, + "rejected_geometric_mean": -12.424213409423828, + "step": 5511 + }, + { + "chosen_geometric_mean": -1.1888139247894287, + "epoch": 1.36, + "grad_norm": 39.5, + "learning_rate": 1.1475046916925185e-06, + "log_odds": 10.007987022399902, + "log_odds_ratio": -0.1575884073972702, + "loss": 0.2998, + "rejected_geometric_mean": -10.906599998474121, + "step": 5512 + }, + { + "chosen_geometric_mean": -0.9915916919708252, + "epoch": 1.36, + "grad_norm": 13.25, + "learning_rate": 1.1466860801346241e-06, + "log_odds": 4.061917304992676, + "log_odds_ratio": -0.07440522313117981, + "loss": 0.2465, + "rejected_geometric_mean": -4.608470439910889, + "step": 5513 + }, + { + "chosen_geometric_mean": -0.9041963815689087, + "epoch": 1.37, + "grad_norm": 5.375, + "learning_rate": 1.145867673771604e-06, + "log_odds": 6.228906154632568, + "log_odds_ratio": -0.13265059888362885, + "loss": 0.2361, + "rejected_geometric_mean": -6.691286087036133, + "step": 5514 + }, + { + "chosen_geometric_mean": -0.9325692653656006, + "epoch": 1.37, + "grad_norm": 23.625, + "learning_rate": 1.14504947272755e-06, + "log_odds": 10.299639701843262, + "log_odds_ratio": -0.0001778388541424647, + "loss": 0.2791, + "rejected_geometric_mean": -10.709905624389648, + "step": 5515 + }, + { + "chosen_geometric_mean": -1.1052799224853516, + "epoch": 1.37, + "grad_norm": 2.671875, + "learning_rate": 1.144231477126519e-06, + "log_odds": 4.015106201171875, + "log_odds_ratio": -0.03772430494427681, + "loss": 0.1934, + "rejected_geometric_mean": -4.729938983917236, + "step": 5516 + }, + { + "chosen_geometric_mean": -1.0571398735046387, + "epoch": 1.37, + "grad_norm": 4.53125, + "learning_rate": 1.1434136870925408e-06, + "log_odds": 10.028587341308594, + "log_odds_ratio": -0.000524483504705131, + "loss": 0.2582, + "rejected_geometric_mean": -10.630102157592773, + "step": 5517 + }, + { + "chosen_geometric_mean": -0.8899089097976685, + "epoch": 1.37, + "grad_norm": 2.03125, + "learning_rate": 1.1425961027496099e-06, + "log_odds": 10.316449165344238, + "log_odds_ratio": -0.18400955200195312, + "loss": 0.2246, + "rejected_geometric_mean": -10.74056625366211, + "step": 5518 + }, + { + "chosen_geometric_mean": -0.9110040068626404, + "epoch": 1.37, + "grad_norm": 18.625, + "learning_rate": 1.1417787242216932e-06, + "log_odds": 8.666685104370117, + "log_odds_ratio": -0.10288988798856735, + "loss": 0.2996, + "rejected_geometric_mean": -9.031233787536621, + "step": 5519 + }, + { + "chosen_geometric_mean": -0.7162787914276123, + "epoch": 1.37, + "grad_norm": 6.1875, + "learning_rate": 1.1409615516327253e-06, + "log_odds": 8.555804252624512, + "log_odds_ratio": -0.12619636952877045, + "loss": 0.2186, + "rejected_geometric_mean": -8.733593940734863, + "step": 5520 + }, + { + "chosen_geometric_mean": -1.0607361793518066, + "epoch": 1.37, + "grad_norm": 2.90625, + "learning_rate": 1.1401445851066084e-06, + "log_odds": 6.16648006439209, + "log_odds_ratio": -0.21992424130439758, + "loss": 0.2791, + "rejected_geometric_mean": -6.895550727844238, + "step": 5521 + }, + { + "chosen_geometric_mean": -0.8845592737197876, + "epoch": 1.37, + "grad_norm": 4.90625, + "learning_rate": 1.1393278247672132e-06, + "log_odds": 12.289743423461914, + "log_odds_ratio": -0.005003579892218113, + "loss": 0.2697, + "rejected_geometric_mean": -12.634381294250488, + "step": 5522 + }, + { + "chosen_geometric_mean": -1.0032426118850708, + "epoch": 1.37, + "grad_norm": 7.46875, + "learning_rate": 1.1385112707383813e-06, + "log_odds": 7.2379045486450195, + "log_odds_ratio": -0.019462479278445244, + "loss": 0.2655, + "rejected_geometric_mean": -7.795624732971191, + "step": 5523 + }, + { + "chosen_geometric_mean": -1.042885661125183, + "epoch": 1.37, + "grad_norm": 9.875, + "learning_rate": 1.137694923143923e-06, + "log_odds": 7.68349027633667, + "log_odds_ratio": -0.10818089544773102, + "loss": 0.2466, + "rejected_geometric_mean": -8.345248222351074, + "step": 5524 + }, + { + "chosen_geometric_mean": -0.9242053627967834, + "epoch": 1.37, + "grad_norm": 3.40625, + "learning_rate": 1.1368787821076138e-06, + "log_odds": 8.440802574157715, + "log_odds_ratio": -0.025538239628076553, + "loss": 0.2196, + "rejected_geometric_mean": -8.867830276489258, + "step": 5525 + }, + { + "chosen_geometric_mean": -1.7615078687667847, + "epoch": 1.37, + "grad_norm": 21.5, + "learning_rate": 1.1360628477532023e-06, + "log_odds": 9.706141471862793, + "log_odds_ratio": -0.002186043653637171, + "loss": 0.2988, + "rejected_geometric_mean": -11.084589004516602, + "step": 5526 + }, + { + "chosen_geometric_mean": -0.9927074909210205, + "epoch": 1.37, + "grad_norm": 3.1875, + "learning_rate": 1.1352471202044032e-06, + "log_odds": 12.873414993286133, + "log_odds_ratio": -0.003247623099014163, + "loss": 0.2372, + "rejected_geometric_mean": -13.388931274414062, + "step": 5527 + }, + { + "chosen_geometric_mean": -0.9004260301589966, + "epoch": 1.37, + "grad_norm": 2.125, + "learning_rate": 1.1344315995848992e-06, + "log_odds": 3.0791866779327393, + "log_odds_ratio": -0.31151601672172546, + "loss": 0.3054, + "rejected_geometric_mean": -3.651301383972168, + "step": 5528 + }, + { + "chosen_geometric_mean": -0.9347275495529175, + "epoch": 1.37, + "grad_norm": 14.4375, + "learning_rate": 1.1336162860183436e-06, + "log_odds": 9.485517501831055, + "log_odds_ratio": -0.12390676140785217, + "loss": 0.3341, + "rejected_geometric_mean": -10.007270812988281, + "step": 5529 + }, + { + "chosen_geometric_mean": -0.9578517079353333, + "epoch": 1.37, + "grad_norm": 4.21875, + "learning_rate": 1.1328011796283584e-06, + "log_odds": 9.926423072814941, + "log_odds_ratio": -0.06119921803474426, + "loss": 0.2666, + "rejected_geometric_mean": -10.42914867401123, + "step": 5530 + }, + { + "chosen_geometric_mean": -0.8898786306381226, + "epoch": 1.37, + "grad_norm": 3.671875, + "learning_rate": 1.1319862805385318e-06, + "log_odds": 5.705065727233887, + "log_odds_ratio": -0.18365278840065002, + "loss": 0.2214, + "rejected_geometric_mean": -6.177114486694336, + "step": 5531 + }, + { + "chosen_geometric_mean": -0.9563998579978943, + "epoch": 1.37, + "grad_norm": 23.0, + "learning_rate": 1.1311715888724233e-06, + "log_odds": 4.370951175689697, + "log_odds_ratio": -0.12314838916063309, + "loss": 0.2729, + "rejected_geometric_mean": -4.882421016693115, + "step": 5532 + }, + { + "chosen_geometric_mean": -0.8392297029495239, + "epoch": 1.37, + "grad_norm": 2.4375, + "learning_rate": 1.1303571047535587e-06, + "log_odds": 14.68881607055664, + "log_odds_ratio": -5.172665260033682e-05, + "loss": 0.2576, + "rejected_geometric_mean": -14.934826850891113, + "step": 5533 + }, + { + "chosen_geometric_mean": -1.0929306745529175, + "epoch": 1.37, + "grad_norm": 2.015625, + "learning_rate": 1.1295428283054336e-06, + "log_odds": 8.898710250854492, + "log_odds_ratio": -0.010979902930557728, + "loss": 0.256, + "rejected_geometric_mean": -9.542108535766602, + "step": 5534 + }, + { + "chosen_geometric_mean": -0.8369307518005371, + "epoch": 1.37, + "grad_norm": 5.375, + "learning_rate": 1.128728759651513e-06, + "log_odds": 6.376556873321533, + "log_odds_ratio": -0.1775771528482437, + "loss": 0.2712, + "rejected_geometric_mean": -6.771132946014404, + "step": 5535 + }, + { + "chosen_geometric_mean": -0.87486732006073, + "epoch": 1.37, + "grad_norm": 3.859375, + "learning_rate": 1.1279148989152274e-06, + "log_odds": 1.9808605909347534, + "log_odds_ratio": -0.1663118600845337, + "loss": 0.2498, + "rejected_geometric_mean": -2.380337715148926, + "step": 5536 + }, + { + "chosen_geometric_mean": -0.9707993268966675, + "epoch": 1.37, + "grad_norm": 2.421875, + "learning_rate": 1.1271012462199796e-06, + "log_odds": 3.151383638381958, + "log_odds_ratio": -0.15812982618808746, + "loss": 0.2953, + "rejected_geometric_mean": -3.7055888175964355, + "step": 5537 + }, + { + "chosen_geometric_mean": -0.9678552746772766, + "epoch": 1.37, + "grad_norm": 4.90625, + "learning_rate": 1.1262878016891376e-06, + "log_odds": 15.187609672546387, + "log_odds_ratio": -3.093597842962481e-05, + "loss": 0.243, + "rejected_geometric_mean": -15.657510757446289, + "step": 5538 + }, + { + "chosen_geometric_mean": -0.9955285787582397, + "epoch": 1.37, + "grad_norm": 19.625, + "learning_rate": 1.1254745654460387e-06, + "log_odds": 2.8770129680633545, + "log_odds_ratio": -0.2697615325450897, + "loss": 0.2922, + "rejected_geometric_mean": -3.5798749923706055, + "step": 5539 + }, + { + "chosen_geometric_mean": -0.9287533760070801, + "epoch": 1.37, + "grad_norm": 5.09375, + "learning_rate": 1.1246615376139893e-06, + "log_odds": 5.042215824127197, + "log_odds_ratio": -0.2553909122943878, + "loss": 0.239, + "rejected_geometric_mean": -5.629467010498047, + "step": 5540 + }, + { + "chosen_geometric_mean": -0.8860131502151489, + "epoch": 1.37, + "grad_norm": 19.75, + "learning_rate": 1.1238487183162652e-06, + "log_odds": 9.087775230407715, + "log_odds_ratio": -0.13401465117931366, + "loss": 0.2552, + "rejected_geometric_mean": -9.483424186706543, + "step": 5541 + }, + { + "chosen_geometric_mean": -0.8703140616416931, + "epoch": 1.37, + "grad_norm": 3.90625, + "learning_rate": 1.123036107676108e-06, + "log_odds": 8.03272819519043, + "log_odds_ratio": -0.08674665540456772, + "loss": 0.244, + "rejected_geometric_mean": -8.423049926757812, + "step": 5542 + }, + { + "chosen_geometric_mean": -0.8893972039222717, + "epoch": 1.37, + "grad_norm": 4.84375, + "learning_rate": 1.1222237058167299e-06, + "log_odds": 4.970322132110596, + "log_odds_ratio": -0.16035096347332, + "loss": 0.2311, + "rejected_geometric_mean": -5.380014419555664, + "step": 5543 + }, + { + "chosen_geometric_mean": -0.9592532515525818, + "epoch": 1.37, + "grad_norm": 17.875, + "learning_rate": 1.1214115128613101e-06, + "log_odds": 4.060795783996582, + "log_odds_ratio": -0.268909752368927, + "loss": 0.2645, + "rejected_geometric_mean": -4.748607635498047, + "step": 5544 + }, + { + "chosen_geometric_mean": -1.1420599222183228, + "epoch": 1.37, + "grad_norm": 4.25, + "learning_rate": 1.1205995289329948e-06, + "log_odds": 1.9455255270004272, + "log_odds_ratio": -0.21320176124572754, + "loss": 0.2759, + "rejected_geometric_mean": -2.756410598754883, + "step": 5545 + }, + { + "chosen_geometric_mean": -0.925366997718811, + "epoch": 1.37, + "grad_norm": 3.90625, + "learning_rate": 1.1197877541549038e-06, + "log_odds": 3.4196150302886963, + "log_odds_ratio": -0.22483599185943604, + "loss": 0.2546, + "rejected_geometric_mean": -3.94142746925354, + "step": 5546 + }, + { + "chosen_geometric_mean": -0.8935316801071167, + "epoch": 1.37, + "grad_norm": 3.0625, + "learning_rate": 1.1189761886501196e-06, + "log_odds": 5.958181381225586, + "log_odds_ratio": -0.15562745928764343, + "loss": 0.2474, + "rejected_geometric_mean": -6.419137001037598, + "step": 5547 + }, + { + "chosen_geometric_mean": -1.0825464725494385, + "epoch": 1.37, + "grad_norm": 4.21875, + "learning_rate": 1.1181648325416941e-06, + "log_odds": 6.321728706359863, + "log_odds_ratio": -0.3167739510536194, + "loss": 0.2565, + "rejected_geometric_mean": -7.165645599365234, + "step": 5548 + }, + { + "chosen_geometric_mean": -0.9592514038085938, + "epoch": 1.37, + "grad_norm": 7.5, + "learning_rate": 1.1173536859526507e-06, + "log_odds": 6.828318119049072, + "log_odds_ratio": -0.0909719318151474, + "loss": 0.2765, + "rejected_geometric_mean": -7.370797157287598, + "step": 5549 + }, + { + "chosen_geometric_mean": -1.022066593170166, + "epoch": 1.37, + "grad_norm": 4.28125, + "learning_rate": 1.1165427490059762e-06, + "log_odds": 3.6061148643493652, + "log_odds_ratio": -0.2631065249443054, + "loss": 0.2652, + "rejected_geometric_mean": -4.350371837615967, + "step": 5550 + }, + { + "chosen_geometric_mean": -0.8117688894271851, + "epoch": 1.37, + "grad_norm": 22.25, + "learning_rate": 1.1157320218246297e-06, + "log_odds": 7.297163009643555, + "log_odds_ratio": -0.14283819496631622, + "loss": 0.2422, + "rejected_geometric_mean": -7.641977310180664, + "step": 5551 + }, + { + "chosen_geometric_mean": -1.442025899887085, + "epoch": 1.37, + "grad_norm": 12.8125, + "learning_rate": 1.1149215045315373e-06, + "log_odds": 6.333072185516357, + "log_odds_ratio": -0.1368417739868164, + "loss": 0.3156, + "rejected_geometric_mean": -7.540823459625244, + "step": 5552 + }, + { + "chosen_geometric_mean": -0.9774426817893982, + "epoch": 1.37, + "grad_norm": 42.25, + "learning_rate": 1.1141111972495921e-06, + "log_odds": 5.284224510192871, + "log_odds_ratio": -0.13099053502082825, + "loss": 0.3235, + "rejected_geometric_mean": -5.8430585861206055, + "step": 5553 + }, + { + "chosen_geometric_mean": -0.8946282267570496, + "epoch": 1.38, + "grad_norm": 5.0, + "learning_rate": 1.1133011001016554e-06, + "log_odds": 6.256185531616211, + "log_odds_ratio": -0.15125901997089386, + "loss": 0.235, + "rejected_geometric_mean": -6.719231605529785, + "step": 5554 + }, + { + "chosen_geometric_mean": -1.0651001930236816, + "epoch": 1.38, + "grad_norm": 8.375, + "learning_rate": 1.112491213210559e-06, + "log_odds": 14.281743049621582, + "log_odds_ratio": -0.022765211760997772, + "loss": 0.2514, + "rejected_geometric_mean": -14.906694412231445, + "step": 5555 + }, + { + "chosen_geometric_mean": -1.1346607208251953, + "epoch": 1.38, + "grad_norm": 3.09375, + "learning_rate": 1.111681536699099e-06, + "log_odds": 7.273444175720215, + "log_odds_ratio": -0.1067744567990303, + "loss": 0.2198, + "rejected_geometric_mean": -8.067276000976562, + "step": 5556 + }, + { + "chosen_geometric_mean": -0.8339940309524536, + "epoch": 1.38, + "grad_norm": 2.90625, + "learning_rate": 1.1108720706900436e-06, + "log_odds": 11.816886901855469, + "log_odds_ratio": -0.2229139506816864, + "loss": 0.2484, + "rejected_geometric_mean": -12.241971969604492, + "step": 5557 + }, + { + "chosen_geometric_mean": -0.9290375709533691, + "epoch": 1.38, + "grad_norm": 12.0, + "learning_rate": 1.1100628153061273e-06, + "log_odds": 7.0861053466796875, + "log_odds_ratio": -0.049688670784235, + "loss": 0.2576, + "rejected_geometric_mean": -7.530492305755615, + "step": 5558 + }, + { + "chosen_geometric_mean": -0.967207670211792, + "epoch": 1.38, + "grad_norm": 7.375, + "learning_rate": 1.109253770670052e-06, + "log_odds": 8.681811332702637, + "log_odds_ratio": -0.0013932002475485206, + "loss": 0.2652, + "rejected_geometric_mean": -9.152968406677246, + "step": 5559 + }, + { + "chosen_geometric_mean": -0.8754937052726746, + "epoch": 1.38, + "grad_norm": 26.125, + "learning_rate": 1.1084449369044883e-06, + "log_odds": 15.859563827514648, + "log_odds_ratio": -3.278256031080673e-07, + "loss": 0.2462, + "rejected_geometric_mean": -16.17576789855957, + "step": 5560 + }, + { + "chosen_geometric_mean": -1.020899772644043, + "epoch": 1.38, + "grad_norm": 4.03125, + "learning_rate": 1.1076363141320749e-06, + "log_odds": 6.73635721206665, + "log_odds_ratio": -0.09190824627876282, + "loss": 0.255, + "rejected_geometric_mean": -7.3720855712890625, + "step": 5561 + }, + { + "chosen_geometric_mean": -1.2002513408660889, + "epoch": 1.38, + "grad_norm": 28.25, + "learning_rate": 1.1068279024754177e-06, + "log_odds": 5.294758319854736, + "log_odds_ratio": -0.1369401514530182, + "loss": 0.2397, + "rejected_geometric_mean": -6.178908348083496, + "step": 5562 + }, + { + "chosen_geometric_mean": -1.033850073814392, + "epoch": 1.38, + "grad_norm": 29.25, + "learning_rate": 1.1060197020570936e-06, + "log_odds": 6.821220397949219, + "log_odds_ratio": -0.21365468204021454, + "loss": 0.3372, + "rejected_geometric_mean": -7.564279079437256, + "step": 5563 + }, + { + "chosen_geometric_mean": -0.7752368450164795, + "epoch": 1.38, + "grad_norm": 4.9375, + "learning_rate": 1.1052117129996435e-06, + "log_odds": 6.94435977935791, + "log_odds_ratio": -0.0824996680021286, + "loss": 0.2273, + "rejected_geometric_mean": -7.181069374084473, + "step": 5564 + }, + { + "chosen_geometric_mean": -0.991104006767273, + "epoch": 1.38, + "grad_norm": 20.0, + "learning_rate": 1.104403935425577e-06, + "log_odds": 8.679444313049316, + "log_odds_ratio": -0.25971344113349915, + "loss": 0.2734, + "rejected_geometric_mean": -9.369985580444336, + "step": 5565 + }, + { + "chosen_geometric_mean": -0.7640877366065979, + "epoch": 1.38, + "grad_norm": 2.109375, + "learning_rate": 1.1035963694573749e-06, + "log_odds": 4.79808235168457, + "log_odds_ratio": -0.11486005038022995, + "loss": 0.2418, + "rejected_geometric_mean": -5.030275344848633, + "step": 5566 + }, + { + "chosen_geometric_mean": -1.0396665334701538, + "epoch": 1.38, + "grad_norm": 6.03125, + "learning_rate": 1.1027890152174811e-06, + "log_odds": 6.388664722442627, + "log_odds_ratio": -0.06552507728338242, + "loss": 0.2567, + "rejected_geometric_mean": -7.024629592895508, + "step": 5567 + }, + { + "chosen_geometric_mean": -0.8954952955245972, + "epoch": 1.38, + "grad_norm": 2.046875, + "learning_rate": 1.1019818728283113e-06, + "log_odds": 6.756279468536377, + "log_odds_ratio": -0.2550792694091797, + "loss": 0.239, + "rejected_geometric_mean": -7.293424129486084, + "step": 5568 + }, + { + "chosen_geometric_mean": -1.2069947719573975, + "epoch": 1.38, + "grad_norm": 43.25, + "learning_rate": 1.1011749424122483e-06, + "log_odds": 5.695042133331299, + "log_odds_ratio": -0.14320631325244904, + "loss": 0.3101, + "rejected_geometric_mean": -6.55874490737915, + "step": 5569 + }, + { + "chosen_geometric_mean": -0.9084302186965942, + "epoch": 1.38, + "grad_norm": 4.71875, + "learning_rate": 1.1003682240916414e-06, + "log_odds": 3.9926886558532715, + "log_odds_ratio": -0.18450139462947845, + "loss": 0.2733, + "rejected_geometric_mean": -4.539282321929932, + "step": 5570 + }, + { + "chosen_geometric_mean": -1.4299616813659668, + "epoch": 1.38, + "grad_norm": 4.65625, + "learning_rate": 1.0995617179888074e-06, + "log_odds": 8.784096717834473, + "log_odds_ratio": -0.04113791137933731, + "loss": 0.2645, + "rejected_geometric_mean": -9.907809257507324, + "step": 5571 + }, + { + "chosen_geometric_mean": -1.0320556163787842, + "epoch": 1.38, + "grad_norm": 19.75, + "learning_rate": 1.098755424226033e-06, + "log_odds": 9.188520431518555, + "log_odds_ratio": -0.08013635873794556, + "loss": 0.2735, + "rejected_geometric_mean": -9.761588096618652, + "step": 5572 + }, + { + "chosen_geometric_mean": -0.8870276808738708, + "epoch": 1.38, + "grad_norm": 3.484375, + "learning_rate": 1.0979493429255725e-06, + "log_odds": 10.874992370605469, + "log_odds_ratio": -0.09605482220649719, + "loss": 0.2356, + "rejected_geometric_mean": -11.2604398727417, + "step": 5573 + }, + { + "chosen_geometric_mean": -1.3267724514007568, + "epoch": 1.38, + "grad_norm": 4.96875, + "learning_rate": 1.0971434742096453e-06, + "log_odds": 6.504264831542969, + "log_odds_ratio": -0.1393466591835022, + "loss": 0.287, + "rejected_geometric_mean": -7.539377212524414, + "step": 5574 + }, + { + "chosen_geometric_mean": -0.8874558806419373, + "epoch": 1.38, + "grad_norm": 10.0625, + "learning_rate": 1.0963378182004427e-06, + "log_odds": 11.513408660888672, + "log_odds_ratio": -0.14258836209774017, + "loss": 0.2638, + "rejected_geometric_mean": -11.988344192504883, + "step": 5575 + }, + { + "chosen_geometric_mean": -0.8724657297134399, + "epoch": 1.38, + "grad_norm": 10.8125, + "learning_rate": 1.09553237502012e-06, + "log_odds": 10.057903289794922, + "log_odds_ratio": -0.16120535135269165, + "loss": 0.2826, + "rejected_geometric_mean": -10.476075172424316, + "step": 5576 + }, + { + "chosen_geometric_mean": -0.98978590965271, + "epoch": 1.38, + "grad_norm": 11.8125, + "learning_rate": 1.0947271447908013e-06, + "log_odds": 1.623854160308838, + "log_odds_ratio": -0.3982817530632019, + "loss": 0.2568, + "rejected_geometric_mean": -2.359994649887085, + "step": 5577 + }, + { + "chosen_geometric_mean": -0.9232394099235535, + "epoch": 1.38, + "grad_norm": 1.921875, + "learning_rate": 1.0939221276345794e-06, + "log_odds": 6.537629127502441, + "log_odds_ratio": -0.1509031057357788, + "loss": 0.2679, + "rejected_geometric_mean": -7.033545970916748, + "step": 5578 + }, + { + "chosen_geometric_mean": -1.0301412343978882, + "epoch": 1.38, + "grad_norm": 30.875, + "learning_rate": 1.093117323673515e-06, + "log_odds": 7.531373977661133, + "log_odds_ratio": -0.044489458203315735, + "loss": 0.2589, + "rejected_geometric_mean": -8.140522003173828, + "step": 5579 + }, + { + "chosen_geometric_mean": -0.9139575362205505, + "epoch": 1.38, + "grad_norm": 9.6875, + "learning_rate": 1.0923127330296343e-06, + "log_odds": 11.408724784851074, + "log_odds_ratio": -0.07664241641759872, + "loss": 0.2259, + "rejected_geometric_mean": -11.828598022460938, + "step": 5580 + }, + { + "chosen_geometric_mean": -0.8936598896980286, + "epoch": 1.38, + "grad_norm": 10.625, + "learning_rate": 1.0915083558249343e-06, + "log_odds": 11.984365463256836, + "log_odds_ratio": -0.00070846953894943, + "loss": 0.212, + "rejected_geometric_mean": -12.346538543701172, + "step": 5581 + }, + { + "chosen_geometric_mean": -1.0926040410995483, + "epoch": 1.38, + "grad_norm": 35.25, + "learning_rate": 1.0907041921813758e-06, + "log_odds": 4.056367874145508, + "log_odds_ratio": -0.2547207772731781, + "loss": 0.3166, + "rejected_geometric_mean": -4.7662458419799805, + "step": 5582 + }, + { + "chosen_geometric_mean": -0.8560029864311218, + "epoch": 1.38, + "grad_norm": 2.015625, + "learning_rate": 1.0899002422208905e-06, + "log_odds": 12.756708145141602, + "log_odds_ratio": -0.00019367468485143036, + "loss": 0.2711, + "rejected_geometric_mean": -13.03546142578125, + "step": 5583 + }, + { + "chosen_geometric_mean": -1.0884610414505005, + "epoch": 1.38, + "grad_norm": 5.34375, + "learning_rate": 1.089096506065377e-06, + "log_odds": 8.25918197631836, + "log_odds_ratio": -0.1526220440864563, + "loss": 0.2646, + "rejected_geometric_mean": -9.013956069946289, + "step": 5584 + }, + { + "chosen_geometric_mean": -0.8902989029884338, + "epoch": 1.38, + "grad_norm": 3.359375, + "learning_rate": 1.0882929838366999e-06, + "log_odds": 13.668278694152832, + "log_odds_ratio": -0.0033507412299513817, + "loss": 0.2355, + "rejected_geometric_mean": -14.029363632202148, + "step": 5585 + }, + { + "chosen_geometric_mean": -0.828561007976532, + "epoch": 1.38, + "grad_norm": 4.71875, + "learning_rate": 1.0874896756566937e-06, + "log_odds": 3.820014238357544, + "log_odds_ratio": -0.18454374372959137, + "loss": 0.2703, + "rejected_geometric_mean": -4.181379795074463, + "step": 5586 + }, + { + "chosen_geometric_mean": -1.2119457721710205, + "epoch": 1.38, + "grad_norm": 13.5625, + "learning_rate": 1.0866865816471584e-06, + "log_odds": 5.421071529388428, + "log_odds_ratio": -0.15742678940296173, + "loss": 0.3192, + "rejected_geometric_mean": -6.349555492401123, + "step": 5587 + }, + { + "chosen_geometric_mean": -0.798091471195221, + "epoch": 1.38, + "grad_norm": 4.1875, + "learning_rate": 1.0858837019298615e-06, + "log_odds": 2.375690460205078, + "log_odds_ratio": -0.20912043750286102, + "loss": 0.2736, + "rejected_geometric_mean": -2.7506065368652344, + "step": 5588 + }, + { + "chosen_geometric_mean": -0.7468900084495544, + "epoch": 1.38, + "grad_norm": 4.46875, + "learning_rate": 1.08508103662654e-06, + "log_odds": 9.862089157104492, + "log_odds_ratio": -0.08783095329999924, + "loss": 0.2471, + "rejected_geometric_mean": -10.049986839294434, + "step": 5589 + }, + { + "chosen_geometric_mean": -1.0551633834838867, + "epoch": 1.38, + "grad_norm": 15.0625, + "learning_rate": 1.0842785858588974e-06, + "log_odds": 5.9114837646484375, + "log_odds_ratio": -0.1471853107213974, + "loss": 0.2828, + "rejected_geometric_mean": -6.61515998840332, + "step": 5590 + }, + { + "chosen_geometric_mean": -1.5824865102767944, + "epoch": 1.38, + "grad_norm": 42.25, + "learning_rate": 1.0834763497486034e-06, + "log_odds": 6.2181620597839355, + "log_odds_ratio": -0.12700484693050385, + "loss": 0.3356, + "rejected_geometric_mean": -7.3691020011901855, + "step": 5591 + }, + { + "chosen_geometric_mean": -1.4448597431182861, + "epoch": 1.38, + "grad_norm": 14.5, + "learning_rate": 1.0826743284172978e-06, + "log_odds": 7.270237922668457, + "log_odds_ratio": -0.39313262701034546, + "loss": 0.287, + "rejected_geometric_mean": -8.604220390319824, + "step": 5592 + }, + { + "chosen_geometric_mean": -1.1628971099853516, + "epoch": 1.38, + "grad_norm": 3.125, + "learning_rate": 1.0818725219865854e-06, + "log_odds": 3.5927348136901855, + "log_odds_ratio": -0.36185795068740845, + "loss": 0.2707, + "rejected_geometric_mean": -4.580324172973633, + "step": 5593 + }, + { + "chosen_geometric_mean": -1.0512439012527466, + "epoch": 1.38, + "grad_norm": 3.375, + "learning_rate": 1.081070930578037e-06, + "log_odds": 4.068544387817383, + "log_odds_ratio": -0.3826126158237457, + "loss": 0.2593, + "rejected_geometric_mean": -4.909881114959717, + "step": 5594 + }, + { + "chosen_geometric_mean": -0.9187974333763123, + "epoch": 1.39, + "grad_norm": 33.5, + "learning_rate": 1.0802695543131978e-06, + "log_odds": 6.798181533813477, + "log_odds_ratio": -0.17416587471961975, + "loss": 0.3215, + "rejected_geometric_mean": -7.349512100219727, + "step": 5595 + }, + { + "chosen_geometric_mean": -1.170434594154358, + "epoch": 1.39, + "grad_norm": 3.234375, + "learning_rate": 1.079468393313573e-06, + "log_odds": 10.63143539428711, + "log_odds_ratio": -0.07455942034721375, + "loss": 0.2369, + "rejected_geometric_mean": -11.457948684692383, + "step": 5596 + }, + { + "chosen_geometric_mean": -0.8877467513084412, + "epoch": 1.39, + "grad_norm": 1.9765625, + "learning_rate": 1.0786674477006374e-06, + "log_odds": 7.043951988220215, + "log_odds_ratio": -0.08284757286310196, + "loss": 0.2552, + "rejected_geometric_mean": -7.446131706237793, + "step": 5597 + }, + { + "chosen_geometric_mean": -1.0482466220855713, + "epoch": 1.39, + "grad_norm": 3.8125, + "learning_rate": 1.077866717595835e-06, + "log_odds": 8.61727237701416, + "log_odds_ratio": -0.19465230405330658, + "loss": 0.2855, + "rejected_geometric_mean": -9.320798873901367, + "step": 5598 + }, + { + "chosen_geometric_mean": -0.9343897700309753, + "epoch": 1.39, + "grad_norm": 4.125, + "learning_rate": 1.077066203120574e-06, + "log_odds": 9.359416007995605, + "log_odds_ratio": -0.12357695400714874, + "loss": 0.2465, + "rejected_geometric_mean": -9.810226440429688, + "step": 5599 + }, + { + "chosen_geometric_mean": -1.050498366355896, + "epoch": 1.39, + "grad_norm": 1.84375, + "learning_rate": 1.0762659043962326e-06, + "log_odds": 8.144253730773926, + "log_odds_ratio": -0.15701404213905334, + "loss": 0.2121, + "rejected_geometric_mean": -8.828965187072754, + "step": 5600 + }, + { + "chosen_geometric_mean": -0.9367071390151978, + "epoch": 1.39, + "grad_norm": 3.453125, + "learning_rate": 1.0754658215441563e-06, + "log_odds": 5.543467998504639, + "log_odds_ratio": -0.27928483486175537, + "loss": 0.2724, + "rejected_geometric_mean": -6.140832901000977, + "step": 5601 + }, + { + "chosen_geometric_mean": -0.7963312268257141, + "epoch": 1.39, + "grad_norm": 11.625, + "learning_rate": 1.0746659546856558e-06, + "log_odds": 8.457273483276367, + "log_odds_ratio": -0.02022073045372963, + "loss": 0.2907, + "rejected_geometric_mean": -8.668612480163574, + "step": 5602 + }, + { + "chosen_geometric_mean": -0.8224050998687744, + "epoch": 1.39, + "grad_norm": 3.140625, + "learning_rate": 1.0738663039420091e-06, + "log_odds": 1.7660770416259766, + "log_odds_ratio": -0.38775038719177246, + "loss": 0.2617, + "rejected_geometric_mean": -2.261730670928955, + "step": 5603 + }, + { + "chosen_geometric_mean": -0.9464318156242371, + "epoch": 1.39, + "grad_norm": 15.1875, + "learning_rate": 1.0730668694344647e-06, + "log_odds": 5.605166435241699, + "log_odds_ratio": -0.018196353688836098, + "loss": 0.3111, + "rejected_geometric_mean": -6.05638313293457, + "step": 5604 + }, + { + "chosen_geometric_mean": -0.9760357141494751, + "epoch": 1.39, + "grad_norm": 68.5, + "learning_rate": 1.072267651284234e-06, + "log_odds": 5.303591251373291, + "log_odds_ratio": -0.17267173528671265, + "loss": 0.3, + "rejected_geometric_mean": -5.955989360809326, + "step": 5605 + }, + { + "chosen_geometric_mean": -1.7619825601577759, + "epoch": 1.39, + "grad_norm": 16.25, + "learning_rate": 1.0714686496124985e-06, + "log_odds": 5.892553329467773, + "log_odds_ratio": -0.08381476998329163, + "loss": 0.3816, + "rejected_geometric_mean": -7.464226722717285, + "step": 5606 + }, + { + "chosen_geometric_mean": -1.2447210550308228, + "epoch": 1.39, + "grad_norm": 19.875, + "learning_rate": 1.0706698645404067e-06, + "log_odds": 9.760909080505371, + "log_odds_ratio": -0.1876446008682251, + "loss": 0.2472, + "rejected_geometric_mean": -10.791196823120117, + "step": 5607 + }, + { + "chosen_geometric_mean": -0.983072817325592, + "epoch": 1.39, + "grad_norm": 11.625, + "learning_rate": 1.0698712961890723e-06, + "log_odds": 8.634445190429688, + "log_odds_ratio": -0.2177790254354477, + "loss": 0.3247, + "rejected_geometric_mean": -9.26563835144043, + "step": 5608 + }, + { + "chosen_geometric_mean": -1.2520780563354492, + "epoch": 1.39, + "grad_norm": 12.25, + "learning_rate": 1.069072944679579e-06, + "log_odds": 8.047331809997559, + "log_odds_ratio": -0.0020993282087147236, + "loss": 0.2559, + "rejected_geometric_mean": -8.906646728515625, + "step": 5609 + }, + { + "chosen_geometric_mean": -1.1446912288665771, + "epoch": 1.39, + "grad_norm": 2.78125, + "learning_rate": 1.0682748101329752e-06, + "log_odds": 8.322447776794434, + "log_odds_ratio": -0.07036339491605759, + "loss": 0.2741, + "rejected_geometric_mean": -9.035725593566895, + "step": 5610 + }, + { + "chosen_geometric_mean": -0.8759020566940308, + "epoch": 1.39, + "grad_norm": 9.0625, + "learning_rate": 1.067476892670275e-06, + "log_odds": 4.85398530960083, + "log_odds_ratio": -0.29569777846336365, + "loss": 0.309, + "rejected_geometric_mean": -5.386075019836426, + "step": 5611 + }, + { + "chosen_geometric_mean": -0.8540768623352051, + "epoch": 1.39, + "grad_norm": 3.34375, + "learning_rate": 1.066679192412466e-06, + "log_odds": 7.95353889465332, + "log_odds_ratio": -0.1370847076177597, + "loss": 0.2433, + "rejected_geometric_mean": -8.351520538330078, + "step": 5612 + }, + { + "chosen_geometric_mean": -1.3497226238250732, + "epoch": 1.39, + "grad_norm": 22.0, + "learning_rate": 1.065881709480497e-06, + "log_odds": 3.005119800567627, + "log_odds_ratio": -0.29406434297561646, + "loss": 0.2968, + "rejected_geometric_mean": -4.080509662628174, + "step": 5613 + }, + { + "chosen_geometric_mean": -0.9287022352218628, + "epoch": 1.39, + "grad_norm": 5.1875, + "learning_rate": 1.0650844439952842e-06, + "log_odds": 5.458080768585205, + "log_odds_ratio": -0.1137089654803276, + "loss": 0.2465, + "rejected_geometric_mean": -5.983311653137207, + "step": 5614 + }, + { + "chosen_geometric_mean": -1.0215966701507568, + "epoch": 1.39, + "grad_norm": 15.75, + "learning_rate": 1.0642873960777146e-06, + "log_odds": 3.671100378036499, + "log_odds_ratio": -0.14179016649723053, + "loss": 0.2525, + "rejected_geometric_mean": -4.318255424499512, + "step": 5615 + }, + { + "chosen_geometric_mean": -1.212553858757019, + "epoch": 1.39, + "grad_norm": 4.625, + "learning_rate": 1.0634905658486368e-06, + "log_odds": 8.500890731811523, + "log_odds_ratio": -0.030197910964488983, + "loss": 0.2787, + "rejected_geometric_mean": -9.365751266479492, + "step": 5616 + }, + { + "chosen_geometric_mean": -0.9623217582702637, + "epoch": 1.39, + "grad_norm": 4.1875, + "learning_rate": 1.0626939534288716e-06, + "log_odds": 7.181573867797852, + "log_odds_ratio": -0.07740091532468796, + "loss": 0.2551, + "rejected_geometric_mean": -7.649561882019043, + "step": 5617 + }, + { + "chosen_geometric_mean": -0.7448341846466064, + "epoch": 1.39, + "grad_norm": 4.15625, + "learning_rate": 1.061897558939205e-06, + "log_odds": 10.864246368408203, + "log_odds_ratio": -0.0006618034094572067, + "loss": 0.2348, + "rejected_geometric_mean": -10.948315620422363, + "step": 5618 + }, + { + "chosen_geometric_mean": -1.0525768995285034, + "epoch": 1.39, + "grad_norm": 2.28125, + "learning_rate": 1.0611013825003883e-06, + "log_odds": 7.342651844024658, + "log_odds_ratio": -0.05005452036857605, + "loss": 0.2485, + "rejected_geometric_mean": -7.980960369110107, + "step": 5619 + }, + { + "chosen_geometric_mean": -0.9511587619781494, + "epoch": 1.39, + "grad_norm": 4.96875, + "learning_rate": 1.0603054242331403e-06, + "log_odds": 5.112917900085449, + "log_odds_ratio": -0.052043367177248, + "loss": 0.2929, + "rejected_geometric_mean": -5.596314430236816, + "step": 5620 + }, + { + "chosen_geometric_mean": -1.305662989616394, + "epoch": 1.39, + "grad_norm": 5.5625, + "learning_rate": 1.0595096842581485e-06, + "log_odds": 10.097307205200195, + "log_odds_ratio": -0.008707502856850624, + "loss": 0.2628, + "rejected_geometric_mean": -11.080757141113281, + "step": 5621 + }, + { + "chosen_geometric_mean": -0.961780309677124, + "epoch": 1.39, + "grad_norm": 5.1875, + "learning_rate": 1.0587141626960667e-06, + "log_odds": 7.064412593841553, + "log_odds_ratio": -0.18685509264469147, + "loss": 0.2386, + "rejected_geometric_mean": -7.723737716674805, + "step": 5622 + }, + { + "chosen_geometric_mean": -0.9081990718841553, + "epoch": 1.39, + "grad_norm": 35.5, + "learning_rate": 1.0579188596675136e-06, + "log_odds": 6.7562713623046875, + "log_odds_ratio": -0.08639850467443466, + "loss": 0.2549, + "rejected_geometric_mean": -7.182306289672852, + "step": 5623 + }, + { + "chosen_geometric_mean": -0.882953405380249, + "epoch": 1.39, + "grad_norm": 6.59375, + "learning_rate": 1.057123775293078e-06, + "log_odds": 3.7502100467681885, + "log_odds_ratio": -0.23517122864723206, + "loss": 0.2523, + "rejected_geometric_mean": -4.258712291717529, + "step": 5624 + }, + { + "chosen_geometric_mean": -1.0695502758026123, + "epoch": 1.39, + "grad_norm": 2.03125, + "learning_rate": 1.0563289096933116e-06, + "log_odds": 5.787827968597412, + "log_odds_ratio": -0.17465269565582275, + "loss": 0.28, + "rejected_geometric_mean": -6.562600135803223, + "step": 5625 + }, + { + "chosen_geometric_mean": -0.9276393055915833, + "epoch": 1.39, + "grad_norm": 5.09375, + "learning_rate": 1.0555342629887372e-06, + "log_odds": 1.1666141748428345, + "log_odds_ratio": -0.31830137968063354, + "loss": 0.2999, + "rejected_geometric_mean": -1.8048882484436035, + "step": 5626 + }, + { + "chosen_geometric_mean": -1.116342544555664, + "epoch": 1.39, + "grad_norm": 3.828125, + "learning_rate": 1.0547398352998404e-06, + "log_odds": 7.719846248626709, + "log_odds_ratio": -0.0009700893424451351, + "loss": 0.2771, + "rejected_geometric_mean": -8.430388450622559, + "step": 5627 + }, + { + "chosen_geometric_mean": -1.1290602684020996, + "epoch": 1.39, + "grad_norm": 3.453125, + "learning_rate": 1.0539456267470774e-06, + "log_odds": 11.583480834960938, + "log_odds_ratio": -0.0001875083107734099, + "loss": 0.2377, + "rejected_geometric_mean": -12.288507461547852, + "step": 5628 + }, + { + "chosen_geometric_mean": -1.097208857536316, + "epoch": 1.39, + "grad_norm": 2.984375, + "learning_rate": 1.0531516374508673e-06, + "log_odds": 11.196517944335938, + "log_odds_ratio": -0.32170403003692627, + "loss": 0.3296, + "rejected_geometric_mean": -11.99919319152832, + "step": 5629 + }, + { + "chosen_geometric_mean": -1.1077603101730347, + "epoch": 1.39, + "grad_norm": 33.5, + "learning_rate": 1.0523578675316004e-06, + "log_odds": 7.105997085571289, + "log_odds_ratio": -0.012665137648582458, + "loss": 0.2406, + "rejected_geometric_mean": -7.804035186767578, + "step": 5630 + }, + { + "chosen_geometric_mean": -0.8078314661979675, + "epoch": 1.39, + "grad_norm": 4.375, + "learning_rate": 1.0515643171096286e-06, + "log_odds": 0.6950401067733765, + "log_odds_ratio": -0.4437843859195709, + "loss": 0.2404, + "rejected_geometric_mean": -1.2653238773345947, + "step": 5631 + }, + { + "chosen_geometric_mean": -0.8744699954986572, + "epoch": 1.39, + "grad_norm": 4.1875, + "learning_rate": 1.0507709863052744e-06, + "log_odds": 7.473356246948242, + "log_odds_ratio": -0.08733484894037247, + "loss": 0.2805, + "rejected_geometric_mean": -7.836414813995361, + "step": 5632 + }, + { + "chosen_geometric_mean": -0.9466139674186707, + "epoch": 1.39, + "grad_norm": 13.8125, + "learning_rate": 1.0499778752388269e-06, + "log_odds": 8.18121337890625, + "log_odds_ratio": -0.03628015145659447, + "loss": 0.2323, + "rejected_geometric_mean": -8.646927833557129, + "step": 5633 + }, + { + "chosen_geometric_mean": -1.3576382398605347, + "epoch": 1.39, + "grad_norm": 12.875, + "learning_rate": 1.0491849840305387e-06, + "log_odds": 4.263624668121338, + "log_odds_ratio": -0.18663635849952698, + "loss": 0.2658, + "rejected_geometric_mean": -5.408717632293701, + "step": 5634 + }, + { + "chosen_geometric_mean": -1.2307556867599487, + "epoch": 1.4, + "grad_norm": 17.0, + "learning_rate": 1.0483923128006332e-06, + "log_odds": 8.581380844116211, + "log_odds_ratio": -0.0077395252883434296, + "loss": 0.2627, + "rejected_geometric_mean": -9.433819770812988, + "step": 5635 + }, + { + "chosen_geometric_mean": -1.1209962368011475, + "epoch": 1.4, + "grad_norm": 13.75, + "learning_rate": 1.0475998616692972e-06, + "log_odds": 8.980412483215332, + "log_odds_ratio": -0.08921274542808533, + "loss": 0.2545, + "rejected_geometric_mean": -9.705171585083008, + "step": 5636 + }, + { + "chosen_geometric_mean": -0.9116551280021667, + "epoch": 1.4, + "grad_norm": 2.765625, + "learning_rate": 1.0468076307566848e-06, + "log_odds": 5.760100364685059, + "log_odds_ratio": -0.08949711173772812, + "loss": 0.2362, + "rejected_geometric_mean": -6.215142726898193, + "step": 5637 + }, + { + "chosen_geometric_mean": -1.1934245824813843, + "epoch": 1.4, + "grad_norm": 26.625, + "learning_rate": 1.0460156201829178e-06, + "log_odds": 6.685970306396484, + "log_odds_ratio": -0.06573979556560516, + "loss": 0.3075, + "rejected_geometric_mean": -7.465834140777588, + "step": 5638 + }, + { + "chosen_geometric_mean": -0.9342291355133057, + "epoch": 1.4, + "grad_norm": 2.53125, + "learning_rate": 1.0452238300680847e-06, + "log_odds": 3.135714054107666, + "log_odds_ratio": -0.2493036389350891, + "loss": 0.2455, + "rejected_geometric_mean": -3.7774760723114014, + "step": 5639 + }, + { + "chosen_geometric_mean": -0.8714540004730225, + "epoch": 1.4, + "grad_norm": 2.078125, + "learning_rate": 1.0444322605322387e-06, + "log_odds": 3.2029075622558594, + "log_odds_ratio": -0.26988571882247925, + "loss": 0.2463, + "rejected_geometric_mean": -3.7328288555145264, + "step": 5640 + }, + { + "chosen_geometric_mean": -0.8486248254776001, + "epoch": 1.4, + "grad_norm": 6.03125, + "learning_rate": 1.043640911695402e-06, + "log_odds": 3.9101717472076416, + "log_odds_ratio": -0.2576802372932434, + "loss": 0.2617, + "rejected_geometric_mean": -4.388347625732422, + "step": 5641 + }, + { + "chosen_geometric_mean": -1.0147039890289307, + "epoch": 1.4, + "grad_norm": 2.296875, + "learning_rate": 1.042849783677562e-06, + "log_odds": 1.6135411262512207, + "log_odds_ratio": -0.3389092683792114, + "loss": 0.2729, + "rejected_geometric_mean": -2.360647678375244, + "step": 5642 + }, + { + "chosen_geometric_mean": -1.2277610301971436, + "epoch": 1.4, + "grad_norm": 20.625, + "learning_rate": 1.0420588765986698e-06, + "log_odds": 8.4596529006958, + "log_odds_ratio": -0.1009853184223175, + "loss": 0.295, + "rejected_geometric_mean": -9.375455856323242, + "step": 5643 + }, + { + "chosen_geometric_mean": -1.0832082033157349, + "epoch": 1.4, + "grad_norm": 8.1875, + "learning_rate": 1.04126819057865e-06, + "log_odds": 10.048858642578125, + "log_odds_ratio": -0.11462897062301636, + "loss": 0.2874, + "rejected_geometric_mean": -10.775674819946289, + "step": 5644 + }, + { + "chosen_geometric_mean": -1.1179075241088867, + "epoch": 1.4, + "grad_norm": 20.75, + "learning_rate": 1.040477725737388e-06, + "log_odds": 5.666158199310303, + "log_odds_ratio": -0.03453349322080612, + "loss": 0.2452, + "rejected_geometric_mean": -6.389951229095459, + "step": 5645 + }, + { + "chosen_geometric_mean": -0.9412726759910583, + "epoch": 1.4, + "grad_norm": 9.4375, + "learning_rate": 1.039687482194736e-06, + "log_odds": 3.4728944301605225, + "log_odds_ratio": -0.11939124763011932, + "loss": 0.2617, + "rejected_geometric_mean": -3.9972970485687256, + "step": 5646 + }, + { + "chosen_geometric_mean": -1.0415985584259033, + "epoch": 1.4, + "grad_norm": 41.75, + "learning_rate": 1.0388974600705157e-06, + "log_odds": 7.627758026123047, + "log_odds_ratio": -0.03590264543890953, + "loss": 0.2593, + "rejected_geometric_mean": -8.22179889678955, + "step": 5647 + }, + { + "chosen_geometric_mean": -0.9550824165344238, + "epoch": 1.4, + "grad_norm": 3.15625, + "learning_rate": 1.038107659484512e-06, + "log_odds": 7.364066123962402, + "log_odds_ratio": -0.056146129965782166, + "loss": 0.2381, + "rejected_geometric_mean": -7.858713150024414, + "step": 5648 + }, + { + "chosen_geometric_mean": -1.0368417501449585, + "epoch": 1.4, + "grad_norm": 5.875, + "learning_rate": 1.0373180805564781e-06, + "log_odds": 7.063640594482422, + "log_odds_ratio": -0.2907789349555969, + "loss": 0.2889, + "rejected_geometric_mean": -7.847938060760498, + "step": 5649 + }, + { + "chosen_geometric_mean": -1.4019778966903687, + "epoch": 1.4, + "grad_norm": 6.15625, + "learning_rate": 1.0365287234061341e-06, + "log_odds": 7.896646976470947, + "log_odds_ratio": -0.05309613421559334, + "loss": 0.2821, + "rejected_geometric_mean": -8.936257362365723, + "step": 5650 + }, + { + "chosen_geometric_mean": -0.9390955567359924, + "epoch": 1.4, + "grad_norm": 16.25, + "learning_rate": 1.035739588153164e-06, + "log_odds": 6.681839466094971, + "log_odds_ratio": -0.13586440682411194, + "loss": 0.2894, + "rejected_geometric_mean": -7.242310047149658, + "step": 5651 + }, + { + "chosen_geometric_mean": -0.9056637287139893, + "epoch": 1.4, + "grad_norm": 6.8125, + "learning_rate": 1.0349506749172211e-06, + "log_odds": 6.20192813873291, + "log_odds_ratio": -0.0616629496216774, + "loss": 0.2592, + "rejected_geometric_mean": -6.627512454986572, + "step": 5652 + }, + { + "chosen_geometric_mean": -0.8134655356407166, + "epoch": 1.4, + "grad_norm": 2.421875, + "learning_rate": 1.0341619838179228e-06, + "log_odds": 7.456480026245117, + "log_odds_ratio": -0.2023896723985672, + "loss": 0.2679, + "rejected_geometric_mean": -7.823797225952148, + "step": 5653 + }, + { + "chosen_geometric_mean": -0.9499629139900208, + "epoch": 1.4, + "grad_norm": 5.4375, + "learning_rate": 1.0333735149748527e-06, + "log_odds": 0.9657008647918701, + "log_odds_ratio": -0.3945198059082031, + "loss": 0.2294, + "rejected_geometric_mean": -1.6701207160949707, + "step": 5654 + }, + { + "chosen_geometric_mean": -1.0020668506622314, + "epoch": 1.4, + "grad_norm": 4.1875, + "learning_rate": 1.0325852685075627e-06, + "log_odds": 1.7644245624542236, + "log_odds_ratio": -0.2940714955329895, + "loss": 0.299, + "rejected_geometric_mean": -2.467756748199463, + "step": 5655 + }, + { + "chosen_geometric_mean": -1.0829036235809326, + "epoch": 1.4, + "grad_norm": 17.5, + "learning_rate": 1.0317972445355704e-06, + "log_odds": 3.152765989303589, + "log_odds_ratio": -0.047617942094802856, + "loss": 0.2579, + "rejected_geometric_mean": -3.835641860961914, + "step": 5656 + }, + { + "chosen_geometric_mean": -0.934283971786499, + "epoch": 1.4, + "grad_norm": 3.046875, + "learning_rate": 1.0310094431783576e-06, + "log_odds": 5.283163070678711, + "log_odds_ratio": -0.1624881625175476, + "loss": 0.2698, + "rejected_geometric_mean": -5.77857780456543, + "step": 5657 + }, + { + "chosen_geometric_mean": -1.0615602731704712, + "epoch": 1.4, + "grad_norm": 4.5625, + "learning_rate": 1.0302218645553756e-06, + "log_odds": 6.855616569519043, + "log_odds_ratio": -0.05601693317294121, + "loss": 0.2321, + "rejected_geometric_mean": -7.494964122772217, + "step": 5658 + }, + { + "chosen_geometric_mean": -1.0588464736938477, + "epoch": 1.4, + "grad_norm": 3.234375, + "learning_rate": 1.0294345087860394e-06, + "log_odds": 6.177028179168701, + "log_odds_ratio": -0.010857848450541496, + "loss": 0.3198, + "rejected_geometric_mean": -6.798402309417725, + "step": 5659 + }, + { + "chosen_geometric_mean": -1.1640775203704834, + "epoch": 1.4, + "grad_norm": 5.59375, + "learning_rate": 1.0286473759897292e-06, + "log_odds": 7.082036018371582, + "log_odds_ratio": -0.14516933262348175, + "loss": 0.2336, + "rejected_geometric_mean": -7.862256050109863, + "step": 5660 + }, + { + "chosen_geometric_mean": -0.9966185092926025, + "epoch": 1.4, + "grad_norm": 5.25, + "learning_rate": 1.0278604662857968e-06, + "log_odds": 8.842415809631348, + "log_odds_ratio": -0.031091082841157913, + "loss": 0.2233, + "rejected_geometric_mean": -9.372963905334473, + "step": 5661 + }, + { + "chosen_geometric_mean": -1.1522243022918701, + "epoch": 1.4, + "grad_norm": 2.90625, + "learning_rate": 1.0270737797935548e-06, + "log_odds": 4.037963390350342, + "log_odds_ratio": -0.061914652585983276, + "loss": 0.2344, + "rejected_geometric_mean": -4.819464206695557, + "step": 5662 + }, + { + "chosen_geometric_mean": -1.295831561088562, + "epoch": 1.4, + "grad_norm": 2.375, + "learning_rate": 1.0262873166322832e-06, + "log_odds": 7.741048812866211, + "log_odds_ratio": -0.011946620419621468, + "loss": 0.2616, + "rejected_geometric_mean": -8.68738079071045, + "step": 5663 + }, + { + "chosen_geometric_mean": -0.9457054138183594, + "epoch": 1.4, + "grad_norm": 4.1875, + "learning_rate": 1.0255010769212297e-06, + "log_odds": 2.4234743118286133, + "log_odds_ratio": -0.22104725241661072, + "loss": 0.2503, + "rejected_geometric_mean": -3.0451464653015137, + "step": 5664 + }, + { + "chosen_geometric_mean": -0.9058741331100464, + "epoch": 1.4, + "grad_norm": 3.859375, + "learning_rate": 1.024715060779606e-06, + "log_odds": 4.71503210067749, + "log_odds_ratio": -0.05655229091644287, + "loss": 0.2653, + "rejected_geometric_mean": -5.119017124176025, + "step": 5665 + }, + { + "chosen_geometric_mean": -1.1629871129989624, + "epoch": 1.4, + "grad_norm": 33.0, + "learning_rate": 1.0239292683265917e-06, + "log_odds": 3.763943910598755, + "log_odds_ratio": -0.3028552532196045, + "loss": 0.2761, + "rejected_geometric_mean": -4.649153709411621, + "step": 5666 + }, + { + "chosen_geometric_mean": -1.0430430173873901, + "epoch": 1.4, + "grad_norm": 13.5625, + "learning_rate": 1.0231436996813326e-06, + "log_odds": 8.15584659576416, + "log_odds_ratio": -0.1500289887189865, + "loss": 0.3018, + "rejected_geometric_mean": -8.86203670501709, + "step": 5667 + }, + { + "chosen_geometric_mean": -0.8846254348754883, + "epoch": 1.4, + "grad_norm": 17.75, + "learning_rate": 1.0223583549629393e-06, + "log_odds": 3.4510247707366943, + "log_odds_ratio": -0.17245757579803467, + "loss": 0.2765, + "rejected_geometric_mean": -3.8958077430725098, + "step": 5668 + }, + { + "chosen_geometric_mean": -0.94456547498703, + "epoch": 1.4, + "grad_norm": 2.28125, + "learning_rate": 1.0215732342904874e-06, + "log_odds": 13.6119384765625, + "log_odds_ratio": -0.007708394899964333, + "loss": 0.2214, + "rejected_geometric_mean": -14.065799713134766, + "step": 5669 + }, + { + "chosen_geometric_mean": -1.158700942993164, + "epoch": 1.4, + "grad_norm": 3.53125, + "learning_rate": 1.0207883377830222e-06, + "log_odds": 6.117917537689209, + "log_odds_ratio": -0.1369054615497589, + "loss": 0.3043, + "rejected_geometric_mean": -6.965373992919922, + "step": 5670 + }, + { + "chosen_geometric_mean": -0.9402025938034058, + "epoch": 1.4, + "grad_norm": 1.859375, + "learning_rate": 1.0200036655595508e-06, + "log_odds": 10.50735855102539, + "log_odds_ratio": -0.11980117857456207, + "loss": 0.238, + "rejected_geometric_mean": -10.991475105285645, + "step": 5671 + }, + { + "chosen_geometric_mean": -1.1683268547058105, + "epoch": 1.4, + "grad_norm": 5.875, + "learning_rate": 1.0192192177390494e-06, + "log_odds": 6.705317974090576, + "log_odds_ratio": -0.15309368073940277, + "loss": 0.2993, + "rejected_geometric_mean": -7.565042495727539, + "step": 5672 + }, + { + "chosen_geometric_mean": -0.9716482758522034, + "epoch": 1.4, + "grad_norm": 3.59375, + "learning_rate": 1.0184349944404603e-06, + "log_odds": 6.899722099304199, + "log_odds_ratio": -0.12317222356796265, + "loss": 0.2396, + "rejected_geometric_mean": -7.4532151222229, + "step": 5673 + }, + { + "chosen_geometric_mean": -1.3109074831008911, + "epoch": 1.4, + "grad_norm": 5.625, + "learning_rate": 1.0176509957826885e-06, + "log_odds": 5.250457763671875, + "log_odds_ratio": -0.24243125319480896, + "loss": 0.2817, + "rejected_geometric_mean": -6.342172622680664, + "step": 5674 + }, + { + "chosen_geometric_mean": -1.0707167387008667, + "epoch": 1.41, + "grad_norm": 3.5625, + "learning_rate": 1.0168672218846088e-06, + "log_odds": 2.8774619102478027, + "log_odds_ratio": -0.11004208028316498, + "loss": 0.293, + "rejected_geometric_mean": -3.57422137260437, + "step": 5675 + }, + { + "chosen_geometric_mean": -1.1345347166061401, + "epoch": 1.41, + "grad_norm": 15.4375, + "learning_rate": 1.016083672865058e-06, + "log_odds": 6.429009437561035, + "log_odds_ratio": -0.179010808467865, + "loss": 0.2613, + "rejected_geometric_mean": -7.247499465942383, + "step": 5676 + }, + { + "chosen_geometric_mean": -0.9365163445472717, + "epoch": 1.41, + "grad_norm": 2.25, + "learning_rate": 1.0153003488428433e-06, + "log_odds": 3.1845510005950928, + "log_odds_ratio": -0.1916493922472, + "loss": 0.2633, + "rejected_geometric_mean": -3.751023769378662, + "step": 5677 + }, + { + "chosen_geometric_mean": -0.8845862150192261, + "epoch": 1.41, + "grad_norm": 3.515625, + "learning_rate": 1.0145172499367337e-06, + "log_odds": 3.4881043434143066, + "log_odds_ratio": -0.10190249979496002, + "loss": 0.2479, + "rejected_geometric_mean": -3.8415167331695557, + "step": 5678 + }, + { + "chosen_geometric_mean": -0.8655236959457397, + "epoch": 1.41, + "grad_norm": 5.75, + "learning_rate": 1.013734376265467e-06, + "log_odds": 4.646472930908203, + "log_odds_ratio": -0.1881386637687683, + "loss": 0.273, + "rejected_geometric_mean": -5.099181652069092, + "step": 5679 + }, + { + "chosen_geometric_mean": -0.9161708950996399, + "epoch": 1.41, + "grad_norm": 2.8125, + "learning_rate": 1.0129517279477443e-06, + "log_odds": 6.383384704589844, + "log_odds_ratio": -0.017391812056303024, + "loss": 0.2623, + "rejected_geometric_mean": -6.788203716278076, + "step": 5680 + }, + { + "chosen_geometric_mean": -0.9002819061279297, + "epoch": 1.41, + "grad_norm": 2.4375, + "learning_rate": 1.0121693051022356e-06, + "log_odds": 8.159370422363281, + "log_odds_ratio": -0.11932966858148575, + "loss": 0.2602, + "rejected_geometric_mean": -8.588171005249023, + "step": 5681 + }, + { + "chosen_geometric_mean": -1.1786339282989502, + "epoch": 1.41, + "grad_norm": 26.25, + "learning_rate": 1.0113871078475728e-06, + "log_odds": 5.5854716300964355, + "log_odds_ratio": -0.38064518570899963, + "loss": 0.2997, + "rejected_geometric_mean": -6.608650207519531, + "step": 5682 + }, + { + "chosen_geometric_mean": -0.9461430311203003, + "epoch": 1.41, + "grad_norm": 4.46875, + "learning_rate": 1.0106051363023572e-06, + "log_odds": 1.3750455379486084, + "log_odds_ratio": -0.28983330726623535, + "loss": 0.2774, + "rejected_geometric_mean": -2.0359697341918945, + "step": 5683 + }, + { + "chosen_geometric_mean": -1.0062334537506104, + "epoch": 1.41, + "grad_norm": 13.4375, + "learning_rate": 1.0098233905851551e-06, + "log_odds": 2.800734519958496, + "log_odds_ratio": -0.16267894208431244, + "loss": 0.2871, + "rejected_geometric_mean": -3.43957781791687, + "step": 5684 + }, + { + "chosen_geometric_mean": -0.9110596179962158, + "epoch": 1.41, + "grad_norm": 2.4375, + "learning_rate": 1.0090418708144972e-06, + "log_odds": 4.277264595031738, + "log_odds_ratio": -0.033741388469934464, + "loss": 0.2516, + "rejected_geometric_mean": -4.686772346496582, + "step": 5685 + }, + { + "chosen_geometric_mean": -0.9079307317733765, + "epoch": 1.41, + "grad_norm": 7.21875, + "learning_rate": 1.0082605771088797e-06, + "log_odds": 8.475793838500977, + "log_odds_ratio": -0.002580713015049696, + "loss": 0.2621, + "rejected_geometric_mean": -8.847095489501953, + "step": 5686 + }, + { + "chosen_geometric_mean": -1.180989384651184, + "epoch": 1.41, + "grad_norm": 18.875, + "learning_rate": 1.007479509586766e-06, + "log_odds": 5.368856906890869, + "log_odds_ratio": -0.2236432284116745, + "loss": 0.2777, + "rejected_geometric_mean": -6.247251510620117, + "step": 5687 + }, + { + "chosen_geometric_mean": -1.1592929363250732, + "epoch": 1.41, + "grad_norm": 4.1875, + "learning_rate": 1.006698668366586e-06, + "log_odds": 8.08486270904541, + "log_odds_ratio": -0.21766486763954163, + "loss": 0.2075, + "rejected_geometric_mean": -8.975836753845215, + "step": 5688 + }, + { + "chosen_geometric_mean": -0.8156920671463013, + "epoch": 1.41, + "grad_norm": 7.3125, + "learning_rate": 1.005918053566732e-06, + "log_odds": 3.1306753158569336, + "log_odds_ratio": -0.14768628776073456, + "loss": 0.2368, + "rejected_geometric_mean": -3.4714787006378174, + "step": 5689 + }, + { + "chosen_geometric_mean": -0.9790393710136414, + "epoch": 1.41, + "grad_norm": 4.0625, + "learning_rate": 1.005137665305566e-06, + "log_odds": 9.273843765258789, + "log_odds_ratio": -0.03328070417046547, + "loss": 0.2809, + "rejected_geometric_mean": -9.790127754211426, + "step": 5690 + }, + { + "chosen_geometric_mean": -1.0775667428970337, + "epoch": 1.41, + "grad_norm": 41.0, + "learning_rate": 1.0043575037014123e-06, + "log_odds": 13.088929176330566, + "log_odds_ratio": -1.9729808627744205e-05, + "loss": 0.2924, + "rejected_geometric_mean": -13.736872673034668, + "step": 5691 + }, + { + "chosen_geometric_mean": -0.983617901802063, + "epoch": 1.41, + "grad_norm": 27.875, + "learning_rate": 1.0035775688725612e-06, + "log_odds": 5.901854515075684, + "log_odds_ratio": -0.17504814267158508, + "loss": 0.3072, + "rejected_geometric_mean": -6.555042266845703, + "step": 5692 + }, + { + "chosen_geometric_mean": -1.2006561756134033, + "epoch": 1.41, + "grad_norm": 37.0, + "learning_rate": 1.0027978609372712e-06, + "log_odds": 7.097930908203125, + "log_odds_ratio": -0.1082625761628151, + "loss": 0.3023, + "rejected_geometric_mean": -7.940258979797363, + "step": 5693 + }, + { + "chosen_geometric_mean": -0.937488317489624, + "epoch": 1.41, + "grad_norm": 7.28125, + "learning_rate": 1.0020183800137648e-06, + "log_odds": 9.210404396057129, + "log_odds_ratio": -0.06854964792728424, + "loss": 0.2563, + "rejected_geometric_mean": -9.687664031982422, + "step": 5694 + }, + { + "chosen_geometric_mean": -0.8869511485099792, + "epoch": 1.41, + "grad_norm": 5.0625, + "learning_rate": 1.0012391262202285e-06, + "log_odds": 4.675381660461426, + "log_odds_ratio": -0.19302906095981598, + "loss": 0.2271, + "rejected_geometric_mean": -5.196689605712891, + "step": 5695 + }, + { + "chosen_geometric_mean": -1.250991940498352, + "epoch": 1.41, + "grad_norm": 19.25, + "learning_rate": 1.0004600996748178e-06, + "log_odds": 5.943716049194336, + "log_odds_ratio": -0.14166852831840515, + "loss": 0.2564, + "rejected_geometric_mean": -6.898099899291992, + "step": 5696 + }, + { + "chosen_geometric_mean": -1.1052978038787842, + "epoch": 1.41, + "grad_norm": 29.875, + "learning_rate": 9.996813004956499e-07, + "log_odds": 8.886597633361816, + "log_odds_ratio": -0.09690771996974945, + "loss": 0.2785, + "rejected_geometric_mean": -9.577461242675781, + "step": 5697 + }, + { + "chosen_geometric_mean": -0.7641469240188599, + "epoch": 1.41, + "grad_norm": 9.3125, + "learning_rate": 9.989027288008102e-07, + "log_odds": 11.47437858581543, + "log_odds_ratio": -0.002485382603481412, + "loss": 0.2488, + "rejected_geometric_mean": -11.57166862487793, + "step": 5698 + }, + { + "chosen_geometric_mean": -0.8768778443336487, + "epoch": 1.41, + "grad_norm": 2.75, + "learning_rate": 9.981243847083501e-07, + "log_odds": 8.523458480834961, + "log_odds_ratio": -0.021601932123303413, + "loss": 0.2598, + "rejected_geometric_mean": -8.86457633972168, + "step": 5699 + }, + { + "chosen_geometric_mean": -0.9378120303153992, + "epoch": 1.41, + "grad_norm": 1.7421875, + "learning_rate": 9.973462683362834e-07, + "log_odds": 6.08432149887085, + "log_odds_ratio": -0.030608542263507843, + "loss": 0.2072, + "rejected_geometric_mean": -6.545004844665527, + "step": 5700 + }, + { + "chosen_geometric_mean": -0.9898830652236938, + "epoch": 1.41, + "grad_norm": 3.4375, + "learning_rate": 9.965683798025926e-07, + "log_odds": 8.324997901916504, + "log_odds_ratio": -0.18326804041862488, + "loss": 0.2295, + "rejected_geometric_mean": -8.927709579467773, + "step": 5701 + }, + { + "chosen_geometric_mean": -0.9861617088317871, + "epoch": 1.41, + "grad_norm": 3.5, + "learning_rate": 9.95790719225224e-07, + "log_odds": 12.411552429199219, + "log_odds_ratio": -0.008254580199718475, + "loss": 0.2458, + "rejected_geometric_mean": -12.907693862915039, + "step": 5702 + }, + { + "chosen_geometric_mean": -0.9387516379356384, + "epoch": 1.41, + "grad_norm": 2.1875, + "learning_rate": 9.950132867220879e-07, + "log_odds": 6.1487884521484375, + "log_odds_ratio": -0.2228912115097046, + "loss": 0.2849, + "rejected_geometric_mean": -6.718264102935791, + "step": 5703 + }, + { + "chosen_geometric_mean": -0.9596027135848999, + "epoch": 1.41, + "grad_norm": 19.375, + "learning_rate": 9.942360824110636e-07, + "log_odds": 4.832159042358398, + "log_odds_ratio": -0.28872430324554443, + "loss": 0.2655, + "rejected_geometric_mean": -5.497054576873779, + "step": 5704 + }, + { + "chosen_geometric_mean": -0.9554465413093567, + "epoch": 1.41, + "grad_norm": 64.0, + "learning_rate": 9.93459106409994e-07, + "log_odds": 14.04178237915039, + "log_odds_ratio": -6.913548713782802e-05, + "loss": 0.2692, + "rejected_geometric_mean": -14.497300148010254, + "step": 5705 + }, + { + "chosen_geometric_mean": -1.0821133852005005, + "epoch": 1.41, + "grad_norm": 13.0625, + "learning_rate": 9.92682358836686e-07, + "log_odds": 2.833601474761963, + "log_odds_ratio": -0.12735700607299805, + "loss": 0.2684, + "rejected_geometric_mean": -3.5645387172698975, + "step": 5706 + }, + { + "chosen_geometric_mean": -0.9942405223846436, + "epoch": 1.41, + "grad_norm": 9.25, + "learning_rate": 9.919058398089149e-07, + "log_odds": 9.124002456665039, + "log_odds_ratio": -0.18036004900932312, + "loss": 0.2354, + "rejected_geometric_mean": -9.779870986938477, + "step": 5707 + }, + { + "chosen_geometric_mean": -0.9261741638183594, + "epoch": 1.41, + "grad_norm": 2.0625, + "learning_rate": 9.911295494444183e-07, + "log_odds": 3.787642478942871, + "log_odds_ratio": -0.1987859159708023, + "loss": 0.2574, + "rejected_geometric_mean": -4.343657970428467, + "step": 5708 + }, + { + "chosen_geometric_mean": -0.9004273414611816, + "epoch": 1.41, + "grad_norm": 12.0, + "learning_rate": 9.903534878608995e-07, + "log_odds": 2.2414841651916504, + "log_odds_ratio": -0.16989897191524506, + "loss": 0.2675, + "rejected_geometric_mean": -2.7283895015716553, + "step": 5709 + }, + { + "chosen_geometric_mean": -1.076519250869751, + "epoch": 1.41, + "grad_norm": 3.09375, + "learning_rate": 9.895776551760312e-07, + "log_odds": 2.835099458694458, + "log_odds_ratio": -0.2714148759841919, + "loss": 0.2318, + "rejected_geometric_mean": -3.6741247177124023, + "step": 5710 + }, + { + "chosen_geometric_mean": -0.994147002696991, + "epoch": 1.41, + "grad_norm": 8.4375, + "learning_rate": 9.888020515074459e-07, + "log_odds": 7.857264995574951, + "log_odds_ratio": -0.12510408461093903, + "loss": 0.2613, + "rejected_geometric_mean": -8.478433609008789, + "step": 5711 + }, + { + "chosen_geometric_mean": -0.9564660787582397, + "epoch": 1.41, + "grad_norm": 2.953125, + "learning_rate": 9.880266769727438e-07, + "log_odds": 7.385960578918457, + "log_odds_ratio": -0.04176662117242813, + "loss": 0.2732, + "rejected_geometric_mean": -7.866550445556641, + "step": 5712 + }, + { + "chosen_geometric_mean": -1.0715162754058838, + "epoch": 1.41, + "grad_norm": 2.359375, + "learning_rate": 9.872515316894915e-07, + "log_odds": 1.6696606874465942, + "log_odds_ratio": -0.315983384847641, + "loss": 0.2579, + "rejected_geometric_mean": -2.489473342895508, + "step": 5713 + }, + { + "chosen_geometric_mean": -0.9399793148040771, + "epoch": 1.41, + "grad_norm": 7.375, + "learning_rate": 9.864766157752179e-07, + "log_odds": 9.231664657592773, + "log_odds_ratio": -0.021477382630109787, + "loss": 0.2907, + "rejected_geometric_mean": -9.686040878295898, + "step": 5714 + }, + { + "chosen_geometric_mean": -1.357898473739624, + "epoch": 1.41, + "grad_norm": 30.75, + "learning_rate": 9.8570192934742e-07, + "log_odds": 9.976972579956055, + "log_odds_ratio": -0.055416539311409, + "loss": 0.3144, + "rejected_geometric_mean": -11.011646270751953, + "step": 5715 + }, + { + "chosen_geometric_mean": -1.0346689224243164, + "epoch": 1.42, + "grad_norm": 26.125, + "learning_rate": 9.849274725235593e-07, + "log_odds": 3.9264917373657227, + "log_odds_ratio": -0.2196383774280548, + "loss": 0.2753, + "rejected_geometric_mean": -4.6684417724609375, + "step": 5716 + }, + { + "chosen_geometric_mean": -1.15083909034729, + "epoch": 1.42, + "grad_norm": 21.5, + "learning_rate": 9.841532454210617e-07, + "log_odds": 6.257410049438477, + "log_odds_ratio": -0.017811978235840797, + "loss": 0.2696, + "rejected_geometric_mean": -7.018210411071777, + "step": 5717 + }, + { + "chosen_geometric_mean": -1.1202033758163452, + "epoch": 1.42, + "grad_norm": 23.625, + "learning_rate": 9.833792481573176e-07, + "log_odds": 5.285161972045898, + "log_odds_ratio": -0.0387759655714035, + "loss": 0.2879, + "rejected_geometric_mean": -5.81242561340332, + "step": 5718 + }, + { + "chosen_geometric_mean": -0.7651371359825134, + "epoch": 1.42, + "grad_norm": 8.5625, + "learning_rate": 9.82605480849685e-07, + "log_odds": 3.1001970767974854, + "log_odds_ratio": -0.1409086287021637, + "loss": 0.2579, + "rejected_geometric_mean": -3.362968683242798, + "step": 5719 + }, + { + "chosen_geometric_mean": -0.7715619802474976, + "epoch": 1.42, + "grad_norm": 5.09375, + "learning_rate": 9.818319436154842e-07, + "log_odds": 7.728072166442871, + "log_odds_ratio": -0.07791808247566223, + "loss": 0.2852, + "rejected_geometric_mean": -7.958670139312744, + "step": 5720 + }, + { + "chosen_geometric_mean": -1.1997984647750854, + "epoch": 1.42, + "grad_norm": 32.75, + "learning_rate": 9.810586365720023e-07, + "log_odds": 2.0087127685546875, + "log_odds_ratio": -0.18686482310295105, + "loss": 0.3561, + "rejected_geometric_mean": -2.9433538913726807, + "step": 5721 + }, + { + "chosen_geometric_mean": -1.0084770917892456, + "epoch": 1.42, + "grad_norm": 4.875, + "learning_rate": 9.80285559836493e-07, + "log_odds": 3.2024006843566895, + "log_odds_ratio": -0.11805778741836548, + "loss": 0.2811, + "rejected_geometric_mean": -3.8317618370056152, + "step": 5722 + }, + { + "chosen_geometric_mean": -0.9305882453918457, + "epoch": 1.42, + "grad_norm": 2.609375, + "learning_rate": 9.795127135261707e-07, + "log_odds": 3.645446300506592, + "log_odds_ratio": -0.17901389300823212, + "loss": 0.2794, + "rejected_geometric_mean": -4.1612749099731445, + "step": 5723 + }, + { + "chosen_geometric_mean": -1.0592347383499146, + "epoch": 1.42, + "grad_norm": 32.0, + "learning_rate": 9.787400977582198e-07, + "log_odds": 4.773074150085449, + "log_odds_ratio": -0.21247047185897827, + "loss": 0.3192, + "rejected_geometric_mean": -5.521290302276611, + "step": 5724 + }, + { + "chosen_geometric_mean": -0.9528279900550842, + "epoch": 1.42, + "grad_norm": 2.5, + "learning_rate": 9.779677126497852e-07, + "log_odds": 4.992620468139648, + "log_odds_ratio": -0.395946204662323, + "loss": 0.2178, + "rejected_geometric_mean": -5.696216583251953, + "step": 5725 + }, + { + "chosen_geometric_mean": -0.9711277484893799, + "epoch": 1.42, + "grad_norm": 20.625, + "learning_rate": 9.771955583179803e-07, + "log_odds": 2.359848976135254, + "log_odds_ratio": -0.20357272028923035, + "loss": 0.2177, + "rejected_geometric_mean": -2.972667694091797, + "step": 5726 + }, + { + "chosen_geometric_mean": -0.8637268543243408, + "epoch": 1.42, + "grad_norm": 12.6875, + "learning_rate": 9.764236348798828e-07, + "log_odds": 6.746685981750488, + "log_odds_ratio": -0.11984456330537796, + "loss": 0.2729, + "rejected_geometric_mean": -7.1432647705078125, + "step": 5727 + }, + { + "chosen_geometric_mean": -1.028196096420288, + "epoch": 1.42, + "grad_norm": 8.25, + "learning_rate": 9.756519424525341e-07, + "log_odds": 4.038375377655029, + "log_odds_ratio": -0.0273441169410944, + "loss": 0.2733, + "rejected_geometric_mean": -4.588662147521973, + "step": 5728 + }, + { + "chosen_geometric_mean": -1.1452207565307617, + "epoch": 1.42, + "grad_norm": 2.953125, + "learning_rate": 9.748804811529403e-07, + "log_odds": 1.9263278245925903, + "log_odds_ratio": -0.40523257851600647, + "loss": 0.2312, + "rejected_geometric_mean": -2.8534018993377686, + "step": 5729 + }, + { + "chosen_geometric_mean": -1.1871858835220337, + "epoch": 1.42, + "grad_norm": 5.15625, + "learning_rate": 9.741092510980753e-07, + "log_odds": 3.8994016647338867, + "log_odds_ratio": -0.0553465336561203, + "loss": 0.2596, + "rejected_geometric_mean": -4.74230432510376, + "step": 5730 + }, + { + "chosen_geometric_mean": -0.941241979598999, + "epoch": 1.42, + "grad_norm": 2.890625, + "learning_rate": 9.733382524048742e-07, + "log_odds": 0.9036478996276855, + "log_odds_ratio": -0.35591140389442444, + "loss": 0.2493, + "rejected_geometric_mean": -1.5921140909194946, + "step": 5731 + }, + { + "chosen_geometric_mean": -0.9253379106521606, + "epoch": 1.42, + "grad_norm": 21.375, + "learning_rate": 9.725674851902403e-07, + "log_odds": 7.91008996963501, + "log_odds_ratio": -0.09103973954916, + "loss": 0.3179, + "rejected_geometric_mean": -8.350761413574219, + "step": 5732 + }, + { + "chosen_geometric_mean": -1.7166792154312134, + "epoch": 1.42, + "grad_norm": 34.5, + "learning_rate": 9.717969495710406e-07, + "log_odds": 5.163813591003418, + "log_odds_ratio": -0.10853950679302216, + "loss": 0.2792, + "rejected_geometric_mean": -6.62239408493042, + "step": 5733 + }, + { + "chosen_geometric_mean": -0.8835998773574829, + "epoch": 1.42, + "grad_norm": 6.125, + "learning_rate": 9.710266456641063e-07, + "log_odds": 2.422940731048584, + "log_odds_ratio": -0.2736645042896271, + "loss": 0.2473, + "rejected_geometric_mean": -2.947288990020752, + "step": 5734 + }, + { + "chosen_geometric_mean": -0.8192359209060669, + "epoch": 1.42, + "grad_norm": 18.75, + "learning_rate": 9.70256573586233e-07, + "log_odds": 10.313419342041016, + "log_odds_ratio": -0.022685952484607697, + "loss": 0.2922, + "rejected_geometric_mean": -10.549309730529785, + "step": 5735 + }, + { + "chosen_geometric_mean": -0.9444303512573242, + "epoch": 1.42, + "grad_norm": 12.1875, + "learning_rate": 9.694867334541827e-07, + "log_odds": 13.015154838562012, + "log_odds_ratio": -0.00011018823715858161, + "loss": 0.2375, + "rejected_geometric_mean": -13.452466011047363, + "step": 5736 + }, + { + "chosen_geometric_mean": -1.1145708560943604, + "epoch": 1.42, + "grad_norm": 2.34375, + "learning_rate": 9.68717125384683e-07, + "log_odds": 6.859209060668945, + "log_odds_ratio": -0.20487168431282043, + "loss": 0.2946, + "rejected_geometric_mean": -7.642160892486572, + "step": 5737 + }, + { + "chosen_geometric_mean": -0.8513879179954529, + "epoch": 1.42, + "grad_norm": 20.375, + "learning_rate": 9.67947749494423e-07, + "log_odds": 4.614263534545898, + "log_odds_ratio": -0.1584656983613968, + "loss": 0.2349, + "rejected_geometric_mean": -5.039849281311035, + "step": 5738 + }, + { + "chosen_geometric_mean": -0.8875975012779236, + "epoch": 1.42, + "grad_norm": 18.0, + "learning_rate": 9.671786059000604e-07, + "log_odds": 6.209115982055664, + "log_odds_ratio": -0.09560702741146088, + "loss": 0.2402, + "rejected_geometric_mean": -6.629333972930908, + "step": 5739 + }, + { + "chosen_geometric_mean": -0.9286024570465088, + "epoch": 1.42, + "grad_norm": 3.15625, + "learning_rate": 9.664096947182147e-07, + "log_odds": 6.759682655334473, + "log_odds_ratio": -0.0582234188914299, + "loss": 0.2432, + "rejected_geometric_mean": -7.212812900543213, + "step": 5740 + }, + { + "chosen_geometric_mean": -0.9147520661354065, + "epoch": 1.42, + "grad_norm": 5.40625, + "learning_rate": 9.656410160654706e-07, + "log_odds": 3.8120882511138916, + "log_odds_ratio": -0.28110653162002563, + "loss": 0.2639, + "rejected_geometric_mean": -4.373449325561523, + "step": 5741 + }, + { + "chosen_geometric_mean": -0.8401765823364258, + "epoch": 1.42, + "grad_norm": 2.203125, + "learning_rate": 9.64872570058379e-07, + "log_odds": 7.245377540588379, + "log_odds_ratio": -0.12274625897407532, + "loss": 0.2368, + "rejected_geometric_mean": -7.56947660446167, + "step": 5742 + }, + { + "chosen_geometric_mean": -0.9812713265419006, + "epoch": 1.42, + "grad_norm": 3.9375, + "learning_rate": 9.64104356813456e-07, + "log_odds": 4.594330787658691, + "log_odds_ratio": -0.11254853010177612, + "loss": 0.2872, + "rejected_geometric_mean": -5.150453567504883, + "step": 5743 + }, + { + "chosen_geometric_mean": -1.148460865020752, + "epoch": 1.42, + "grad_norm": 5.28125, + "learning_rate": 9.63336376447179e-07, + "log_odds": 0.6303375959396362, + "log_odds_ratio": -0.43391016125679016, + "loss": 0.2734, + "rejected_geometric_mean": -1.628735065460205, + "step": 5744 + }, + { + "chosen_geometric_mean": -1.0022244453430176, + "epoch": 1.42, + "grad_norm": 3.953125, + "learning_rate": 9.625686290759942e-07, + "log_odds": 4.0086822509765625, + "log_odds_ratio": -0.32077673077583313, + "loss": 0.2736, + "rejected_geometric_mean": -4.7518815994262695, + "step": 5745 + }, + { + "chosen_geometric_mean": -0.9334660768508911, + "epoch": 1.42, + "grad_norm": 3.078125, + "learning_rate": 9.618011148163086e-07, + "log_odds": 5.614086151123047, + "log_odds_ratio": -0.1585458219051361, + "loss": 0.2686, + "rejected_geometric_mean": -6.146788120269775, + "step": 5746 + }, + { + "chosen_geometric_mean": -1.0233806371688843, + "epoch": 1.42, + "grad_norm": 14.6875, + "learning_rate": 9.610338337844971e-07, + "log_odds": 9.122640609741211, + "log_odds_ratio": -0.0007502674707211554, + "loss": 0.2335, + "rejected_geometric_mean": -9.688097953796387, + "step": 5747 + }, + { + "chosen_geometric_mean": -1.0471832752227783, + "epoch": 1.42, + "grad_norm": 7.9375, + "learning_rate": 9.602667860968984e-07, + "log_odds": 2.266453266143799, + "log_odds_ratio": -0.3819889724254608, + "loss": 0.2766, + "rejected_geometric_mean": -3.1228246688842773, + "step": 5748 + }, + { + "chosen_geometric_mean": -0.9369300603866577, + "epoch": 1.42, + "grad_norm": 4.25, + "learning_rate": 9.594999718698134e-07, + "log_odds": 6.54334831237793, + "log_odds_ratio": -0.20292769372463226, + "loss": 0.2745, + "rejected_geometric_mean": -7.081852912902832, + "step": 5749 + }, + { + "chosen_geometric_mean": -0.9785465002059937, + "epoch": 1.42, + "grad_norm": 4.40625, + "learning_rate": 9.58733391219512e-07, + "log_odds": 3.9894156455993652, + "log_odds_ratio": -0.2598613500595093, + "loss": 0.217, + "rejected_geometric_mean": -4.652613162994385, + "step": 5750 + }, + { + "chosen_geometric_mean": -1.072485089302063, + "epoch": 1.42, + "grad_norm": 4.25, + "learning_rate": 9.579670442622243e-07, + "log_odds": 8.811450958251953, + "log_odds_ratio": -0.03328458592295647, + "loss": 0.2393, + "rejected_geometric_mean": -9.474104881286621, + "step": 5751 + }, + { + "chosen_geometric_mean": -1.0282347202301025, + "epoch": 1.42, + "grad_norm": 7.71875, + "learning_rate": 9.572009311141472e-07, + "log_odds": 6.485209941864014, + "log_odds_ratio": -0.12904317677021027, + "loss": 0.2678, + "rejected_geometric_mean": -7.138476371765137, + "step": 5752 + }, + { + "chosen_geometric_mean": -1.1161885261535645, + "epoch": 1.42, + "grad_norm": 2.515625, + "learning_rate": 9.564350518914419e-07, + "log_odds": 6.324364185333252, + "log_odds_ratio": -0.1862819641828537, + "loss": 0.3127, + "rejected_geometric_mean": -7.123967170715332, + "step": 5753 + }, + { + "chosen_geometric_mean": -0.9495292901992798, + "epoch": 1.42, + "grad_norm": 11.8125, + "learning_rate": 9.556694067102354e-07, + "log_odds": 4.1680474281311035, + "log_odds_ratio": -0.11144731938838959, + "loss": 0.2559, + "rejected_geometric_mean": -4.690718173980713, + "step": 5754 + }, + { + "chosen_geometric_mean": -0.9621355533599854, + "epoch": 1.42, + "grad_norm": 3.828125, + "learning_rate": 9.549039956866156e-07, + "log_odds": 9.516207695007324, + "log_odds_ratio": -0.0010410540271550417, + "loss": 0.2318, + "rejected_geometric_mean": -9.9937744140625, + "step": 5755 + }, + { + "chosen_geometric_mean": -0.8513596057891846, + "epoch": 1.43, + "grad_norm": 3.546875, + "learning_rate": 9.541388189366396e-07, + "log_odds": 1.3853662014007568, + "log_odds_ratio": -0.337266206741333, + "loss": 0.2626, + "rejected_geometric_mean": -1.9129890203475952, + "step": 5756 + }, + { + "chosen_geometric_mean": -1.5468586683273315, + "epoch": 1.43, + "grad_norm": 30.875, + "learning_rate": 9.533738765763254e-07, + "log_odds": 9.008953094482422, + "log_odds_ratio": -0.06587256491184235, + "loss": 0.3057, + "rejected_geometric_mean": -10.241951942443848, + "step": 5757 + }, + { + "chosen_geometric_mean": -0.9617010951042175, + "epoch": 1.43, + "grad_norm": 4.5, + "learning_rate": 9.526091687216549e-07, + "log_odds": 0.8352069854736328, + "log_odds_ratio": -0.37157467007637024, + "loss": 0.2198, + "rejected_geometric_mean": -1.5516916513442993, + "step": 5758 + }, + { + "chosen_geometric_mean": -0.7883751392364502, + "epoch": 1.43, + "grad_norm": 2.609375, + "learning_rate": 9.518446954885794e-07, + "log_odds": 6.513290882110596, + "log_odds_ratio": -0.1274152249097824, + "loss": 0.2445, + "rejected_geometric_mean": -6.843648910522461, + "step": 5759 + }, + { + "chosen_geometric_mean": -1.0094966888427734, + "epoch": 1.43, + "grad_norm": 2.703125, + "learning_rate": 9.510804569930099e-07, + "log_odds": 11.823346138000488, + "log_odds_ratio": -0.0017213401151821017, + "loss": 0.2408, + "rejected_geometric_mean": -12.326088905334473, + "step": 5760 + }, + { + "chosen_geometric_mean": -0.9378706216812134, + "epoch": 1.43, + "grad_norm": 4.25, + "learning_rate": 9.503164533508224e-07, + "log_odds": 4.97632360458374, + "log_odds_ratio": -0.06890132278203964, + "loss": 0.2284, + "rejected_geometric_mean": -5.461367607116699, + "step": 5761 + }, + { + "chosen_geometric_mean": -0.9149595499038696, + "epoch": 1.43, + "grad_norm": 7.3125, + "learning_rate": 9.495526846778599e-07, + "log_odds": 6.8535308837890625, + "log_odds_ratio": -0.12848018109798431, + "loss": 0.2264, + "rejected_geometric_mean": -7.367019176483154, + "step": 5762 + }, + { + "chosen_geometric_mean": -1.1101405620574951, + "epoch": 1.43, + "grad_norm": 21.875, + "learning_rate": 9.487891510899264e-07, + "log_odds": 3.3680267333984375, + "log_odds_ratio": -0.09074056893587112, + "loss": 0.2748, + "rejected_geometric_mean": -4.109830856323242, + "step": 5763 + }, + { + "chosen_geometric_mean": -1.125942587852478, + "epoch": 1.43, + "grad_norm": 7.53125, + "learning_rate": 9.480258527027927e-07, + "log_odds": 8.288726806640625, + "log_odds_ratio": -0.11479654908180237, + "loss": 0.254, + "rejected_geometric_mean": -9.056550979614258, + "step": 5764 + }, + { + "chosen_geometric_mean": -0.9312331080436707, + "epoch": 1.43, + "grad_norm": 33.25, + "learning_rate": 9.47262789632194e-07, + "log_odds": 11.456514358520508, + "log_odds_ratio": -0.05489322543144226, + "loss": 0.3054, + "rejected_geometric_mean": -11.912444114685059, + "step": 5765 + }, + { + "chosen_geometric_mean": -0.9342254400253296, + "epoch": 1.43, + "grad_norm": 7.28125, + "learning_rate": 9.464999619938283e-07, + "log_odds": 6.163266181945801, + "log_odds_ratio": -0.09278131276369095, + "loss": 0.2465, + "rejected_geometric_mean": -6.667010307312012, + "step": 5766 + }, + { + "chosen_geometric_mean": -0.8223817348480225, + "epoch": 1.43, + "grad_norm": 13.25, + "learning_rate": 9.457373699033576e-07, + "log_odds": 6.010794162750244, + "log_odds_ratio": -0.20980656147003174, + "loss": 0.2317, + "rejected_geometric_mean": -6.399113655090332, + "step": 5767 + }, + { + "chosen_geometric_mean": -1.2923648357391357, + "epoch": 1.43, + "grad_norm": 2.984375, + "learning_rate": 9.449750134764108e-07, + "log_odds": 7.305416584014893, + "log_odds_ratio": -0.03156646713614464, + "loss": 0.2851, + "rejected_geometric_mean": -8.276494979858398, + "step": 5768 + }, + { + "chosen_geometric_mean": -0.8697506785392761, + "epoch": 1.43, + "grad_norm": 3.21875, + "learning_rate": 9.442128928285777e-07, + "log_odds": 7.475764274597168, + "log_odds_ratio": -0.03981407359242439, + "loss": 0.2455, + "rejected_geometric_mean": -7.8060503005981445, + "step": 5769 + }, + { + "chosen_geometric_mean": -1.2491742372512817, + "epoch": 1.43, + "grad_norm": 17.625, + "learning_rate": 9.43451008075415e-07, + "log_odds": 6.49740743637085, + "log_odds_ratio": -0.15681342780590057, + "loss": 0.2578, + "rejected_geometric_mean": -7.503584861755371, + "step": 5770 + }, + { + "chosen_geometric_mean": -1.1248646974563599, + "epoch": 1.43, + "grad_norm": 5.5, + "learning_rate": 9.426893593324437e-07, + "log_odds": 6.826526165008545, + "log_odds_ratio": -0.008258175104856491, + "loss": 0.2117, + "rejected_geometric_mean": -7.557161331176758, + "step": 5771 + }, + { + "chosen_geometric_mean": -1.1254487037658691, + "epoch": 1.43, + "grad_norm": 24.0, + "learning_rate": 9.419279467151463e-07, + "log_odds": 12.273438453674316, + "log_odds_ratio": -0.006095962133258581, + "loss": 0.2915, + "rejected_geometric_mean": -13.007635116577148, + "step": 5772 + }, + { + "chosen_geometric_mean": -1.0206378698349, + "epoch": 1.43, + "grad_norm": 39.5, + "learning_rate": 9.411667703389729e-07, + "log_odds": 6.400586128234863, + "log_odds_ratio": -0.066500224173069, + "loss": 0.2751, + "rejected_geometric_mean": -6.9932403564453125, + "step": 5773 + }, + { + "chosen_geometric_mean": -1.0232574939727783, + "epoch": 1.43, + "grad_norm": 6.34375, + "learning_rate": 9.40405830319335e-07, + "log_odds": 1.8935058116912842, + "log_odds_ratio": -0.38524627685546875, + "loss": 0.2917, + "rejected_geometric_mean": -2.7354512214660645, + "step": 5774 + }, + { + "chosen_geometric_mean": -1.0307092666625977, + "epoch": 1.43, + "grad_norm": 2.421875, + "learning_rate": 9.39645126771608e-07, + "log_odds": 12.979681015014648, + "log_odds_ratio": -0.1268354058265686, + "loss": 0.2433, + "rejected_geometric_mean": -13.674504280090332, + "step": 5775 + }, + { + "chosen_geometric_mean": -1.063959002494812, + "epoch": 1.43, + "grad_norm": 24.875, + "learning_rate": 9.388846598111362e-07, + "log_odds": 3.6888303756713867, + "log_odds_ratio": -0.06702733039855957, + "loss": 0.2868, + "rejected_geometric_mean": -4.360086917877197, + "step": 5776 + }, + { + "chosen_geometric_mean": -0.9250457286834717, + "epoch": 1.43, + "grad_norm": 36.75, + "learning_rate": 9.38124429553223e-07, + "log_odds": 2.3954691886901855, + "log_odds_ratio": -0.1581265777349472, + "loss": 0.2455, + "rejected_geometric_mean": -2.9414560794830322, + "step": 5777 + }, + { + "chosen_geometric_mean": -0.952320396900177, + "epoch": 1.43, + "grad_norm": 2.3125, + "learning_rate": 9.373644361131363e-07, + "log_odds": 9.166688919067383, + "log_odds_ratio": -0.0033929774072021246, + "loss": 0.2552, + "rejected_geometric_mean": -9.616158485412598, + "step": 5778 + }, + { + "chosen_geometric_mean": -0.989811360836029, + "epoch": 1.43, + "grad_norm": 7.875, + "learning_rate": 9.366046796061112e-07, + "log_odds": 4.600900650024414, + "log_odds_ratio": -0.029925838112831116, + "loss": 0.2612, + "rejected_geometric_mean": -5.128781318664551, + "step": 5779 + }, + { + "chosen_geometric_mean": -1.0473809242248535, + "epoch": 1.43, + "grad_norm": 17.875, + "learning_rate": 9.358451601473436e-07, + "log_odds": 4.5642852783203125, + "log_odds_ratio": -0.15090057253837585, + "loss": 0.2694, + "rejected_geometric_mean": -5.251101493835449, + "step": 5780 + }, + { + "chosen_geometric_mean": -0.7393121719360352, + "epoch": 1.43, + "grad_norm": 3.125, + "learning_rate": 9.350858778519956e-07, + "log_odds": 13.008232116699219, + "log_odds_ratio": -0.02369181253015995, + "loss": 0.2899, + "rejected_geometric_mean": -13.08740234375, + "step": 5781 + }, + { + "chosen_geometric_mean": -0.9560400247573853, + "epoch": 1.43, + "grad_norm": 2.546875, + "learning_rate": 9.343268328351935e-07, + "log_odds": 6.403623104095459, + "log_odds_ratio": -0.16977454721927643, + "loss": 0.2953, + "rejected_geometric_mean": -6.950991630554199, + "step": 5782 + }, + { + "chosen_geometric_mean": -1.063714623451233, + "epoch": 1.43, + "grad_norm": 45.25, + "learning_rate": 9.335680252120258e-07, + "log_odds": 5.00214958190918, + "log_odds_ratio": -0.18594582378864288, + "loss": 0.2758, + "rejected_geometric_mean": -5.757931709289551, + "step": 5783 + }, + { + "chosen_geometric_mean": -0.9094733595848083, + "epoch": 1.43, + "grad_norm": 1.9609375, + "learning_rate": 9.32809455097545e-07, + "log_odds": 5.396552085876465, + "log_odds_ratio": -0.032588135451078415, + "loss": 0.2386, + "rejected_geometric_mean": -5.803339958190918, + "step": 5784 + }, + { + "chosen_geometric_mean": -1.1968189477920532, + "epoch": 1.43, + "grad_norm": 18.75, + "learning_rate": 9.320511226067697e-07, + "log_odds": 2.1220901012420654, + "log_odds_ratio": -0.2748647630214691, + "loss": 0.3191, + "rejected_geometric_mean": -3.0718801021575928, + "step": 5785 + }, + { + "chosen_geometric_mean": -0.8218611478805542, + "epoch": 1.43, + "grad_norm": 3.4375, + "learning_rate": 9.312930278546816e-07, + "log_odds": 4.328180313110352, + "log_odds_ratio": -0.23780183494091034, + "loss": 0.2999, + "rejected_geometric_mean": -4.775562286376953, + "step": 5786 + }, + { + "chosen_geometric_mean": -0.9913219213485718, + "epoch": 1.43, + "grad_norm": 7.125, + "learning_rate": 9.305351709562252e-07, + "log_odds": 10.319791793823242, + "log_odds_ratio": -0.025083154439926147, + "loss": 0.2546, + "rejected_geometric_mean": -10.787282943725586, + "step": 5787 + }, + { + "chosen_geometric_mean": -0.9532071948051453, + "epoch": 1.43, + "grad_norm": 1.9296875, + "learning_rate": 9.297775520263108e-07, + "log_odds": 4.847223281860352, + "log_odds_ratio": -0.18227756023406982, + "loss": 0.2409, + "rejected_geometric_mean": -5.440128803253174, + "step": 5788 + }, + { + "chosen_geometric_mean": -0.9111902713775635, + "epoch": 1.43, + "grad_norm": 12.8125, + "learning_rate": 9.290201711798103e-07, + "log_odds": 13.934457778930664, + "log_odds_ratio": -0.002184682758525014, + "loss": 0.2805, + "rejected_geometric_mean": -14.325774192810059, + "step": 5789 + }, + { + "chosen_geometric_mean": -0.7539734840393066, + "epoch": 1.43, + "grad_norm": 3.953125, + "learning_rate": 9.282630285315622e-07, + "log_odds": 6.34173583984375, + "log_odds_ratio": -0.11320999264717102, + "loss": 0.2687, + "rejected_geometric_mean": -6.554521560668945, + "step": 5790 + }, + { + "chosen_geometric_mean": -0.9765728712081909, + "epoch": 1.43, + "grad_norm": 5.21875, + "learning_rate": 9.27506124196366e-07, + "log_odds": 5.442963123321533, + "log_odds_ratio": -0.1475931853055954, + "loss": 0.2797, + "rejected_geometric_mean": -6.047276973724365, + "step": 5791 + }, + { + "chosen_geometric_mean": -1.0004255771636963, + "epoch": 1.43, + "grad_norm": 2.75, + "learning_rate": 9.267494582889883e-07, + "log_odds": 7.438923358917236, + "log_odds_ratio": -0.0013178101507946849, + "loss": 0.2549, + "rejected_geometric_mean": -7.964406967163086, + "step": 5792 + }, + { + "chosen_geometric_mean": -1.0755418539047241, + "epoch": 1.43, + "grad_norm": 6.96875, + "learning_rate": 9.259930309241561e-07, + "log_odds": 8.965916633605957, + "log_odds_ratio": -0.07709234207868576, + "loss": 0.2737, + "rejected_geometric_mean": -9.666572570800781, + "step": 5793 + }, + { + "chosen_geometric_mean": -0.952979564666748, + "epoch": 1.43, + "grad_norm": 18.0, + "learning_rate": 9.252368422165633e-07, + "log_odds": 6.472631454467773, + "log_odds_ratio": -0.08746089786291122, + "loss": 0.257, + "rejected_geometric_mean": -6.9657368659973145, + "step": 5794 + }, + { + "chosen_geometric_mean": -0.8950139284133911, + "epoch": 1.43, + "grad_norm": 20.375, + "learning_rate": 9.244808922808652e-07, + "log_odds": 3.199061393737793, + "log_odds_ratio": -0.10601171851158142, + "loss": 0.2806, + "rejected_geometric_mean": -3.6449880599975586, + "step": 5795 + }, + { + "chosen_geometric_mean": -0.8687801361083984, + "epoch": 1.44, + "grad_norm": 2.25, + "learning_rate": 9.237251812316822e-07, + "log_odds": 4.227023124694824, + "log_odds_ratio": -0.031779732555150986, + "loss": 0.2978, + "rejected_geometric_mean": -4.570065021514893, + "step": 5796 + }, + { + "chosen_geometric_mean": -0.9892706274986267, + "epoch": 1.44, + "grad_norm": 12.25, + "learning_rate": 9.229697091835996e-07, + "log_odds": 3.6244633197784424, + "log_odds_ratio": -0.2932095527648926, + "loss": 0.2729, + "rejected_geometric_mean": -4.333415985107422, + "step": 5797 + }, + { + "chosen_geometric_mean": -0.8999322652816772, + "epoch": 1.44, + "grad_norm": 2.1875, + "learning_rate": 9.222144762511629e-07, + "log_odds": 7.898476600646973, + "log_odds_ratio": -0.019994139671325684, + "loss": 0.2352, + "rejected_geometric_mean": -8.259765625, + "step": 5798 + }, + { + "chosen_geometric_mean": -0.9300979971885681, + "epoch": 1.44, + "grad_norm": 4.9375, + "learning_rate": 9.214594825488857e-07, + "log_odds": 8.448535919189453, + "log_odds_ratio": -0.31561124324798584, + "loss": 0.2668, + "rejected_geometric_mean": -9.077271461486816, + "step": 5799 + }, + { + "chosen_geometric_mean": -0.7681248784065247, + "epoch": 1.44, + "grad_norm": 5.0, + "learning_rate": 9.207047281912421e-07, + "log_odds": 14.171428680419922, + "log_odds_ratio": -4.420054756337777e-05, + "loss": 0.2458, + "rejected_geometric_mean": -14.30320930480957, + "step": 5800 + }, + { + "chosen_geometric_mean": -0.992431104183197, + "epoch": 1.44, + "grad_norm": 2.0, + "learning_rate": 9.199502132926699e-07, + "log_odds": 4.255707740783691, + "log_odds_ratio": -0.054056767374277115, + "loss": 0.2433, + "rejected_geometric_mean": -4.765100479125977, + "step": 5801 + }, + { + "chosen_geometric_mean": -1.0318478345870972, + "epoch": 1.44, + "grad_norm": 2.125, + "learning_rate": 9.191959379675727e-07, + "log_odds": 13.100727081298828, + "log_odds_ratio": -0.0003058324218727648, + "loss": 0.28, + "rejected_geometric_mean": -13.640617370605469, + "step": 5802 + }, + { + "chosen_geometric_mean": -1.0249543190002441, + "epoch": 1.44, + "grad_norm": 3.75, + "learning_rate": 9.184419023303179e-07, + "log_odds": 4.264364719390869, + "log_odds_ratio": -0.18725019693374634, + "loss": 0.2492, + "rejected_geometric_mean": -4.964134216308594, + "step": 5803 + }, + { + "chosen_geometric_mean": -0.9383898973464966, + "epoch": 1.44, + "grad_norm": 36.25, + "learning_rate": 9.176881064952333e-07, + "log_odds": 2.2398946285247803, + "log_odds_ratio": -0.31499436497688293, + "loss": 0.2361, + "rejected_geometric_mean": -2.8768041133880615, + "step": 5804 + }, + { + "chosen_geometric_mean": -0.9781514406204224, + "epoch": 1.44, + "grad_norm": 12.0, + "learning_rate": 9.169345505766144e-07, + "log_odds": 8.946645736694336, + "log_odds_ratio": -0.10044138878583908, + "loss": 0.25, + "rejected_geometric_mean": -9.421232223510742, + "step": 5805 + }, + { + "chosen_geometric_mean": -1.024577021598816, + "epoch": 1.44, + "grad_norm": 6.625, + "learning_rate": 9.161812346887175e-07, + "log_odds": 12.956963539123535, + "log_odds_ratio": -0.030515626072883606, + "loss": 0.2772, + "rejected_geometric_mean": -13.549046516418457, + "step": 5806 + }, + { + "chosen_geometric_mean": -1.133923888206482, + "epoch": 1.44, + "grad_norm": 8.4375, + "learning_rate": 9.154281589457614e-07, + "log_odds": 5.18082332611084, + "log_odds_ratio": -0.19192074239253998, + "loss": 0.204, + "rejected_geometric_mean": -6.015374660491943, + "step": 5807 + }, + { + "chosen_geometric_mean": -0.9651508331298828, + "epoch": 1.44, + "grad_norm": 10.4375, + "learning_rate": 9.14675323461934e-07, + "log_odds": 11.307641983032227, + "log_odds_ratio": -0.1603795737028122, + "loss": 0.2771, + "rejected_geometric_mean": -11.899337768554688, + "step": 5808 + }, + { + "chosen_geometric_mean": -0.8821999430656433, + "epoch": 1.44, + "grad_norm": 4.875, + "learning_rate": 9.139227283513819e-07, + "log_odds": 9.477926254272461, + "log_odds_ratio": -0.12778286635875702, + "loss": 0.2373, + "rejected_geometric_mean": -9.8035306930542, + "step": 5809 + }, + { + "chosen_geometric_mean": -0.8741213083267212, + "epoch": 1.44, + "grad_norm": 2.921875, + "learning_rate": 9.13170373728215e-07, + "log_odds": 9.19642448425293, + "log_odds_ratio": -0.11923456192016602, + "loss": 0.292, + "rejected_geometric_mean": -9.491626739501953, + "step": 5810 + }, + { + "chosen_geometric_mean": -1.1334350109100342, + "epoch": 1.44, + "grad_norm": 5.1875, + "learning_rate": 9.124182597065107e-07, + "log_odds": 6.735640048980713, + "log_odds_ratio": -0.1523420363664627, + "loss": 0.274, + "rejected_geometric_mean": -7.5450286865234375, + "step": 5811 + }, + { + "chosen_geometric_mean": -1.1004111766815186, + "epoch": 1.44, + "grad_norm": 20.5, + "learning_rate": 9.116663864003053e-07, + "log_odds": 4.479401111602783, + "log_odds_ratio": -0.08488951623439789, + "loss": 0.3039, + "rejected_geometric_mean": -5.193843841552734, + "step": 5812 + }, + { + "chosen_geometric_mean": -0.9150639176368713, + "epoch": 1.44, + "grad_norm": 4.8125, + "learning_rate": 9.109147539236022e-07, + "log_odds": 3.753056764602661, + "log_odds_ratio": -0.21347655355930328, + "loss": 0.2725, + "rejected_geometric_mean": -4.259346008300781, + "step": 5813 + }, + { + "chosen_geometric_mean": -1.064902901649475, + "epoch": 1.44, + "grad_norm": 3.5, + "learning_rate": 9.101633623903675e-07, + "log_odds": 5.1038055419921875, + "log_odds_ratio": -0.13925203680992126, + "loss": 0.1948, + "rejected_geometric_mean": -5.800354480743408, + "step": 5814 + }, + { + "chosen_geometric_mean": -0.9903674721717834, + "epoch": 1.44, + "grad_norm": 3.9375, + "learning_rate": 9.094122119145282e-07, + "log_odds": 5.604782581329346, + "log_odds_ratio": -0.10484886914491653, + "loss": 0.3175, + "rejected_geometric_mean": -6.179615497589111, + "step": 5815 + }, + { + "chosen_geometric_mean": -1.1448208093643188, + "epoch": 1.44, + "grad_norm": 3.625, + "learning_rate": 9.086613026099791e-07, + "log_odds": 6.935081958770752, + "log_odds_ratio": -0.07231873273849487, + "loss": 0.2338, + "rejected_geometric_mean": -7.701630592346191, + "step": 5816 + }, + { + "chosen_geometric_mean": -0.9467054009437561, + "epoch": 1.44, + "grad_norm": 4.78125, + "learning_rate": 9.079106345905747e-07, + "log_odds": 8.400240898132324, + "log_odds_ratio": -0.02855486050248146, + "loss": 0.2071, + "rejected_geometric_mean": -8.805015563964844, + "step": 5817 + }, + { + "chosen_geometric_mean": -1.1170389652252197, + "epoch": 1.44, + "grad_norm": 24.625, + "learning_rate": 9.071602079701336e-07, + "log_odds": 5.918178558349609, + "log_odds_ratio": -0.14033955335617065, + "loss": 0.237, + "rejected_geometric_mean": -6.723259925842285, + "step": 5818 + }, + { + "chosen_geometric_mean": -1.0737204551696777, + "epoch": 1.44, + "grad_norm": 5.5, + "learning_rate": 9.064100228624395e-07, + "log_odds": 6.764087677001953, + "log_odds_ratio": -0.17609326541423798, + "loss": 0.2852, + "rejected_geometric_mean": -7.50346565246582, + "step": 5819 + }, + { + "chosen_geometric_mean": -0.9008660912513733, + "epoch": 1.44, + "grad_norm": 4.0, + "learning_rate": 9.056600793812393e-07, + "log_odds": 12.310273170471191, + "log_odds_ratio": -3.1919931643642485e-05, + "loss": 0.2301, + "rejected_geometric_mean": -12.652048110961914, + "step": 5820 + }, + { + "chosen_geometric_mean": -1.009448528289795, + "epoch": 1.44, + "grad_norm": 3.5, + "learning_rate": 9.049103776402404e-07, + "log_odds": 9.807626724243164, + "log_odds_ratio": -0.1178898811340332, + "loss": 0.248, + "rejected_geometric_mean": -10.451709747314453, + "step": 5821 + }, + { + "chosen_geometric_mean": -1.3586751222610474, + "epoch": 1.44, + "grad_norm": 9.3125, + "learning_rate": 9.041609177531177e-07, + "log_odds": 9.261127471923828, + "log_odds_ratio": -0.09515391290187836, + "loss": 0.2413, + "rejected_geometric_mean": -10.338552474975586, + "step": 5822 + }, + { + "chosen_geometric_mean": -0.8597664833068848, + "epoch": 1.44, + "grad_norm": 3.484375, + "learning_rate": 9.03411699833506e-07, + "log_odds": 4.434179306030273, + "log_odds_ratio": -0.29427364468574524, + "loss": 0.2622, + "rejected_geometric_mean": -4.907504081726074, + "step": 5823 + }, + { + "chosen_geometric_mean": -1.0180859565734863, + "epoch": 1.44, + "grad_norm": 2.8125, + "learning_rate": 9.026627239950034e-07, + "log_odds": 8.990653991699219, + "log_odds_ratio": -0.03625255450606346, + "loss": 0.2562, + "rejected_geometric_mean": -9.536474227905273, + "step": 5824 + }, + { + "chosen_geometric_mean": -0.8490396738052368, + "epoch": 1.44, + "grad_norm": 4.125, + "learning_rate": 9.019139903511759e-07, + "log_odds": 10.158063888549805, + "log_odds_ratio": -0.06713548302650452, + "loss": 0.2289, + "rejected_geometric_mean": -10.467947959899902, + "step": 5825 + }, + { + "chosen_geometric_mean": -0.8884097337722778, + "epoch": 1.44, + "grad_norm": 2.734375, + "learning_rate": 9.011654990155478e-07, + "log_odds": 5.541289806365967, + "log_odds_ratio": -0.30602172017097473, + "loss": 0.3256, + "rejected_geometric_mean": -6.128233432769775, + "step": 5826 + }, + { + "chosen_geometric_mean": -0.8449010848999023, + "epoch": 1.44, + "grad_norm": 4.28125, + "learning_rate": 9.004172501016076e-07, + "log_odds": 5.730278491973877, + "log_odds_ratio": -0.16702991724014282, + "loss": 0.221, + "rejected_geometric_mean": -6.111297130584717, + "step": 5827 + }, + { + "chosen_geometric_mean": -1.0454243421554565, + "epoch": 1.44, + "grad_norm": 15.125, + "learning_rate": 8.996692437228091e-07, + "log_odds": 8.479122161865234, + "log_odds_ratio": -0.03317568451166153, + "loss": 0.2303, + "rejected_geometric_mean": -9.082536697387695, + "step": 5828 + }, + { + "chosen_geometric_mean": -0.8896318674087524, + "epoch": 1.44, + "grad_norm": 5.25, + "learning_rate": 8.989214799925669e-07, + "log_odds": 7.945653438568115, + "log_odds_ratio": -0.2434198260307312, + "loss": 0.2639, + "rejected_geometric_mean": -8.48315715789795, + "step": 5829 + }, + { + "chosen_geometric_mean": -0.9826264381408691, + "epoch": 1.44, + "grad_norm": 33.0, + "learning_rate": 8.981739590242602e-07, + "log_odds": 7.287312030792236, + "log_odds_ratio": -0.14518874883651733, + "loss": 0.2468, + "rejected_geometric_mean": -7.833484649658203, + "step": 5830 + }, + { + "chosen_geometric_mean": -0.885666012763977, + "epoch": 1.44, + "grad_norm": 2.4375, + "learning_rate": 8.974266809312327e-07, + "log_odds": 7.252978324890137, + "log_odds_ratio": -0.15947315096855164, + "loss": 0.2273, + "rejected_geometric_mean": -7.768660068511963, + "step": 5831 + }, + { + "chosen_geometric_mean": -1.326682209968567, + "epoch": 1.44, + "grad_norm": 27.375, + "learning_rate": 8.966796458267882e-07, + "log_odds": 7.130317687988281, + "log_odds_ratio": -0.1529347449541092, + "loss": 0.3333, + "rejected_geometric_mean": -8.227350234985352, + "step": 5832 + }, + { + "chosen_geometric_mean": -1.0633186101913452, + "epoch": 1.44, + "grad_norm": 5.0, + "learning_rate": 8.959328538241943e-07, + "log_odds": 2.420159101486206, + "log_odds_ratio": -0.33223336935043335, + "loss": 0.2356, + "rejected_geometric_mean": -3.2466864585876465, + "step": 5833 + }, + { + "chosen_geometric_mean": -1.1059836149215698, + "epoch": 1.44, + "grad_norm": 15.8125, + "learning_rate": 8.951863050366847e-07, + "log_odds": 6.364516258239746, + "log_odds_ratio": -0.10669320821762085, + "loss": 0.2927, + "rejected_geometric_mean": -7.108564376831055, + "step": 5834 + }, + { + "chosen_geometric_mean": -1.2186739444732666, + "epoch": 1.44, + "grad_norm": 21.0, + "learning_rate": 8.944399995774522e-07, + "log_odds": 1.8601329326629639, + "log_odds_ratio": -0.21982744336128235, + "loss": 0.2912, + "rejected_geometric_mean": -2.8338565826416016, + "step": 5835 + }, + { + "chosen_geometric_mean": -0.9281919002532959, + "epoch": 1.44, + "grad_norm": 32.5, + "learning_rate": 8.936939375596554e-07, + "log_odds": 6.689152717590332, + "log_odds_ratio": -0.01510896161198616, + "loss": 0.2697, + "rejected_geometric_mean": -7.111261367797852, + "step": 5836 + }, + { + "chosen_geometric_mean": -0.9302011728286743, + "epoch": 1.45, + "grad_norm": 2.578125, + "learning_rate": 8.929481190964162e-07, + "log_odds": 12.920915603637695, + "log_odds_ratio": -0.00015784852439537644, + "loss": 0.2155, + "rejected_geometric_mean": -13.341352462768555, + "step": 5837 + }, + { + "chosen_geometric_mean": -0.8659085631370544, + "epoch": 1.45, + "grad_norm": 2.6875, + "learning_rate": 8.92202544300817e-07, + "log_odds": 1.8237751722335815, + "log_odds_ratio": -0.32373762130737305, + "loss": 0.2692, + "rejected_geometric_mean": -2.428807020187378, + "step": 5838 + }, + { + "chosen_geometric_mean": -0.977096438407898, + "epoch": 1.45, + "grad_norm": 4.46875, + "learning_rate": 8.914572132859062e-07, + "log_odds": 10.786504745483398, + "log_odds_ratio": -0.0001864973019110039, + "loss": 0.2318, + "rejected_geometric_mean": -11.284246444702148, + "step": 5839 + }, + { + "chosen_geometric_mean": -0.9832801818847656, + "epoch": 1.45, + "grad_norm": 2.265625, + "learning_rate": 8.907121261646923e-07, + "log_odds": 8.477843284606934, + "log_odds_ratio": -0.15747417509555817, + "loss": 0.2732, + "rejected_geometric_mean": -9.126605987548828, + "step": 5840 + }, + { + "chosen_geometric_mean": -0.9607177376747131, + "epoch": 1.45, + "grad_norm": 4.5625, + "learning_rate": 8.899672830501501e-07, + "log_odds": 8.16593074798584, + "log_odds_ratio": -0.10979949682950974, + "loss": 0.2444, + "rejected_geometric_mean": -8.725866317749023, + "step": 5841 + }, + { + "chosen_geometric_mean": -0.9871131181716919, + "epoch": 1.45, + "grad_norm": 2.390625, + "learning_rate": 8.892226840552143e-07, + "log_odds": 8.440730094909668, + "log_odds_ratio": -0.10273212939500809, + "loss": 0.2399, + "rejected_geometric_mean": -8.997359275817871, + "step": 5842 + }, + { + "chosen_geometric_mean": -0.9901952743530273, + "epoch": 1.45, + "grad_norm": 2.546875, + "learning_rate": 8.884783292927854e-07, + "log_odds": 8.165141105651855, + "log_odds_ratio": -0.21558792889118195, + "loss": 0.2925, + "rejected_geometric_mean": -8.90183162689209, + "step": 5843 + }, + { + "chosen_geometric_mean": -0.9015055894851685, + "epoch": 1.45, + "grad_norm": 3.40625, + "learning_rate": 8.877342188757235e-07, + "log_odds": 4.716901779174805, + "log_odds_ratio": -0.12216715514659882, + "loss": 0.2662, + "rejected_geometric_mean": -5.202498912811279, + "step": 5844 + }, + { + "chosen_geometric_mean": -0.8048420548439026, + "epoch": 1.45, + "grad_norm": 7.34375, + "learning_rate": 8.869903529168558e-07, + "log_odds": 11.09245491027832, + "log_odds_ratio": -0.15012717247009277, + "loss": 0.2615, + "rejected_geometric_mean": -11.45814037322998, + "step": 5845 + }, + { + "chosen_geometric_mean": -0.993143618106842, + "epoch": 1.45, + "grad_norm": 3.5625, + "learning_rate": 8.86246731528968e-07, + "log_odds": 10.119245529174805, + "log_odds_ratio": -0.006528593134135008, + "loss": 0.2348, + "rejected_geometric_mean": -10.632078170776367, + "step": 5846 + }, + { + "chosen_geometric_mean": -0.8670281171798706, + "epoch": 1.45, + "grad_norm": 3.96875, + "learning_rate": 8.855033548248123e-07, + "log_odds": 13.823323249816895, + "log_odds_ratio": -0.1771429032087326, + "loss": 0.2967, + "rejected_geometric_mean": -14.252178192138672, + "step": 5847 + }, + { + "chosen_geometric_mean": -1.074161410331726, + "epoch": 1.45, + "grad_norm": 3.296875, + "learning_rate": 8.847602229171032e-07, + "log_odds": 3.435563802719116, + "log_odds_ratio": -0.25607410073280334, + "loss": 0.2567, + "rejected_geometric_mean": -4.276905536651611, + "step": 5848 + }, + { + "chosen_geometric_mean": -0.9554505348205566, + "epoch": 1.45, + "grad_norm": 25.5, + "learning_rate": 8.840173359185164e-07, + "log_odds": 7.920963287353516, + "log_odds_ratio": -0.10463099181652069, + "loss": 0.2863, + "rejected_geometric_mean": -8.440909385681152, + "step": 5849 + }, + { + "chosen_geometric_mean": -1.0822256803512573, + "epoch": 1.45, + "grad_norm": 2.609375, + "learning_rate": 8.832746939416903e-07, + "log_odds": 8.296140670776367, + "log_odds_ratio": -0.0024674853775650263, + "loss": 0.2376, + "rejected_geometric_mean": -8.963481903076172, + "step": 5850 + }, + { + "chosen_geometric_mean": -1.0035024881362915, + "epoch": 1.45, + "grad_norm": 20.0, + "learning_rate": 8.825322970992284e-07, + "log_odds": 2.8047878742218018, + "log_odds_ratio": -0.320046067237854, + "loss": 0.2928, + "rejected_geometric_mean": -3.592855453491211, + "step": 5851 + }, + { + "chosen_geometric_mean": -1.3425945043563843, + "epoch": 1.45, + "grad_norm": 17.625, + "learning_rate": 8.817901455036967e-07, + "log_odds": 8.91170597076416, + "log_odds_ratio": -0.11137232929468155, + "loss": 0.2425, + "rejected_geometric_mean": -9.982372283935547, + "step": 5852 + }, + { + "chosen_geometric_mean": -1.0801745653152466, + "epoch": 1.45, + "grad_norm": 3.421875, + "learning_rate": 8.810482392676217e-07, + "log_odds": 11.78818416595459, + "log_odds_ratio": -0.021164825186133385, + "loss": 0.2039, + "rejected_geometric_mean": -12.454445838928223, + "step": 5853 + }, + { + "chosen_geometric_mean": -0.8854854106903076, + "epoch": 1.45, + "grad_norm": 2.546875, + "learning_rate": 8.803065785034953e-07, + "log_odds": 3.799123764038086, + "log_odds_ratio": -0.21685582399368286, + "loss": 0.2681, + "rejected_geometric_mean": -4.263649940490723, + "step": 5854 + }, + { + "chosen_geometric_mean": -0.9544827938079834, + "epoch": 1.45, + "grad_norm": 14.5625, + "learning_rate": 8.795651633237709e-07, + "log_odds": 5.761816024780273, + "log_odds_ratio": -0.09143394231796265, + "loss": 0.2396, + "rejected_geometric_mean": -6.2867655754089355, + "step": 5855 + }, + { + "chosen_geometric_mean": -0.8447505831718445, + "epoch": 1.45, + "grad_norm": 2.125, + "learning_rate": 8.788239938408635e-07, + "log_odds": 7.4113969802856445, + "log_odds_ratio": -0.17204168438911438, + "loss": 0.2318, + "rejected_geometric_mean": -7.829319000244141, + "step": 5856 + }, + { + "chosen_geometric_mean": -1.0599132776260376, + "epoch": 1.45, + "grad_norm": 15.9375, + "learning_rate": 8.780830701671534e-07, + "log_odds": 2.065929889678955, + "log_odds_ratio": -0.3025364875793457, + "loss": 0.3143, + "rejected_geometric_mean": -2.8283417224884033, + "step": 5857 + }, + { + "chosen_geometric_mean": -1.0916016101837158, + "epoch": 1.45, + "grad_norm": 10.375, + "learning_rate": 8.773423924149826e-07, + "log_odds": 4.394280910491943, + "log_odds_ratio": -0.10871341079473495, + "loss": 0.2692, + "rejected_geometric_mean": -5.093137264251709, + "step": 5858 + }, + { + "chosen_geometric_mean": -0.8767033219337463, + "epoch": 1.45, + "grad_norm": 3.578125, + "learning_rate": 8.766019606966548e-07, + "log_odds": 12.156310081481934, + "log_odds_ratio": -0.0009715917985886335, + "loss": 0.2172, + "rejected_geometric_mean": -12.47146224975586, + "step": 5859 + }, + { + "chosen_geometric_mean": -1.1589343547821045, + "epoch": 1.45, + "grad_norm": 36.5, + "learning_rate": 8.758617751244383e-07, + "log_odds": 3.800506830215454, + "log_odds_ratio": -0.18734145164489746, + "loss": 0.2939, + "rejected_geometric_mean": -4.661901950836182, + "step": 5860 + }, + { + "chosen_geometric_mean": -0.8899134397506714, + "epoch": 1.45, + "grad_norm": 36.0, + "learning_rate": 8.751218358105617e-07, + "log_odds": 14.112471580505371, + "log_odds_ratio": -0.01272854208946228, + "loss": 0.3043, + "rejected_geometric_mean": -14.465600967407227, + "step": 5861 + }, + { + "chosen_geometric_mean": -1.0143109560012817, + "epoch": 1.45, + "grad_norm": 7.3125, + "learning_rate": 8.743821428672183e-07, + "log_odds": 4.414839267730713, + "log_odds_ratio": -0.3836274743080139, + "loss": 0.273, + "rejected_geometric_mean": -5.152422904968262, + "step": 5862 + }, + { + "chosen_geometric_mean": -0.8189296722412109, + "epoch": 1.45, + "grad_norm": 2.265625, + "learning_rate": 8.736426964065639e-07, + "log_odds": 14.110074996948242, + "log_odds_ratio": -8.374585377168842e-06, + "loss": 0.1982, + "rejected_geometric_mean": -14.345209121704102, + "step": 5863 + }, + { + "chosen_geometric_mean": -0.872266948223114, + "epoch": 1.45, + "grad_norm": 2.453125, + "learning_rate": 8.729034965407148e-07, + "log_odds": 7.762662410736084, + "log_odds_ratio": -0.11439172178506851, + "loss": 0.256, + "rejected_geometric_mean": -8.179971694946289, + "step": 5864 + }, + { + "chosen_geometric_mean": -0.9596013426780701, + "epoch": 1.45, + "grad_norm": 23.625, + "learning_rate": 8.721645433817533e-07, + "log_odds": 5.237873554229736, + "log_odds_ratio": -0.15212643146514893, + "loss": 0.2157, + "rejected_geometric_mean": -5.759753704071045, + "step": 5865 + }, + { + "chosen_geometric_mean": -0.9355504512786865, + "epoch": 1.45, + "grad_norm": 27.625, + "learning_rate": 8.714258370417214e-07, + "log_odds": 6.271329879760742, + "log_odds_ratio": -0.10027514398097992, + "loss": 0.3689, + "rejected_geometric_mean": -6.776569366455078, + "step": 5866 + }, + { + "chosen_geometric_mean": -1.1347736120224, + "epoch": 1.45, + "grad_norm": 12.625, + "learning_rate": 8.706873776326242e-07, + "log_odds": 4.739266395568848, + "log_odds_ratio": -0.20385928452014923, + "loss": 0.2359, + "rejected_geometric_mean": -5.594201564788818, + "step": 5867 + }, + { + "chosen_geometric_mean": -0.988236665725708, + "epoch": 1.45, + "grad_norm": 18.75, + "learning_rate": 8.699491652664299e-07, + "log_odds": 7.5578203201293945, + "log_odds_ratio": -0.02653346210718155, + "loss": 0.2757, + "rejected_geometric_mean": -8.053678512573242, + "step": 5868 + }, + { + "chosen_geometric_mean": -0.9620712995529175, + "epoch": 1.45, + "grad_norm": 5.125, + "learning_rate": 8.69211200055071e-07, + "log_odds": 8.766998291015625, + "log_odds_ratio": -0.01675144024193287, + "loss": 0.2549, + "rejected_geometric_mean": -9.250191688537598, + "step": 5869 + }, + { + "chosen_geometric_mean": -1.030829906463623, + "epoch": 1.45, + "grad_norm": 9.5, + "learning_rate": 8.684734821104385e-07, + "log_odds": 8.924456596374512, + "log_odds_ratio": -0.001563570462167263, + "loss": 0.2551, + "rejected_geometric_mean": -9.50040054321289, + "step": 5870 + }, + { + "chosen_geometric_mean": -0.8856868147850037, + "epoch": 1.45, + "grad_norm": 26.625, + "learning_rate": 8.6773601154439e-07, + "log_odds": 8.12229061126709, + "log_odds_ratio": -0.1818845123052597, + "loss": 0.2897, + "rejected_geometric_mean": -8.539922714233398, + "step": 5871 + }, + { + "chosen_geometric_mean": -0.9196110963821411, + "epoch": 1.45, + "grad_norm": 80.0, + "learning_rate": 8.669987884687428e-07, + "log_odds": 6.256815433502197, + "log_odds_ratio": -0.012365762144327164, + "loss": 0.2645, + "rejected_geometric_mean": -6.654028415679932, + "step": 5872 + }, + { + "chosen_geometric_mean": -1.2358382940292358, + "epoch": 1.45, + "grad_norm": 3.984375, + "learning_rate": 8.66261812995276e-07, + "log_odds": 1.003664493560791, + "log_odds_ratio": -0.3374258875846863, + "loss": 0.3617, + "rejected_geometric_mean": -2.063305377960205, + "step": 5873 + }, + { + "chosen_geometric_mean": -1.10536527633667, + "epoch": 1.45, + "grad_norm": 9.375, + "learning_rate": 8.655250852357363e-07, + "log_odds": 4.839698791503906, + "log_odds_ratio": -0.1255197674036026, + "loss": 0.266, + "rejected_geometric_mean": -5.58376407623291, + "step": 5874 + }, + { + "chosen_geometric_mean": -1.0389922857284546, + "epoch": 1.45, + "grad_norm": 26.0, + "learning_rate": 8.647886053018273e-07, + "log_odds": 7.741528511047363, + "log_odds_ratio": -0.12326417118310928, + "loss": 0.3146, + "rejected_geometric_mean": -8.40032958984375, + "step": 5875 + }, + { + "chosen_geometric_mean": -0.9122442007064819, + "epoch": 1.45, + "grad_norm": 2.640625, + "learning_rate": 8.640523733052167e-07, + "log_odds": 8.211870193481445, + "log_odds_ratio": -0.0688396468758583, + "loss": 0.2431, + "rejected_geometric_mean": -8.598188400268555, + "step": 5876 + }, + { + "chosen_geometric_mean": -0.8982742428779602, + "epoch": 1.46, + "grad_norm": 51.5, + "learning_rate": 8.633163893575361e-07, + "log_odds": 11.586212158203125, + "log_odds_ratio": -0.13534462451934814, + "loss": 0.2636, + "rejected_geometric_mean": -12.070050239562988, + "step": 5877 + }, + { + "chosen_geometric_mean": -1.1305959224700928, + "epoch": 1.46, + "grad_norm": 33.25, + "learning_rate": 8.625806535703768e-07, + "log_odds": 11.092617988586426, + "log_odds_ratio": -0.000380896293791011, + "loss": 0.3038, + "rejected_geometric_mean": -11.81484603881836, + "step": 5878 + }, + { + "chosen_geometric_mean": -0.8595353960990906, + "epoch": 1.46, + "grad_norm": 17.875, + "learning_rate": 8.618451660552948e-07, + "log_odds": 2.801604747772217, + "log_odds_ratio": -0.34393125772476196, + "loss": 0.2762, + "rejected_geometric_mean": -3.3516318798065186, + "step": 5879 + }, + { + "chosen_geometric_mean": -1.009975552558899, + "epoch": 1.46, + "grad_norm": 30.25, + "learning_rate": 8.611099269238088e-07, + "log_odds": 5.7047224044799805, + "log_odds_ratio": -0.023757563903927803, + "loss": 0.2639, + "rejected_geometric_mean": -6.253537654876709, + "step": 5880 + }, + { + "chosen_geometric_mean": -1.0345299243927002, + "epoch": 1.46, + "grad_norm": 6.84375, + "learning_rate": 8.603749362873975e-07, + "log_odds": 2.3595993518829346, + "log_odds_ratio": -0.36856943368911743, + "loss": 0.2451, + "rejected_geometric_mean": -3.1361117362976074, + "step": 5881 + }, + { + "chosen_geometric_mean": -0.9679640531539917, + "epoch": 1.46, + "grad_norm": 1.6640625, + "learning_rate": 8.596401942575025e-07, + "log_odds": 9.305723190307617, + "log_odds_ratio": -0.0010672679636627436, + "loss": 0.1962, + "rejected_geometric_mean": -9.756046295166016, + "step": 5882 + }, + { + "chosen_geometric_mean": -0.9458796977996826, + "epoch": 1.46, + "grad_norm": 3.375, + "learning_rate": 8.589057009455301e-07, + "log_odds": 7.690526008605957, + "log_odds_ratio": -0.22349153459072113, + "loss": 0.2327, + "rejected_geometric_mean": -8.245349884033203, + "step": 5883 + }, + { + "chosen_geometric_mean": -0.9301333427429199, + "epoch": 1.46, + "grad_norm": 24.375, + "learning_rate": 8.581714564628452e-07, + "log_odds": 10.0329008102417, + "log_odds_ratio": -0.0055397311225533485, + "loss": 0.298, + "rejected_geometric_mean": -10.436079025268555, + "step": 5884 + }, + { + "chosen_geometric_mean": -0.9358785152435303, + "epoch": 1.46, + "grad_norm": 2.234375, + "learning_rate": 8.574374609207778e-07, + "log_odds": 7.988339424133301, + "log_odds_ratio": -0.2660627067089081, + "loss": 0.3058, + "rejected_geometric_mean": -8.632620811462402, + "step": 5885 + }, + { + "chosen_geometric_mean": -1.0878515243530273, + "epoch": 1.46, + "grad_norm": 2.953125, + "learning_rate": 8.5670371443062e-07, + "log_odds": 3.8821754455566406, + "log_odds_ratio": -0.1005401462316513, + "loss": 0.228, + "rejected_geometric_mean": -4.587921142578125, + "step": 5886 + }, + { + "chosen_geometric_mean": -0.7759636640548706, + "epoch": 1.46, + "grad_norm": 3.390625, + "learning_rate": 8.559702171036241e-07, + "log_odds": 8.504390716552734, + "log_odds_ratio": -0.0031282564159482718, + "loss": 0.2195, + "rejected_geometric_mean": -8.64389419555664, + "step": 5887 + }, + { + "chosen_geometric_mean": -0.9636608362197876, + "epoch": 1.46, + "grad_norm": 3.140625, + "learning_rate": 8.552369690510071e-07, + "log_odds": 6.254481792449951, + "log_odds_ratio": -0.2543748617172241, + "loss": 0.2444, + "rejected_geometric_mean": -6.875221252441406, + "step": 5888 + }, + { + "chosen_geometric_mean": -0.8719372153282166, + "epoch": 1.46, + "grad_norm": 20.375, + "learning_rate": 8.545039703839452e-07, + "log_odds": 5.9462995529174805, + "log_odds_ratio": -0.056476060301065445, + "loss": 0.3146, + "rejected_geometric_mean": -6.307744979858398, + "step": 5889 + }, + { + "chosen_geometric_mean": -0.9792805910110474, + "epoch": 1.46, + "grad_norm": 27.375, + "learning_rate": 8.537712212135802e-07, + "log_odds": 11.090155601501465, + "log_odds_ratio": -0.007580871693789959, + "loss": 0.3042, + "rejected_geometric_mean": -11.583681106567383, + "step": 5890 + }, + { + "chosen_geometric_mean": -0.9630740284919739, + "epoch": 1.46, + "grad_norm": 2.921875, + "learning_rate": 8.530387216510147e-07, + "log_odds": 3.6165924072265625, + "log_odds_ratio": -0.24002966284751892, + "loss": 0.291, + "rejected_geometric_mean": -4.232431411743164, + "step": 5891 + }, + { + "chosen_geometric_mean": -0.9015710353851318, + "epoch": 1.46, + "grad_norm": 24.875, + "learning_rate": 8.523064718073123e-07, + "log_odds": 13.035036087036133, + "log_odds_ratio": -0.019258277490735054, + "loss": 0.2894, + "rejected_geometric_mean": -13.416630744934082, + "step": 5892 + }, + { + "chosen_geometric_mean": -1.2659034729003906, + "epoch": 1.46, + "grad_norm": 9.3125, + "learning_rate": 8.515744717934993e-07, + "log_odds": 5.698404312133789, + "log_odds_ratio": -0.15403001010417938, + "loss": 0.2538, + "rejected_geometric_mean": -6.698596000671387, + "step": 5893 + }, + { + "chosen_geometric_mean": -0.8015834093093872, + "epoch": 1.46, + "grad_norm": 4.5625, + "learning_rate": 8.508427217205659e-07, + "log_odds": 2.9709746837615967, + "log_odds_ratio": -0.1431155800819397, + "loss": 0.2547, + "rejected_geometric_mean": -3.2767562866210938, + "step": 5894 + }, + { + "chosen_geometric_mean": -1.0427496433258057, + "epoch": 1.46, + "grad_norm": 49.25, + "learning_rate": 8.501112216994611e-07, + "log_odds": 8.270744323730469, + "log_odds_ratio": -0.015649840235710144, + "loss": 0.2811, + "rejected_geometric_mean": -8.838972091674805, + "step": 5895 + }, + { + "chosen_geometric_mean": -1.0813159942626953, + "epoch": 1.46, + "grad_norm": 2.5, + "learning_rate": 8.493799718410991e-07, + "log_odds": 9.615190505981445, + "log_odds_ratio": -0.18853160738945007, + "loss": 0.3097, + "rejected_geometric_mean": -10.412540435791016, + "step": 5896 + }, + { + "chosen_geometric_mean": -0.9216580986976624, + "epoch": 1.46, + "grad_norm": 4.21875, + "learning_rate": 8.486489722563554e-07, + "log_odds": 4.904610633850098, + "log_odds_ratio": -0.03163190186023712, + "loss": 0.2543, + "rejected_geometric_mean": -5.32073974609375, + "step": 5897 + }, + { + "chosen_geometric_mean": -1.0522034168243408, + "epoch": 1.46, + "grad_norm": 2.671875, + "learning_rate": 8.479182230560665e-07, + "log_odds": 3.2414321899414062, + "log_odds_ratio": -0.16991516947746277, + "loss": 0.254, + "rejected_geometric_mean": -3.9522461891174316, + "step": 5898 + }, + { + "chosen_geometric_mean": -1.2106351852416992, + "epoch": 1.46, + "grad_norm": 25.75, + "learning_rate": 8.471877243510304e-07, + "log_odds": 5.077915668487549, + "log_odds_ratio": -0.033451665192842484, + "loss": 0.2561, + "rejected_geometric_mean": -5.803336143493652, + "step": 5899 + }, + { + "chosen_geometric_mean": -1.0834087133407593, + "epoch": 1.46, + "grad_norm": 8.0625, + "learning_rate": 8.464574762520094e-07, + "log_odds": 4.5127482414245605, + "log_odds_ratio": -0.21186739206314087, + "loss": 0.2333, + "rejected_geometric_mean": -5.278713226318359, + "step": 5900 + }, + { + "chosen_geometric_mean": -0.9735618233680725, + "epoch": 1.46, + "grad_norm": 1.9453125, + "learning_rate": 8.457274788697272e-07, + "log_odds": 9.96857738494873, + "log_odds_ratio": -0.020997457206249237, + "loss": 0.2572, + "rejected_geometric_mean": -10.439329147338867, + "step": 5901 + }, + { + "chosen_geometric_mean": -1.037923812866211, + "epoch": 1.46, + "grad_norm": 19.125, + "learning_rate": 8.449977323148675e-07, + "log_odds": 12.783510208129883, + "log_odds_ratio": -0.0010471384739503264, + "loss": 0.2539, + "rejected_geometric_mean": -13.32928466796875, + "step": 5902 + }, + { + "chosen_geometric_mean": -0.9812785387039185, + "epoch": 1.46, + "grad_norm": 6.46875, + "learning_rate": 8.442682366980789e-07, + "log_odds": 5.058088302612305, + "log_odds_ratio": -0.11850964277982712, + "loss": 0.2536, + "rejected_geometric_mean": -5.658207416534424, + "step": 5903 + }, + { + "chosen_geometric_mean": -1.203965425491333, + "epoch": 1.46, + "grad_norm": 5.75, + "learning_rate": 8.435389921299699e-07, + "log_odds": 10.674504280090332, + "log_odds_ratio": -0.007214879151433706, + "loss": 0.2381, + "rejected_geometric_mean": -11.466010093688965, + "step": 5904 + }, + { + "chosen_geometric_mean": -0.9783322215080261, + "epoch": 1.46, + "grad_norm": 2.234375, + "learning_rate": 8.428099987211102e-07, + "log_odds": 3.7456393241882324, + "log_odds_ratio": -0.28115442395210266, + "loss": 0.2805, + "rejected_geometric_mean": -4.409360408782959, + "step": 5905 + }, + { + "chosen_geometric_mean": -1.1509912014007568, + "epoch": 1.46, + "grad_norm": 9.0, + "learning_rate": 8.420812565820339e-07, + "log_odds": 1.7883294820785522, + "log_odds_ratio": -0.22366563975811005, + "loss": 0.259, + "rejected_geometric_mean": -2.6752636432647705, + "step": 5906 + }, + { + "chosen_geometric_mean": -1.0456576347351074, + "epoch": 1.46, + "grad_norm": 6.28125, + "learning_rate": 8.413527658232363e-07, + "log_odds": 8.662994384765625, + "log_odds_ratio": -0.057229701429605484, + "loss": 0.2395, + "rejected_geometric_mean": -9.2884521484375, + "step": 5907 + }, + { + "chosen_geometric_mean": -1.0808919668197632, + "epoch": 1.46, + "grad_norm": 31.125, + "learning_rate": 8.406245265551727e-07, + "log_odds": 9.767478942871094, + "log_odds_ratio": -0.05412304773926735, + "loss": 0.2569, + "rejected_geometric_mean": -10.447102546691895, + "step": 5908 + }, + { + "chosen_geometric_mean": -1.2017292976379395, + "epoch": 1.46, + "grad_norm": 5.875, + "learning_rate": 8.398965388882633e-07, + "log_odds": 3.69923734664917, + "log_odds_ratio": -0.15929464995861053, + "loss": 0.2737, + "rejected_geometric_mean": -4.613778591156006, + "step": 5909 + }, + { + "chosen_geometric_mean": -0.8727991580963135, + "epoch": 1.46, + "grad_norm": 9.25, + "learning_rate": 8.391688029328865e-07, + "log_odds": 6.075287342071533, + "log_odds_ratio": -0.06944189965724945, + "loss": 0.2887, + "rejected_geometric_mean": -6.4443359375, + "step": 5910 + }, + { + "chosen_geometric_mean": -0.9457736015319824, + "epoch": 1.46, + "grad_norm": 2.953125, + "learning_rate": 8.384413187993856e-07, + "log_odds": 5.263183116912842, + "log_odds_ratio": -0.12267462909221649, + "loss": 0.2421, + "rejected_geometric_mean": -5.801482200622559, + "step": 5911 + }, + { + "chosen_geometric_mean": -0.9477752447128296, + "epoch": 1.46, + "grad_norm": 12.1875, + "learning_rate": 8.377140865980657e-07, + "log_odds": 5.6311445236206055, + "log_odds_ratio": -0.07491442561149597, + "loss": 0.2334, + "rejected_geometric_mean": -6.1392621994018555, + "step": 5912 + }, + { + "chosen_geometric_mean": -0.908053994178772, + "epoch": 1.46, + "grad_norm": 6.71875, + "learning_rate": 8.36987106439191e-07, + "log_odds": 8.44334602355957, + "log_odds_ratio": -0.18260574340820312, + "loss": 0.3002, + "rejected_geometric_mean": -8.864019393920898, + "step": 5913 + }, + { + "chosen_geometric_mean": -0.8546228408813477, + "epoch": 1.46, + "grad_norm": 11.8125, + "learning_rate": 8.362603784329903e-07, + "log_odds": 2.6147775650024414, + "log_odds_ratio": -0.21120081841945648, + "loss": 0.2737, + "rejected_geometric_mean": -3.0064492225646973, + "step": 5914 + }, + { + "chosen_geometric_mean": -0.9022471308708191, + "epoch": 1.46, + "grad_norm": 2.046875, + "learning_rate": 8.355339026896525e-07, + "log_odds": 7.550411224365234, + "log_odds_ratio": -0.013544711284339428, + "loss": 0.2735, + "rejected_geometric_mean": -7.925389289855957, + "step": 5915 + }, + { + "chosen_geometric_mean": -0.8418288230895996, + "epoch": 1.46, + "grad_norm": 5.25, + "learning_rate": 8.348076793193283e-07, + "log_odds": 8.400886535644531, + "log_odds_ratio": -0.10432145744562149, + "loss": 0.2877, + "rejected_geometric_mean": -8.735313415527344, + "step": 5916 + }, + { + "chosen_geometric_mean": -1.163907766342163, + "epoch": 1.46, + "grad_norm": 2.515625, + "learning_rate": 8.340817084321306e-07, + "log_odds": 11.55761432647705, + "log_odds_ratio": -0.06005604565143585, + "loss": 0.2742, + "rejected_geometric_mean": -12.34546184539795, + "step": 5917 + }, + { + "chosen_geometric_mean": -0.9523482322692871, + "epoch": 1.47, + "grad_norm": 20.125, + "learning_rate": 8.333559901381355e-07, + "log_odds": 8.137014389038086, + "log_odds_ratio": -0.07511411607265472, + "loss": 0.2654, + "rejected_geometric_mean": -8.65073299407959, + "step": 5918 + }, + { + "chosen_geometric_mean": -0.8860527873039246, + "epoch": 1.47, + "grad_norm": 5.84375, + "learning_rate": 8.326305245473776e-07, + "log_odds": 8.161436080932617, + "log_odds_ratio": -0.03263045847415924, + "loss": 0.2226, + "rejected_geometric_mean": -8.527772903442383, + "step": 5919 + }, + { + "chosen_geometric_mean": -0.9788206219673157, + "epoch": 1.47, + "grad_norm": 36.75, + "learning_rate": 8.319053117698561e-07, + "log_odds": 1.5370475053787231, + "log_odds_ratio": -0.3339768648147583, + "loss": 0.3019, + "rejected_geometric_mean": -2.2503914833068848, + "step": 5920 + }, + { + "chosen_geometric_mean": -0.8174690008163452, + "epoch": 1.47, + "grad_norm": 1.953125, + "learning_rate": 8.311803519155303e-07, + "log_odds": 6.396218299865723, + "log_odds_ratio": -0.004178783390671015, + "loss": 0.2408, + "rejected_geometric_mean": -6.601495742797852, + "step": 5921 + }, + { + "chosen_geometric_mean": -0.9439946413040161, + "epoch": 1.47, + "grad_norm": 8.625, + "learning_rate": 8.304556450943199e-07, + "log_odds": 3.227186679840088, + "log_odds_ratio": -0.35018837451934814, + "loss": 0.2462, + "rejected_geometric_mean": -3.9749088287353516, + "step": 5922 + }, + { + "chosen_geometric_mean": -1.0092146396636963, + "epoch": 1.47, + "grad_norm": 2.34375, + "learning_rate": 8.297311914161108e-07, + "log_odds": 5.801434516906738, + "log_odds_ratio": -0.10407702624797821, + "loss": 0.2606, + "rejected_geometric_mean": -6.410576343536377, + "step": 5923 + }, + { + "chosen_geometric_mean": -1.1285221576690674, + "epoch": 1.47, + "grad_norm": 14.0, + "learning_rate": 8.290069909907458e-07, + "log_odds": 7.49111270904541, + "log_odds_ratio": -0.33447781205177307, + "loss": 0.2283, + "rejected_geometric_mean": -8.355610847473145, + "step": 5924 + }, + { + "chosen_geometric_mean": -0.7941755652427673, + "epoch": 1.47, + "grad_norm": 1.9609375, + "learning_rate": 8.28283043928031e-07, + "log_odds": 10.274568557739258, + "log_odds_ratio": -0.09553929418325424, + "loss": 0.2422, + "rejected_geometric_mean": -10.557581901550293, + "step": 5925 + }, + { + "chosen_geometric_mean": -1.057636022567749, + "epoch": 1.47, + "grad_norm": 5.6875, + "learning_rate": 8.27559350337735e-07, + "log_odds": 5.695667743682861, + "log_odds_ratio": -0.11084204912185669, + "loss": 0.2818, + "rejected_geometric_mean": -6.372281074523926, + "step": 5926 + }, + { + "chosen_geometric_mean": -1.0280609130859375, + "epoch": 1.47, + "grad_norm": 50.0, + "learning_rate": 8.268359103295859e-07, + "log_odds": 10.018157958984375, + "log_odds_ratio": -0.057980433106422424, + "loss": 0.317, + "rejected_geometric_mean": -10.602460861206055, + "step": 5927 + }, + { + "chosen_geometric_mean": -0.9964452981948853, + "epoch": 1.47, + "grad_norm": 3.40625, + "learning_rate": 8.261127240132752e-07, + "log_odds": 3.267026901245117, + "log_odds_ratio": -0.15453919768333435, + "loss": 0.1966, + "rejected_geometric_mean": -3.88818621635437, + "step": 5928 + }, + { + "chosen_geometric_mean": -0.9556744694709778, + "epoch": 1.47, + "grad_norm": 10.75, + "learning_rate": 8.253897914984562e-07, + "log_odds": 7.468293190002441, + "log_odds_ratio": -0.011816326528787613, + "loss": 0.2691, + "rejected_geometric_mean": -7.907891273498535, + "step": 5929 + }, + { + "chosen_geometric_mean": -0.945297360420227, + "epoch": 1.47, + "grad_norm": 3.515625, + "learning_rate": 8.246671128947422e-07, + "log_odds": 4.66060209274292, + "log_odds_ratio": -0.22146376967430115, + "loss": 0.2852, + "rejected_geometric_mean": -5.2721028327941895, + "step": 5930 + }, + { + "chosen_geometric_mean": -1.0710985660552979, + "epoch": 1.47, + "grad_norm": 7.8125, + "learning_rate": 8.239446883117075e-07, + "log_odds": 11.040470123291016, + "log_odds_ratio": -0.047026924788951874, + "loss": 0.2504, + "rejected_geometric_mean": -11.717232704162598, + "step": 5931 + }, + { + "chosen_geometric_mean": -1.0001252889633179, + "epoch": 1.47, + "grad_norm": 18.625, + "learning_rate": 8.232225178588907e-07, + "log_odds": 3.6975910663604736, + "log_odds_ratio": -0.1037493348121643, + "loss": 0.3092, + "rejected_geometric_mean": -4.243579864501953, + "step": 5932 + }, + { + "chosen_geometric_mean": -0.8206497430801392, + "epoch": 1.47, + "grad_norm": 17.5, + "learning_rate": 8.225006016457887e-07, + "log_odds": 5.833014965057373, + "log_odds_ratio": -0.1398126184940338, + "loss": 0.2772, + "rejected_geometric_mean": -6.166007995605469, + "step": 5933 + }, + { + "chosen_geometric_mean": -0.9489657282829285, + "epoch": 1.47, + "grad_norm": 19.25, + "learning_rate": 8.217789397818621e-07, + "log_odds": 8.664973258972168, + "log_odds_ratio": -0.004835600033402443, + "loss": 0.2567, + "rejected_geometric_mean": -9.101722717285156, + "step": 5934 + }, + { + "chosen_geometric_mean": -0.9893418550491333, + "epoch": 1.47, + "grad_norm": 12.0625, + "learning_rate": 8.210575323765327e-07, + "log_odds": 4.398378372192383, + "log_odds_ratio": -0.12605570256710052, + "loss": 0.2817, + "rejected_geometric_mean": -5.011000156402588, + "step": 5935 + }, + { + "chosen_geometric_mean": -0.963677167892456, + "epoch": 1.47, + "grad_norm": 8.75, + "learning_rate": 8.203363795391819e-07, + "log_odds": 12.483261108398438, + "log_odds_ratio": -0.0003407514013815671, + "loss": 0.2263, + "rejected_geometric_mean": -12.96163558959961, + "step": 5936 + }, + { + "chosen_geometric_mean": -0.9218798875808716, + "epoch": 1.47, + "grad_norm": 2.546875, + "learning_rate": 8.196154813791554e-07, + "log_odds": 7.728872776031494, + "log_odds_ratio": -0.02925463393330574, + "loss": 0.2893, + "rejected_geometric_mean": -8.101192474365234, + "step": 5937 + }, + { + "chosen_geometric_mean": -0.941434919834137, + "epoch": 1.47, + "grad_norm": 60.5, + "learning_rate": 8.188948380057576e-07, + "log_odds": 4.25311279296875, + "log_odds_ratio": -0.30761805176734924, + "loss": 0.3395, + "rejected_geometric_mean": -4.7370195388793945, + "step": 5938 + }, + { + "chosen_geometric_mean": -1.2034916877746582, + "epoch": 1.47, + "grad_norm": 8.4375, + "learning_rate": 8.18174449528254e-07, + "log_odds": 5.123941898345947, + "log_odds_ratio": -0.13826985657215118, + "loss": 0.283, + "rejected_geometric_mean": -6.012077808380127, + "step": 5939 + }, + { + "chosen_geometric_mean": -1.1662267446517944, + "epoch": 1.47, + "grad_norm": 2.8125, + "learning_rate": 8.174543160558757e-07, + "log_odds": 4.556910991668701, + "log_odds_ratio": -0.1294531524181366, + "loss": 0.2945, + "rejected_geometric_mean": -5.372443675994873, + "step": 5940 + }, + { + "chosen_geometric_mean": -1.0402413606643677, + "epoch": 1.47, + "grad_norm": 37.75, + "learning_rate": 8.167344376978109e-07, + "log_odds": 6.998071670532227, + "log_odds_ratio": -0.12281034886837006, + "loss": 0.3231, + "rejected_geometric_mean": -7.638932704925537, + "step": 5941 + }, + { + "chosen_geometric_mean": -0.8450401425361633, + "epoch": 1.47, + "grad_norm": 11.4375, + "learning_rate": 8.160148145632093e-07, + "log_odds": 9.505607604980469, + "log_odds_ratio": -0.020315976813435555, + "loss": 0.2466, + "rejected_geometric_mean": -9.792793273925781, + "step": 5942 + }, + { + "chosen_geometric_mean": -1.0649688243865967, + "epoch": 1.47, + "grad_norm": 3.15625, + "learning_rate": 8.152954467611852e-07, + "log_odds": 7.497740268707275, + "log_odds_ratio": -0.022073186933994293, + "loss": 0.2554, + "rejected_geometric_mean": -8.13418197631836, + "step": 5943 + }, + { + "chosen_geometric_mean": -0.7942551374435425, + "epoch": 1.47, + "grad_norm": 1.53125, + "learning_rate": 8.145763344008098e-07, + "log_odds": 12.85757064819336, + "log_odds_ratio": -0.0008874362101778388, + "loss": 0.1756, + "rejected_geometric_mean": -13.03131103515625, + "step": 5944 + }, + { + "chosen_geometric_mean": -1.0178252458572388, + "epoch": 1.47, + "grad_norm": 27.75, + "learning_rate": 8.138574775911187e-07, + "log_odds": 6.700131416320801, + "log_odds_ratio": -0.11172420531511307, + "loss": 0.2524, + "rejected_geometric_mean": -7.30714225769043, + "step": 5945 + }, + { + "chosen_geometric_mean": -0.8997820019721985, + "epoch": 1.47, + "grad_norm": 1.921875, + "learning_rate": 8.131388764411091e-07, + "log_odds": 6.077775001525879, + "log_odds_ratio": -0.08186153322458267, + "loss": 0.2389, + "rejected_geometric_mean": -6.494749069213867, + "step": 5946 + }, + { + "chosen_geometric_mean": -1.0734397172927856, + "epoch": 1.47, + "grad_norm": 2.609375, + "learning_rate": 8.124205310597372e-07, + "log_odds": 9.521881103515625, + "log_odds_ratio": -0.0018717453349381685, + "loss": 0.2436, + "rejected_geometric_mean": -10.158890724182129, + "step": 5947 + }, + { + "chosen_geometric_mean": -1.20361328125, + "epoch": 1.47, + "grad_norm": 120.5, + "learning_rate": 8.117024415559205e-07, + "log_odds": 5.009880065917969, + "log_odds_ratio": -0.06521429866552353, + "loss": 0.265, + "rejected_geometric_mean": -5.879212856292725, + "step": 5948 + }, + { + "chosen_geometric_mean": -0.9488468170166016, + "epoch": 1.47, + "grad_norm": 2.03125, + "learning_rate": 8.109846080385392e-07, + "log_odds": 5.184266090393066, + "log_odds_ratio": -0.043784309178590775, + "loss": 0.2496, + "rejected_geometric_mean": -5.650538444519043, + "step": 5949 + }, + { + "chosen_geometric_mean": -1.2659103870391846, + "epoch": 1.47, + "grad_norm": 4.4375, + "learning_rate": 8.102670306164354e-07, + "log_odds": 3.211972951889038, + "log_odds_ratio": -0.1443958431482315, + "loss": 0.2379, + "rejected_geometric_mean": -4.167110443115234, + "step": 5950 + }, + { + "chosen_geometric_mean": -1.5162365436553955, + "epoch": 1.47, + "grad_norm": 16.25, + "learning_rate": 8.09549709398409e-07, + "log_odds": 9.931085586547852, + "log_odds_ratio": -0.5562803745269775, + "loss": 0.3051, + "rejected_geometric_mean": -11.268987655639648, + "step": 5951 + }, + { + "chosen_geometric_mean": -1.1009255647659302, + "epoch": 1.47, + "grad_norm": 31.375, + "learning_rate": 8.088326444932246e-07, + "log_odds": 4.010543346405029, + "log_odds_ratio": -0.268475204706192, + "loss": 0.3131, + "rejected_geometric_mean": -4.8099164962768555, + "step": 5952 + }, + { + "chosen_geometric_mean": -0.8787766098976135, + "epoch": 1.47, + "grad_norm": 2.03125, + "learning_rate": 8.081158360096056e-07, + "log_odds": 9.869195938110352, + "log_odds_ratio": -0.0424514040350914, + "loss": 0.2308, + "rejected_geometric_mean": -10.241307258605957, + "step": 5953 + }, + { + "chosen_geometric_mean": -1.2427769899368286, + "epoch": 1.47, + "grad_norm": 11.6875, + "learning_rate": 8.07399284056238e-07, + "log_odds": 16.3195743560791, + "log_odds_ratio": -5.387339842854999e-05, + "loss": 0.3027, + "rejected_geometric_mean": -17.197999954223633, + "step": 5954 + }, + { + "chosen_geometric_mean": -1.08761727809906, + "epoch": 1.47, + "grad_norm": 6.28125, + "learning_rate": 8.066829887417677e-07, + "log_odds": 3.2691078186035156, + "log_odds_ratio": -0.3884023427963257, + "loss": 0.2936, + "rejected_geometric_mean": -4.129631519317627, + "step": 5955 + }, + { + "chosen_geometric_mean": -1.1927059888839722, + "epoch": 1.47, + "grad_norm": 3.515625, + "learning_rate": 8.059669501748032e-07, + "log_odds": 5.291702747344971, + "log_odds_ratio": -0.07974293828010559, + "loss": 0.249, + "rejected_geometric_mean": -6.139941692352295, + "step": 5956 + }, + { + "chosen_geometric_mean": -1.158949851989746, + "epoch": 1.47, + "grad_norm": 4.84375, + "learning_rate": 8.052511684639116e-07, + "log_odds": 2.5468642711639404, + "log_odds_ratio": -0.2348608374595642, + "loss": 0.2686, + "rejected_geometric_mean": -3.4592435359954834, + "step": 5957 + }, + { + "chosen_geometric_mean": -0.9167704582214355, + "epoch": 1.48, + "grad_norm": 2.125, + "learning_rate": 8.045356437176244e-07, + "log_odds": 4.831411838531494, + "log_odds_ratio": -0.1202869638800621, + "loss": 0.2398, + "rejected_geometric_mean": -5.324919700622559, + "step": 5958 + }, + { + "chosen_geometric_mean": -0.9991213083267212, + "epoch": 1.48, + "grad_norm": 2.015625, + "learning_rate": 8.038203760444304e-07, + "log_odds": 12.214558601379395, + "log_odds_ratio": -0.008567818440496922, + "loss": 0.2422, + "rejected_geometric_mean": -12.741777420043945, + "step": 5959 + }, + { + "chosen_geometric_mean": -1.0849376916885376, + "epoch": 1.48, + "grad_norm": 7.1875, + "learning_rate": 8.031053655527823e-07, + "log_odds": 4.58046293258667, + "log_odds_ratio": -0.09004232287406921, + "loss": 0.2851, + "rejected_geometric_mean": -5.280356407165527, + "step": 5960 + }, + { + "chosen_geometric_mean": -1.1107521057128906, + "epoch": 1.48, + "grad_norm": 31.875, + "learning_rate": 8.023906123510941e-07, + "log_odds": 6.894007205963135, + "log_odds_ratio": -0.022328950464725494, + "loss": 0.3192, + "rejected_geometric_mean": -7.613884925842285, + "step": 5961 + }, + { + "chosen_geometric_mean": -1.0198297500610352, + "epoch": 1.48, + "grad_norm": 2.21875, + "learning_rate": 8.016761165477371e-07, + "log_odds": 5.7122955322265625, + "log_odds_ratio": -0.1339864879846573, + "loss": 0.2836, + "rejected_geometric_mean": -6.3598151206970215, + "step": 5962 + }, + { + "chosen_geometric_mean": -1.1053389310836792, + "epoch": 1.48, + "grad_norm": 20.75, + "learning_rate": 8.009618782510484e-07, + "log_odds": 5.425360679626465, + "log_odds_ratio": -0.11083883047103882, + "loss": 0.2966, + "rejected_geometric_mean": -6.168674945831299, + "step": 5963 + }, + { + "chosen_geometric_mean": -1.0767799615859985, + "epoch": 1.48, + "grad_norm": 6.0625, + "learning_rate": 8.002478975693223e-07, + "log_odds": 0.6590449213981628, + "log_odds_ratio": -0.4310106337070465, + "loss": 0.2762, + "rejected_geometric_mean": -1.5776631832122803, + "step": 5964 + }, + { + "chosen_geometric_mean": -0.9369574785232544, + "epoch": 1.48, + "grad_norm": 25.625, + "learning_rate": 7.995341746108148e-07, + "log_odds": 3.6238696575164795, + "log_odds_ratio": -0.16293133795261383, + "loss": 0.2964, + "rejected_geometric_mean": -4.158458232879639, + "step": 5965 + }, + { + "chosen_geometric_mean": -0.8434280157089233, + "epoch": 1.48, + "grad_norm": 16.125, + "learning_rate": 7.988207094837444e-07, + "log_odds": 8.809602737426758, + "log_odds_ratio": -0.05323730409145355, + "loss": 0.2848, + "rejected_geometric_mean": -9.116752624511719, + "step": 5966 + }, + { + "chosen_geometric_mean": -1.0108383893966675, + "epoch": 1.48, + "grad_norm": 30.5, + "learning_rate": 7.981075022962903e-07, + "log_odds": 8.175653457641602, + "log_odds_ratio": -0.14186428487300873, + "loss": 0.2525, + "rejected_geometric_mean": -8.821492195129395, + "step": 5967 + }, + { + "chosen_geometric_mean": -1.025537133216858, + "epoch": 1.48, + "grad_norm": 1.9375, + "learning_rate": 7.973945531565899e-07, + "log_odds": 11.034547805786133, + "log_odds_ratio": -0.15438929200172424, + "loss": 0.2417, + "rejected_geometric_mean": -11.678024291992188, + "step": 5968 + }, + { + "chosen_geometric_mean": -0.9990077614784241, + "epoch": 1.48, + "grad_norm": 2.703125, + "learning_rate": 7.966818621727457e-07, + "log_odds": 10.717065811157227, + "log_odds_ratio": -0.017438970506191254, + "loss": 0.3265, + "rejected_geometric_mean": -11.203393936157227, + "step": 5969 + }, + { + "chosen_geometric_mean": -1.0430328845977783, + "epoch": 1.48, + "grad_norm": 6.84375, + "learning_rate": 7.95969429452817e-07, + "log_odds": 12.078572273254395, + "log_odds_ratio": -0.0049683223478496075, + "loss": 0.2469, + "rejected_geometric_mean": -12.651632308959961, + "step": 5970 + }, + { + "chosen_geometric_mean": -1.0014679431915283, + "epoch": 1.48, + "grad_norm": 3.890625, + "learning_rate": 7.952572551048249e-07, + "log_odds": 13.089356422424316, + "log_odds_ratio": -0.020384149625897408, + "loss": 0.2196, + "rejected_geometric_mean": -13.600762367248535, + "step": 5971 + }, + { + "chosen_geometric_mean": -0.9584890604019165, + "epoch": 1.48, + "grad_norm": 2.75, + "learning_rate": 7.945453392367547e-07, + "log_odds": 10.034233093261719, + "log_odds_ratio": -0.017298532649874687, + "loss": 0.269, + "rejected_geometric_mean": -10.492864608764648, + "step": 5972 + }, + { + "chosen_geometric_mean": -0.9770687222480774, + "epoch": 1.48, + "grad_norm": 2.53125, + "learning_rate": 7.938336819565489e-07, + "log_odds": 3.0467476844787598, + "log_odds_ratio": -0.22393584251403809, + "loss": 0.2436, + "rejected_geometric_mean": -3.7038865089416504, + "step": 5973 + }, + { + "chosen_geometric_mean": -1.079166293144226, + "epoch": 1.48, + "grad_norm": 3.296875, + "learning_rate": 7.931222833721108e-07, + "log_odds": 3.1084036827087402, + "log_odds_ratio": -0.2144378125667572, + "loss": 0.2531, + "rejected_geometric_mean": -3.8788933753967285, + "step": 5974 + }, + { + "chosen_geometric_mean": -0.9014660120010376, + "epoch": 1.48, + "grad_norm": 4.875, + "learning_rate": 7.92411143591307e-07, + "log_odds": 3.987959384918213, + "log_odds_ratio": -0.3007808327674866, + "loss": 0.2007, + "rejected_geometric_mean": -4.60983419418335, + "step": 5975 + }, + { + "chosen_geometric_mean": -1.6768308877944946, + "epoch": 1.48, + "grad_norm": 36.0, + "learning_rate": 7.917002627219623e-07, + "log_odds": 7.364082336425781, + "log_odds_ratio": -0.027891820296645164, + "loss": 0.3317, + "rejected_geometric_mean": -8.743396759033203, + "step": 5976 + }, + { + "chosen_geometric_mean": -0.8760076761245728, + "epoch": 1.48, + "grad_norm": 10.875, + "learning_rate": 7.909896408718631e-07, + "log_odds": 2.1865437030792236, + "log_odds_ratio": -0.2855151295661926, + "loss": 0.218, + "rejected_geometric_mean": -2.693129777908325, + "step": 5977 + }, + { + "chosen_geometric_mean": -1.1195887327194214, + "epoch": 1.48, + "grad_norm": 2.71875, + "learning_rate": 7.902792781487581e-07, + "log_odds": 6.290968894958496, + "log_odds_ratio": -0.27369415760040283, + "loss": 0.2631, + "rejected_geometric_mean": -7.155967712402344, + "step": 5978 + }, + { + "chosen_geometric_mean": -1.0825101137161255, + "epoch": 1.48, + "grad_norm": 22.375, + "learning_rate": 7.89569174660354e-07, + "log_odds": 8.303064346313477, + "log_odds_ratio": -0.0006920796586200595, + "loss": 0.323, + "rejected_geometric_mean": -8.968817710876465, + "step": 5979 + }, + { + "chosen_geometric_mean": -0.9845795631408691, + "epoch": 1.48, + "grad_norm": 3.828125, + "learning_rate": 7.888593305143208e-07, + "log_odds": 6.393582344055176, + "log_odds_ratio": -0.1441221982240677, + "loss": 0.2893, + "rejected_geometric_mean": -6.956228256225586, + "step": 5980 + }, + { + "chosen_geometric_mean": -0.8233965635299683, + "epoch": 1.48, + "grad_norm": 12.4375, + "learning_rate": 7.881497458182871e-07, + "log_odds": 4.799233436584473, + "log_odds_ratio": -0.12028506398200989, + "loss": 0.2991, + "rejected_geometric_mean": -5.130501747131348, + "step": 5981 + }, + { + "chosen_geometric_mean": -0.9761637449264526, + "epoch": 1.48, + "grad_norm": 3.59375, + "learning_rate": 7.874404206798423e-07, + "log_odds": 13.783903121948242, + "log_odds_ratio": -0.07987186312675476, + "loss": 0.2575, + "rejected_geometric_mean": -14.338866233825684, + "step": 5982 + }, + { + "chosen_geometric_mean": -0.9000308513641357, + "epoch": 1.48, + "grad_norm": 2.96875, + "learning_rate": 7.867313552065378e-07, + "log_odds": 5.410935878753662, + "log_odds_ratio": -0.027315614745020866, + "loss": 0.2406, + "rejected_geometric_mean": -5.782743453979492, + "step": 5983 + }, + { + "chosen_geometric_mean": -1.0213863849639893, + "epoch": 1.48, + "grad_norm": 7.21875, + "learning_rate": 7.86022549505886e-07, + "log_odds": 6.473123550415039, + "log_odds_ratio": -0.0690145269036293, + "loss": 0.2878, + "rejected_geometric_mean": -7.07834529876709, + "step": 5984 + }, + { + "chosen_geometric_mean": -0.9715954065322876, + "epoch": 1.48, + "grad_norm": 1.953125, + "learning_rate": 7.853140036853568e-07, + "log_odds": 10.588630676269531, + "log_odds_ratio": -0.0015958786243572831, + "loss": 0.2542, + "rejected_geometric_mean": -11.079468727111816, + "step": 5985 + }, + { + "chosen_geometric_mean": -0.9628819823265076, + "epoch": 1.48, + "grad_norm": 12.5625, + "learning_rate": 7.846057178523847e-07, + "log_odds": 4.841645240783691, + "log_odds_ratio": -0.26431649923324585, + "loss": 0.3289, + "rejected_geometric_mean": -5.46629524230957, + "step": 5986 + }, + { + "chosen_geometric_mean": -1.0201960802078247, + "epoch": 1.48, + "grad_norm": 6.375, + "learning_rate": 7.838976921143621e-07, + "log_odds": 2.1174793243408203, + "log_odds_ratio": -0.25170251727104187, + "loss": 0.2399, + "rejected_geometric_mean": -2.805295467376709, + "step": 5987 + }, + { + "chosen_geometric_mean": -0.9302248954772949, + "epoch": 1.48, + "grad_norm": 3.25, + "learning_rate": 7.831899265786408e-07, + "log_odds": 6.825082778930664, + "log_odds_ratio": -0.23871968686580658, + "loss": 0.2227, + "rejected_geometric_mean": -7.415205955505371, + "step": 5988 + }, + { + "chosen_geometric_mean": -0.9220925569534302, + "epoch": 1.48, + "grad_norm": 2.109375, + "learning_rate": 7.824824213525384e-07, + "log_odds": 12.119568824768066, + "log_odds_ratio": -0.08662384003400803, + "loss": 0.2858, + "rejected_geometric_mean": -12.534189224243164, + "step": 5989 + }, + { + "chosen_geometric_mean": -1.1090068817138672, + "epoch": 1.48, + "grad_norm": 2.375, + "learning_rate": 7.817751765433281e-07, + "log_odds": 11.822469711303711, + "log_odds_ratio": -0.0007563038961961865, + "loss": 0.2504, + "rejected_geometric_mean": -12.528751373291016, + "step": 5990 + }, + { + "chosen_geometric_mean": -1.0645008087158203, + "epoch": 1.48, + "grad_norm": 38.5, + "learning_rate": 7.810681922582444e-07, + "log_odds": 6.354259967803955, + "log_odds_ratio": -0.1093117967247963, + "loss": 0.2597, + "rejected_geometric_mean": -7.011924743652344, + "step": 5991 + }, + { + "chosen_geometric_mean": -1.2110559940338135, + "epoch": 1.48, + "grad_norm": 8.0625, + "learning_rate": 7.803614686044847e-07, + "log_odds": 7.021677494049072, + "log_odds_ratio": -0.08998140692710876, + "loss": 0.254, + "rejected_geometric_mean": -7.893213272094727, + "step": 5992 + }, + { + "chosen_geometric_mean": -1.0680104494094849, + "epoch": 1.48, + "grad_norm": 76.0, + "learning_rate": 7.796550056892036e-07, + "log_odds": 15.680578231811523, + "log_odds_ratio": -0.00038982558180578053, + "loss": 0.2439, + "rejected_geometric_mean": -16.324230194091797, + "step": 5993 + }, + { + "chosen_geometric_mean": -1.9502652883529663, + "epoch": 1.48, + "grad_norm": 14.1875, + "learning_rate": 7.789488036195186e-07, + "log_odds": 7.168771743774414, + "log_odds_ratio": -0.08810292184352875, + "loss": 0.3176, + "rejected_geometric_mean": -8.887479782104492, + "step": 5994 + }, + { + "chosen_geometric_mean": -0.8586300015449524, + "epoch": 1.48, + "grad_norm": 1.875, + "learning_rate": 7.782428625025077e-07, + "log_odds": 2.685364246368408, + "log_odds_ratio": -0.196006178855896, + "loss": 0.2226, + "rejected_geometric_mean": -3.1103599071502686, + "step": 5995 + }, + { + "chosen_geometric_mean": -0.8122494220733643, + "epoch": 1.48, + "grad_norm": 4.125, + "learning_rate": 7.775371824452077e-07, + "log_odds": 5.087601661682129, + "log_odds_ratio": -0.22719763219356537, + "loss": 0.2901, + "rejected_geometric_mean": -5.467939853668213, + "step": 5996 + }, + { + "chosen_geometric_mean": -1.022064208984375, + "epoch": 1.48, + "grad_norm": 3.296875, + "learning_rate": 7.76831763554616e-07, + "log_odds": 3.670290946960449, + "log_odds_ratio": -0.14231018722057343, + "loss": 0.2865, + "rejected_geometric_mean": -4.331482887268066, + "step": 5997 + }, + { + "chosen_geometric_mean": -1.15116548538208, + "epoch": 1.49, + "grad_norm": 5.65625, + "learning_rate": 7.761266059376926e-07, + "log_odds": 6.739300727844238, + "log_odds_ratio": -0.034672901034355164, + "loss": 0.2486, + "rejected_geometric_mean": -7.524360656738281, + "step": 5998 + }, + { + "chosen_geometric_mean": -0.7822448015213013, + "epoch": 1.49, + "grad_norm": 15.5625, + "learning_rate": 7.754217097013547e-07, + "log_odds": 4.836641311645508, + "log_odds_ratio": -0.1278408169746399, + "loss": 0.27, + "rejected_geometric_mean": -5.059728145599365, + "step": 5999 + }, + { + "chosen_geometric_mean": -1.0668576955795288, + "epoch": 1.49, + "grad_norm": 4.0, + "learning_rate": 7.747170749524821e-07, + "log_odds": 9.590333938598633, + "log_odds_ratio": -0.13813669979572296, + "loss": 0.2548, + "rejected_geometric_mean": -10.31491756439209, + "step": 6000 + }, + { + "chosen_geometric_mean": -1.0711339712142944, + "epoch": 1.49, + "grad_norm": 4.3125, + "learning_rate": 7.740127017979154e-07, + "log_odds": 8.440472602844238, + "log_odds_ratio": -0.07433398067951202, + "loss": 0.2747, + "rejected_geometric_mean": -9.117766380310059, + "step": 6001 + }, + { + "chosen_geometric_mean": -1.2596817016601562, + "epoch": 1.49, + "grad_norm": 20.75, + "learning_rate": 7.733085903444526e-07, + "log_odds": 3.3966877460479736, + "log_odds_ratio": -0.2241995632648468, + "loss": 0.278, + "rejected_geometric_mean": -4.434352874755859, + "step": 6002 + }, + { + "chosen_geometric_mean": -1.1578974723815918, + "epoch": 1.49, + "grad_norm": 4.34375, + "learning_rate": 7.72604740698856e-07, + "log_odds": 9.983820915222168, + "log_odds_ratio": -0.08288515359163284, + "loss": 0.2617, + "rejected_geometric_mean": -10.78159236907959, + "step": 6003 + }, + { + "chosen_geometric_mean": -0.7754750847816467, + "epoch": 1.49, + "grad_norm": 3.8125, + "learning_rate": 7.719011529678439e-07, + "log_odds": 6.043421268463135, + "log_odds_ratio": -0.12157559394836426, + "loss": 0.3112, + "rejected_geometric_mean": -6.304351329803467, + "step": 6004 + }, + { + "chosen_geometric_mean": -1.0515508651733398, + "epoch": 1.49, + "grad_norm": 6.0625, + "learning_rate": 7.711978272580989e-07, + "log_odds": 1.5520434379577637, + "log_odds_ratio": -0.2716345191001892, + "loss": 0.2353, + "rejected_geometric_mean": -2.368896245956421, + "step": 6005 + }, + { + "chosen_geometric_mean": -0.9352231621742249, + "epoch": 1.49, + "grad_norm": 5.5625, + "learning_rate": 7.704947636762605e-07, + "log_odds": 2.5893256664276123, + "log_odds_ratio": -0.28132882714271545, + "loss": 0.2458, + "rejected_geometric_mean": -3.122316360473633, + "step": 6006 + }, + { + "chosen_geometric_mean": -1.1233437061309814, + "epoch": 1.49, + "grad_norm": 2.4375, + "learning_rate": 7.697919623289318e-07, + "log_odds": 6.222841739654541, + "log_odds_ratio": -0.11228509992361069, + "loss": 0.2823, + "rejected_geometric_mean": -6.9945878982543945, + "step": 6007 + }, + { + "chosen_geometric_mean": -0.9223887324333191, + "epoch": 1.49, + "grad_norm": 27.875, + "learning_rate": 7.690894233226726e-07, + "log_odds": 8.472879409790039, + "log_odds_ratio": -0.10258550941944122, + "loss": 0.2411, + "rejected_geometric_mean": -8.978379249572754, + "step": 6008 + }, + { + "chosen_geometric_mean": -0.9290062189102173, + "epoch": 1.49, + "grad_norm": 31.0, + "learning_rate": 7.683871467640061e-07, + "log_odds": 2.415346384048462, + "log_odds_ratio": -0.09110648930072784, + "loss": 0.2261, + "rejected_geometric_mean": -2.8725430965423584, + "step": 6009 + }, + { + "chosen_geometric_mean": -1.2222000360488892, + "epoch": 1.49, + "grad_norm": 25.125, + "learning_rate": 7.676851327594128e-07, + "log_odds": 3.73187518119812, + "log_odds_ratio": -0.06777233630418777, + "loss": 0.3708, + "rejected_geometric_mean": -4.601058483123779, + "step": 6010 + }, + { + "chosen_geometric_mean": -1.0277214050292969, + "epoch": 1.49, + "grad_norm": 6.6875, + "learning_rate": 7.669833814153357e-07, + "log_odds": 5.298809051513672, + "log_odds_ratio": -0.12171884626150131, + "loss": 0.276, + "rejected_geometric_mean": -5.929156303405762, + "step": 6011 + }, + { + "chosen_geometric_mean": -0.9849964380264282, + "epoch": 1.49, + "grad_norm": 2.53125, + "learning_rate": 7.662818928381782e-07, + "log_odds": 10.353033065795898, + "log_odds_ratio": -0.133623406291008, + "loss": 0.2336, + "rejected_geometric_mean": -10.943304061889648, + "step": 6012 + }, + { + "chosen_geometric_mean": -0.9489074945449829, + "epoch": 1.49, + "grad_norm": 4.6875, + "learning_rate": 7.655806671343016e-07, + "log_odds": 6.477869033813477, + "log_odds_ratio": -0.23511506617069244, + "loss": 0.2936, + "rejected_geometric_mean": -7.082276344299316, + "step": 6013 + }, + { + "chosen_geometric_mean": -0.9998724460601807, + "epoch": 1.49, + "grad_norm": 31.25, + "learning_rate": 7.648797044100282e-07, + "log_odds": 8.086711883544922, + "log_odds_ratio": -0.07321489602327347, + "loss": 0.2527, + "rejected_geometric_mean": -8.627461433410645, + "step": 6014 + }, + { + "chosen_geometric_mean": -1.171602725982666, + "epoch": 1.49, + "grad_norm": 15.5, + "learning_rate": 7.641790047716411e-07, + "log_odds": 4.666102886199951, + "log_odds_ratio": -0.2063104808330536, + "loss": 0.2507, + "rejected_geometric_mean": -5.527041912078857, + "step": 6015 + }, + { + "chosen_geometric_mean": -1.0310349464416504, + "epoch": 1.49, + "grad_norm": 2.734375, + "learning_rate": 7.634785683253839e-07, + "log_odds": 6.588556289672852, + "log_odds_ratio": -0.23766180872917175, + "loss": 0.2569, + "rejected_geometric_mean": -7.343247890472412, + "step": 6016 + }, + { + "chosen_geometric_mean": -0.8513404130935669, + "epoch": 1.49, + "grad_norm": 7.125, + "learning_rate": 7.627783951774587e-07, + "log_odds": 7.375577449798584, + "log_odds_ratio": -0.13067704439163208, + "loss": 0.2924, + "rejected_geometric_mean": -7.764362335205078, + "step": 6017 + }, + { + "chosen_geometric_mean": -0.7943251132965088, + "epoch": 1.49, + "grad_norm": 29.375, + "learning_rate": 7.620784854340294e-07, + "log_odds": 7.191873073577881, + "log_odds_ratio": -0.1517706662416458, + "loss": 0.2502, + "rejected_geometric_mean": -7.484129428863525, + "step": 6018 + }, + { + "chosen_geometric_mean": -1.0235923528671265, + "epoch": 1.49, + "grad_norm": 4.75, + "learning_rate": 7.61378839201219e-07, + "log_odds": 7.849921226501465, + "log_odds_ratio": -0.12113810330629349, + "loss": 0.2758, + "rejected_geometric_mean": -8.492378234863281, + "step": 6019 + }, + { + "chosen_geometric_mean": -0.9138349294662476, + "epoch": 1.49, + "grad_norm": 2.4375, + "learning_rate": 7.606794565851092e-07, + "log_odds": 9.502581596374512, + "log_odds_ratio": -0.01886793039739132, + "loss": 0.238, + "rejected_geometric_mean": -9.908843040466309, + "step": 6020 + }, + { + "chosen_geometric_mean": -0.893094003200531, + "epoch": 1.49, + "grad_norm": 2.9375, + "learning_rate": 7.599803376917441e-07, + "log_odds": 3.7226600646972656, + "log_odds_ratio": -0.18496644496917725, + "loss": 0.2811, + "rejected_geometric_mean": -4.250132083892822, + "step": 6021 + }, + { + "chosen_geometric_mean": -0.9095430374145508, + "epoch": 1.49, + "grad_norm": 3.359375, + "learning_rate": 7.592814826271283e-07, + "log_odds": 8.078302383422852, + "log_odds_ratio": -0.002034785458818078, + "loss": 0.2919, + "rejected_geometric_mean": -8.466178894042969, + "step": 6022 + }, + { + "chosen_geometric_mean": -1.2304184436798096, + "epoch": 1.49, + "grad_norm": 5.84375, + "learning_rate": 7.58582891497223e-07, + "log_odds": 5.637333393096924, + "log_odds_ratio": -0.04487389326095581, + "loss": 0.2684, + "rejected_geometric_mean": -6.50672721862793, + "step": 6023 + }, + { + "chosen_geometric_mean": -1.043870210647583, + "epoch": 1.49, + "grad_norm": 2.3125, + "learning_rate": 7.578845644079528e-07, + "log_odds": 4.187673568725586, + "log_odds_ratio": -0.12863531708717346, + "loss": 0.2722, + "rejected_geometric_mean": -4.879688739776611, + "step": 6024 + }, + { + "chosen_geometric_mean": -0.9574868679046631, + "epoch": 1.49, + "grad_norm": 1.7109375, + "learning_rate": 7.571865014651994e-07, + "log_odds": 7.916731357574463, + "log_odds_ratio": -0.01635470986366272, + "loss": 0.2315, + "rejected_geometric_mean": -8.353343963623047, + "step": 6025 + }, + { + "chosen_geometric_mean": -1.726778507232666, + "epoch": 1.49, + "grad_norm": 27.875, + "learning_rate": 7.564887027748066e-07, + "log_odds": 4.008205413818359, + "log_odds_ratio": -0.024164322763681412, + "loss": 0.3263, + "rejected_geometric_mean": -5.429448127746582, + "step": 6026 + }, + { + "chosen_geometric_mean": -1.0489213466644287, + "epoch": 1.49, + "grad_norm": 4.3125, + "learning_rate": 7.557911684425787e-07, + "log_odds": 7.594581604003906, + "log_odds_ratio": -0.08866684883832932, + "loss": 0.2558, + "rejected_geometric_mean": -8.27505111694336, + "step": 6027 + }, + { + "chosen_geometric_mean": -0.9303761720657349, + "epoch": 1.49, + "grad_norm": 4.3125, + "learning_rate": 7.550938985742765e-07, + "log_odds": 6.20156192779541, + "log_odds_ratio": -0.09816662222146988, + "loss": 0.2522, + "rejected_geometric_mean": -6.690161228179932, + "step": 6028 + }, + { + "chosen_geometric_mean": -1.026761531829834, + "epoch": 1.49, + "grad_norm": 4.90625, + "learning_rate": 7.543968932756246e-07, + "log_odds": 4.167169570922852, + "log_odds_ratio": -0.07436856627464294, + "loss": 0.2407, + "rejected_geometric_mean": -4.7640509605407715, + "step": 6029 + }, + { + "chosen_geometric_mean": -1.2013719081878662, + "epoch": 1.49, + "grad_norm": 7.875, + "learning_rate": 7.537001526523053e-07, + "log_odds": 1.2974271774291992, + "log_odds_ratio": -0.3089328706264496, + "loss": 0.2949, + "rejected_geometric_mean": -2.3083314895629883, + "step": 6030 + }, + { + "chosen_geometric_mean": -1.0270949602127075, + "epoch": 1.49, + "grad_norm": 2.4375, + "learning_rate": 7.530036768099597e-07, + "log_odds": 15.376908302307129, + "log_odds_ratio": -0.0013463321374729276, + "loss": 0.2527, + "rejected_geometric_mean": -15.943035125732422, + "step": 6031 + }, + { + "chosen_geometric_mean": -1.0420870780944824, + "epoch": 1.49, + "grad_norm": 2.90625, + "learning_rate": 7.523074658541918e-07, + "log_odds": 2.0163400173187256, + "log_odds_ratio": -0.29620569944381714, + "loss": 0.2722, + "rejected_geometric_mean": -2.7899169921875, + "step": 6032 + }, + { + "chosen_geometric_mean": -0.9286708235740662, + "epoch": 1.49, + "grad_norm": 36.0, + "learning_rate": 7.516115198905641e-07, + "log_odds": 10.708536148071289, + "log_odds_ratio": -0.17376549541950226, + "loss": 0.2615, + "rejected_geometric_mean": -11.290084838867188, + "step": 6033 + }, + { + "chosen_geometric_mean": -1.1106243133544922, + "epoch": 1.49, + "grad_norm": 4.3125, + "learning_rate": 7.509158390245975e-07, + "log_odds": 7.187507152557373, + "log_odds_ratio": -0.10681603103876114, + "loss": 0.247, + "rejected_geometric_mean": -7.908228397369385, + "step": 6034 + }, + { + "chosen_geometric_mean": -0.9325851202011108, + "epoch": 1.49, + "grad_norm": 6.40625, + "learning_rate": 7.502204233617752e-07, + "log_odds": 6.15877628326416, + "log_odds_ratio": -0.06908780336380005, + "loss": 0.2749, + "rejected_geometric_mean": -6.607546806335449, + "step": 6035 + }, + { + "chosen_geometric_mean": -0.8919496536254883, + "epoch": 1.49, + "grad_norm": 12.0625, + "learning_rate": 7.495252730075387e-07, + "log_odds": 9.50721549987793, + "log_odds_ratio": -0.009463696740567684, + "loss": 0.2493, + "rejected_geometric_mean": -9.868997573852539, + "step": 6036 + }, + { + "chosen_geometric_mean": -1.1167782545089722, + "epoch": 1.49, + "grad_norm": 1.9765625, + "learning_rate": 7.488303880672871e-07, + "log_odds": 5.142791748046875, + "log_odds_ratio": -0.08026894181966782, + "loss": 0.2371, + "rejected_geometric_mean": -5.9049177169799805, + "step": 6037 + }, + { + "chosen_geometric_mean": -1.0165446996688843, + "epoch": 1.49, + "grad_norm": 27.25, + "learning_rate": 7.481357686463853e-07, + "log_odds": 3.532843589782715, + "log_odds_ratio": -0.15074968338012695, + "loss": 0.3307, + "rejected_geometric_mean": -4.113533973693848, + "step": 6038 + }, + { + "chosen_geometric_mean": -1.0405166149139404, + "epoch": 1.5, + "grad_norm": 7.375, + "learning_rate": 7.474414148501524e-07, + "log_odds": 6.41500186920166, + "log_odds_ratio": -0.11719737946987152, + "loss": 0.2145, + "rejected_geometric_mean": -7.087080955505371, + "step": 6039 + }, + { + "chosen_geometric_mean": -1.0653640031814575, + "epoch": 1.5, + "grad_norm": 8.125, + "learning_rate": 7.467473267838688e-07, + "log_odds": 1.6914448738098145, + "log_odds_ratio": -0.32706812024116516, + "loss": 0.2583, + "rejected_geometric_mean": -2.5201258659362793, + "step": 6040 + }, + { + "chosen_geometric_mean": -1.0840833187103271, + "epoch": 1.5, + "grad_norm": 4.78125, + "learning_rate": 7.460535045527759e-07, + "log_odds": 9.053890228271484, + "log_odds_ratio": -0.031028248369693756, + "loss": 0.2043, + "rejected_geometric_mean": -9.7352876663208, + "step": 6041 + }, + { + "chosen_geometric_mean": -1.1259493827819824, + "epoch": 1.5, + "grad_norm": 2.9375, + "learning_rate": 7.453599482620727e-07, + "log_odds": 5.177282333374023, + "log_odds_ratio": -0.06398826837539673, + "loss": 0.2134, + "rejected_geometric_mean": -5.933742523193359, + "step": 6042 + }, + { + "chosen_geometric_mean": -0.8777327537536621, + "epoch": 1.5, + "grad_norm": 44.25, + "learning_rate": 7.446666580169193e-07, + "log_odds": 3.2523608207702637, + "log_odds_ratio": -0.11245714128017426, + "loss": 0.2743, + "rejected_geometric_mean": -3.663229465484619, + "step": 6043 + }, + { + "chosen_geometric_mean": -0.9620596170425415, + "epoch": 1.5, + "grad_norm": 17.125, + "learning_rate": 7.439736339224365e-07, + "log_odds": 7.10926628112793, + "log_odds_ratio": -0.23670732975006104, + "loss": 0.2564, + "rejected_geometric_mean": -7.711596488952637, + "step": 6044 + }, + { + "chosen_geometric_mean": -0.8846636414527893, + "epoch": 1.5, + "grad_norm": 1.8359375, + "learning_rate": 7.432808760837023e-07, + "log_odds": 7.920078277587891, + "log_odds_ratio": -0.006870911456644535, + "loss": 0.2421, + "rejected_geometric_mean": -8.215767860412598, + "step": 6045 + }, + { + "chosen_geometric_mean": -0.9432224631309509, + "epoch": 1.5, + "grad_norm": 48.25, + "learning_rate": 7.425883846057543e-07, + "log_odds": 8.00001049041748, + "log_odds_ratio": -0.1386995017528534, + "loss": 0.2607, + "rejected_geometric_mean": -8.539432525634766, + "step": 6046 + }, + { + "chosen_geometric_mean": -0.848017156124115, + "epoch": 1.5, + "grad_norm": 2.140625, + "learning_rate": 7.418961595935934e-07, + "log_odds": 5.710597991943359, + "log_odds_ratio": -0.21399961411952972, + "loss": 0.2258, + "rejected_geometric_mean": -6.148221015930176, + "step": 6047 + }, + { + "chosen_geometric_mean": -1.0442676544189453, + "epoch": 1.5, + "grad_norm": 2.046875, + "learning_rate": 7.412042011521747e-07, + "log_odds": 3.6795365810394287, + "log_odds_ratio": -0.23984047770500183, + "loss": 0.2495, + "rejected_geometric_mean": -4.378798007965088, + "step": 6048 + }, + { + "chosen_geometric_mean": -1.0587533712387085, + "epoch": 1.5, + "grad_norm": 3.5625, + "learning_rate": 7.405125093864174e-07, + "log_odds": 8.654424667358398, + "log_odds_ratio": -0.04304513707756996, + "loss": 0.2592, + "rejected_geometric_mean": -9.297405242919922, + "step": 6049 + }, + { + "chosen_geometric_mean": -0.9332501292228699, + "epoch": 1.5, + "grad_norm": 1.875, + "learning_rate": 7.398210844011988e-07, + "log_odds": 9.194478988647461, + "log_odds_ratio": -0.0013048307737335563, + "loss": 0.2215, + "rejected_geometric_mean": -9.615633964538574, + "step": 6050 + }, + { + "chosen_geometric_mean": -0.9726139903068542, + "epoch": 1.5, + "grad_norm": 3.03125, + "learning_rate": 7.391299263013546e-07, + "log_odds": 14.649084091186523, + "log_odds_ratio": -0.0009537786827422678, + "loss": 0.2553, + "rejected_geometric_mean": -15.122655868530273, + "step": 6051 + }, + { + "chosen_geometric_mean": -0.7590426802635193, + "epoch": 1.5, + "grad_norm": 3.390625, + "learning_rate": 7.384390351916818e-07, + "log_odds": 3.826878070831299, + "log_odds_ratio": -0.3099191188812256, + "loss": 0.2365, + "rejected_geometric_mean": -4.169875621795654, + "step": 6052 + }, + { + "chosen_geometric_mean": -0.8912371397018433, + "epoch": 1.5, + "grad_norm": 2.015625, + "learning_rate": 7.377484111769348e-07, + "log_odds": 7.6084818840026855, + "log_odds_ratio": -0.10837610065937042, + "loss": 0.2211, + "rejected_geometric_mean": -8.030497550964355, + "step": 6053 + }, + { + "chosen_geometric_mean": -0.8920722007751465, + "epoch": 1.5, + "grad_norm": 15.625, + "learning_rate": 7.3705805436183e-07, + "log_odds": 1.2629001140594482, + "log_odds_ratio": -0.3300703167915344, + "loss": 0.2741, + "rejected_geometric_mean": -1.891804575920105, + "step": 6054 + }, + { + "chosen_geometric_mean": -0.7618789672851562, + "epoch": 1.5, + "grad_norm": 5.25, + "learning_rate": 7.363679648510422e-07, + "log_odds": 3.431333541870117, + "log_odds_ratio": -0.1424785852432251, + "loss": 0.2368, + "rejected_geometric_mean": -3.6528677940368652, + "step": 6055 + }, + { + "chosen_geometric_mean": -0.973887026309967, + "epoch": 1.5, + "grad_norm": 4.46875, + "learning_rate": 7.356781427492049e-07, + "log_odds": 5.068210124969482, + "log_odds_ratio": -0.1113492101430893, + "loss": 0.2505, + "rejected_geometric_mean": -5.6144914627075195, + "step": 6056 + }, + { + "chosen_geometric_mean": -1.0094239711761475, + "epoch": 1.5, + "grad_norm": 21.625, + "learning_rate": 7.349885881609112e-07, + "log_odds": 0.5318278074264526, + "log_odds_ratio": -0.48105573654174805, + "loss": 0.2608, + "rejected_geometric_mean": -1.3915663957595825, + "step": 6057 + }, + { + "chosen_geometric_mean": -0.9985302686691284, + "epoch": 1.5, + "grad_norm": 4.0, + "learning_rate": 7.342993011907154e-07, + "log_odds": 5.9431071281433105, + "log_odds_ratio": -0.0976143479347229, + "loss": 0.305, + "rejected_geometric_mean": -6.518382549285889, + "step": 6058 + }, + { + "chosen_geometric_mean": -1.0137431621551514, + "epoch": 1.5, + "grad_norm": 3.546875, + "learning_rate": 7.336102819431285e-07, + "log_odds": 7.2050018310546875, + "log_odds_ratio": -0.1695561558008194, + "loss": 0.2618, + "rejected_geometric_mean": -7.91994571685791, + "step": 6059 + }, + { + "chosen_geometric_mean": -1.3726437091827393, + "epoch": 1.5, + "grad_norm": 9.625, + "learning_rate": 7.329215305226231e-07, + "log_odds": 7.835705757141113, + "log_odds_ratio": -0.0075159138068556786, + "loss": 0.2574, + "rejected_geometric_mean": -8.84510612487793, + "step": 6060 + }, + { + "chosen_geometric_mean": -0.9676477909088135, + "epoch": 1.5, + "grad_norm": 4.625, + "learning_rate": 7.322330470336314e-07, + "log_odds": 4.348413944244385, + "log_odds_ratio": -0.11728613078594208, + "loss": 0.2882, + "rejected_geometric_mean": -4.8672194480896, + "step": 6061 + }, + { + "chosen_geometric_mean": -0.8939499855041504, + "epoch": 1.5, + "grad_norm": 4.65625, + "learning_rate": 7.315448315805429e-07, + "log_odds": 1.3637843132019043, + "log_odds_ratio": -0.2845917344093323, + "loss": 0.2943, + "rejected_geometric_mean": -1.8975756168365479, + "step": 6062 + }, + { + "chosen_geometric_mean": -0.9075145125389099, + "epoch": 1.5, + "grad_norm": 2.96875, + "learning_rate": 7.308568842677071e-07, + "log_odds": 7.547788143157959, + "log_odds_ratio": -0.5175336003303528, + "loss": 0.2787, + "rejected_geometric_mean": -8.37790298461914, + "step": 6063 + }, + { + "chosen_geometric_mean": -1.0476315021514893, + "epoch": 1.5, + "grad_norm": 4.59375, + "learning_rate": 7.301692051994341e-07, + "log_odds": 3.6217212677001953, + "log_odds_ratio": -0.04171382635831833, + "loss": 0.2549, + "rejected_geometric_mean": -4.234462738037109, + "step": 6064 + }, + { + "chosen_geometric_mean": -1.0055919885635376, + "epoch": 1.5, + "grad_norm": 2.015625, + "learning_rate": 7.294817944799934e-07, + "log_odds": 9.673245429992676, + "log_odds_ratio": -0.0026721376925706863, + "loss": 0.2513, + "rejected_geometric_mean": -10.218803405761719, + "step": 6065 + }, + { + "chosen_geometric_mean": -0.6662165522575378, + "epoch": 1.5, + "grad_norm": 2.140625, + "learning_rate": 7.287946522136108e-07, + "log_odds": 13.984821319580078, + "log_odds_ratio": -0.005033968482166529, + "loss": 0.2013, + "rejected_geometric_mean": -13.920181274414062, + "step": 6066 + }, + { + "chosen_geometric_mean": -0.9108524918556213, + "epoch": 1.5, + "grad_norm": 2.125, + "learning_rate": 7.28107778504476e-07, + "log_odds": 4.150095462799072, + "log_odds_ratio": -0.09185972064733505, + "loss": 0.2299, + "rejected_geometric_mean": -4.611310005187988, + "step": 6067 + }, + { + "chosen_geometric_mean": -0.9861097931861877, + "epoch": 1.5, + "grad_norm": 27.5, + "learning_rate": 7.274211734567341e-07, + "log_odds": 7.317150592803955, + "log_odds_ratio": -0.10401835292577744, + "loss": 0.3699, + "rejected_geometric_mean": -7.850996494293213, + "step": 6068 + }, + { + "chosen_geometric_mean": -0.8897936344146729, + "epoch": 1.5, + "grad_norm": 14.9375, + "learning_rate": 7.267348371744903e-07, + "log_odds": 9.843949317932129, + "log_odds_ratio": -0.050616778433322906, + "loss": 0.2759, + "rejected_geometric_mean": -10.236333847045898, + "step": 6069 + }, + { + "chosen_geometric_mean": -0.8501133918762207, + "epoch": 1.5, + "grad_norm": 5.1875, + "learning_rate": 7.260487697618107e-07, + "log_odds": 7.1341047286987305, + "log_odds_ratio": -0.11551129817962646, + "loss": 0.2487, + "rejected_geometric_mean": -7.52390193939209, + "step": 6070 + }, + { + "chosen_geometric_mean": -0.8590530753135681, + "epoch": 1.5, + "grad_norm": 16.375, + "learning_rate": 7.253629713227201e-07, + "log_odds": 3.6897993087768555, + "log_odds_ratio": -0.03226298838853836, + "loss": 0.2358, + "rejected_geometric_mean": -3.975806713104248, + "step": 6071 + }, + { + "chosen_geometric_mean": -0.936664342880249, + "epoch": 1.5, + "grad_norm": 1.984375, + "learning_rate": 7.246774419612004e-07, + "log_odds": 3.6470870971679688, + "log_odds_ratio": -0.22046400606632233, + "loss": 0.3018, + "rejected_geometric_mean": -4.220987319946289, + "step": 6072 + }, + { + "chosen_geometric_mean": -1.097886085510254, + "epoch": 1.5, + "grad_norm": 18.5, + "learning_rate": 7.239921817811959e-07, + "log_odds": 6.891483306884766, + "log_odds_ratio": -0.08130384236574173, + "loss": 0.2959, + "rejected_geometric_mean": -7.59453010559082, + "step": 6073 + }, + { + "chosen_geometric_mean": -1.0502424240112305, + "epoch": 1.5, + "grad_norm": 2.875, + "learning_rate": 7.233071908866073e-07, + "log_odds": 9.51860523223877, + "log_odds_ratio": -0.024178436025977135, + "loss": 0.2511, + "rejected_geometric_mean": -10.137786865234375, + "step": 6074 + }, + { + "chosen_geometric_mean": -0.8487473726272583, + "epoch": 1.5, + "grad_norm": 4.03125, + "learning_rate": 7.226224693812959e-07, + "log_odds": 11.805815696716309, + "log_odds_ratio": -0.000582832028158009, + "loss": 0.24, + "rejected_geometric_mean": -12.049558639526367, + "step": 6075 + }, + { + "chosen_geometric_mean": -1.0376648902893066, + "epoch": 1.5, + "grad_norm": 5.09375, + "learning_rate": 7.219380173690832e-07, + "log_odds": 5.770246982574463, + "log_odds_ratio": -0.041034918278455734, + "loss": 0.2074, + "rejected_geometric_mean": -6.347987174987793, + "step": 6076 + }, + { + "chosen_geometric_mean": -1.1557152271270752, + "epoch": 1.5, + "grad_norm": 12.625, + "learning_rate": 7.212538349537465e-07, + "log_odds": 5.323200225830078, + "log_odds_ratio": -0.21084287762641907, + "loss": 0.2425, + "rejected_geometric_mean": -6.201105117797852, + "step": 6077 + }, + { + "chosen_geometric_mean": -1.0098657608032227, + "epoch": 1.5, + "grad_norm": 11.75, + "learning_rate": 7.205699222390261e-07, + "log_odds": 8.613327026367188, + "log_odds_ratio": -0.015351897105574608, + "loss": 0.3061, + "rejected_geometric_mean": -9.158204078674316, + "step": 6078 + }, + { + "chosen_geometric_mean": -0.8606743216514587, + "epoch": 1.51, + "grad_norm": 12.3125, + "learning_rate": 7.198862793286188e-07, + "log_odds": 8.308256149291992, + "log_odds_ratio": -0.0003486787318252027, + "loss": 0.2617, + "rejected_geometric_mean": -8.611210823059082, + "step": 6079 + }, + { + "chosen_geometric_mean": -0.9732804298400879, + "epoch": 1.51, + "grad_norm": 29.0, + "learning_rate": 7.192029063261807e-07, + "log_odds": 9.631302833557129, + "log_odds_ratio": -0.18807435035705566, + "loss": 0.3277, + "rejected_geometric_mean": -10.247647285461426, + "step": 6080 + }, + { + "chosen_geometric_mean": -0.9396228790283203, + "epoch": 1.51, + "grad_norm": 2.921875, + "learning_rate": 7.185198033353278e-07, + "log_odds": 6.119045257568359, + "log_odds_ratio": -0.16363006830215454, + "loss": 0.2527, + "rejected_geometric_mean": -6.657581329345703, + "step": 6081 + }, + { + "chosen_geometric_mean": -0.8634689450263977, + "epoch": 1.51, + "grad_norm": 3.96875, + "learning_rate": 7.178369704596361e-07, + "log_odds": 2.3175084590911865, + "log_odds_ratio": -0.15951816737651825, + "loss": 0.219, + "rejected_geometric_mean": -2.7143378257751465, + "step": 6082 + }, + { + "chosen_geometric_mean": -1.042973279953003, + "epoch": 1.51, + "grad_norm": 4.3125, + "learning_rate": 7.171544078026379e-07, + "log_odds": 5.718307971954346, + "log_odds_ratio": -0.17527134716510773, + "loss": 0.2459, + "rejected_geometric_mean": -6.408468246459961, + "step": 6083 + }, + { + "chosen_geometric_mean": -1.0996110439300537, + "epoch": 1.51, + "grad_norm": 4.96875, + "learning_rate": 7.164721154678275e-07, + "log_odds": 5.961694717407227, + "log_odds_ratio": -0.18340608477592468, + "loss": 0.2982, + "rejected_geometric_mean": -6.754881858825684, + "step": 6084 + }, + { + "chosen_geometric_mean": -0.8910198211669922, + "epoch": 1.51, + "grad_norm": 16.25, + "learning_rate": 7.157900935586559e-07, + "log_odds": 9.239728927612305, + "log_odds_ratio": -0.1505262851715088, + "loss": 0.2357, + "rejected_geometric_mean": -9.714533805847168, + "step": 6085 + }, + { + "chosen_geometric_mean": -0.7890645265579224, + "epoch": 1.51, + "grad_norm": 5.96875, + "learning_rate": 7.151083421785327e-07, + "log_odds": 9.676918029785156, + "log_odds_ratio": -0.11352651566267014, + "loss": 0.2803, + "rejected_geometric_mean": -9.950257301330566, + "step": 6086 + }, + { + "chosen_geometric_mean": -0.8047250509262085, + "epoch": 1.51, + "grad_norm": 35.25, + "learning_rate": 7.144268614308308e-07, + "log_odds": 4.6213059425354, + "log_odds_ratio": -0.27514541149139404, + "loss": 0.2533, + "rejected_geometric_mean": -5.071735382080078, + "step": 6087 + }, + { + "chosen_geometric_mean": -0.9217466711997986, + "epoch": 1.51, + "grad_norm": 24.0, + "learning_rate": 7.137456514188773e-07, + "log_odds": 7.287089824676514, + "log_odds_ratio": -0.12773197889328003, + "loss": 0.2881, + "rejected_geometric_mean": -7.791821002960205, + "step": 6088 + }, + { + "chosen_geometric_mean": -1.316200852394104, + "epoch": 1.51, + "grad_norm": 2.15625, + "learning_rate": 7.130647122459597e-07, + "log_odds": 4.727381706237793, + "log_odds_ratio": -0.17653700709342957, + "loss": 0.2667, + "rejected_geometric_mean": -5.805876731872559, + "step": 6089 + }, + { + "chosen_geometric_mean": -1.0036280155181885, + "epoch": 1.51, + "grad_norm": 21.875, + "learning_rate": 7.123840440153257e-07, + "log_odds": 6.1363348960876465, + "log_odds_ratio": -0.2195805162191391, + "loss": 0.2402, + "rejected_geometric_mean": -6.82827091217041, + "step": 6090 + }, + { + "chosen_geometric_mean": -1.151702880859375, + "epoch": 1.51, + "grad_norm": 11.3125, + "learning_rate": 7.117036468301796e-07, + "log_odds": 9.228572845458984, + "log_odds_ratio": -0.1324121505022049, + "loss": 0.3448, + "rejected_geometric_mean": -10.046615600585938, + "step": 6091 + }, + { + "chosen_geometric_mean": -1.2465488910675049, + "epoch": 1.51, + "grad_norm": 3.078125, + "learning_rate": 7.11023520793687e-07, + "log_odds": 5.840628147125244, + "log_odds_ratio": -0.02799975499510765, + "loss": 0.2497, + "rejected_geometric_mean": -6.758912563323975, + "step": 6092 + }, + { + "chosen_geometric_mean": -1.2379025220870972, + "epoch": 1.51, + "grad_norm": 8.8125, + "learning_rate": 7.103436660089716e-07, + "log_odds": 7.225482940673828, + "log_odds_ratio": -0.28461509943008423, + "loss": 0.3038, + "rejected_geometric_mean": -8.254202842712402, + "step": 6093 + }, + { + "chosen_geometric_mean": -1.0490840673446655, + "epoch": 1.51, + "grad_norm": 5.78125, + "learning_rate": 7.096640825791152e-07, + "log_odds": 5.095314025878906, + "log_odds_ratio": -0.12712185084819794, + "loss": 0.2608, + "rejected_geometric_mean": -5.796187400817871, + "step": 6094 + }, + { + "chosen_geometric_mean": -1.1142712831497192, + "epoch": 1.51, + "grad_norm": 23.875, + "learning_rate": 7.089847706071584e-07, + "log_odds": 8.952479362487793, + "log_odds_ratio": -0.0008871641475707293, + "loss": 0.2461, + "rejected_geometric_mean": -9.619503021240234, + "step": 6095 + }, + { + "chosen_geometric_mean": -1.2118120193481445, + "epoch": 1.51, + "grad_norm": 25.75, + "learning_rate": 7.083057301961024e-07, + "log_odds": 4.643720626831055, + "log_odds_ratio": -0.18710413575172424, + "loss": 0.2526, + "rejected_geometric_mean": -5.534722328186035, + "step": 6096 + }, + { + "chosen_geometric_mean": -0.8130389451980591, + "epoch": 1.51, + "grad_norm": 8.625, + "learning_rate": 7.076269614489046e-07, + "log_odds": 10.701733589172363, + "log_odds_ratio": -0.00036839136737398803, + "loss": 0.2598, + "rejected_geometric_mean": -10.910319328308105, + "step": 6097 + }, + { + "chosen_geometric_mean": -1.2715535163879395, + "epoch": 1.51, + "grad_norm": 26.0, + "learning_rate": 7.069484644684834e-07, + "log_odds": 6.86664342880249, + "log_odds_ratio": -0.21636395156383514, + "loss": 0.2461, + "rejected_geometric_mean": -7.724743366241455, + "step": 6098 + }, + { + "chosen_geometric_mean": -0.9198358058929443, + "epoch": 1.51, + "grad_norm": 2.234375, + "learning_rate": 7.062702393577162e-07, + "log_odds": 10.513981819152832, + "log_odds_ratio": -0.0011361666256561875, + "loss": 0.2816, + "rejected_geometric_mean": -10.896590232849121, + "step": 6099 + }, + { + "chosen_geometric_mean": -1.168775200843811, + "epoch": 1.51, + "grad_norm": 2.140625, + "learning_rate": 7.055922862194361e-07, + "log_odds": 5.301558971405029, + "log_odds_ratio": -0.13577109575271606, + "loss": 0.2537, + "rejected_geometric_mean": -6.188323497772217, + "step": 6100 + }, + { + "chosen_geometric_mean": -0.9342028498649597, + "epoch": 1.51, + "grad_norm": 24.875, + "learning_rate": 7.049146051564393e-07, + "log_odds": 12.914816856384277, + "log_odds_ratio": -0.001687048003077507, + "loss": 0.3124, + "rejected_geometric_mean": -13.316635131835938, + "step": 6101 + }, + { + "chosen_geometric_mean": -0.9895851612091064, + "epoch": 1.51, + "grad_norm": 3.171875, + "learning_rate": 7.042371962714767e-07, + "log_odds": 5.225582122802734, + "log_odds_ratio": -0.2557748556137085, + "loss": 0.2561, + "rejected_geometric_mean": -5.925067901611328, + "step": 6102 + }, + { + "chosen_geometric_mean": -0.9343057870864868, + "epoch": 1.51, + "grad_norm": 2.015625, + "learning_rate": 7.035600596672604e-07, + "log_odds": 7.035367488861084, + "log_odds_ratio": -0.11535053700208664, + "loss": 0.2423, + "rejected_geometric_mean": -7.542530059814453, + "step": 6103 + }, + { + "chosen_geometric_mean": -0.9280281066894531, + "epoch": 1.51, + "grad_norm": 10.25, + "learning_rate": 7.028831954464613e-07, + "log_odds": 14.549627304077148, + "log_odds_ratio": -0.06942497938871384, + "loss": 0.2745, + "rejected_geometric_mean": -14.997980117797852, + "step": 6104 + }, + { + "chosen_geometric_mean": -0.7221823930740356, + "epoch": 1.51, + "grad_norm": 35.0, + "learning_rate": 7.022066037117078e-07, + "log_odds": 5.118778228759766, + "log_odds_ratio": -0.1426180750131607, + "loss": 0.2715, + "rejected_geometric_mean": -5.297785758972168, + "step": 6105 + }, + { + "chosen_geometric_mean": -0.9731431007385254, + "epoch": 1.51, + "grad_norm": 2.78125, + "learning_rate": 7.015302845655864e-07, + "log_odds": 9.32989501953125, + "log_odds_ratio": -0.12508457899093628, + "loss": 0.2496, + "rejected_geometric_mean": -9.913625717163086, + "step": 6106 + }, + { + "chosen_geometric_mean": -0.9236481189727783, + "epoch": 1.51, + "grad_norm": 2.6875, + "learning_rate": 7.008542381106451e-07, + "log_odds": 4.7662858963012695, + "log_odds_ratio": -0.12169256806373596, + "loss": 0.3029, + "rejected_geometric_mean": -5.176072597503662, + "step": 6107 + }, + { + "chosen_geometric_mean": -0.9725267291069031, + "epoch": 1.51, + "grad_norm": 23.625, + "learning_rate": 7.00178464449387e-07, + "log_odds": 4.691272735595703, + "log_odds_ratio": -0.227196604013443, + "loss": 0.2703, + "rejected_geometric_mean": -5.297872543334961, + "step": 6108 + }, + { + "chosen_geometric_mean": -1.0037841796875, + "epoch": 1.51, + "grad_norm": 37.75, + "learning_rate": 6.995029636842762e-07, + "log_odds": 9.051910400390625, + "log_odds_ratio": -0.019145019352436066, + "loss": 0.2976, + "rejected_geometric_mean": -9.597574234008789, + "step": 6109 + }, + { + "chosen_geometric_mean": -0.8760758638381958, + "epoch": 1.51, + "grad_norm": 4.75, + "learning_rate": 6.988277359177362e-07, + "log_odds": 4.311115264892578, + "log_odds_ratio": -0.27145200967788696, + "loss": 0.2688, + "rejected_geometric_mean": -4.777669429779053, + "step": 6110 + }, + { + "chosen_geometric_mean": -0.9384473562240601, + "epoch": 1.51, + "grad_norm": 2.828125, + "learning_rate": 6.981527812521465e-07, + "log_odds": 7.80970573425293, + "log_odds_ratio": -0.0938805639743805, + "loss": 0.2657, + "rejected_geometric_mean": -8.241218566894531, + "step": 6111 + }, + { + "chosen_geometric_mean": -0.8751479387283325, + "epoch": 1.51, + "grad_norm": 11.875, + "learning_rate": 6.974780997898459e-07, + "log_odds": 11.1558837890625, + "log_odds_ratio": -0.0006200882489793003, + "loss": 0.2701, + "rejected_geometric_mean": -11.4854736328125, + "step": 6112 + }, + { + "chosen_geometric_mean": -0.8675056099891663, + "epoch": 1.51, + "grad_norm": 4.84375, + "learning_rate": 6.968036916331328e-07, + "log_odds": 7.599260330200195, + "log_odds_ratio": -0.05058303475379944, + "loss": 0.2634, + "rejected_geometric_mean": -7.946162223815918, + "step": 6113 + }, + { + "chosen_geometric_mean": -0.9616692066192627, + "epoch": 1.51, + "grad_norm": 2.484375, + "learning_rate": 6.961295568842647e-07, + "log_odds": 6.848840236663818, + "log_odds_ratio": -0.04786231368780136, + "loss": 0.2337, + "rejected_geometric_mean": -7.347807884216309, + "step": 6114 + }, + { + "chosen_geometric_mean": -1.0587749481201172, + "epoch": 1.51, + "grad_norm": 3.421875, + "learning_rate": 6.954556956454547e-07, + "log_odds": 4.202944278717041, + "log_odds_ratio": -0.3009999990463257, + "loss": 0.2439, + "rejected_geometric_mean": -5.020287036895752, + "step": 6115 + }, + { + "chosen_geometric_mean": -1.0142117738723755, + "epoch": 1.51, + "grad_norm": 2.25, + "learning_rate": 6.947821080188782e-07, + "log_odds": 11.710607528686523, + "log_odds_ratio": -0.05671071261167526, + "loss": 0.2525, + "rejected_geometric_mean": -12.254594802856445, + "step": 6116 + }, + { + "chosen_geometric_mean": -0.8111039996147156, + "epoch": 1.51, + "grad_norm": 31.375, + "learning_rate": 6.94108794106666e-07, + "log_odds": 9.68327808380127, + "log_odds_ratio": -0.1012006625533104, + "loss": 0.2618, + "rejected_geometric_mean": -9.95354175567627, + "step": 6117 + }, + { + "chosen_geometric_mean": -1.0457048416137695, + "epoch": 1.51, + "grad_norm": 28.625, + "learning_rate": 6.934357540109097e-07, + "log_odds": 6.451163291931152, + "log_odds_ratio": -0.24956119060516357, + "loss": 0.2575, + "rejected_geometric_mean": -7.23244047164917, + "step": 6118 + }, + { + "chosen_geometric_mean": -1.6481709480285645, + "epoch": 1.51, + "grad_norm": 29.5, + "learning_rate": 6.927629878336567e-07, + "log_odds": 7.02506685256958, + "log_odds_ratio": -0.17976462841033936, + "loss": 0.2878, + "rejected_geometric_mean": -8.509845733642578, + "step": 6119 + }, + { + "chosen_geometric_mean": -1.234861969947815, + "epoch": 1.52, + "grad_norm": 22.0, + "learning_rate": 6.920904956769165e-07, + "log_odds": 9.316165924072266, + "log_odds_ratio": -0.2791697382926941, + "loss": 0.2706, + "rejected_geometric_mean": -10.161401748657227, + "step": 6120 + }, + { + "chosen_geometric_mean": -1.1083076000213623, + "epoch": 1.52, + "grad_norm": 48.25, + "learning_rate": 6.914182776426534e-07, + "log_odds": 8.893394470214844, + "log_odds_ratio": -0.015552244149148464, + "loss": 0.2508, + "rejected_geometric_mean": -9.575286865234375, + "step": 6121 + }, + { + "chosen_geometric_mean": -0.8659946322441101, + "epoch": 1.52, + "grad_norm": 33.25, + "learning_rate": 6.90746333832793e-07, + "log_odds": 12.56692886352539, + "log_odds_ratio": -0.08249066025018692, + "loss": 0.3266, + "rejected_geometric_mean": -12.92999267578125, + "step": 6122 + }, + { + "chosen_geometric_mean": -0.8775883913040161, + "epoch": 1.52, + "grad_norm": 3.71875, + "learning_rate": 6.900746643492168e-07, + "log_odds": 10.436275482177734, + "log_odds_ratio": -0.0005437413929030299, + "loss": 0.2863, + "rejected_geometric_mean": -10.765134811401367, + "step": 6123 + }, + { + "chosen_geometric_mean": -1.96002197265625, + "epoch": 1.52, + "grad_norm": 26.625, + "learning_rate": 6.894032692937669e-07, + "log_odds": 2.3693785667419434, + "log_odds_ratio": -0.20155836641788483, + "loss": 0.295, + "rejected_geometric_mean": -3.995335102081299, + "step": 6124 + }, + { + "chosen_geometric_mean": -1.0218052864074707, + "epoch": 1.52, + "grad_norm": 2.40625, + "learning_rate": 6.887321487682435e-07, + "log_odds": 3.7373862266540527, + "log_odds_ratio": -0.23037178814411163, + "loss": 0.2804, + "rejected_geometric_mean": -4.478748798370361, + "step": 6125 + }, + { + "chosen_geometric_mean": -0.764311671257019, + "epoch": 1.52, + "grad_norm": 2.046875, + "learning_rate": 6.880613028744032e-07, + "log_odds": 1.0956473350524902, + "log_odds_ratio": -0.4307192265987396, + "loss": 0.2436, + "rejected_geometric_mean": -1.6321924924850464, + "step": 6126 + }, + { + "chosen_geometric_mean": -0.9883790016174316, + "epoch": 1.52, + "grad_norm": 4.3125, + "learning_rate": 6.873907317139636e-07, + "log_odds": 5.452241897583008, + "log_odds_ratio": -0.26906001567840576, + "loss": 0.2496, + "rejected_geometric_mean": -6.079672336578369, + "step": 6127 + }, + { + "chosen_geometric_mean": -1.1919002532958984, + "epoch": 1.52, + "grad_norm": 2.859375, + "learning_rate": 6.86720435388599e-07, + "log_odds": 4.330938339233398, + "log_odds_ratio": -0.15803107619285583, + "loss": 0.2333, + "rejected_geometric_mean": -5.233386993408203, + "step": 6128 + }, + { + "chosen_geometric_mean": -1.175026297569275, + "epoch": 1.52, + "grad_norm": 1.84375, + "learning_rate": 6.860504139999413e-07, + "log_odds": 6.606998920440674, + "log_odds_ratio": -0.03605694696307182, + "loss": 0.2374, + "rejected_geometric_mean": -7.423635959625244, + "step": 6129 + }, + { + "chosen_geometric_mean": -0.917259693145752, + "epoch": 1.52, + "grad_norm": 2.796875, + "learning_rate": 6.853806676495828e-07, + "log_odds": 12.450571060180664, + "log_odds_ratio": -0.0007484099478460848, + "loss": 0.2395, + "rejected_geometric_mean": -12.839456558227539, + "step": 6130 + }, + { + "chosen_geometric_mean": -0.9713843464851379, + "epoch": 1.52, + "grad_norm": 2.5625, + "learning_rate": 6.847111964390738e-07, + "log_odds": 7.0227837562561035, + "log_odds_ratio": -0.24347487092018127, + "loss": 0.2314, + "rejected_geometric_mean": -7.6342949867248535, + "step": 6131 + }, + { + "chosen_geometric_mean": -1.0212393999099731, + "epoch": 1.52, + "grad_norm": 2.9375, + "learning_rate": 6.840420004699208e-07, + "log_odds": 4.510601043701172, + "log_odds_ratio": -0.16401304304599762, + "loss": 0.2546, + "rejected_geometric_mean": -5.16731595993042, + "step": 6132 + }, + { + "chosen_geometric_mean": -1.0758965015411377, + "epoch": 1.52, + "grad_norm": 3.796875, + "learning_rate": 6.833730798435911e-07, + "log_odds": 6.756828308105469, + "log_odds_ratio": -0.011961241252720356, + "loss": 0.2839, + "rejected_geometric_mean": -7.409084320068359, + "step": 6133 + }, + { + "chosen_geometric_mean": -1.3034013509750366, + "epoch": 1.52, + "grad_norm": 65.0, + "learning_rate": 6.827044346615088e-07, + "log_odds": 6.537988662719727, + "log_odds_ratio": -0.42811936140060425, + "loss": 0.3459, + "rejected_geometric_mean": -7.437896251678467, + "step": 6134 + }, + { + "chosen_geometric_mean": -0.8714821338653564, + "epoch": 1.52, + "grad_norm": 14.6875, + "learning_rate": 6.820360650250552e-07, + "log_odds": 7.5407795906066895, + "log_odds_ratio": -0.15213902294635773, + "loss": 0.2599, + "rejected_geometric_mean": -7.911226272583008, + "step": 6135 + }, + { + "chosen_geometric_mean": -1.0747724771499634, + "epoch": 1.52, + "grad_norm": 18.5, + "learning_rate": 6.813679710355736e-07, + "log_odds": 6.906991958618164, + "log_odds_ratio": -0.05894390866160393, + "loss": 0.2097, + "rejected_geometric_mean": -7.594542026519775, + "step": 6136 + }, + { + "chosen_geometric_mean": -0.9309757351875305, + "epoch": 1.52, + "grad_norm": 2.046875, + "learning_rate": 6.807001527943618e-07, + "log_odds": 13.683865547180176, + "log_odds_ratio": -0.003842171747237444, + "loss": 0.2469, + "rejected_geometric_mean": -14.087379455566406, + "step": 6137 + }, + { + "chosen_geometric_mean": -1.239567518234253, + "epoch": 1.52, + "grad_norm": 33.5, + "learning_rate": 6.800326104026767e-07, + "log_odds": 2.399066925048828, + "log_odds_ratio": -0.5678151249885559, + "loss": 0.3355, + "rejected_geometric_mean": -3.47145414352417, + "step": 6138 + }, + { + "chosen_geometric_mean": -1.1301319599151611, + "epoch": 1.52, + "grad_norm": 2.328125, + "learning_rate": 6.793653439617348e-07, + "log_odds": 3.588700532913208, + "log_odds_ratio": -0.2667275369167328, + "loss": 0.2892, + "rejected_geometric_mean": -4.466738224029541, + "step": 6139 + }, + { + "chosen_geometric_mean": -1.1527990102767944, + "epoch": 1.52, + "grad_norm": 25.5, + "learning_rate": 6.786983535727084e-07, + "log_odds": 6.726864337921143, + "log_odds_ratio": -0.24819602072238922, + "loss": 0.3464, + "rejected_geometric_mean": -7.630380630493164, + "step": 6140 + }, + { + "chosen_geometric_mean": -1.3296293020248413, + "epoch": 1.52, + "grad_norm": 9.125, + "learning_rate": 6.7803163933673e-07, + "log_odds": 4.578091144561768, + "log_odds_ratio": -0.12633545696735382, + "loss": 0.3075, + "rejected_geometric_mean": -5.585524559020996, + "step": 6141 + }, + { + "chosen_geometric_mean": -1.051277995109558, + "epoch": 1.52, + "grad_norm": 1.859375, + "learning_rate": 6.773652013548901e-07, + "log_odds": 3.2910187244415283, + "log_odds_ratio": -0.05683629959821701, + "loss": 0.1757, + "rejected_geometric_mean": -3.924710512161255, + "step": 6142 + }, + { + "chosen_geometric_mean": -0.8016029596328735, + "epoch": 1.52, + "grad_norm": 2.0, + "learning_rate": 6.766990397282364e-07, + "log_odds": 5.004078388214111, + "log_odds_ratio": -0.1764608472585678, + "loss": 0.2433, + "rejected_geometric_mean": -5.320828914642334, + "step": 6143 + }, + { + "chosen_geometric_mean": -0.8750492334365845, + "epoch": 1.52, + "grad_norm": 11.5, + "learning_rate": 6.760331545577734e-07, + "log_odds": 6.3535919189453125, + "log_odds_ratio": -0.02093362621963024, + "loss": 0.2497, + "rejected_geometric_mean": -6.6963372230529785, + "step": 6144 + }, + { + "chosen_geometric_mean": -1.1041158437728882, + "epoch": 1.52, + "grad_norm": 6.46875, + "learning_rate": 6.753675459444675e-07, + "log_odds": 9.29983901977539, + "log_odds_ratio": -0.10253650695085526, + "loss": 0.2764, + "rejected_geometric_mean": -10.052950859069824, + "step": 6145 + }, + { + "chosen_geometric_mean": -0.97456294298172, + "epoch": 1.52, + "grad_norm": 2.34375, + "learning_rate": 6.747022139892393e-07, + "log_odds": 8.456454277038574, + "log_odds_ratio": -0.12118059396743774, + "loss": 0.2797, + "rejected_geometric_mean": -8.96216869354248, + "step": 6146 + }, + { + "chosen_geometric_mean": -0.7524466514587402, + "epoch": 1.52, + "grad_norm": 3.671875, + "learning_rate": 6.740371587929695e-07, + "log_odds": 2.4885318279266357, + "log_odds_ratio": -0.2807062864303589, + "loss": 0.2412, + "rejected_geometric_mean": -2.8463196754455566, + "step": 6147 + }, + { + "chosen_geometric_mean": -0.8199472427368164, + "epoch": 1.52, + "grad_norm": 4.21875, + "learning_rate": 6.733723804564976e-07, + "log_odds": 6.27767276763916, + "log_odds_ratio": -0.2556186020374298, + "loss": 0.2654, + "rejected_geometric_mean": -6.75175666809082, + "step": 6148 + }, + { + "chosen_geometric_mean": -0.9477128982543945, + "epoch": 1.52, + "grad_norm": 2.28125, + "learning_rate": 6.727078790806183e-07, + "log_odds": 9.583724021911621, + "log_odds_ratio": -0.008248571306467056, + "loss": 0.2798, + "rejected_geometric_mean": -9.998298645019531, + "step": 6149 + }, + { + "chosen_geometric_mean": -1.073594093322754, + "epoch": 1.52, + "grad_norm": 2.484375, + "learning_rate": 6.720436547660875e-07, + "log_odds": 10.474653244018555, + "log_odds_ratio": -0.017035773023962975, + "loss": 0.2595, + "rejected_geometric_mean": -11.112527847290039, + "step": 6150 + }, + { + "chosen_geometric_mean": -1.0158839225769043, + "epoch": 1.52, + "grad_norm": 5.46875, + "learning_rate": 6.713797076136166e-07, + "log_odds": 4.015351295471191, + "log_odds_ratio": -0.19109591841697693, + "loss": 0.2747, + "rejected_geometric_mean": -4.692099571228027, + "step": 6151 + }, + { + "chosen_geometric_mean": -1.0274569988250732, + "epoch": 1.52, + "grad_norm": 20.0, + "learning_rate": 6.707160377238747e-07, + "log_odds": 5.0947771072387695, + "log_odds_ratio": -0.16434074938297272, + "loss": 0.2646, + "rejected_geometric_mean": -5.734914302825928, + "step": 6152 + }, + { + "chosen_geometric_mean": -0.9836362600326538, + "epoch": 1.52, + "grad_norm": 2.71875, + "learning_rate": 6.700526451974928e-07, + "log_odds": 3.8513741493225098, + "log_odds_ratio": -0.24937257170677185, + "loss": 0.2769, + "rejected_geometric_mean": -4.478184700012207, + "step": 6153 + }, + { + "chosen_geometric_mean": -0.8859370350837708, + "epoch": 1.52, + "grad_norm": 9.0625, + "learning_rate": 6.693895301350562e-07, + "log_odds": 3.9750308990478516, + "log_odds_ratio": -0.13351504504680634, + "loss": 0.2446, + "rejected_geometric_mean": -4.425739765167236, + "step": 6154 + }, + { + "chosen_geometric_mean": -0.9928217530250549, + "epoch": 1.52, + "grad_norm": 6.71875, + "learning_rate": 6.687266926371075e-07, + "log_odds": 7.02731990814209, + "log_odds_ratio": -0.18243877589702606, + "loss": 0.2385, + "rejected_geometric_mean": -7.632694244384766, + "step": 6155 + }, + { + "chosen_geometric_mean": -0.7639585733413696, + "epoch": 1.52, + "grad_norm": 5.09375, + "learning_rate": 6.680641328041507e-07, + "log_odds": 5.459330081939697, + "log_odds_ratio": -0.26175743341445923, + "loss": 0.2258, + "rejected_geometric_mean": -5.83972692489624, + "step": 6156 + }, + { + "chosen_geometric_mean": -0.9615417718887329, + "epoch": 1.52, + "grad_norm": 3.5625, + "learning_rate": 6.674018507366445e-07, + "log_odds": 4.705794334411621, + "log_odds_ratio": -0.17544348537921906, + "loss": 0.2699, + "rejected_geometric_mean": -5.2593994140625, + "step": 6157 + }, + { + "chosen_geometric_mean": -0.9383000731468201, + "epoch": 1.52, + "grad_norm": 6.46875, + "learning_rate": 6.66739846535007e-07, + "log_odds": 5.046330451965332, + "log_odds_ratio": -0.16310743987560272, + "loss": 0.2694, + "rejected_geometric_mean": -5.61994743347168, + "step": 6158 + }, + { + "chosen_geometric_mean": -0.83451247215271, + "epoch": 1.52, + "grad_norm": 3.640625, + "learning_rate": 6.66078120299615e-07, + "log_odds": 2.728760242462158, + "log_odds_ratio": -0.20084896683692932, + "loss": 0.2271, + "rejected_geometric_mean": -3.1700408458709717, + "step": 6159 + }, + { + "chosen_geometric_mean": -1.2044179439544678, + "epoch": 1.53, + "grad_norm": 17.25, + "learning_rate": 6.654166721308014e-07, + "log_odds": 8.212900161743164, + "log_odds_ratio": -0.11735890805721283, + "loss": 0.2364, + "rejected_geometric_mean": -9.067427635192871, + "step": 6160 + }, + { + "chosen_geometric_mean": -1.0639171600341797, + "epoch": 1.53, + "grad_norm": 4.90625, + "learning_rate": 6.647555021288568e-07, + "log_odds": 13.975393295288086, + "log_odds_ratio": -3.7402969610411674e-05, + "loss": 0.2531, + "rejected_geometric_mean": -14.604340553283691, + "step": 6161 + }, + { + "chosen_geometric_mean": -0.8492801785469055, + "epoch": 1.53, + "grad_norm": 2.96875, + "learning_rate": 6.640946103940315e-07, + "log_odds": 2.379232406616211, + "log_odds_ratio": -0.1953449249267578, + "loss": 0.2301, + "rejected_geometric_mean": -2.792673110961914, + "step": 6162 + }, + { + "chosen_geometric_mean": -0.8770065307617188, + "epoch": 1.53, + "grad_norm": 9.5625, + "learning_rate": 6.634339970265319e-07, + "log_odds": 6.514602184295654, + "log_odds_ratio": -0.11257299035787582, + "loss": 0.2638, + "rejected_geometric_mean": -6.94964599609375, + "step": 6163 + }, + { + "chosen_geometric_mean": -0.9151816368103027, + "epoch": 1.53, + "grad_norm": 3.109375, + "learning_rate": 6.627736621265229e-07, + "log_odds": 6.264472961425781, + "log_odds_ratio": -0.08206407725811005, + "loss": 0.262, + "rejected_geometric_mean": -6.6752495765686035, + "step": 6164 + }, + { + "chosen_geometric_mean": -1.0196176767349243, + "epoch": 1.53, + "grad_norm": 2.703125, + "learning_rate": 6.621136057941282e-07, + "log_odds": 11.167553901672363, + "log_odds_ratio": -0.04587993770837784, + "loss": 0.2534, + "rejected_geometric_mean": -11.713687896728516, + "step": 6165 + }, + { + "chosen_geometric_mean": -0.9429512023925781, + "epoch": 1.53, + "grad_norm": 17.625, + "learning_rate": 6.614538281294264e-07, + "log_odds": 2.513462543487549, + "log_odds_ratio": -0.21492360532283783, + "loss": 0.2443, + "rejected_geometric_mean": -3.1054511070251465, + "step": 6166 + }, + { + "chosen_geometric_mean": -0.984097957611084, + "epoch": 1.53, + "grad_norm": 3.6875, + "learning_rate": 6.607943292324576e-07, + "log_odds": 4.202661991119385, + "log_odds_ratio": -0.19190850853919983, + "loss": 0.2463, + "rejected_geometric_mean": -4.759979248046875, + "step": 6167 + }, + { + "chosen_geometric_mean": -0.9678237438201904, + "epoch": 1.53, + "grad_norm": 3.078125, + "learning_rate": 6.601351092032157e-07, + "log_odds": 7.718642234802246, + "log_odds_ratio": -0.16620543599128723, + "loss": 0.2628, + "rejected_geometric_mean": -8.31084156036377, + "step": 6168 + }, + { + "chosen_geometric_mean": -1.039962649345398, + "epoch": 1.53, + "grad_norm": 17.875, + "learning_rate": 6.594761681416559e-07, + "log_odds": 8.878971099853516, + "log_odds_ratio": -0.19932380318641663, + "loss": 0.2761, + "rejected_geometric_mean": -9.462052345275879, + "step": 6169 + }, + { + "chosen_geometric_mean": -1.135746955871582, + "epoch": 1.53, + "grad_norm": 3.65625, + "learning_rate": 6.588175061476882e-07, + "log_odds": 7.179551124572754, + "log_odds_ratio": -0.2013939917087555, + "loss": 0.2272, + "rejected_geometric_mean": -7.991276741027832, + "step": 6170 + }, + { + "chosen_geometric_mean": -0.9884614944458008, + "epoch": 1.53, + "grad_norm": 6.28125, + "learning_rate": 6.581591233211826e-07, + "log_odds": 7.665334701538086, + "log_odds_ratio": -0.0622207373380661, + "loss": 0.247, + "rejected_geometric_mean": -8.20351791381836, + "step": 6171 + }, + { + "chosen_geometric_mean": -1.1051037311553955, + "epoch": 1.53, + "grad_norm": 8.1875, + "learning_rate": 6.57501019761965e-07, + "log_odds": 6.8344340324401855, + "log_odds_ratio": -0.061075128614902496, + "loss": 0.2765, + "rejected_geometric_mean": -7.558255195617676, + "step": 6172 + }, + { + "chosen_geometric_mean": -1.025876760482788, + "epoch": 1.53, + "grad_norm": 4.34375, + "learning_rate": 6.568431955698204e-07, + "log_odds": 4.1827168464660645, + "log_odds_ratio": -0.20521706342697144, + "loss": 0.2656, + "rejected_geometric_mean": -4.911194801330566, + "step": 6173 + }, + { + "chosen_geometric_mean": -1.3477030992507935, + "epoch": 1.53, + "grad_norm": 11.125, + "learning_rate": 6.561856508444897e-07, + "log_odds": 7.684525012969971, + "log_odds_ratio": -0.08747491240501404, + "loss": 0.2806, + "rejected_geometric_mean": -8.748739242553711, + "step": 6174 + }, + { + "chosen_geometric_mean": -0.8877643346786499, + "epoch": 1.53, + "grad_norm": 11.875, + "learning_rate": 6.555283856856731e-07, + "log_odds": 12.213162422180176, + "log_odds_ratio": -0.058983832597732544, + "loss": 0.297, + "rejected_geometric_mean": -12.611452102661133, + "step": 6175 + }, + { + "chosen_geometric_mean": -1.1513913869857788, + "epoch": 1.53, + "grad_norm": 8.5, + "learning_rate": 6.548714001930284e-07, + "log_odds": 10.74705982208252, + "log_odds_ratio": -0.04820114001631737, + "loss": 0.2761, + "rejected_geometric_mean": -11.477767944335938, + "step": 6176 + }, + { + "chosen_geometric_mean": -1.2010672092437744, + "epoch": 1.53, + "grad_norm": 3.5625, + "learning_rate": 6.5421469446617e-07, + "log_odds": 5.36405086517334, + "log_odds_ratio": -0.22168146073818207, + "loss": 0.3112, + "rejected_geometric_mean": -6.301599502563477, + "step": 6177 + }, + { + "chosen_geometric_mean": -1.1260838508605957, + "epoch": 1.53, + "grad_norm": 1.96875, + "learning_rate": 6.535582686046691e-07, + "log_odds": 12.083734512329102, + "log_odds_ratio": -0.004377218894660473, + "loss": 0.2346, + "rejected_geometric_mean": -12.797226905822754, + "step": 6178 + }, + { + "chosen_geometric_mean": -1.122817873954773, + "epoch": 1.53, + "grad_norm": 6.65625, + "learning_rate": 6.529021227080564e-07, + "log_odds": 10.850818634033203, + "log_odds_ratio": -0.007336248178035021, + "loss": 0.3227, + "rejected_geometric_mean": -11.571126937866211, + "step": 6179 + }, + { + "chosen_geometric_mean": -0.879773736000061, + "epoch": 1.53, + "grad_norm": 83.5, + "learning_rate": 6.522462568758206e-07, + "log_odds": 2.566311836242676, + "log_odds_ratio": -0.14152392745018005, + "loss": 0.2994, + "rejected_geometric_mean": -3.0013575553894043, + "step": 6180 + }, + { + "chosen_geometric_mean": -1.052779197692871, + "epoch": 1.53, + "grad_norm": 4.28125, + "learning_rate": 6.515906712074047e-07, + "log_odds": 5.204607963562012, + "log_odds_ratio": -0.1261066198348999, + "loss": 0.2243, + "rejected_geometric_mean": -5.806063175201416, + "step": 6181 + }, + { + "chosen_geometric_mean": -1.1662442684173584, + "epoch": 1.53, + "grad_norm": 12.9375, + "learning_rate": 6.509353658022127e-07, + "log_odds": 1.8026427030563354, + "log_odds_ratio": -0.28848138451576233, + "loss": 0.2716, + "rejected_geometric_mean": -2.7046327590942383, + "step": 6182 + }, + { + "chosen_geometric_mean": -0.8966230750083923, + "epoch": 1.53, + "grad_norm": 26.0, + "learning_rate": 6.50280340759604e-07, + "log_odds": 12.177661895751953, + "log_odds_ratio": -0.0011985921300947666, + "loss": 0.2313, + "rejected_geometric_mean": -12.545624732971191, + "step": 6183 + }, + { + "chosen_geometric_mean": -1.2322874069213867, + "epoch": 1.53, + "grad_norm": 2.421875, + "learning_rate": 6.496255961788947e-07, + "log_odds": 4.531904697418213, + "log_odds_ratio": -0.2011551558971405, + "loss": 0.2963, + "rejected_geometric_mean": -5.502676963806152, + "step": 6184 + }, + { + "chosen_geometric_mean": -0.8842349648475647, + "epoch": 1.53, + "grad_norm": 5.125, + "learning_rate": 6.489711321593628e-07, + "log_odds": 9.362751007080078, + "log_odds_ratio": -0.15905825793743134, + "loss": 0.2523, + "rejected_geometric_mean": -9.815069198608398, + "step": 6185 + }, + { + "chosen_geometric_mean": -1.144158124923706, + "epoch": 1.53, + "grad_norm": 58.0, + "learning_rate": 6.483169488002394e-07, + "log_odds": 7.362290382385254, + "log_odds_ratio": -0.10499252378940582, + "loss": 0.3195, + "rejected_geometric_mean": -8.191946983337402, + "step": 6186 + }, + { + "chosen_geometric_mean": -1.122934341430664, + "epoch": 1.53, + "grad_norm": 13.0625, + "learning_rate": 6.476630462007133e-07, + "log_odds": 8.327022552490234, + "log_odds_ratio": -0.1858653575181961, + "loss": 0.2491, + "rejected_geometric_mean": -9.054709434509277, + "step": 6187 + }, + { + "chosen_geometric_mean": -0.9734897017478943, + "epoch": 1.53, + "grad_norm": 3.140625, + "learning_rate": 6.470094244599334e-07, + "log_odds": 3.4715237617492676, + "log_odds_ratio": -0.06549365073442459, + "loss": 0.2625, + "rejected_geometric_mean": -3.9974284172058105, + "step": 6188 + }, + { + "chosen_geometric_mean": -0.8607712984085083, + "epoch": 1.53, + "grad_norm": 2.40625, + "learning_rate": 6.46356083677003e-07, + "log_odds": 9.945130348205566, + "log_odds_ratio": -0.14092300832271576, + "loss": 0.2396, + "rejected_geometric_mean": -10.278064727783203, + "step": 6189 + }, + { + "chosen_geometric_mean": -0.8131383657455444, + "epoch": 1.53, + "grad_norm": 5.40625, + "learning_rate": 6.457030239509854e-07, + "log_odds": 4.606978893280029, + "log_odds_ratio": -0.2476176768541336, + "loss": 0.2277, + "rejected_geometric_mean": -4.989137172698975, + "step": 6190 + }, + { + "chosen_geometric_mean": -0.9429183006286621, + "epoch": 1.53, + "grad_norm": 7.53125, + "learning_rate": 6.450502453809001e-07, + "log_odds": 9.476271629333496, + "log_odds_ratio": -0.0028489050455391407, + "loss": 0.2532, + "rejected_geometric_mean": -9.892004013061523, + "step": 6191 + }, + { + "chosen_geometric_mean": -0.8878937363624573, + "epoch": 1.53, + "grad_norm": 33.5, + "learning_rate": 6.443977480657232e-07, + "log_odds": 7.566629886627197, + "log_odds_ratio": -0.10544810444116592, + "loss": 0.2848, + "rejected_geometric_mean": -8.004064559936523, + "step": 6192 + }, + { + "chosen_geometric_mean": -1.1090673208236694, + "epoch": 1.53, + "grad_norm": 19.625, + "learning_rate": 6.437455321043898e-07, + "log_odds": 5.616553783416748, + "log_odds_ratio": -0.24931147694587708, + "loss": 0.269, + "rejected_geometric_mean": -6.464827537536621, + "step": 6193 + }, + { + "chosen_geometric_mean": -0.8854265213012695, + "epoch": 1.53, + "grad_norm": 7.375, + "learning_rate": 6.430935975957913e-07, + "log_odds": 5.453248500823975, + "log_odds_ratio": -0.23821118474006653, + "loss": 0.2952, + "rejected_geometric_mean": -5.976290225982666, + "step": 6194 + }, + { + "chosen_geometric_mean": -0.7564077377319336, + "epoch": 1.53, + "grad_norm": 7.46875, + "learning_rate": 6.424419446387758e-07, + "log_odds": 8.226923942565918, + "log_odds_ratio": -0.03419633209705353, + "loss": 0.226, + "rejected_geometric_mean": -8.37950611114502, + "step": 6195 + }, + { + "chosen_geometric_mean": -0.9392813444137573, + "epoch": 1.53, + "grad_norm": 3.078125, + "learning_rate": 6.417905733321503e-07, + "log_odds": 10.789263725280762, + "log_odds_ratio": -0.07442012429237366, + "loss": 0.2638, + "rejected_geometric_mean": -11.248093605041504, + "step": 6196 + }, + { + "chosen_geometric_mean": -1.1566540002822876, + "epoch": 1.53, + "grad_norm": 9.625, + "learning_rate": 6.411394837746787e-07, + "log_odds": 5.657427787780762, + "log_odds_ratio": -0.01669112965464592, + "loss": 0.2613, + "rejected_geometric_mean": -6.408775329589844, + "step": 6197 + }, + { + "chosen_geometric_mean": -0.9928619861602783, + "epoch": 1.53, + "grad_norm": 2.796875, + "learning_rate": 6.404886760650806e-07, + "log_odds": 8.482053756713867, + "log_odds_ratio": -0.11604595184326172, + "loss": 0.2647, + "rejected_geometric_mean": -9.079116821289062, + "step": 6198 + }, + { + "chosen_geometric_mean": -0.8177382946014404, + "epoch": 1.53, + "grad_norm": 2.609375, + "learning_rate": 6.398381503020354e-07, + "log_odds": 2.5607237815856934, + "log_odds_ratio": -0.26497575640678406, + "loss": 0.2187, + "rejected_geometric_mean": -3.0046746730804443, + "step": 6199 + }, + { + "chosen_geometric_mean": -1.0103304386138916, + "epoch": 1.54, + "grad_norm": 5.0, + "learning_rate": 6.391879065841777e-07, + "log_odds": 6.6812214851379395, + "log_odds_ratio": -0.26487964391708374, + "loss": 0.2726, + "rejected_geometric_mean": -7.3996171951293945, + "step": 6200 + }, + { + "chosen_geometric_mean": -1.170741319656372, + "epoch": 1.54, + "grad_norm": 2.3125, + "learning_rate": 6.38537945010099e-07, + "log_odds": 4.025711536407471, + "log_odds_ratio": -0.20645540952682495, + "loss": 0.2632, + "rejected_geometric_mean": -4.900312423706055, + "step": 6201 + }, + { + "chosen_geometric_mean": -0.9290640354156494, + "epoch": 1.54, + "grad_norm": 2.484375, + "learning_rate": 6.378882656783514e-07, + "log_odds": 9.105655670166016, + "log_odds_ratio": -0.10579729825258255, + "loss": 0.2724, + "rejected_geometric_mean": -9.60318374633789, + "step": 6202 + }, + { + "chosen_geometric_mean": -1.2395154237747192, + "epoch": 1.54, + "grad_norm": 11.75, + "learning_rate": 6.372388686874409e-07, + "log_odds": 10.831805229187012, + "log_odds_ratio": -0.0171279925853014, + "loss": 0.266, + "rejected_geometric_mean": -11.666923522949219, + "step": 6203 + }, + { + "chosen_geometric_mean": -1.1487326622009277, + "epoch": 1.54, + "grad_norm": 22.875, + "learning_rate": 6.365897541358307e-07, + "log_odds": 8.887199401855469, + "log_odds_ratio": -0.15800827741622925, + "loss": 0.2905, + "rejected_geometric_mean": -9.729269027709961, + "step": 6204 + }, + { + "chosen_geometric_mean": -0.8831887245178223, + "epoch": 1.54, + "grad_norm": 1.8203125, + "learning_rate": 6.359409221219437e-07, + "log_odds": 11.641530990600586, + "log_odds_ratio": -0.008349807001650333, + "loss": 0.2177, + "rejected_geometric_mean": -11.963485717773438, + "step": 6205 + }, + { + "chosen_geometric_mean": -1.0577749013900757, + "epoch": 1.54, + "grad_norm": 11.625, + "learning_rate": 6.352923727441568e-07, + "log_odds": 9.309001922607422, + "log_odds_ratio": -0.18154852092266083, + "loss": 0.268, + "rejected_geometric_mean": -9.987653732299805, + "step": 6206 + }, + { + "chosen_geometric_mean": -1.296362280845642, + "epoch": 1.54, + "grad_norm": 15.4375, + "learning_rate": 6.346441061008063e-07, + "log_odds": 1.1922509670257568, + "log_odds_ratio": -0.27979469299316406, + "loss": 0.2999, + "rejected_geometric_mean": -2.2807934284210205, + "step": 6207 + }, + { + "chosen_geometric_mean": -0.8742795586585999, + "epoch": 1.54, + "grad_norm": 23.625, + "learning_rate": 6.339961222901861e-07, + "log_odds": 5.828732490539551, + "log_odds_ratio": -0.007979393005371094, + "loss": 0.2542, + "rejected_geometric_mean": -6.139828681945801, + "step": 6208 + }, + { + "chosen_geometric_mean": -1.1209428310394287, + "epoch": 1.54, + "grad_norm": 14.25, + "learning_rate": 6.333484214105454e-07, + "log_odds": 10.906793594360352, + "log_odds_ratio": -0.0001901490322779864, + "loss": 0.262, + "rejected_geometric_mean": -11.63090705871582, + "step": 6209 + }, + { + "chosen_geometric_mean": -1.0260587930679321, + "epoch": 1.54, + "grad_norm": 5.46875, + "learning_rate": 6.327010035600898e-07, + "log_odds": 2.990452289581299, + "log_odds_ratio": -0.23931433260440826, + "loss": 0.2612, + "rejected_geometric_mean": -3.7237534523010254, + "step": 6210 + }, + { + "chosen_geometric_mean": -1.0261718034744263, + "epoch": 1.54, + "grad_norm": 7.09375, + "learning_rate": 6.320538688369853e-07, + "log_odds": 3.2154393196105957, + "log_odds_ratio": -0.16198226809501648, + "loss": 0.273, + "rejected_geometric_mean": -3.8863325119018555, + "step": 6211 + }, + { + "chosen_geometric_mean": -0.9877878427505493, + "epoch": 1.54, + "grad_norm": 3.421875, + "learning_rate": 6.314070173393519e-07, + "log_odds": 7.865127086639404, + "log_odds_ratio": -0.007163272704929113, + "loss": 0.268, + "rejected_geometric_mean": -8.389259338378906, + "step": 6212 + }, + { + "chosen_geometric_mean": -1.0873008966445923, + "epoch": 1.54, + "grad_norm": 2.359375, + "learning_rate": 6.30760449165268e-07, + "log_odds": 8.013876914978027, + "log_odds_ratio": -0.289355993270874, + "loss": 0.2578, + "rejected_geometric_mean": -8.840478897094727, + "step": 6213 + }, + { + "chosen_geometric_mean": -1.0730527639389038, + "epoch": 1.54, + "grad_norm": 35.0, + "learning_rate": 6.301141644127701e-07, + "log_odds": 12.913084983825684, + "log_odds_ratio": -0.08119446039199829, + "loss": 0.2893, + "rejected_geometric_mean": -13.604876518249512, + "step": 6214 + }, + { + "chosen_geometric_mean": -1.0524002313613892, + "epoch": 1.54, + "grad_norm": 2.09375, + "learning_rate": 6.294681631798483e-07, + "log_odds": 2.652278184890747, + "log_odds_ratio": -0.25924837589263916, + "loss": 0.2453, + "rejected_geometric_mean": -3.363417625427246, + "step": 6215 + }, + { + "chosen_geometric_mean": -1.1424589157104492, + "epoch": 1.54, + "grad_norm": 1.96875, + "learning_rate": 6.28822445564454e-07, + "log_odds": 11.600724220275879, + "log_odds_ratio": -4.440731572685763e-05, + "loss": 0.2571, + "rejected_geometric_mean": -12.346521377563477, + "step": 6216 + }, + { + "chosen_geometric_mean": -0.8735841512680054, + "epoch": 1.54, + "grad_norm": 4.40625, + "learning_rate": 6.28177011664492e-07, + "log_odds": 3.7489254474639893, + "log_odds_ratio": -0.0884842574596405, + "loss": 0.2721, + "rejected_geometric_mean": -4.106400966644287, + "step": 6217 + }, + { + "chosen_geometric_mean": -1.2187408208847046, + "epoch": 1.54, + "grad_norm": 7.59375, + "learning_rate": 6.275318615778258e-07, + "log_odds": 0.4963464140892029, + "log_odds_ratio": -0.48065996170043945, + "loss": 0.2828, + "rejected_geometric_mean": -1.5895676612854004, + "step": 6218 + }, + { + "chosen_geometric_mean": -1.5671319961547852, + "epoch": 1.54, + "grad_norm": 41.25, + "learning_rate": 6.268869954022768e-07, + "log_odds": 5.2337212562561035, + "log_odds_ratio": -0.16958776116371155, + "loss": 0.3139, + "rejected_geometric_mean": -6.409735202789307, + "step": 6219 + }, + { + "chosen_geometric_mean": -1.079376459121704, + "epoch": 1.54, + "grad_norm": 4.09375, + "learning_rate": 6.262424132356215e-07, + "log_odds": 11.283008575439453, + "log_odds_ratio": -0.0008763864752836525, + "loss": 0.255, + "rejected_geometric_mean": -11.945379257202148, + "step": 6220 + }, + { + "chosen_geometric_mean": -1.301939845085144, + "epoch": 1.54, + "grad_norm": 3.40625, + "learning_rate": 6.25598115175593e-07, + "log_odds": 3.2193968296051025, + "log_odds_ratio": -0.26384595036506653, + "loss": 0.2814, + "rejected_geometric_mean": -4.287981986999512, + "step": 6221 + }, + { + "chosen_geometric_mean": -1.0194330215454102, + "epoch": 1.54, + "grad_norm": 26.375, + "learning_rate": 6.249541013198837e-07, + "log_odds": 4.910477638244629, + "log_odds_ratio": -0.08903581649065018, + "loss": 0.2142, + "rejected_geometric_mean": -5.511723518371582, + "step": 6222 + }, + { + "chosen_geometric_mean": -0.9212368130683899, + "epoch": 1.54, + "grad_norm": 37.75, + "learning_rate": 6.243103717661408e-07, + "log_odds": 2.5767147541046143, + "log_odds_ratio": -0.2587606906890869, + "loss": 0.2805, + "rejected_geometric_mean": -3.193718433380127, + "step": 6223 + }, + { + "chosen_geometric_mean": -1.176830768585205, + "epoch": 1.54, + "grad_norm": 16.75, + "learning_rate": 6.236669266119691e-07, + "log_odds": 13.551898002624512, + "log_odds_ratio": -1.710707147140056e-05, + "loss": 0.2566, + "rejected_geometric_mean": -14.254000663757324, + "step": 6224 + }, + { + "chosen_geometric_mean": -0.8014987707138062, + "epoch": 1.54, + "grad_norm": 2.90625, + "learning_rate": 6.230237659549318e-07, + "log_odds": 10.722295761108398, + "log_odds_ratio": -0.010630477219820023, + "loss": 0.2092, + "rejected_geometric_mean": -10.929435729980469, + "step": 6225 + }, + { + "chosen_geometric_mean": -1.007954716682434, + "epoch": 1.54, + "grad_norm": 13.1875, + "learning_rate": 6.223808898925459e-07, + "log_odds": 7.181632995605469, + "log_odds_ratio": -0.0940483957529068, + "loss": 0.2515, + "rejected_geometric_mean": -7.771320343017578, + "step": 6226 + }, + { + "chosen_geometric_mean": -1.0099648237228394, + "epoch": 1.54, + "grad_norm": 3.125, + "learning_rate": 6.21738298522287e-07, + "log_odds": 2.175657272338867, + "log_odds_ratio": -0.240949347615242, + "loss": 0.2716, + "rejected_geometric_mean": -2.8788623809814453, + "step": 6227 + }, + { + "chosen_geometric_mean": -1.0587756633758545, + "epoch": 1.54, + "grad_norm": 2.3125, + "learning_rate": 6.210959919415874e-07, + "log_odds": 7.302731990814209, + "log_odds_ratio": -0.19738426804542542, + "loss": 0.239, + "rejected_geometric_mean": -8.029526710510254, + "step": 6228 + }, + { + "chosen_geometric_mean": -1.4013981819152832, + "epoch": 1.54, + "grad_norm": 26.75, + "learning_rate": 6.204539702478374e-07, + "log_odds": 3.169515609741211, + "log_odds_ratio": -0.27930790185928345, + "loss": 0.3207, + "rejected_geometric_mean": -4.373570442199707, + "step": 6229 + }, + { + "chosen_geometric_mean": -1.068912148475647, + "epoch": 1.54, + "grad_norm": 4.1875, + "learning_rate": 6.19812233538381e-07, + "log_odds": 12.679597854614258, + "log_odds_ratio": -0.00014948238094802946, + "loss": 0.2509, + "rejected_geometric_mean": -13.31125545501709, + "step": 6230 + }, + { + "chosen_geometric_mean": -1.1190180778503418, + "epoch": 1.54, + "grad_norm": 27.0, + "learning_rate": 6.191707819105228e-07, + "log_odds": 2.5836057662963867, + "log_odds_ratio": -0.14195263385772705, + "loss": 0.2816, + "rejected_geometric_mean": -3.3440370559692383, + "step": 6231 + }, + { + "chosen_geometric_mean": -0.93301922082901, + "epoch": 1.54, + "grad_norm": 2.984375, + "learning_rate": 6.185296154615211e-07, + "log_odds": 11.686120986938477, + "log_odds_ratio": -0.03744509071111679, + "loss": 0.286, + "rejected_geometric_mean": -12.141510009765625, + "step": 6232 + }, + { + "chosen_geometric_mean": -1.0140340328216553, + "epoch": 1.54, + "grad_norm": 7.125, + "learning_rate": 6.17888734288592e-07, + "log_odds": 8.974289894104004, + "log_odds_ratio": -0.14704373478889465, + "loss": 0.2578, + "rejected_geometric_mean": -9.5992431640625, + "step": 6233 + }, + { + "chosen_geometric_mean": -1.0005629062652588, + "epoch": 1.54, + "grad_norm": 8.75, + "learning_rate": 6.172481384889084e-07, + "log_odds": 5.0610222816467285, + "log_odds_ratio": -0.22440490126609802, + "loss": 0.2542, + "rejected_geometric_mean": -5.686450004577637, + "step": 6234 + }, + { + "chosen_geometric_mean": -1.0956106185913086, + "epoch": 1.54, + "grad_norm": 9.1875, + "learning_rate": 6.166078281596016e-07, + "log_odds": 9.475597381591797, + "log_odds_ratio": -0.05230061337351799, + "loss": 0.226, + "rejected_geometric_mean": -10.178217887878418, + "step": 6235 + }, + { + "chosen_geometric_mean": -0.9490762948989868, + "epoch": 1.54, + "grad_norm": 10.4375, + "learning_rate": 6.159678033977559e-07, + "log_odds": 3.7640912532806396, + "log_odds_ratio": -0.18597093224525452, + "loss": 0.2651, + "rejected_geometric_mean": -4.274338245391846, + "step": 6236 + }, + { + "chosen_geometric_mean": -0.8163474202156067, + "epoch": 1.54, + "grad_norm": 3.34375, + "learning_rate": 6.153280643004158e-07, + "log_odds": 9.28711986541748, + "log_odds_ratio": -0.1613655984401703, + "loss": 0.264, + "rejected_geometric_mean": -9.586406707763672, + "step": 6237 + }, + { + "chosen_geometric_mean": -1.0140269994735718, + "epoch": 1.54, + "grad_norm": 2.28125, + "learning_rate": 6.146886109645803e-07, + "log_odds": 2.840726375579834, + "log_odds_ratio": -0.32362374663352966, + "loss": 0.2562, + "rejected_geometric_mean": -3.5617480278015137, + "step": 6238 + }, + { + "chosen_geometric_mean": -1.1694848537445068, + "epoch": 1.54, + "grad_norm": 2.015625, + "learning_rate": 6.140494434872063e-07, + "log_odds": 1.7465592622756958, + "log_odds_ratio": -0.2493383288383484, + "loss": 0.2418, + "rejected_geometric_mean": -2.687185287475586, + "step": 6239 + }, + { + "chosen_geometric_mean": -0.9451684355735779, + "epoch": 1.54, + "grad_norm": 14.0625, + "learning_rate": 6.134105619652073e-07, + "log_odds": 0.9290950298309326, + "log_odds_ratio": -0.3735888600349426, + "loss": 0.2463, + "rejected_geometric_mean": -1.5910162925720215, + "step": 6240 + }, + { + "chosen_geometric_mean": -1.1452504396438599, + "epoch": 1.55, + "grad_norm": 55.75, + "learning_rate": 6.127719664954521e-07, + "log_odds": 7.965916156768799, + "log_odds_ratio": -0.022022590041160583, + "loss": 0.2581, + "rejected_geometric_mean": -8.723973274230957, + "step": 6241 + }, + { + "chosen_geometric_mean": -1.0795843601226807, + "epoch": 1.55, + "grad_norm": 9.8125, + "learning_rate": 6.121336571747682e-07, + "log_odds": 10.95167350769043, + "log_odds_ratio": -0.04408527538180351, + "loss": 0.2636, + "rejected_geometric_mean": -11.556509017944336, + "step": 6242 + }, + { + "chosen_geometric_mean": -0.8560720682144165, + "epoch": 1.55, + "grad_norm": 8.8125, + "learning_rate": 6.114956340999384e-07, + "log_odds": 3.3982553482055664, + "log_odds_ratio": -0.25745218992233276, + "loss": 0.2706, + "rejected_geometric_mean": -3.874882936477661, + "step": 6243 + }, + { + "chosen_geometric_mean": -0.8617008328437805, + "epoch": 1.55, + "grad_norm": 9.3125, + "learning_rate": 6.108578973677009e-07, + "log_odds": 2.5696403980255127, + "log_odds_ratio": -0.2639697194099426, + "loss": 0.2417, + "rejected_geometric_mean": -3.0928494930267334, + "step": 6244 + }, + { + "chosen_geometric_mean": -0.9974082112312317, + "epoch": 1.55, + "grad_norm": 6.4375, + "learning_rate": 6.102204470747534e-07, + "log_odds": 7.515807628631592, + "log_odds_ratio": -0.03964705765247345, + "loss": 0.2352, + "rejected_geometric_mean": -8.061470031738281, + "step": 6245 + }, + { + "chosen_geometric_mean": -1.0392903089523315, + "epoch": 1.55, + "grad_norm": 4.21875, + "learning_rate": 6.095832833177484e-07, + "log_odds": 3.2921621799468994, + "log_odds_ratio": -0.1902766227722168, + "loss": 0.2587, + "rejected_geometric_mean": -4.012852668762207, + "step": 6246 + }, + { + "chosen_geometric_mean": -0.7683805227279663, + "epoch": 1.55, + "grad_norm": 5.9375, + "learning_rate": 6.089464061932945e-07, + "log_odds": 4.811224937438965, + "log_odds_ratio": -0.15110301971435547, + "loss": 0.2498, + "rejected_geometric_mean": -5.003907680511475, + "step": 6247 + }, + { + "chosen_geometric_mean": -0.7378065586090088, + "epoch": 1.55, + "grad_norm": 5.03125, + "learning_rate": 6.083098157979591e-07, + "log_odds": 11.685080528259277, + "log_odds_ratio": -0.006031775381416082, + "loss": 0.2318, + "rejected_geometric_mean": -11.739888191223145, + "step": 6248 + }, + { + "chosen_geometric_mean": -0.9735363721847534, + "epoch": 1.55, + "grad_norm": 1.9375, + "learning_rate": 6.076735122282634e-07, + "log_odds": 7.031648635864258, + "log_odds_ratio": -0.11899472773075104, + "loss": 0.2623, + "rejected_geometric_mean": -7.538437843322754, + "step": 6249 + }, + { + "chosen_geometric_mean": -0.9899756908416748, + "epoch": 1.55, + "grad_norm": 8.5, + "learning_rate": 6.070374955806852e-07, + "log_odds": 1.867624044418335, + "log_odds_ratio": -0.3613687753677368, + "loss": 0.301, + "rejected_geometric_mean": -2.6111972332000732, + "step": 6250 + }, + { + "chosen_geometric_mean": -0.940338671207428, + "epoch": 1.55, + "grad_norm": 1.8125, + "learning_rate": 6.064017659516625e-07, + "log_odds": 15.189065933227539, + "log_odds_ratio": -0.00022055988665670156, + "loss": 0.2248, + "rejected_geometric_mean": -15.621543884277344, + "step": 6251 + }, + { + "chosen_geometric_mean": -0.8581480979919434, + "epoch": 1.55, + "grad_norm": 7.03125, + "learning_rate": 6.057663234375857e-07, + "log_odds": 16.824338912963867, + "log_odds_ratio": -0.0005071983323432505, + "loss": 0.2446, + "rejected_geometric_mean": -17.11783790588379, + "step": 6252 + }, + { + "chosen_geometric_mean": -1.012296438217163, + "epoch": 1.55, + "grad_norm": 2.4375, + "learning_rate": 6.05131168134803e-07, + "log_odds": 4.1752777099609375, + "log_odds_ratio": -0.07575826346874237, + "loss": 0.2645, + "rejected_geometric_mean": -4.774599075317383, + "step": 6253 + }, + { + "chosen_geometric_mean": -1.0522161722183228, + "epoch": 1.55, + "grad_norm": 5.46875, + "learning_rate": 6.044963001396198e-07, + "log_odds": 6.878336429595947, + "log_odds_ratio": -0.22261321544647217, + "loss": 0.2897, + "rejected_geometric_mean": -7.556858539581299, + "step": 6254 + }, + { + "chosen_geometric_mean": -0.90578293800354, + "epoch": 1.55, + "grad_norm": 41.25, + "learning_rate": 6.038617195482965e-07, + "log_odds": 4.997945785522461, + "log_odds_ratio": -0.20598658919334412, + "loss": 0.2572, + "rejected_geometric_mean": -5.483620643615723, + "step": 6255 + }, + { + "chosen_geometric_mean": -1.031475305557251, + "epoch": 1.55, + "grad_norm": 3.25, + "learning_rate": 6.032274264570512e-07, + "log_odds": 4.932662487030029, + "log_odds_ratio": -0.16484057903289795, + "loss": 0.238, + "rejected_geometric_mean": -5.6172380447387695, + "step": 6256 + }, + { + "chosen_geometric_mean": -0.9355114102363586, + "epoch": 1.55, + "grad_norm": 2.234375, + "learning_rate": 6.025934209620588e-07, + "log_odds": 15.514188766479492, + "log_odds_ratio": -0.0018632826395332813, + "loss": 0.2504, + "rejected_geometric_mean": -15.93013858795166, + "step": 6257 + }, + { + "chosen_geometric_mean": -1.7210826873779297, + "epoch": 1.55, + "grad_norm": 27.625, + "learning_rate": 6.01959703159449e-07, + "log_odds": 12.485492706298828, + "log_odds_ratio": -0.13160008192062378, + "loss": 0.3416, + "rejected_geometric_mean": -13.967073440551758, + "step": 6258 + }, + { + "chosen_geometric_mean": -1.0172183513641357, + "epoch": 1.55, + "grad_norm": 1.8984375, + "learning_rate": 6.013262731453076e-07, + "log_odds": 7.103445053100586, + "log_odds_ratio": -0.05664464831352234, + "loss": 0.2448, + "rejected_geometric_mean": -7.704832077026367, + "step": 6259 + }, + { + "chosen_geometric_mean": -0.9213602542877197, + "epoch": 1.55, + "grad_norm": 3.078125, + "learning_rate": 6.006931310156799e-07, + "log_odds": 3.630401372909546, + "log_odds_ratio": -0.1527085304260254, + "loss": 0.2576, + "rejected_geometric_mean": -4.138535499572754, + "step": 6260 + }, + { + "chosen_geometric_mean": -0.9378657937049866, + "epoch": 1.55, + "grad_norm": 1.9375, + "learning_rate": 6.000602768665634e-07, + "log_odds": 5.345371246337891, + "log_odds_ratio": -0.013477369211614132, + "loss": 0.2242, + "rejected_geometric_mean": -5.777911186218262, + "step": 6261 + }, + { + "chosen_geometric_mean": -0.9407353401184082, + "epoch": 1.55, + "grad_norm": 62.5, + "learning_rate": 5.994277107939151e-07, + "log_odds": 9.95569133758545, + "log_odds_ratio": -0.2534642517566681, + "loss": 0.3224, + "rejected_geometric_mean": -10.621899604797363, + "step": 6262 + }, + { + "chosen_geometric_mean": -1.0347623825073242, + "epoch": 1.55, + "grad_norm": 1.7890625, + "learning_rate": 5.987954328936476e-07, + "log_odds": 6.315889358520508, + "log_odds_ratio": -0.047998059540987015, + "loss": 0.2491, + "rejected_geometric_mean": -6.936776638031006, + "step": 6263 + }, + { + "chosen_geometric_mean": -0.9151195287704468, + "epoch": 1.55, + "grad_norm": 20.25, + "learning_rate": 5.981634432616281e-07, + "log_odds": 7.668002605438232, + "log_odds_ratio": -0.1983027160167694, + "loss": 0.2487, + "rejected_geometric_mean": -8.214139938354492, + "step": 6264 + }, + { + "chosen_geometric_mean": -0.966702938079834, + "epoch": 1.55, + "grad_norm": 2.40625, + "learning_rate": 5.97531741993683e-07, + "log_odds": 3.677272319793701, + "log_odds_ratio": -0.06588428467512131, + "loss": 0.2551, + "rejected_geometric_mean": -4.171588897705078, + "step": 6265 + }, + { + "chosen_geometric_mean": -0.8914597034454346, + "epoch": 1.55, + "grad_norm": 24.125, + "learning_rate": 5.969003291855916e-07, + "log_odds": 8.431745529174805, + "log_odds_ratio": -0.002747736405581236, + "loss": 0.2387, + "rejected_geometric_mean": -8.781227111816406, + "step": 6266 + }, + { + "chosen_geometric_mean": -1.4860764741897583, + "epoch": 1.55, + "grad_norm": 30.875, + "learning_rate": 5.962692049330923e-07, + "log_odds": 3.675124406814575, + "log_odds_ratio": -0.2623138725757599, + "loss": 0.2571, + "rejected_geometric_mean": -4.975559234619141, + "step": 6267 + }, + { + "chosen_geometric_mean": -1.1201719045639038, + "epoch": 1.55, + "grad_norm": 2.0625, + "learning_rate": 5.956383693318791e-07, + "log_odds": 1.6097443103790283, + "log_odds_ratio": -0.33003202080726624, + "loss": 0.2684, + "rejected_geometric_mean": -2.5019490718841553, + "step": 6268 + }, + { + "chosen_geometric_mean": -0.9467689990997314, + "epoch": 1.55, + "grad_norm": 6.28125, + "learning_rate": 5.950078224776012e-07, + "log_odds": 10.44908332824707, + "log_odds_ratio": -0.2442975789308548, + "loss": 0.2518, + "rejected_geometric_mean": -11.042540550231934, + "step": 6269 + }, + { + "chosen_geometric_mean": -1.0262959003448486, + "epoch": 1.55, + "grad_norm": 2.15625, + "learning_rate": 5.943775644658642e-07, + "log_odds": 9.74270248413086, + "log_odds_ratio": -0.02660755254328251, + "loss": 0.2568, + "rejected_geometric_mean": -10.324278831481934, + "step": 6270 + }, + { + "chosen_geometric_mean": -1.2256077527999878, + "epoch": 1.55, + "grad_norm": 32.75, + "learning_rate": 5.937475953922314e-07, + "log_odds": 7.356338024139404, + "log_odds_ratio": -0.14680764079093933, + "loss": 0.2917, + "rejected_geometric_mean": -8.29747200012207, + "step": 6271 + }, + { + "chosen_geometric_mean": -0.8964639902114868, + "epoch": 1.55, + "grad_norm": 18.625, + "learning_rate": 5.931179153522199e-07, + "log_odds": 5.888848304748535, + "log_odds_ratio": -0.14881554245948792, + "loss": 0.314, + "rejected_geometric_mean": -6.377964019775391, + "step": 6272 + }, + { + "chosen_geometric_mean": -1.09745192527771, + "epoch": 1.55, + "grad_norm": 8.625, + "learning_rate": 5.924885244413054e-07, + "log_odds": 5.423371315002441, + "log_odds_ratio": -0.2764781415462494, + "loss": 0.2784, + "rejected_geometric_mean": -6.2796430587768555, + "step": 6273 + }, + { + "chosen_geometric_mean": -1.0007672309875488, + "epoch": 1.55, + "grad_norm": 10.1875, + "learning_rate": 5.918594227549187e-07, + "log_odds": 6.711174964904785, + "log_odds_ratio": -0.4920337498188019, + "loss": 0.2668, + "rejected_geometric_mean": -7.611208915710449, + "step": 6274 + }, + { + "chosen_geometric_mean": -0.9555980563163757, + "epoch": 1.55, + "grad_norm": 1.9375, + "learning_rate": 5.912306103884463e-07, + "log_odds": 6.89729642868042, + "log_odds_ratio": -0.1582740694284439, + "loss": 0.2436, + "rejected_geometric_mean": -7.475679874420166, + "step": 6275 + }, + { + "chosen_geometric_mean": -0.7929933667182922, + "epoch": 1.55, + "grad_norm": 8.0, + "learning_rate": 5.90602087437231e-07, + "log_odds": 10.040663719177246, + "log_odds_ratio": -0.04209558293223381, + "loss": 0.271, + "rejected_geometric_mean": -10.227477073669434, + "step": 6276 + }, + { + "chosen_geometric_mean": -1.083179235458374, + "epoch": 1.55, + "grad_norm": 4.4375, + "learning_rate": 5.899738539965718e-07, + "log_odds": 5.136664867401123, + "log_odds_ratio": -0.08243949711322784, + "loss": 0.2213, + "rejected_geometric_mean": -5.830521583557129, + "step": 6277 + }, + { + "chosen_geometric_mean": -1.1420572996139526, + "epoch": 1.55, + "grad_norm": 4.15625, + "learning_rate": 5.893459101617255e-07, + "log_odds": 4.4602251052856445, + "log_odds_ratio": -0.11226523667573929, + "loss": 0.2784, + "rejected_geometric_mean": -5.280709266662598, + "step": 6278 + }, + { + "chosen_geometric_mean": -0.9282435178756714, + "epoch": 1.55, + "grad_norm": 7.5625, + "learning_rate": 5.887182560279015e-07, + "log_odds": 5.402690887451172, + "log_odds_ratio": -0.2531469762325287, + "loss": 0.305, + "rejected_geometric_mean": -5.976728439331055, + "step": 6279 + }, + { + "chosen_geometric_mean": -0.8743792176246643, + "epoch": 1.55, + "grad_norm": 27.625, + "learning_rate": 5.880908916902689e-07, + "log_odds": 1.5651123523712158, + "log_odds_ratio": -0.28447994589805603, + "loss": 0.2102, + "rejected_geometric_mean": -2.1349010467529297, + "step": 6280 + }, + { + "chosen_geometric_mean": -0.9885382652282715, + "epoch": 1.56, + "grad_norm": 62.25, + "learning_rate": 5.874638172439497e-07, + "log_odds": 1.621307611465454, + "log_odds_ratio": -0.29957449436187744, + "loss": 0.4133, + "rejected_geometric_mean": -2.271326780319214, + "step": 6281 + }, + { + "chosen_geometric_mean": -0.9724436402320862, + "epoch": 1.56, + "grad_norm": 7.1875, + "learning_rate": 5.86837032784025e-07, + "log_odds": 8.931192398071289, + "log_odds_ratio": -0.03723318502306938, + "loss": 0.2003, + "rejected_geometric_mean": -9.435622215270996, + "step": 6282 + }, + { + "chosen_geometric_mean": -0.94660484790802, + "epoch": 1.56, + "grad_norm": 17.625, + "learning_rate": 5.862105384055289e-07, + "log_odds": 8.95292854309082, + "log_odds_ratio": -0.16896028816699982, + "loss": 0.2537, + "rejected_geometric_mean": -9.544562339782715, + "step": 6283 + }, + { + "chosen_geometric_mean": -1.1261987686157227, + "epoch": 1.56, + "grad_norm": 21.75, + "learning_rate": 5.855843342034542e-07, + "log_odds": 8.15275764465332, + "log_odds_ratio": -0.18355143070220947, + "loss": 0.3268, + "rejected_geometric_mean": -8.976205825805664, + "step": 6284 + }, + { + "chosen_geometric_mean": -1.3220030069351196, + "epoch": 1.56, + "grad_norm": 20.0, + "learning_rate": 5.849584202727471e-07, + "log_odds": 3.874697685241699, + "log_odds_ratio": -0.2118736356496811, + "loss": 0.2698, + "rejected_geometric_mean": -4.936086654663086, + "step": 6285 + }, + { + "chosen_geometric_mean": -0.6665326356887817, + "epoch": 1.56, + "grad_norm": 2.265625, + "learning_rate": 5.843327967083129e-07, + "log_odds": 5.879706382751465, + "log_odds_ratio": -0.16132421791553497, + "loss": 0.2811, + "rejected_geometric_mean": -5.997241973876953, + "step": 6286 + }, + { + "chosen_geometric_mean": -1.1000161170959473, + "epoch": 1.56, + "grad_norm": 7.53125, + "learning_rate": 5.837074636050094e-07, + "log_odds": 1.50417160987854, + "log_odds_ratio": -0.2831423282623291, + "loss": 0.2734, + "rejected_geometric_mean": -2.3176286220550537, + "step": 6287 + }, + { + "chosen_geometric_mean": -1.1403224468231201, + "epoch": 1.56, + "grad_norm": 20.625, + "learning_rate": 5.83082421057653e-07, + "log_odds": 16.11174774169922, + "log_odds_ratio": -0.03165711462497711, + "loss": 0.2544, + "rejected_geometric_mean": -16.855636596679688, + "step": 6288 + }, + { + "chosen_geometric_mean": -0.9435388445854187, + "epoch": 1.56, + "grad_norm": 46.0, + "learning_rate": 5.824576691610157e-07, + "log_odds": 3.9368271827697754, + "log_odds_ratio": -0.13343167304992676, + "loss": 0.2951, + "rejected_geometric_mean": -4.4832444190979, + "step": 6289 + }, + { + "chosen_geometric_mean": -0.8714510202407837, + "epoch": 1.56, + "grad_norm": 2.1875, + "learning_rate": 5.818332080098235e-07, + "log_odds": 9.724214553833008, + "log_odds_ratio": -0.2388468086719513, + "loss": 0.2624, + "rejected_geometric_mean": -10.145683288574219, + "step": 6290 + }, + { + "chosen_geometric_mean": -0.8049703240394592, + "epoch": 1.56, + "grad_norm": 1.8671875, + "learning_rate": 5.812090376987614e-07, + "log_odds": 8.498152732849121, + "log_odds_ratio": -0.027988292276859283, + "loss": 0.2298, + "rejected_geometric_mean": -8.72899341583252, + "step": 6291 + }, + { + "chosen_geometric_mean": -0.9361659288406372, + "epoch": 1.56, + "grad_norm": 10.5625, + "learning_rate": 5.805851583224675e-07, + "log_odds": 5.734046936035156, + "log_odds_ratio": -0.3415687382221222, + "loss": 0.2871, + "rejected_geometric_mean": -6.387340545654297, + "step": 6292 + }, + { + "chosen_geometric_mean": -0.9386919736862183, + "epoch": 1.56, + "grad_norm": 12.75, + "learning_rate": 5.799615699755367e-07, + "log_odds": 1.3274213075637817, + "log_odds_ratio": -0.3464556932449341, + "loss": 0.2394, + "rejected_geometric_mean": -1.9413650035858154, + "step": 6293 + }, + { + "chosen_geometric_mean": -0.8600941300392151, + "epoch": 1.56, + "grad_norm": 11.75, + "learning_rate": 5.793382727525201e-07, + "log_odds": 4.550358772277832, + "log_odds_ratio": -0.09945780038833618, + "loss": 0.2751, + "rejected_geometric_mean": -4.91959285736084, + "step": 6294 + }, + { + "chosen_geometric_mean": -1.011706829071045, + "epoch": 1.56, + "grad_norm": 18.25, + "learning_rate": 5.787152667479254e-07, + "log_odds": 2.505094051361084, + "log_odds_ratio": -0.3614639639854431, + "loss": 0.2516, + "rejected_geometric_mean": -3.290182113647461, + "step": 6295 + }, + { + "chosen_geometric_mean": -0.9554513692855835, + "epoch": 1.56, + "grad_norm": 3.984375, + "learning_rate": 5.780925520562142e-07, + "log_odds": 4.945939064025879, + "log_odds_ratio": -0.21105383336544037, + "loss": 0.263, + "rejected_geometric_mean": -5.490198135375977, + "step": 6296 + }, + { + "chosen_geometric_mean": -0.9019110202789307, + "epoch": 1.56, + "grad_norm": 16.125, + "learning_rate": 5.774701287718057e-07, + "log_odds": 8.973695755004883, + "log_odds_ratio": -0.008588350377976894, + "loss": 0.2548, + "rejected_geometric_mean": -9.331483840942383, + "step": 6297 + }, + { + "chosen_geometric_mean": -0.9654962420463562, + "epoch": 1.56, + "grad_norm": 77.5, + "learning_rate": 5.768479969890742e-07, + "log_odds": 3.566659688949585, + "log_odds_ratio": -0.15728063881397247, + "loss": 0.2834, + "rejected_geometric_mean": -4.136173725128174, + "step": 6298 + }, + { + "chosen_geometric_mean": -1.108988881111145, + "epoch": 1.56, + "grad_norm": 8.0, + "learning_rate": 5.76226156802348e-07, + "log_odds": 5.832399368286133, + "log_odds_ratio": -0.10312415659427643, + "loss": 0.233, + "rejected_geometric_mean": -6.603756427764893, + "step": 6299 + }, + { + "chosen_geometric_mean": -1.0582761764526367, + "epoch": 1.56, + "grad_norm": 6.96875, + "learning_rate": 5.75604608305916e-07, + "log_odds": 6.449307918548584, + "log_odds_ratio": -0.004926420282572508, + "loss": 0.2371, + "rejected_geometric_mean": -7.070901393890381, + "step": 6300 + }, + { + "chosen_geometric_mean": -0.8684717416763306, + "epoch": 1.56, + "grad_norm": 3.5625, + "learning_rate": 5.749833515940184e-07, + "log_odds": 9.951803207397461, + "log_odds_ratio": -0.20507769286632538, + "loss": 0.2511, + "rejected_geometric_mean": -10.33181095123291, + "step": 6301 + }, + { + "chosen_geometric_mean": -1.086883783340454, + "epoch": 1.56, + "grad_norm": 4.375, + "learning_rate": 5.743623867608522e-07, + "log_odds": 7.252949237823486, + "log_odds_ratio": -0.12145406007766724, + "loss": 0.245, + "rejected_geometric_mean": -7.922760963439941, + "step": 6302 + }, + { + "chosen_geometric_mean": -1.1226797103881836, + "epoch": 1.56, + "grad_norm": 14.5625, + "learning_rate": 5.737417139005713e-07, + "log_odds": 2.4380531311035156, + "log_odds_ratio": -0.19179722666740417, + "loss": 0.2459, + "rejected_geometric_mean": -3.295727252960205, + "step": 6303 + }, + { + "chosen_geometric_mean": -0.8565764427185059, + "epoch": 1.56, + "grad_norm": 40.0, + "learning_rate": 5.731213331072838e-07, + "log_odds": 10.73499870300293, + "log_odds_ratio": -0.09967765212059021, + "loss": 0.3342, + "rejected_geometric_mean": -11.104768753051758, + "step": 6304 + }, + { + "chosen_geometric_mean": -0.9359158277511597, + "epoch": 1.56, + "grad_norm": 4.6875, + "learning_rate": 5.725012444750549e-07, + "log_odds": 5.29491662979126, + "log_odds_ratio": -0.026206281036138535, + "loss": 0.274, + "rejected_geometric_mean": -5.746258735656738, + "step": 6305 + }, + { + "chosen_geometric_mean": -1.1027336120605469, + "epoch": 1.56, + "grad_norm": 2.015625, + "learning_rate": 5.718814480979054e-07, + "log_odds": 7.7884440422058105, + "log_odds_ratio": -0.010041086003184319, + "loss": 0.2462, + "rejected_geometric_mean": -8.478800773620605, + "step": 6306 + }, + { + "chosen_geometric_mean": -0.8963708877563477, + "epoch": 1.56, + "grad_norm": 10.0625, + "learning_rate": 5.712619440698111e-07, + "log_odds": 2.9064788818359375, + "log_odds_ratio": -0.29643407464027405, + "loss": 0.2809, + "rejected_geometric_mean": -3.5123584270477295, + "step": 6307 + }, + { + "chosen_geometric_mean": -0.905709981918335, + "epoch": 1.56, + "grad_norm": 2.5625, + "learning_rate": 5.706427324847022e-07, + "log_odds": 6.459295749664307, + "log_odds_ratio": -0.19233828783035278, + "loss": 0.2362, + "rejected_geometric_mean": -6.990391731262207, + "step": 6308 + }, + { + "chosen_geometric_mean": -0.9399533867835999, + "epoch": 1.56, + "grad_norm": 18.125, + "learning_rate": 5.700238134364683e-07, + "log_odds": 4.196114540100098, + "log_odds_ratio": -0.12278282642364502, + "loss": 0.2597, + "rejected_geometric_mean": -4.678736686706543, + "step": 6309 + }, + { + "chosen_geometric_mean": -1.2025344371795654, + "epoch": 1.56, + "grad_norm": 6.09375, + "learning_rate": 5.694051870189504e-07, + "log_odds": 1.4435863494873047, + "log_odds_ratio": -0.3267568349838257, + "loss": 0.2425, + "rejected_geometric_mean": -2.4448318481445312, + "step": 6310 + }, + { + "chosen_geometric_mean": -1.201391577720642, + "epoch": 1.56, + "grad_norm": 1.90625, + "learning_rate": 5.687868533259477e-07, + "log_odds": 6.276261329650879, + "log_odds_ratio": -0.0919438824057579, + "loss": 0.2523, + "rejected_geometric_mean": -7.141451358795166, + "step": 6311 + }, + { + "chosen_geometric_mean": -0.9639894962310791, + "epoch": 1.56, + "grad_norm": 2.515625, + "learning_rate": 5.681688124512158e-07, + "log_odds": 19.33737564086914, + "log_odds_ratio": -0.10220230370759964, + "loss": 0.2334, + "rejected_geometric_mean": -19.85410499572754, + "step": 6312 + }, + { + "chosen_geometric_mean": -1.0526387691497803, + "epoch": 1.56, + "grad_norm": 12.5625, + "learning_rate": 5.675510644884627e-07, + "log_odds": 3.536712884902954, + "log_odds_ratio": -0.376220166683197, + "loss": 0.2808, + "rejected_geometric_mean": -4.395048141479492, + "step": 6313 + }, + { + "chosen_geometric_mean": -1.0686982870101929, + "epoch": 1.56, + "grad_norm": 43.5, + "learning_rate": 5.669336095313552e-07, + "log_odds": 4.063085079193115, + "log_odds_ratio": -0.16400843858718872, + "loss": 0.2963, + "rejected_geometric_mean": -4.815338611602783, + "step": 6314 + }, + { + "chosen_geometric_mean": -0.8503407835960388, + "epoch": 1.56, + "grad_norm": 2.0, + "learning_rate": 5.663164476735136e-07, + "log_odds": 14.18498706817627, + "log_odds_ratio": -7.190468022599816e-05, + "loss": 0.2242, + "rejected_geometric_mean": -14.457695960998535, + "step": 6315 + }, + { + "chosen_geometric_mean": -0.9468377828598022, + "epoch": 1.56, + "grad_norm": 5.75, + "learning_rate": 5.656995790085132e-07, + "log_odds": 5.918545722961426, + "log_odds_ratio": -0.15904206037521362, + "loss": 0.2578, + "rejected_geometric_mean": -6.462344646453857, + "step": 6316 + }, + { + "chosen_geometric_mean": -1.1035007238388062, + "epoch": 1.56, + "grad_norm": 38.25, + "learning_rate": 5.650830036298888e-07, + "log_odds": 4.652861595153809, + "log_odds_ratio": -0.2398967444896698, + "loss": 0.2837, + "rejected_geometric_mean": -5.502442359924316, + "step": 6317 + }, + { + "chosen_geometric_mean": -0.7629460096359253, + "epoch": 1.56, + "grad_norm": 3.21875, + "learning_rate": 5.644667216311267e-07, + "log_odds": 13.734956741333008, + "log_odds_ratio": -0.0031434977427124977, + "loss": 0.2569, + "rejected_geometric_mean": -13.849984169006348, + "step": 6318 + }, + { + "chosen_geometric_mean": -1.0215039253234863, + "epoch": 1.56, + "grad_norm": 38.0, + "learning_rate": 5.638507331056692e-07, + "log_odds": 6.290642738342285, + "log_odds_ratio": -0.10396889597177505, + "loss": 0.2278, + "rejected_geometric_mean": -6.91572380065918, + "step": 6319 + }, + { + "chosen_geometric_mean": -1.0826103687286377, + "epoch": 1.56, + "grad_norm": 2.453125, + "learning_rate": 5.632350381469162e-07, + "log_odds": 4.123202800750732, + "log_odds_ratio": -0.20400306582450867, + "loss": 0.2504, + "rejected_geometric_mean": -4.841638565063477, + "step": 6320 + }, + { + "chosen_geometric_mean": -0.9552620649337769, + "epoch": 1.56, + "grad_norm": 5.59375, + "learning_rate": 5.626196368482209e-07, + "log_odds": 9.843976020812988, + "log_odds_ratio": -0.1720699816942215, + "loss": 0.2425, + "rejected_geometric_mean": -10.4256591796875, + "step": 6321 + }, + { + "chosen_geometric_mean": -0.9774144887924194, + "epoch": 1.57, + "grad_norm": 5.4375, + "learning_rate": 5.620045293028934e-07, + "log_odds": 11.556855201721191, + "log_odds_ratio": -0.09364423900842667, + "loss": 0.2639, + "rejected_geometric_mean": -12.090899467468262, + "step": 6322 + }, + { + "chosen_geometric_mean": -0.9917972087860107, + "epoch": 1.57, + "grad_norm": 63.5, + "learning_rate": 5.613897156041995e-07, + "log_odds": 4.06082820892334, + "log_odds_ratio": -0.2657589018344879, + "loss": 0.2886, + "rejected_geometric_mean": -4.73769474029541, + "step": 6323 + }, + { + "chosen_geometric_mean": -0.78889000415802, + "epoch": 1.57, + "grad_norm": 23.375, + "learning_rate": 5.607751958453589e-07, + "log_odds": 6.27221155166626, + "log_odds_ratio": -0.23343941569328308, + "loss": 0.2912, + "rejected_geometric_mean": -6.633687496185303, + "step": 6324 + }, + { + "chosen_geometric_mean": -0.8291608095169067, + "epoch": 1.57, + "grad_norm": 4.59375, + "learning_rate": 5.601609701195468e-07, + "log_odds": 9.106912612915039, + "log_odds_ratio": -0.007561765145510435, + "loss": 0.2045, + "rejected_geometric_mean": -9.318033218383789, + "step": 6325 + }, + { + "chosen_geometric_mean": -1.1127345561981201, + "epoch": 1.57, + "grad_norm": 9.25, + "learning_rate": 5.595470385198962e-07, + "log_odds": 5.7596259117126465, + "log_odds_ratio": -0.1368468701839447, + "loss": 0.2626, + "rejected_geometric_mean": -6.47971773147583, + "step": 6326 + }, + { + "chosen_geometric_mean": -0.8863014578819275, + "epoch": 1.57, + "grad_norm": 17.625, + "learning_rate": 5.589334011394923e-07, + "log_odds": 8.771173477172852, + "log_odds_ratio": -0.10789696872234344, + "loss": 0.2278, + "rejected_geometric_mean": -9.168502807617188, + "step": 6327 + }, + { + "chosen_geometric_mean": -0.9629392027854919, + "epoch": 1.57, + "grad_norm": 10.5, + "learning_rate": 5.583200580713779e-07, + "log_odds": 6.323599815368652, + "log_odds_ratio": -0.12038592994213104, + "loss": 0.221, + "rejected_geometric_mean": -6.858609199523926, + "step": 6328 + }, + { + "chosen_geometric_mean": -0.8706190586090088, + "epoch": 1.57, + "grad_norm": 4.875, + "learning_rate": 5.577070094085513e-07, + "log_odds": 9.272726058959961, + "log_odds_ratio": -0.04319512099027634, + "loss": 0.2264, + "rejected_geometric_mean": -9.614203453063965, + "step": 6329 + }, + { + "chosen_geometric_mean": -0.8496745824813843, + "epoch": 1.57, + "grad_norm": 3.359375, + "learning_rate": 5.57094255243964e-07, + "log_odds": 6.52512264251709, + "log_odds_ratio": -0.08325448632240295, + "loss": 0.2816, + "rejected_geometric_mean": -6.842197895050049, + "step": 6330 + }, + { + "chosen_geometric_mean": -1.0826961994171143, + "epoch": 1.57, + "grad_norm": 10.3125, + "learning_rate": 5.564817956705259e-07, + "log_odds": 7.9788103103637695, + "log_odds_ratio": -0.1312294900417328, + "loss": 0.2584, + "rejected_geometric_mean": -8.67152214050293, + "step": 6331 + }, + { + "chosen_geometric_mean": -0.9305943250656128, + "epoch": 1.57, + "grad_norm": 5.09375, + "learning_rate": 5.558696307810987e-07, + "log_odds": 3.3109922409057617, + "log_odds_ratio": -0.2608816623687744, + "loss": 0.2317, + "rejected_geometric_mean": -3.894735336303711, + "step": 6332 + }, + { + "chosen_geometric_mean": -0.8583971261978149, + "epoch": 1.57, + "grad_norm": 2.1875, + "learning_rate": 5.552577606685031e-07, + "log_odds": 10.20749282836914, + "log_odds_ratio": -0.13081112504005432, + "loss": 0.2643, + "rejected_geometric_mean": -10.53473949432373, + "step": 6333 + }, + { + "chosen_geometric_mean": -1.1875004768371582, + "epoch": 1.57, + "grad_norm": 16.125, + "learning_rate": 5.546461854255114e-07, + "log_odds": 11.273724555969238, + "log_odds_ratio": -0.12081588804721832, + "loss": 0.321, + "rejected_geometric_mean": -12.101865768432617, + "step": 6334 + }, + { + "chosen_geometric_mean": -1.0205295085906982, + "epoch": 1.57, + "grad_norm": 5.5, + "learning_rate": 5.540349051448551e-07, + "log_odds": 8.401532173156738, + "log_odds_ratio": -0.12246507406234741, + "loss": 0.2467, + "rejected_geometric_mean": -9.026656150817871, + "step": 6335 + }, + { + "chosen_geometric_mean": -1.0982271432876587, + "epoch": 1.57, + "grad_norm": 22.75, + "learning_rate": 5.534239199192173e-07, + "log_odds": 5.493276596069336, + "log_odds_ratio": -0.12244132906198502, + "loss": 0.296, + "rejected_geometric_mean": -6.178410530090332, + "step": 6336 + }, + { + "chosen_geometric_mean": -0.8297724723815918, + "epoch": 1.57, + "grad_norm": 2.359375, + "learning_rate": 5.528132298412392e-07, + "log_odds": 14.800192832946777, + "log_odds_ratio": -0.13027019798755646, + "loss": 0.271, + "rejected_geometric_mean": -15.167739868164062, + "step": 6337 + }, + { + "chosen_geometric_mean": -1.0198432207107544, + "epoch": 1.57, + "grad_norm": 43.0, + "learning_rate": 5.52202835003515e-07, + "log_odds": 9.609273910522461, + "log_odds_ratio": -0.13933323323726654, + "loss": 0.2544, + "rejected_geometric_mean": -10.241701126098633, + "step": 6338 + }, + { + "chosen_geometric_mean": -1.1891403198242188, + "epoch": 1.57, + "grad_norm": 2.40625, + "learning_rate": 5.515927354985956e-07, + "log_odds": 9.180532455444336, + "log_odds_ratio": -0.09235725551843643, + "loss": 0.2503, + "rejected_geometric_mean": -9.99599838256836, + "step": 6339 + }, + { + "chosen_geometric_mean": -0.8588511943817139, + "epoch": 1.57, + "grad_norm": 2.515625, + "learning_rate": 5.509829314189877e-07, + "log_odds": 8.378037452697754, + "log_odds_ratio": -0.10318624973297119, + "loss": 0.2349, + "rejected_geometric_mean": -8.713737487792969, + "step": 6340 + }, + { + "chosen_geometric_mean": -0.9344040155410767, + "epoch": 1.57, + "grad_norm": 97.5, + "learning_rate": 5.503734228571514e-07, + "log_odds": 10.779141426086426, + "log_odds_ratio": -0.0638059750199318, + "loss": 0.2617, + "rejected_geometric_mean": -11.262425422668457, + "step": 6341 + }, + { + "chosen_geometric_mean": -0.9823732972145081, + "epoch": 1.57, + "grad_norm": 3.109375, + "learning_rate": 5.497642099055022e-07, + "log_odds": 2.768519639968872, + "log_odds_ratio": -0.2901137173175812, + "loss": 0.2745, + "rejected_geometric_mean": -3.4748432636260986, + "step": 6342 + }, + { + "chosen_geometric_mean": -0.8178693652153015, + "epoch": 1.57, + "grad_norm": 2.453125, + "learning_rate": 5.491552926564123e-07, + "log_odds": 7.20145320892334, + "log_odds_ratio": -0.09131727367639542, + "loss": 0.2263, + "rejected_geometric_mean": -7.487997531890869, + "step": 6343 + }, + { + "chosen_geometric_mean": -1.139309287071228, + "epoch": 1.57, + "grad_norm": 2.953125, + "learning_rate": 5.485466712022084e-07, + "log_odds": 7.805916786193848, + "log_odds_ratio": -0.12697313725948334, + "loss": 0.2674, + "rejected_geometric_mean": -8.638591766357422, + "step": 6344 + }, + { + "chosen_geometric_mean": -0.9279672503471375, + "epoch": 1.57, + "grad_norm": 2.953125, + "learning_rate": 5.479383456351711e-07, + "log_odds": 7.486706733703613, + "log_odds_ratio": -0.11487038433551788, + "loss": 0.2598, + "rejected_geometric_mean": -7.993351936340332, + "step": 6345 + }, + { + "chosen_geometric_mean": -1.037539005279541, + "epoch": 1.57, + "grad_norm": 11.4375, + "learning_rate": 5.473303160475382e-07, + "log_odds": 8.945459365844727, + "log_odds_ratio": -0.13925662636756897, + "loss": 0.2654, + "rejected_geometric_mean": -9.55538558959961, + "step": 6346 + }, + { + "chosen_geometric_mean": -0.8483444452285767, + "epoch": 1.57, + "grad_norm": 6.1875, + "learning_rate": 5.467225825315015e-07, + "log_odds": 8.033791542053223, + "log_odds_ratio": -0.15564262866973877, + "loss": 0.2866, + "rejected_geometric_mean": -8.423408508300781, + "step": 6347 + }, + { + "chosen_geometric_mean": -0.9031452536582947, + "epoch": 1.57, + "grad_norm": 2.171875, + "learning_rate": 5.461151451792057e-07, + "log_odds": 5.502626419067383, + "log_odds_ratio": -0.3037465512752533, + "loss": 0.2482, + "rejected_geometric_mean": -6.083990097045898, + "step": 6348 + }, + { + "chosen_geometric_mean": -1.0739107131958008, + "epoch": 1.57, + "grad_norm": 2.265625, + "learning_rate": 5.455080040827565e-07, + "log_odds": 8.55704402923584, + "log_odds_ratio": -0.19714337587356567, + "loss": 0.2698, + "rejected_geometric_mean": -9.337329864501953, + "step": 6349 + }, + { + "chosen_geometric_mean": -1.1735800504684448, + "epoch": 1.57, + "grad_norm": 6.96875, + "learning_rate": 5.449011593342094e-07, + "log_odds": 3.598822593688965, + "log_odds_ratio": -0.06572037935256958, + "loss": 0.2991, + "rejected_geometric_mean": -4.410150527954102, + "step": 6350 + }, + { + "chosen_geometric_mean": -0.943213164806366, + "epoch": 1.57, + "grad_norm": 1.984375, + "learning_rate": 5.442946110255756e-07, + "log_odds": 9.588143348693848, + "log_odds_ratio": -0.020165659487247467, + "loss": 0.2312, + "rejected_geometric_mean": -10.036566734313965, + "step": 6351 + }, + { + "chosen_geometric_mean": -0.8613340854644775, + "epoch": 1.57, + "grad_norm": 5.09375, + "learning_rate": 5.436883592488243e-07, + "log_odds": 8.185711860656738, + "log_odds_ratio": -0.13730835914611816, + "loss": 0.2331, + "rejected_geometric_mean": -8.572124481201172, + "step": 6352 + }, + { + "chosen_geometric_mean": -0.8563432097434998, + "epoch": 1.57, + "grad_norm": 58.75, + "learning_rate": 5.430824040958762e-07, + "log_odds": 7.360374450683594, + "log_odds_ratio": -0.024816904217004776, + "loss": 0.2416, + "rejected_geometric_mean": -7.676918983459473, + "step": 6353 + }, + { + "chosen_geometric_mean": -0.8360565304756165, + "epoch": 1.57, + "grad_norm": 18.625, + "learning_rate": 5.424767456586091e-07, + "log_odds": 2.1249349117279053, + "log_odds_ratio": -0.2203914225101471, + "loss": 0.2389, + "rejected_geometric_mean": -2.5840649604797363, + "step": 6354 + }, + { + "chosen_geometric_mean": -1.033463478088379, + "epoch": 1.57, + "grad_norm": 1.7734375, + "learning_rate": 5.418713840288564e-07, + "log_odds": 9.386974334716797, + "log_odds_ratio": -0.002510362071916461, + "loss": 0.206, + "rejected_geometric_mean": -9.981064796447754, + "step": 6355 + }, + { + "chosen_geometric_mean": -0.8579331636428833, + "epoch": 1.57, + "grad_norm": 2.375, + "learning_rate": 5.412663192984041e-07, + "log_odds": 3.765413522720337, + "log_odds_ratio": -0.30263403058052063, + "loss": 0.2973, + "rejected_geometric_mean": -4.2947611808776855, + "step": 6356 + }, + { + "chosen_geometric_mean": -0.7895275354385376, + "epoch": 1.57, + "grad_norm": 2.15625, + "learning_rate": 5.406615515589955e-07, + "log_odds": 6.0420942306518555, + "log_odds_ratio": -0.18104106187820435, + "loss": 0.2523, + "rejected_geometric_mean": -6.33303165435791, + "step": 6357 + }, + { + "chosen_geometric_mean": -1.018641471862793, + "epoch": 1.57, + "grad_norm": 4.0, + "learning_rate": 5.400570809023276e-07, + "log_odds": 9.73514175415039, + "log_odds_ratio": -0.0028796896804124117, + "loss": 0.3069, + "rejected_geometric_mean": -10.290419578552246, + "step": 6358 + }, + { + "chosen_geometric_mean": -1.0036431550979614, + "epoch": 1.57, + "grad_norm": 2.15625, + "learning_rate": 5.394529074200519e-07, + "log_odds": 10.75149154663086, + "log_odds_ratio": -0.0027803254779428244, + "loss": 0.2558, + "rejected_geometric_mean": -11.293111801147461, + "step": 6359 + }, + { + "chosen_geometric_mean": -0.9022629261016846, + "epoch": 1.57, + "grad_norm": 17.875, + "learning_rate": 5.388490312037761e-07, + "log_odds": 15.973989486694336, + "log_odds_ratio": -9.86475424724631e-06, + "loss": 0.2567, + "rejected_geometric_mean": -16.290321350097656, + "step": 6360 + }, + { + "chosen_geometric_mean": -0.932974100112915, + "epoch": 1.57, + "grad_norm": 3.0625, + "learning_rate": 5.382454523450633e-07, + "log_odds": 4.145335674285889, + "log_odds_ratio": -0.17308887839317322, + "loss": 0.296, + "rejected_geometric_mean": -4.6656036376953125, + "step": 6361 + }, + { + "chosen_geometric_mean": -1.0204036235809326, + "epoch": 1.58, + "grad_norm": 15.0, + "learning_rate": 5.37642170935429e-07, + "log_odds": 4.0684027671813965, + "log_odds_ratio": -0.4771173298358917, + "loss": 0.2495, + "rejected_geometric_mean": -4.938206195831299, + "step": 6362 + }, + { + "chosen_geometric_mean": -1.1342933177947998, + "epoch": 1.58, + "grad_norm": 19.375, + "learning_rate": 5.370391870663466e-07, + "log_odds": 2.3354387283325195, + "log_odds_ratio": -0.2694358825683594, + "loss": 0.309, + "rejected_geometric_mean": -3.21962833404541, + "step": 6363 + }, + { + "chosen_geometric_mean": -0.8607192039489746, + "epoch": 1.58, + "grad_norm": 5.46875, + "learning_rate": 5.364365008292422e-07, + "log_odds": 5.622331619262695, + "log_odds_ratio": -0.016964051872491837, + "loss": 0.2135, + "rejected_geometric_mean": -5.887213706970215, + "step": 6364 + }, + { + "chosen_geometric_mean": -1.0509788990020752, + "epoch": 1.58, + "grad_norm": 24.125, + "learning_rate": 5.358341123154962e-07, + "log_odds": 5.160652160644531, + "log_odds_ratio": -0.3027527332305908, + "loss": 0.2951, + "rejected_geometric_mean": -5.927157402038574, + "step": 6365 + }, + { + "chosen_geometric_mean": -1.0689409971237183, + "epoch": 1.58, + "grad_norm": 4.78125, + "learning_rate": 5.352320216164475e-07, + "log_odds": 3.993800401687622, + "log_odds_ratio": -0.1861579865217209, + "loss": 0.2645, + "rejected_geometric_mean": -4.716335296630859, + "step": 6366 + }, + { + "chosen_geometric_mean": -1.0019572973251343, + "epoch": 1.58, + "grad_norm": 9.5, + "learning_rate": 5.346302288233867e-07, + "log_odds": 6.934104919433594, + "log_odds_ratio": -0.25110554695129395, + "loss": 0.2966, + "rejected_geometric_mean": -7.63197135925293, + "step": 6367 + }, + { + "chosen_geometric_mean": -0.8684855699539185, + "epoch": 1.58, + "grad_norm": 2.0, + "learning_rate": 5.340287340275591e-07, + "log_odds": 8.172629356384277, + "log_odds_ratio": -0.010436379350721836, + "loss": 0.2667, + "rejected_geometric_mean": -8.4934720993042, + "step": 6368 + }, + { + "chosen_geometric_mean": -0.882300615310669, + "epoch": 1.58, + "grad_norm": 5.875, + "learning_rate": 5.334275373201669e-07, + "log_odds": 11.975372314453125, + "log_odds_ratio": -0.03534138947725296, + "loss": 0.2544, + "rejected_geometric_mean": -12.3326416015625, + "step": 6369 + }, + { + "chosen_geometric_mean": -1.5247925519943237, + "epoch": 1.58, + "grad_norm": 46.0, + "learning_rate": 5.32826638792365e-07, + "log_odds": 13.207027435302734, + "log_odds_ratio": -0.04797488823533058, + "loss": 0.3989, + "rejected_geometric_mean": -14.421985626220703, + "step": 6370 + }, + { + "chosen_geometric_mean": -0.8546290397644043, + "epoch": 1.58, + "grad_norm": 2.0625, + "learning_rate": 5.322260385352645e-07, + "log_odds": 16.19683265686035, + "log_odds_ratio": -0.12530817091464996, + "loss": 0.2423, + "rejected_geometric_mean": -16.530920028686523, + "step": 6371 + }, + { + "chosen_geometric_mean": -0.9412931203842163, + "epoch": 1.58, + "grad_norm": 37.5, + "learning_rate": 5.316257366399316e-07, + "log_odds": 8.292753219604492, + "log_odds_ratio": -0.017087124288082123, + "loss": 0.3088, + "rejected_geometric_mean": -8.73239517211914, + "step": 6372 + }, + { + "chosen_geometric_mean": -1.9179718494415283, + "epoch": 1.58, + "grad_norm": 24.5, + "learning_rate": 5.310257331973856e-07, + "log_odds": 1.7174526453018188, + "log_odds_ratio": -0.5210278630256653, + "loss": 0.3284, + "rejected_geometric_mean": -3.3347129821777344, + "step": 6373 + }, + { + "chosen_geometric_mean": -0.9569405913352966, + "epoch": 1.58, + "grad_norm": 8.0, + "learning_rate": 5.304260282986009e-07, + "log_odds": 3.3173775672912598, + "log_odds_ratio": -0.1998990923166275, + "loss": 0.2884, + "rejected_geometric_mean": -3.8862271308898926, + "step": 6374 + }, + { + "chosen_geometric_mean": -1.0850672721862793, + "epoch": 1.58, + "grad_norm": 3.453125, + "learning_rate": 5.298266220345085e-07, + "log_odds": 3.6727101802825928, + "log_odds_ratio": -0.19355200231075287, + "loss": 0.2101, + "rejected_geometric_mean": -4.492002487182617, + "step": 6375 + }, + { + "chosen_geometric_mean": -1.044757604598999, + "epoch": 1.58, + "grad_norm": 4.96875, + "learning_rate": 5.292275144959913e-07, + "log_odds": 5.865062713623047, + "log_odds_ratio": -0.23706945776939392, + "loss": 0.3154, + "rejected_geometric_mean": -6.631961822509766, + "step": 6376 + }, + { + "chosen_geometric_mean": -1.0007920265197754, + "epoch": 1.58, + "grad_norm": 2.71875, + "learning_rate": 5.286287057738893e-07, + "log_odds": 3.94838285446167, + "log_odds_ratio": -0.36286330223083496, + "loss": 0.2601, + "rejected_geometric_mean": -4.74511194229126, + "step": 6377 + }, + { + "chosen_geometric_mean": -0.8519412279129028, + "epoch": 1.58, + "grad_norm": 2.328125, + "learning_rate": 5.280301959589967e-07, + "log_odds": 7.554598331451416, + "log_odds_ratio": -0.10602867603302002, + "loss": 0.2556, + "rejected_geometric_mean": -7.913125991821289, + "step": 6378 + }, + { + "chosen_geometric_mean": -1.0498228073120117, + "epoch": 1.58, + "grad_norm": 4.5625, + "learning_rate": 5.274319851420609e-07, + "log_odds": 11.29354190826416, + "log_odds_ratio": -6.768519233446568e-05, + "loss": 0.2789, + "rejected_geometric_mean": -11.870688438415527, + "step": 6379 + }, + { + "chosen_geometric_mean": -0.9808129072189331, + "epoch": 1.58, + "grad_norm": 8.0625, + "learning_rate": 5.268340734137859e-07, + "log_odds": 1.8620355129241943, + "log_odds_ratio": -0.20158734917640686, + "loss": 0.2684, + "rejected_geometric_mean": -2.51163649559021, + "step": 6380 + }, + { + "chosen_geometric_mean": -1.0250535011291504, + "epoch": 1.58, + "grad_norm": 5.59375, + "learning_rate": 5.262364608648285e-07, + "log_odds": 6.970106601715088, + "log_odds_ratio": -0.3353308439254761, + "loss": 0.3118, + "rejected_geometric_mean": -7.797976016998291, + "step": 6381 + }, + { + "chosen_geometric_mean": -0.8919550180435181, + "epoch": 1.58, + "grad_norm": 49.5, + "learning_rate": 5.25639147585802e-07, + "log_odds": 9.808950424194336, + "log_odds_ratio": -0.09501431882381439, + "loss": 0.2734, + "rejected_geometric_mean": -10.197708129882812, + "step": 6382 + }, + { + "chosen_geometric_mean": -0.8863537311553955, + "epoch": 1.58, + "grad_norm": 6.3125, + "learning_rate": 5.250421336672734e-07, + "log_odds": 0.6916549205780029, + "log_odds_ratio": -0.4784848690032959, + "loss": 0.2476, + "rejected_geometric_mean": -1.4269607067108154, + "step": 6383 + }, + { + "chosen_geometric_mean": -1.0511140823364258, + "epoch": 1.58, + "grad_norm": 4.1875, + "learning_rate": 5.244454191997642e-07, + "log_odds": 13.480857849121094, + "log_odds_ratio": -0.09678473323583603, + "loss": 0.2402, + "rejected_geometric_mean": -14.17061710357666, + "step": 6384 + }, + { + "chosen_geometric_mean": -1.0860275030136108, + "epoch": 1.58, + "grad_norm": 9.4375, + "learning_rate": 5.238490042737499e-07, + "log_odds": 10.626466751098633, + "log_odds_ratio": -0.007754538208246231, + "loss": 0.2382, + "rejected_geometric_mean": -11.29487133026123, + "step": 6385 + }, + { + "chosen_geometric_mean": -1.117431402206421, + "epoch": 1.58, + "grad_norm": 7.53125, + "learning_rate": 5.232528889796626e-07, + "log_odds": 1.8082853555679321, + "log_odds_ratio": -0.28025108575820923, + "loss": 0.3035, + "rejected_geometric_mean": -2.689457416534424, + "step": 6386 + }, + { + "chosen_geometric_mean": -1.0923806428909302, + "epoch": 1.58, + "grad_norm": 3.53125, + "learning_rate": 5.226570734078862e-07, + "log_odds": 5.070511341094971, + "log_odds_ratio": -0.22222697734832764, + "loss": 0.3168, + "rejected_geometric_mean": -5.8849382400512695, + "step": 6387 + }, + { + "chosen_geometric_mean": -1.0181478261947632, + "epoch": 1.58, + "grad_norm": 2.578125, + "learning_rate": 5.220615576487614e-07, + "log_odds": 5.666043281555176, + "log_odds_ratio": -0.2577732503414154, + "loss": 0.2634, + "rejected_geometric_mean": -6.376159191131592, + "step": 6388 + }, + { + "chosen_geometric_mean": -0.9057741165161133, + "epoch": 1.58, + "grad_norm": 1.8359375, + "learning_rate": 5.214663417925836e-07, + "log_odds": 11.054495811462402, + "log_odds_ratio": -0.009737962856888771, + "loss": 0.2369, + "rejected_geometric_mean": -11.437394142150879, + "step": 6389 + }, + { + "chosen_geometric_mean": -1.0072767734527588, + "epoch": 1.58, + "grad_norm": 42.0, + "learning_rate": 5.208714259296007e-07, + "log_odds": 10.541475296020508, + "log_odds_ratio": -0.05753306671977043, + "loss": 0.3407, + "rejected_geometric_mean": -11.120232582092285, + "step": 6390 + }, + { + "chosen_geometric_mean": -0.8754922151565552, + "epoch": 1.58, + "grad_norm": 1.875, + "learning_rate": 5.202768101500161e-07, + "log_odds": 6.601536750793457, + "log_odds_ratio": -0.02316291630268097, + "loss": 0.2128, + "rejected_geometric_mean": -6.913959980010986, + "step": 6391 + }, + { + "chosen_geometric_mean": -0.7670143842697144, + "epoch": 1.58, + "grad_norm": 63.0, + "learning_rate": 5.19682494543988e-07, + "log_odds": 9.257225036621094, + "log_odds_ratio": -0.1284158080816269, + "loss": 0.2867, + "rejected_geometric_mean": -9.502339363098145, + "step": 6392 + }, + { + "chosen_geometric_mean": -1.0857417583465576, + "epoch": 1.58, + "grad_norm": 51.25, + "learning_rate": 5.190884792016299e-07, + "log_odds": 0.26350629329681396, + "log_odds_ratio": -0.6815292835235596, + "loss": 0.3387, + "rejected_geometric_mean": -1.3089425563812256, + "step": 6393 + }, + { + "chosen_geometric_mean": -0.8028573989868164, + "epoch": 1.58, + "grad_norm": 3.984375, + "learning_rate": 5.184947642130072e-07, + "log_odds": 6.066549301147461, + "log_odds_ratio": -0.23375551402568817, + "loss": 0.2506, + "rejected_geometric_mean": -6.4408979415893555, + "step": 6394 + }, + { + "chosen_geometric_mean": -1.0965739488601685, + "epoch": 1.58, + "grad_norm": 3.5, + "learning_rate": 5.179013496681428e-07, + "log_odds": 11.366242408752441, + "log_odds_ratio": -0.020867154002189636, + "loss": 0.2582, + "rejected_geometric_mean": -12.066944122314453, + "step": 6395 + }, + { + "chosen_geometric_mean": -1.2140214443206787, + "epoch": 1.58, + "grad_norm": 14.0, + "learning_rate": 5.173082356570119e-07, + "log_odds": 6.068665027618408, + "log_odds_ratio": -0.3215862810611725, + "loss": 0.3028, + "rejected_geometric_mean": -7.0423903465271, + "step": 6396 + }, + { + "chosen_geometric_mean": -1.1723699569702148, + "epoch": 1.58, + "grad_norm": 9.1875, + "learning_rate": 5.167154222695444e-07, + "log_odds": 5.307425498962402, + "log_odds_ratio": -0.2319403886795044, + "loss": 0.2811, + "rejected_geometric_mean": -6.203584671020508, + "step": 6397 + }, + { + "chosen_geometric_mean": -1.2726680040359497, + "epoch": 1.58, + "grad_norm": 31.5, + "learning_rate": 5.161229095956252e-07, + "log_odds": 12.28915023803711, + "log_odds_ratio": -0.014871561899781227, + "loss": 0.2732, + "rejected_geometric_mean": -13.199234962463379, + "step": 6398 + }, + { + "chosen_geometric_mean": -0.91996169090271, + "epoch": 1.58, + "grad_norm": 19.125, + "learning_rate": 5.155306977250946e-07, + "log_odds": 5.959716796875, + "log_odds_ratio": -0.15540482103824615, + "loss": 0.2438, + "rejected_geometric_mean": -6.485134124755859, + "step": 6399 + }, + { + "chosen_geometric_mean": -1.1148066520690918, + "epoch": 1.58, + "grad_norm": 8.875, + "learning_rate": 5.149387867477445e-07, + "log_odds": 9.031750679016113, + "log_odds_ratio": -0.03253629803657532, + "loss": 0.2207, + "rejected_geometric_mean": -9.744402885437012, + "step": 6400 + }, + { + "chosen_geometric_mean": -1.6348724365234375, + "epoch": 1.58, + "grad_norm": 48.5, + "learning_rate": 5.143471767533242e-07, + "log_odds": 4.8853983879089355, + "log_odds_ratio": -0.16123144328594208, + "loss": 0.2751, + "rejected_geometric_mean": -6.2308197021484375, + "step": 6401 + }, + { + "chosen_geometric_mean": -1.0934662818908691, + "epoch": 1.59, + "grad_norm": 10.9375, + "learning_rate": 5.13755867831535e-07, + "log_odds": 8.012073516845703, + "log_odds_ratio": -0.04061897099018097, + "loss": 0.2408, + "rejected_geometric_mean": -8.69311809539795, + "step": 6402 + }, + { + "chosen_geometric_mean": -0.9932231903076172, + "epoch": 1.59, + "grad_norm": 2.15625, + "learning_rate": 5.131648600720335e-07, + "log_odds": 14.878250122070312, + "log_odds_ratio": -0.0006592763238586485, + "loss": 0.2695, + "rejected_geometric_mean": -15.394245147705078, + "step": 6403 + }, + { + "chosen_geometric_mean": -0.9062315225601196, + "epoch": 1.59, + "grad_norm": 36.5, + "learning_rate": 5.125741535644319e-07, + "log_odds": 5.264066696166992, + "log_odds_ratio": -0.10315126180648804, + "loss": 0.2422, + "rejected_geometric_mean": -5.723471641540527, + "step": 6404 + }, + { + "chosen_geometric_mean": -1.0373890399932861, + "epoch": 1.59, + "grad_norm": 2.828125, + "learning_rate": 5.119837483982937e-07, + "log_odds": 5.96636438369751, + "log_odds_ratio": -0.16028988361358643, + "loss": 0.2575, + "rejected_geometric_mean": -6.651673316955566, + "step": 6405 + }, + { + "chosen_geometric_mean": -1.0990031957626343, + "epoch": 1.59, + "grad_norm": 25.25, + "learning_rate": 5.113936446631404e-07, + "log_odds": 5.794287204742432, + "log_odds_ratio": -0.07977647334337234, + "loss": 0.271, + "rejected_geometric_mean": -6.458108901977539, + "step": 6406 + }, + { + "chosen_geometric_mean": -0.6912590861320496, + "epoch": 1.59, + "grad_norm": 24.875, + "learning_rate": 5.108038424484447e-07, + "log_odds": 10.472543716430664, + "log_odds_ratio": -0.010362532921135426, + "loss": 0.2762, + "rejected_geometric_mean": -10.433740615844727, + "step": 6407 + }, + { + "chosen_geometric_mean": -0.986371636390686, + "epoch": 1.59, + "grad_norm": 16.125, + "learning_rate": 5.102143418436345e-07, + "log_odds": 8.882879257202148, + "log_odds_ratio": -0.12082589417695999, + "loss": 0.2552, + "rejected_geometric_mean": -9.401130676269531, + "step": 6408 + }, + { + "chosen_geometric_mean": -0.9878795146942139, + "epoch": 1.59, + "grad_norm": 13.0, + "learning_rate": 5.096251429380925e-07, + "log_odds": 9.212069511413574, + "log_odds_ratio": -0.011826777830719948, + "loss": 0.3083, + "rejected_geometric_mean": -9.736804962158203, + "step": 6409 + }, + { + "chosen_geometric_mean": -0.8611279726028442, + "epoch": 1.59, + "grad_norm": 1.828125, + "learning_rate": 5.090362458211565e-07, + "log_odds": 10.84792423248291, + "log_odds_ratio": -0.014107804745435715, + "loss": 0.2336, + "rejected_geometric_mean": -11.159525871276855, + "step": 6410 + }, + { + "chosen_geometric_mean": -0.954583466053009, + "epoch": 1.59, + "grad_norm": 1.796875, + "learning_rate": 5.084476505821157e-07, + "log_odds": 4.0200886726379395, + "log_odds_ratio": -0.20183581113815308, + "loss": 0.2181, + "rejected_geometric_mean": -4.653155326843262, + "step": 6411 + }, + { + "chosen_geometric_mean": -2.0083799362182617, + "epoch": 1.59, + "grad_norm": 88.5, + "learning_rate": 5.078593573102169e-07, + "log_odds": 10.056973457336426, + "log_odds_ratio": -0.18120507895946503, + "loss": 0.403, + "rejected_geometric_mean": -11.875372886657715, + "step": 6412 + }, + { + "chosen_geometric_mean": -1.4857631921768188, + "epoch": 1.59, + "grad_norm": 7.0, + "learning_rate": 5.072713660946588e-07, + "log_odds": 6.713202953338623, + "log_odds_ratio": -0.22593669593334198, + "loss": 0.2569, + "rejected_geometric_mean": -7.956160068511963, + "step": 6413 + }, + { + "chosen_geometric_mean": -0.9845130443572998, + "epoch": 1.59, + "grad_norm": 25.875, + "learning_rate": 5.066836770245933e-07, + "log_odds": 6.813336372375488, + "log_odds_ratio": -0.20866869390010834, + "loss": 0.2699, + "rejected_geometric_mean": -7.492979049682617, + "step": 6414 + }, + { + "chosen_geometric_mean": -0.8436147570610046, + "epoch": 1.59, + "grad_norm": 54.25, + "learning_rate": 5.060962901891314e-07, + "log_odds": 6.245288848876953, + "log_odds_ratio": -0.028059719130396843, + "loss": 0.3964, + "rejected_geometric_mean": -6.544929504394531, + "step": 6415 + }, + { + "chosen_geometric_mean": -1.0663117170333862, + "epoch": 1.59, + "grad_norm": 12.125, + "learning_rate": 5.055092056773334e-07, + "log_odds": 4.34403657913208, + "log_odds_ratio": -0.18360009789466858, + "loss": 0.2373, + "rejected_geometric_mean": -5.112086772918701, + "step": 6416 + }, + { + "chosen_geometric_mean": -0.9061469435691833, + "epoch": 1.59, + "grad_norm": 57.0, + "learning_rate": 5.049224235782146e-07, + "log_odds": 5.376996994018555, + "log_odds_ratio": -0.17488154768943787, + "loss": 0.23, + "rejected_geometric_mean": -5.902640342712402, + "step": 6417 + }, + { + "chosen_geometric_mean": -0.9233746528625488, + "epoch": 1.59, + "grad_norm": 6.21875, + "learning_rate": 5.043359439807469e-07, + "log_odds": 12.617387771606445, + "log_odds_ratio": -0.019567199051380157, + "loss": 0.225, + "rejected_geometric_mean": -13.022927284240723, + "step": 6418 + }, + { + "chosen_geometric_mean": -1.0982563495635986, + "epoch": 1.59, + "grad_norm": 33.5, + "learning_rate": 5.037497669738531e-07, + "log_odds": 4.6028618812561035, + "log_odds_ratio": -0.019108278676867485, + "loss": 0.2576, + "rejected_geometric_mean": -5.2509870529174805, + "step": 6419 + }, + { + "chosen_geometric_mean": -0.9279977083206177, + "epoch": 1.59, + "grad_norm": 2.703125, + "learning_rate": 5.031638926464122e-07, + "log_odds": 12.095426559448242, + "log_odds_ratio": -0.08187583088874817, + "loss": 0.2851, + "rejected_geometric_mean": -12.57021713256836, + "step": 6420 + }, + { + "chosen_geometric_mean": -0.9101187586784363, + "epoch": 1.59, + "grad_norm": 3.375, + "learning_rate": 5.025783210872578e-07, + "log_odds": 8.545618057250977, + "log_odds_ratio": -0.03282924368977547, + "loss": 0.2384, + "rejected_geometric_mean": -8.949894905090332, + "step": 6421 + }, + { + "chosen_geometric_mean": -0.9432826042175293, + "epoch": 1.59, + "grad_norm": 10.0625, + "learning_rate": 5.019930523851763e-07, + "log_odds": 4.160609245300293, + "log_odds_ratio": -0.17953945696353912, + "loss": 0.2683, + "rejected_geometric_mean": -4.705918312072754, + "step": 6422 + }, + { + "chosen_geometric_mean": -1.014845371246338, + "epoch": 1.59, + "grad_norm": 2.046875, + "learning_rate": 5.01408086628907e-07, + "log_odds": 5.983161926269531, + "log_odds_ratio": -0.1517944633960724, + "loss": 0.2277, + "rejected_geometric_mean": -6.618702411651611, + "step": 6423 + }, + { + "chosen_geometric_mean": -1.062458872795105, + "epoch": 1.59, + "grad_norm": 7.4375, + "learning_rate": 5.008234239071466e-07, + "log_odds": 6.2072649002075195, + "log_odds_ratio": -0.050334565341472626, + "loss": 0.2605, + "rejected_geometric_mean": -6.869607448577881, + "step": 6424 + }, + { + "chosen_geometric_mean": -1.0906691551208496, + "epoch": 1.59, + "grad_norm": 2.546875, + "learning_rate": 5.002390643085425e-07, + "log_odds": 3.9710636138916016, + "log_odds_ratio": -0.02710486203432083, + "loss": 0.2116, + "rejected_geometric_mean": -4.662731647491455, + "step": 6425 + }, + { + "chosen_geometric_mean": -0.8659042119979858, + "epoch": 1.59, + "grad_norm": 20.0, + "learning_rate": 4.996550079216983e-07, + "log_odds": 11.948148727416992, + "log_odds_ratio": -5.5078133300412446e-05, + "loss": 0.2933, + "rejected_geometric_mean": -12.21863079071045, + "step": 6426 + }, + { + "chosen_geometric_mean": -0.878730058670044, + "epoch": 1.59, + "grad_norm": 4.5625, + "learning_rate": 4.990712548351718e-07, + "log_odds": 6.994306564331055, + "log_odds_ratio": -0.13530847430229187, + "loss": 0.2521, + "rejected_geometric_mean": -7.389444351196289, + "step": 6427 + }, + { + "chosen_geometric_mean": -0.9781886339187622, + "epoch": 1.59, + "grad_norm": 3.890625, + "learning_rate": 4.984878051374723e-07, + "log_odds": 6.511186599731445, + "log_odds_ratio": -0.06812150776386261, + "loss": 0.2803, + "rejected_geometric_mean": -7.061297416687012, + "step": 6428 + }, + { + "chosen_geometric_mean": -0.9124699831008911, + "epoch": 1.59, + "grad_norm": 4.3125, + "learning_rate": 4.979046589170667e-07, + "log_odds": 7.6267290115356445, + "log_odds_ratio": -0.0024576978757977486, + "loss": 0.2902, + "rejected_geometric_mean": -7.988936424255371, + "step": 6429 + }, + { + "chosen_geometric_mean": -0.810677170753479, + "epoch": 1.59, + "grad_norm": 2.78125, + "learning_rate": 4.973218162623719e-07, + "log_odds": 9.275506973266602, + "log_odds_ratio": -0.013882502913475037, + "loss": 0.2425, + "rejected_geometric_mean": -9.500389099121094, + "step": 6430 + }, + { + "chosen_geometric_mean": -1.0098702907562256, + "epoch": 1.59, + "grad_norm": 2.9375, + "learning_rate": 4.967392772617621e-07, + "log_odds": 2.8049583435058594, + "log_odds_ratio": -0.17460210621356964, + "loss": 0.2563, + "rejected_geometric_mean": -3.437725782394409, + "step": 6431 + }, + { + "chosen_geometric_mean": -0.8439992666244507, + "epoch": 1.59, + "grad_norm": 4.5, + "learning_rate": 4.961570420035647e-07, + "log_odds": 10.769763946533203, + "log_odds_ratio": -0.025617633014917374, + "loss": 0.2339, + "rejected_geometric_mean": -11.05587387084961, + "step": 6432 + }, + { + "chosen_geometric_mean": -0.9572461843490601, + "epoch": 1.59, + "grad_norm": 7.5, + "learning_rate": 4.955751105760595e-07, + "log_odds": 18.23672103881836, + "log_odds_ratio": -1.9073556813964387e-06, + "loss": 0.2405, + "rejected_geometric_mean": -18.695415496826172, + "step": 6433 + }, + { + "chosen_geometric_mean": -0.9233391284942627, + "epoch": 1.59, + "grad_norm": 2.21875, + "learning_rate": 4.949934830674813e-07, + "log_odds": 4.435909748077393, + "log_odds_ratio": -0.048203758895397186, + "loss": 0.2483, + "rejected_geometric_mean": -4.841613292694092, + "step": 6434 + }, + { + "chosen_geometric_mean": -0.7727885246276855, + "epoch": 1.59, + "grad_norm": 2.953125, + "learning_rate": 4.944121595660195e-07, + "log_odds": 7.528343200683594, + "log_odds_ratio": -0.04225625470280647, + "loss": 0.2059, + "rejected_geometric_mean": -7.690518379211426, + "step": 6435 + }, + { + "chosen_geometric_mean": -0.9915480613708496, + "epoch": 1.59, + "grad_norm": 6.75, + "learning_rate": 4.938311401598156e-07, + "log_odds": 8.258551597595215, + "log_odds_ratio": -0.14562776684761047, + "loss": 0.2469, + "rejected_geometric_mean": -8.819973945617676, + "step": 6436 + }, + { + "chosen_geometric_mean": -0.9240615367889404, + "epoch": 1.59, + "grad_norm": 4.09375, + "learning_rate": 4.932504249369668e-07, + "log_odds": 8.35595703125, + "log_odds_ratio": -0.012880656868219376, + "loss": 0.2649, + "rejected_geometric_mean": -8.726009368896484, + "step": 6437 + }, + { + "chosen_geometric_mean": -0.707637369632721, + "epoch": 1.59, + "grad_norm": 3.15625, + "learning_rate": 4.926700139855242e-07, + "log_odds": 8.188861846923828, + "log_odds_ratio": -0.0014432722236961126, + "loss": 0.2811, + "rejected_geometric_mean": -8.206962585449219, + "step": 6438 + }, + { + "chosen_geometric_mean": -0.8917406797409058, + "epoch": 1.59, + "grad_norm": 2.265625, + "learning_rate": 4.920899073934912e-07, + "log_odds": 10.330592155456543, + "log_odds_ratio": -0.05985186621546745, + "loss": 0.2606, + "rejected_geometric_mean": -10.734691619873047, + "step": 6439 + }, + { + "chosen_geometric_mean": -1.0058104991912842, + "epoch": 1.59, + "grad_norm": 2.078125, + "learning_rate": 4.915101052488252e-07, + "log_odds": 3.405892848968506, + "log_odds_ratio": -0.14313200116157532, + "loss": 0.2659, + "rejected_geometric_mean": -4.040102005004883, + "step": 6440 + }, + { + "chosen_geometric_mean": -0.8786488771438599, + "epoch": 1.59, + "grad_norm": 2.90625, + "learning_rate": 4.90930607639439e-07, + "log_odds": 8.364702224731445, + "log_odds_ratio": -0.01994672603905201, + "loss": 0.2719, + "rejected_geometric_mean": -8.70230484008789, + "step": 6441 + }, + { + "chosen_geometric_mean": -0.8442327380180359, + "epoch": 1.59, + "grad_norm": 26.125, + "learning_rate": 4.903514146531987e-07, + "log_odds": 10.443230628967285, + "log_odds_ratio": -0.023599667474627495, + "loss": 0.2152, + "rejected_geometric_mean": -10.733173370361328, + "step": 6442 + }, + { + "chosen_geometric_mean": -0.9875697493553162, + "epoch": 1.6, + "grad_norm": 36.5, + "learning_rate": 4.897725263779229e-07, + "log_odds": 13.94755744934082, + "log_odds_ratio": -0.022232702001929283, + "loss": 0.244, + "rejected_geometric_mean": -14.478178024291992, + "step": 6443 + }, + { + "chosen_geometric_mean": -1.181318998336792, + "epoch": 1.6, + "grad_norm": 24.625, + "learning_rate": 4.891939429013862e-07, + "log_odds": 14.658906936645508, + "log_odds_ratio": -0.0001686781324679032, + "loss": 0.2791, + "rejected_geometric_mean": -15.420310974121094, + "step": 6444 + }, + { + "chosen_geometric_mean": -0.9818013310432434, + "epoch": 1.6, + "grad_norm": 4.40625, + "learning_rate": 4.886156643113141e-07, + "log_odds": 6.333632469177246, + "log_odds_ratio": -0.3175361156463623, + "loss": 0.2546, + "rejected_geometric_mean": -7.065258979797363, + "step": 6445 + }, + { + "chosen_geometric_mean": -1.0120207071304321, + "epoch": 1.6, + "grad_norm": 23.5, + "learning_rate": 4.88037690695389e-07, + "log_odds": 8.152095794677734, + "log_odds_ratio": -0.00583433173596859, + "loss": 0.3252, + "rejected_geometric_mean": -8.701279640197754, + "step": 6446 + }, + { + "chosen_geometric_mean": -1.045121669769287, + "epoch": 1.6, + "grad_norm": 33.25, + "learning_rate": 4.874600221412446e-07, + "log_odds": 5.185822486877441, + "log_odds_ratio": -0.0957278311252594, + "loss": 0.2995, + "rejected_geometric_mean": -5.827666282653809, + "step": 6447 + }, + { + "chosen_geometric_mean": -1.0747241973876953, + "epoch": 1.6, + "grad_norm": 22.25, + "learning_rate": 4.8688265873647e-07, + "log_odds": 3.0934228897094727, + "log_odds_ratio": -0.24702304601669312, + "loss": 0.3295, + "rejected_geometric_mean": -3.8784728050231934, + "step": 6448 + }, + { + "chosen_geometric_mean": -0.9753159284591675, + "epoch": 1.6, + "grad_norm": 2.84375, + "learning_rate": 4.863056005686068e-07, + "log_odds": 7.027986526489258, + "log_odds_ratio": -0.1127723753452301, + "loss": 0.2801, + "rejected_geometric_mean": -7.604831218719482, + "step": 6449 + }, + { + "chosen_geometric_mean": -1.0730581283569336, + "epoch": 1.6, + "grad_norm": 2.875, + "learning_rate": 4.857288477251515e-07, + "log_odds": 10.540434837341309, + "log_odds_ratio": -0.028437893837690353, + "loss": 0.2603, + "rejected_geometric_mean": -11.177929878234863, + "step": 6450 + }, + { + "chosen_geometric_mean": -0.7729775905609131, + "epoch": 1.6, + "grad_norm": 14.0, + "learning_rate": 4.851524002935526e-07, + "log_odds": 9.27518081665039, + "log_odds_ratio": -0.010226171463727951, + "loss": 0.2528, + "rejected_geometric_mean": -9.424554824829102, + "step": 6451 + }, + { + "chosen_geometric_mean": -1.7242767810821533, + "epoch": 1.6, + "grad_norm": 27.5, + "learning_rate": 4.845762583612141e-07, + "log_odds": 5.561487197875977, + "log_odds_ratio": -0.12587028741836548, + "loss": 0.3128, + "rejected_geometric_mean": -7.100529670715332, + "step": 6452 + }, + { + "chosen_geometric_mean": -0.944516658782959, + "epoch": 1.6, + "grad_norm": 4.625, + "learning_rate": 4.840004220154934e-07, + "log_odds": 12.943836212158203, + "log_odds_ratio": -0.012322387658059597, + "loss": 0.2981, + "rejected_geometric_mean": -13.393623352050781, + "step": 6453 + }, + { + "chosen_geometric_mean": -0.9407511949539185, + "epoch": 1.6, + "grad_norm": 5.40625, + "learning_rate": 4.834248913437001e-07, + "log_odds": 5.7965779304504395, + "log_odds_ratio": -0.12953539192676544, + "loss": 0.2551, + "rejected_geometric_mean": -6.34171724319458, + "step": 6454 + }, + { + "chosen_geometric_mean": -0.9534629583358765, + "epoch": 1.6, + "grad_norm": 30.25, + "learning_rate": 4.828496664330998e-07, + "log_odds": 4.465114593505859, + "log_odds_ratio": -0.10709331929683685, + "loss": 0.291, + "rejected_geometric_mean": -4.9349822998046875, + "step": 6455 + }, + { + "chosen_geometric_mean": -1.0956859588623047, + "epoch": 1.6, + "grad_norm": 1.8984375, + "learning_rate": 4.822747473709094e-07, + "log_odds": 10.317242622375488, + "log_odds_ratio": -0.004869035445153713, + "loss": 0.2209, + "rejected_geometric_mean": -11.003108978271484, + "step": 6456 + }, + { + "chosen_geometric_mean": -1.1274725198745728, + "epoch": 1.6, + "grad_norm": 91.0, + "learning_rate": 4.817001342442998e-07, + "log_odds": 6.042176723480225, + "log_odds_ratio": -0.21223941445350647, + "loss": 0.2982, + "rejected_geometric_mean": -6.9198174476623535, + "step": 6457 + }, + { + "chosen_geometric_mean": -0.8877460956573486, + "epoch": 1.6, + "grad_norm": 18.0, + "learning_rate": 4.811258271403973e-07, + "log_odds": 8.322863578796387, + "log_odds_ratio": -0.0016498398035764694, + "loss": 0.2851, + "rejected_geometric_mean": -8.666387557983398, + "step": 6458 + }, + { + "chosen_geometric_mean": -1.1636443138122559, + "epoch": 1.6, + "grad_norm": 15.4375, + "learning_rate": 4.805518261462807e-07, + "log_odds": 2.507458209991455, + "log_odds_ratio": -0.19270257651805878, + "loss": 0.2493, + "rejected_geometric_mean": -3.4114067554473877, + "step": 6459 + }, + { + "chosen_geometric_mean": -0.7345435619354248, + "epoch": 1.6, + "grad_norm": 2.171875, + "learning_rate": 4.799781313489812e-07, + "log_odds": 9.219001770019531, + "log_odds_ratio": -0.006951472721993923, + "loss": 0.2691, + "rejected_geometric_mean": -9.257930755615234, + "step": 6460 + }, + { + "chosen_geometric_mean": -1.1198909282684326, + "epoch": 1.6, + "grad_norm": 10.75, + "learning_rate": 4.794047428354865e-07, + "log_odds": 5.683648109436035, + "log_odds_ratio": -0.26188695430755615, + "loss": 0.259, + "rejected_geometric_mean": -6.552356719970703, + "step": 6461 + }, + { + "chosen_geometric_mean": -0.9711821675300598, + "epoch": 1.6, + "grad_norm": 1.9765625, + "learning_rate": 4.788316606927346e-07, + "log_odds": 6.3240065574646, + "log_odds_ratio": -0.05036921054124832, + "loss": 0.2893, + "rejected_geometric_mean": -6.825741767883301, + "step": 6462 + }, + { + "chosen_geometric_mean": -0.7121270298957825, + "epoch": 1.6, + "grad_norm": 16.75, + "learning_rate": 4.782588850076178e-07, + "log_odds": 4.6923041343688965, + "log_odds_ratio": -0.1044684499502182, + "loss": 0.2407, + "rejected_geometric_mean": -4.803178310394287, + "step": 6463 + }, + { + "chosen_geometric_mean": -0.9438678026199341, + "epoch": 1.6, + "grad_norm": 2.65625, + "learning_rate": 4.776864158669848e-07, + "log_odds": 1.7513349056243896, + "log_odds_ratio": -0.2608643174171448, + "loss": 0.2617, + "rejected_geometric_mean": -2.3867897987365723, + "step": 6464 + }, + { + "chosen_geometric_mean": -1.1920818090438843, + "epoch": 1.6, + "grad_norm": 2.171875, + "learning_rate": 4.771142533576348e-07, + "log_odds": 2.3561484813690186, + "log_odds_ratio": -0.2581561505794525, + "loss": 0.2369, + "rejected_geometric_mean": -3.3047783374786377, + "step": 6465 + }, + { + "chosen_geometric_mean": -1.0839757919311523, + "epoch": 1.6, + "grad_norm": 18.25, + "learning_rate": 4.7654239756632053e-07, + "log_odds": 5.109453201293945, + "log_odds_ratio": -0.19047676026821136, + "loss": 0.2621, + "rejected_geometric_mean": -5.891104221343994, + "step": 6466 + }, + { + "chosen_geometric_mean": -0.9920927286148071, + "epoch": 1.6, + "grad_norm": 4.125, + "learning_rate": 4.759708485797501e-07, + "log_odds": 9.751646041870117, + "log_odds_ratio": -0.009225092828273773, + "loss": 0.2631, + "rejected_geometric_mean": -10.246594429016113, + "step": 6467 + }, + { + "chosen_geometric_mean": -1.5922808647155762, + "epoch": 1.6, + "grad_norm": 32.25, + "learning_rate": 4.753996064845834e-07, + "log_odds": 8.178695678710938, + "log_odds_ratio": -0.12388944625854492, + "loss": 0.2832, + "rejected_geometric_mean": -9.445710182189941, + "step": 6468 + }, + { + "chosen_geometric_mean": -0.9431255459785461, + "epoch": 1.6, + "grad_norm": 4.625, + "learning_rate": 4.748286713674344e-07, + "log_odds": 3.7184433937072754, + "log_odds_ratio": -0.2908017635345459, + "loss": 0.2725, + "rejected_geometric_mean": -4.393738746643066, + "step": 6469 + }, + { + "chosen_geometric_mean": -0.8192750811576843, + "epoch": 1.6, + "grad_norm": 10.5, + "learning_rate": 4.7425804331487164e-07, + "log_odds": 9.250328063964844, + "log_odds_ratio": -0.00025230139726772904, + "loss": 0.2719, + "rejected_geometric_mean": -9.47021770477295, + "step": 6470 + }, + { + "chosen_geometric_mean": -1.0275800228118896, + "epoch": 1.6, + "grad_norm": 44.0, + "learning_rate": 4.7368772241341507e-07, + "log_odds": 0.1986067146062851, + "log_odds_ratio": -0.6250991821289062, + "loss": 0.3039, + "rejected_geometric_mean": -1.1099998950958252, + "step": 6471 + }, + { + "chosen_geometric_mean": -1.0757149457931519, + "epoch": 1.6, + "grad_norm": 25.75, + "learning_rate": 4.731177087495389e-07, + "log_odds": 6.93419075012207, + "log_odds_ratio": -0.11264947056770325, + "loss": 0.2903, + "rejected_geometric_mean": -7.658595085144043, + "step": 6472 + }, + { + "chosen_geometric_mean": -0.9976602792739868, + "epoch": 1.6, + "grad_norm": 3.4375, + "learning_rate": 4.7254800240967155e-07, + "log_odds": 5.875669956207275, + "log_odds_ratio": -0.2831271290779114, + "loss": 0.2495, + "rejected_geometric_mean": -6.571796417236328, + "step": 6473 + }, + { + "chosen_geometric_mean": -0.9155887365341187, + "epoch": 1.6, + "grad_norm": 2.375, + "learning_rate": 4.719786034801932e-07, + "log_odds": 4.611110210418701, + "log_odds_ratio": -0.17278647422790527, + "loss": 0.2508, + "rejected_geometric_mean": -5.120733261108398, + "step": 6474 + }, + { + "chosen_geometric_mean": -1.0566004514694214, + "epoch": 1.6, + "grad_norm": 21.25, + "learning_rate": 4.7140951204743934e-07, + "log_odds": 4.993922233581543, + "log_odds_ratio": -0.07897905260324478, + "loss": 0.2996, + "rejected_geometric_mean": -5.640424728393555, + "step": 6475 + }, + { + "chosen_geometric_mean": -1.2101871967315674, + "epoch": 1.6, + "grad_norm": 16.125, + "learning_rate": 4.708407281976979e-07, + "log_odds": 10.29681396484375, + "log_odds_ratio": -0.2178627997636795, + "loss": 0.3304, + "rejected_geometric_mean": -11.145097732543945, + "step": 6476 + }, + { + "chosen_geometric_mean": -0.9692996740341187, + "epoch": 1.6, + "grad_norm": 16.0, + "learning_rate": 4.702722520172093e-07, + "log_odds": 7.744129657745361, + "log_odds_ratio": -0.315436989068985, + "loss": 0.3, + "rejected_geometric_mean": -8.465331077575684, + "step": 6477 + }, + { + "chosen_geometric_mean": -1.0864778757095337, + "epoch": 1.6, + "grad_norm": 2.140625, + "learning_rate": 4.697040835921693e-07, + "log_odds": 13.040775299072266, + "log_odds_ratio": -6.14718155702576e-05, + "loss": 0.2365, + "rejected_geometric_mean": -13.709888458251953, + "step": 6478 + }, + { + "chosen_geometric_mean": -0.9847680330276489, + "epoch": 1.6, + "grad_norm": 4.21875, + "learning_rate": 4.6913622300872543e-07, + "log_odds": 6.140202522277832, + "log_odds_ratio": -0.10945454239845276, + "loss": 0.239, + "rejected_geometric_mean": -6.732649803161621, + "step": 6479 + }, + { + "chosen_geometric_mean": -1.0666455030441284, + "epoch": 1.6, + "grad_norm": 4.71875, + "learning_rate": 4.685686703529774e-07, + "log_odds": 4.564675807952881, + "log_odds_ratio": -0.17448723316192627, + "loss": 0.2494, + "rejected_geometric_mean": -5.346477508544922, + "step": 6480 + }, + { + "chosen_geometric_mean": -1.0274004936218262, + "epoch": 1.6, + "grad_norm": 5.75, + "learning_rate": 4.6800142571098245e-07, + "log_odds": 12.454014778137207, + "log_odds_ratio": -0.0574934296309948, + "loss": 0.2584, + "rejected_geometric_mean": -13.055915832519531, + "step": 6481 + }, + { + "chosen_geometric_mean": -1.2058180570602417, + "epoch": 1.6, + "grad_norm": 26.625, + "learning_rate": 4.674344891687477e-07, + "log_odds": 2.567318916320801, + "log_odds_ratio": -0.3386842608451843, + "loss": 0.3092, + "rejected_geometric_mean": -3.5373716354370117, + "step": 6482 + }, + { + "chosen_geometric_mean": -1.0488835573196411, + "epoch": 1.61, + "grad_norm": 2.140625, + "learning_rate": 4.668678608122329e-07, + "log_odds": 2.070439338684082, + "log_odds_ratio": -0.28548184037208557, + "loss": 0.2665, + "rejected_geometric_mean": -2.82521390914917, + "step": 6483 + }, + { + "chosen_geometric_mean": -0.7810866832733154, + "epoch": 1.61, + "grad_norm": 25.25, + "learning_rate": 4.6630154072735414e-07, + "log_odds": 7.619174003601074, + "log_odds_ratio": -0.23124273121356964, + "loss": 0.254, + "rejected_geometric_mean": -7.9795823097229, + "step": 6484 + }, + { + "chosen_geometric_mean": -0.8808490633964539, + "epoch": 1.61, + "grad_norm": 11.6875, + "learning_rate": 4.657355289999782e-07, + "log_odds": 7.640430450439453, + "log_odds_ratio": -0.18659329414367676, + "loss": 0.2609, + "rejected_geometric_mean": -8.11003303527832, + "step": 6485 + }, + { + "chosen_geometric_mean": -0.9595608711242676, + "epoch": 1.61, + "grad_norm": 1.875, + "learning_rate": 4.6516982571592625e-07, + "log_odds": 9.976025581359863, + "log_odds_ratio": -0.0036673815920948982, + "loss": 0.2091, + "rejected_geometric_mean": -10.422566413879395, + "step": 6486 + }, + { + "chosen_geometric_mean": -1.0328623056411743, + "epoch": 1.61, + "grad_norm": 21.375, + "learning_rate": 4.6460443096097317e-07, + "log_odds": 4.588038921356201, + "log_odds_ratio": -0.10627363622188568, + "loss": 0.2834, + "rejected_geometric_mean": -5.240396499633789, + "step": 6487 + }, + { + "chosen_geometric_mean": -0.8358256220817566, + "epoch": 1.61, + "grad_norm": 4.375, + "learning_rate": 4.640393448208458e-07, + "log_odds": 2.5891480445861816, + "log_odds_ratio": -0.2320709526538849, + "loss": 0.2666, + "rejected_geometric_mean": -2.9799158573150635, + "step": 6488 + }, + { + "chosen_geometric_mean": -1.0068151950836182, + "epoch": 1.61, + "grad_norm": 5.125, + "learning_rate": 4.634745673812244e-07, + "log_odds": 3.392991542816162, + "log_odds_ratio": -0.18362370133399963, + "loss": 0.2484, + "rejected_geometric_mean": -4.048587799072266, + "step": 6489 + }, + { + "chosen_geometric_mean": -0.9033052325248718, + "epoch": 1.61, + "grad_norm": 13.25, + "learning_rate": 4.629100987277435e-07, + "log_odds": 4.225421905517578, + "log_odds_ratio": -0.13551832735538483, + "loss": 0.2439, + "rejected_geometric_mean": -4.730833530426025, + "step": 6490 + }, + { + "chosen_geometric_mean": -0.9400553107261658, + "epoch": 1.61, + "grad_norm": 17.5, + "learning_rate": 4.6234593894598947e-07, + "log_odds": 9.989628791809082, + "log_odds_ratio": -0.021486278623342514, + "loss": 0.2912, + "rejected_geometric_mean": -10.445289611816406, + "step": 6491 + }, + { + "chosen_geometric_mean": -0.8777005672454834, + "epoch": 1.61, + "grad_norm": 5.59375, + "learning_rate": 4.617820881215024e-07, + "log_odds": 7.183403015136719, + "log_odds_ratio": -0.07450350373983383, + "loss": 0.2496, + "rejected_geometric_mean": -7.539072036743164, + "step": 6492 + }, + { + "chosen_geometric_mean": -1.138943076133728, + "epoch": 1.61, + "grad_norm": 35.25, + "learning_rate": 4.6121854633977696e-07, + "log_odds": 5.363945960998535, + "log_odds_ratio": -0.27586397528648376, + "loss": 0.2947, + "rejected_geometric_mean": -6.186772346496582, + "step": 6493 + }, + { + "chosen_geometric_mean": -1.1820027828216553, + "epoch": 1.61, + "grad_norm": 18.375, + "learning_rate": 4.6065531368625767e-07, + "log_odds": 8.969281196594238, + "log_odds_ratio": -0.2529463768005371, + "loss": 0.2859, + "rejected_geometric_mean": -9.838025093078613, + "step": 6494 + }, + { + "chosen_geometric_mean": -0.7894318103790283, + "epoch": 1.61, + "grad_norm": 1.7421875, + "learning_rate": 4.600923902463461e-07, + "log_odds": 7.118216514587402, + "log_odds_ratio": -0.0745563954114914, + "loss": 0.2068, + "rejected_geometric_mean": -7.375306606292725, + "step": 6495 + }, + { + "chosen_geometric_mean": -0.8954409956932068, + "epoch": 1.61, + "grad_norm": 2.890625, + "learning_rate": 4.5952977610539294e-07, + "log_odds": 6.4680585861206055, + "log_odds_ratio": -0.005080613307654858, + "loss": 0.2343, + "rejected_geometric_mean": -6.833745002746582, + "step": 6496 + }, + { + "chosen_geometric_mean": -1.023909091949463, + "epoch": 1.61, + "grad_norm": 1.953125, + "learning_rate": 4.589674713487058e-07, + "log_odds": 10.454829216003418, + "log_odds_ratio": -0.15304192900657654, + "loss": 0.2501, + "rejected_geometric_mean": -11.119315147399902, + "step": 6497 + }, + { + "chosen_geometric_mean": -0.8017226457595825, + "epoch": 1.61, + "grad_norm": 14.8125, + "learning_rate": 4.584054760615422e-07, + "log_odds": 7.091731071472168, + "log_odds_ratio": -0.18088960647583008, + "loss": 0.2686, + "rejected_geometric_mean": -7.448272705078125, + "step": 6498 + }, + { + "chosen_geometric_mean": -1.0611592531204224, + "epoch": 1.61, + "grad_norm": 2.65625, + "learning_rate": 4.5784379032911525e-07, + "log_odds": 6.177127838134766, + "log_odds_ratio": -0.1799003630876541, + "loss": 0.2615, + "rejected_geometric_mean": -6.93335485458374, + "step": 6499 + }, + { + "chosen_geometric_mean": -1.1552295684814453, + "epoch": 1.61, + "grad_norm": 7.65625, + "learning_rate": 4.572824142365889e-07, + "log_odds": 4.276722431182861, + "log_odds_ratio": -0.0638263002038002, + "loss": 0.2592, + "rejected_geometric_mean": -5.049464225769043, + "step": 6500 + }, + { + "chosen_geometric_mean": -2.4550602436065674, + "epoch": 1.61, + "grad_norm": 77.5, + "learning_rate": 4.567213478690824e-07, + "log_odds": 11.407524108886719, + "log_odds_ratio": -0.03803527355194092, + "loss": 0.3742, + "rejected_geometric_mean": -13.471570014953613, + "step": 6501 + }, + { + "chosen_geometric_mean": -0.9597477316856384, + "epoch": 1.61, + "grad_norm": 19.375, + "learning_rate": 4.561605913116654e-07, + "log_odds": 10.728342056274414, + "log_odds_ratio": -0.11154039204120636, + "loss": 0.283, + "rejected_geometric_mean": -11.276815414428711, + "step": 6502 + }, + { + "chosen_geometric_mean": -0.8835587501525879, + "epoch": 1.61, + "grad_norm": 2.171875, + "learning_rate": 4.5560014464936327e-07, + "log_odds": 18.372356414794922, + "log_odds_ratio": -1.4782362995902076e-05, + "loss": 0.251, + "rejected_geometric_mean": -18.679807662963867, + "step": 6503 + }, + { + "chosen_geometric_mean": -1.3683724403381348, + "epoch": 1.61, + "grad_norm": 5.96875, + "learning_rate": 4.5504000796715323e-07, + "log_odds": 1.1772078275680542, + "log_odds_ratio": -0.44389545917510986, + "loss": 0.2587, + "rejected_geometric_mean": -2.4381136894226074, + "step": 6504 + }, + { + "chosen_geometric_mean": -0.9868055582046509, + "epoch": 1.61, + "grad_norm": 66.5, + "learning_rate": 4.5448018134996495e-07, + "log_odds": 12.137563705444336, + "log_odds_ratio": -0.0018308142898604274, + "loss": 0.276, + "rejected_geometric_mean": -12.651403427124023, + "step": 6505 + }, + { + "chosen_geometric_mean": -0.9709638357162476, + "epoch": 1.61, + "grad_norm": 2.9375, + "learning_rate": 4.539206648826811e-07, + "log_odds": 5.718322277069092, + "log_odds_ratio": -0.1892758011817932, + "loss": 0.2318, + "rejected_geometric_mean": -6.3888702392578125, + "step": 6506 + }, + { + "chosen_geometric_mean": -0.7462156414985657, + "epoch": 1.61, + "grad_norm": 10.375, + "learning_rate": 4.5336145865013836e-07, + "log_odds": 12.50814437866211, + "log_odds_ratio": -0.0014646839117631316, + "loss": 0.2602, + "rejected_geometric_mean": -12.594615936279297, + "step": 6507 + }, + { + "chosen_geometric_mean": -0.8791216611862183, + "epoch": 1.61, + "grad_norm": 2.15625, + "learning_rate": 4.5280256273712657e-07, + "log_odds": 7.01976203918457, + "log_odds_ratio": -0.07094182819128036, + "loss": 0.2682, + "rejected_geometric_mean": -7.404788494110107, + "step": 6508 + }, + { + "chosen_geometric_mean": -1.0903809070587158, + "epoch": 1.61, + "grad_norm": 7.15625, + "learning_rate": 4.5224397722838656e-07, + "log_odds": 5.674003601074219, + "log_odds_ratio": -0.3008303940296173, + "loss": 0.2778, + "rejected_geometric_mean": -6.484903335571289, + "step": 6509 + }, + { + "chosen_geometric_mean": -0.9539080858230591, + "epoch": 1.61, + "grad_norm": 32.0, + "learning_rate": 4.5168570220861407e-07, + "log_odds": 10.386695861816406, + "log_odds_ratio": -0.013778732158243656, + "loss": 0.2748, + "rejected_geometric_mean": -10.83751106262207, + "step": 6510 + }, + { + "chosen_geometric_mean": -0.9318784475326538, + "epoch": 1.61, + "grad_norm": 8.3125, + "learning_rate": 4.511277377624568e-07, + "log_odds": 3.959416389465332, + "log_odds_ratio": -0.14731627702713013, + "loss": 0.2262, + "rejected_geometric_mean": -4.477858543395996, + "step": 6511 + }, + { + "chosen_geometric_mean": -1.1065406799316406, + "epoch": 1.61, + "grad_norm": 24.75, + "learning_rate": 4.505700839745142e-07, + "log_odds": 10.601573944091797, + "log_odds_ratio": -0.01524385903030634, + "loss": 0.2792, + "rejected_geometric_mean": -11.30554485321045, + "step": 6512 + }, + { + "chosen_geometric_mean": -1.0013575553894043, + "epoch": 1.61, + "grad_norm": 7.5625, + "learning_rate": 4.5001274092934256e-07, + "log_odds": 6.250397682189941, + "log_odds_ratio": -0.039019882678985596, + "loss": 0.2307, + "rejected_geometric_mean": -6.807597637176514, + "step": 6513 + }, + { + "chosen_geometric_mean": -0.989467203617096, + "epoch": 1.61, + "grad_norm": 5.59375, + "learning_rate": 4.4945570871144706e-07, + "log_odds": 10.943531036376953, + "log_odds_ratio": -0.13719424605369568, + "loss": 0.2637, + "rejected_geometric_mean": -11.556998252868652, + "step": 6514 + }, + { + "chosen_geometric_mean": -0.8024815320968628, + "epoch": 1.61, + "grad_norm": 18.875, + "learning_rate": 4.4889898740528656e-07, + "log_odds": 3.02919602394104, + "log_odds_ratio": -0.22263196110725403, + "loss": 0.2773, + "rejected_geometric_mean": -3.4189600944519043, + "step": 6515 + }, + { + "chosen_geometric_mean": -1.074406385421753, + "epoch": 1.61, + "grad_norm": 15.75, + "learning_rate": 4.4834257709527456e-07, + "log_odds": 4.146306991577148, + "log_odds_ratio": -0.2070542722940445, + "loss": 0.2849, + "rejected_geometric_mean": -4.870652198791504, + "step": 6516 + }, + { + "chosen_geometric_mean": -1.0056753158569336, + "epoch": 1.61, + "grad_norm": 3.1875, + "learning_rate": 4.477864778657751e-07, + "log_odds": 2.7326340675354004, + "log_odds_ratio": -0.18690069019794464, + "loss": 0.2849, + "rejected_geometric_mean": -3.37418270111084, + "step": 6517 + }, + { + "chosen_geometric_mean": -0.8255532383918762, + "epoch": 1.61, + "grad_norm": 16.375, + "learning_rate": 4.4723068980110695e-07, + "log_odds": 4.563954830169678, + "log_odds_ratio": -0.13588237762451172, + "loss": 0.3362, + "rejected_geometric_mean": -4.923957347869873, + "step": 6518 + }, + { + "chosen_geometric_mean": -0.9924182295799255, + "epoch": 1.61, + "grad_norm": 4.96875, + "learning_rate": 4.466752129855412e-07, + "log_odds": 2.398761749267578, + "log_odds_ratio": -0.1351294368505478, + "loss": 0.2815, + "rejected_geometric_mean": -3.0022835731506348, + "step": 6519 + }, + { + "chosen_geometric_mean": -1.0546131134033203, + "epoch": 1.61, + "grad_norm": 2.234375, + "learning_rate": 4.4612004750330035e-07, + "log_odds": 4.135183811187744, + "log_odds_ratio": -0.20751993358135223, + "loss": 0.2627, + "rejected_geometric_mean": -4.845316410064697, + "step": 6520 + }, + { + "chosen_geometric_mean": -1.0817114114761353, + "epoch": 1.61, + "grad_norm": 10.4375, + "learning_rate": 4.455651934385621e-07, + "log_odds": 6.812297821044922, + "log_odds_ratio": -0.09599542617797852, + "loss": 0.2335, + "rejected_geometric_mean": -7.506895065307617, + "step": 6521 + }, + { + "chosen_geometric_mean": -0.9599459171295166, + "epoch": 1.61, + "grad_norm": 2.625, + "learning_rate": 4.450106508754551e-07, + "log_odds": 1.8726779222488403, + "log_odds_ratio": -0.37620460987091064, + "loss": 0.2304, + "rejected_geometric_mean": -2.595111131668091, + "step": 6522 + }, + { + "chosen_geometric_mean": -1.0227632522583008, + "epoch": 1.62, + "grad_norm": 7.21875, + "learning_rate": 4.444564198980605e-07, + "log_odds": 10.176472663879395, + "log_odds_ratio": -0.01112128235399723, + "loss": 0.248, + "rejected_geometric_mean": -10.71769905090332, + "step": 6523 + }, + { + "chosen_geometric_mean": -0.9048404097557068, + "epoch": 1.62, + "grad_norm": 64.5, + "learning_rate": 4.439025005904138e-07, + "log_odds": 4.3171610832214355, + "log_odds_ratio": -0.1404827982187271, + "loss": 0.3144, + "rejected_geometric_mean": -4.801167011260986, + "step": 6524 + }, + { + "chosen_geometric_mean": -0.8975468277931213, + "epoch": 1.62, + "grad_norm": 22.625, + "learning_rate": 4.433488930365029e-07, + "log_odds": 11.545604705810547, + "log_odds_ratio": -0.11936014890670776, + "loss": 0.2719, + "rejected_geometric_mean": -11.971858978271484, + "step": 6525 + }, + { + "chosen_geometric_mean": -0.9545064568519592, + "epoch": 1.62, + "grad_norm": 2.03125, + "learning_rate": 4.4279559732026716e-07, + "log_odds": 6.1386003494262695, + "log_odds_ratio": -0.1490778923034668, + "loss": 0.2599, + "rejected_geometric_mean": -6.653377532958984, + "step": 6526 + }, + { + "chosen_geometric_mean": -0.8389087915420532, + "epoch": 1.62, + "grad_norm": 1.84375, + "learning_rate": 4.4224261352560027e-07, + "log_odds": 7.284709930419922, + "log_odds_ratio": -0.09227611869573593, + "loss": 0.2311, + "rejected_geometric_mean": -7.548970699310303, + "step": 6527 + }, + { + "chosen_geometric_mean": -0.8707991242408752, + "epoch": 1.62, + "grad_norm": 13.125, + "learning_rate": 4.4168994173634737e-07, + "log_odds": 9.160575866699219, + "log_odds_ratio": -0.04120016470551491, + "loss": 0.2374, + "rejected_geometric_mean": -9.480268478393555, + "step": 6528 + }, + { + "chosen_geometric_mean": -0.8410084843635559, + "epoch": 1.62, + "grad_norm": 20.375, + "learning_rate": 4.4113758203630564e-07, + "log_odds": 6.590407848358154, + "log_odds_ratio": -0.01717264950275421, + "loss": 0.2852, + "rejected_geometric_mean": -6.84367036819458, + "step": 6529 + }, + { + "chosen_geometric_mean": -1.2357956171035767, + "epoch": 1.62, + "grad_norm": 2.90625, + "learning_rate": 4.405855345092283e-07, + "log_odds": 5.349365234375, + "log_odds_ratio": -0.09595535695552826, + "loss": 0.2456, + "rejected_geometric_mean": -6.279346942901611, + "step": 6530 + }, + { + "chosen_geometric_mean": -0.949049711227417, + "epoch": 1.62, + "grad_norm": 22.5, + "learning_rate": 4.4003379923881815e-07, + "log_odds": 6.917105197906494, + "log_odds_ratio": -0.12903738021850586, + "loss": 0.2647, + "rejected_geometric_mean": -7.450320720672607, + "step": 6531 + }, + { + "chosen_geometric_mean": -0.8254138827323914, + "epoch": 1.62, + "grad_norm": 2.5, + "learning_rate": 4.394823763087308e-07, + "log_odds": 8.29331111907959, + "log_odds_ratio": -0.0029967937152832747, + "loss": 0.2199, + "rejected_geometric_mean": -8.536014556884766, + "step": 6532 + }, + { + "chosen_geometric_mean": -0.9550511837005615, + "epoch": 1.62, + "grad_norm": 12.5625, + "learning_rate": 4.38931265802576e-07, + "log_odds": 2.897038459777832, + "log_odds_ratio": -0.21098975837230682, + "loss": 0.2677, + "rejected_geometric_mean": -3.480374336242676, + "step": 6533 + }, + { + "chosen_geometric_mean": -1.4685635566711426, + "epoch": 1.62, + "grad_norm": 19.25, + "learning_rate": 4.3838046780391467e-07, + "log_odds": 7.2625532150268555, + "log_odds_ratio": -0.04047878831624985, + "loss": 0.2772, + "rejected_geometric_mean": -8.43281078338623, + "step": 6534 + }, + { + "chosen_geometric_mean": -0.9846349954605103, + "epoch": 1.62, + "grad_norm": 16.375, + "learning_rate": 4.3782998239626116e-07, + "log_odds": 8.255727767944336, + "log_odds_ratio": -0.11920365691184998, + "loss": 0.2881, + "rejected_geometric_mean": -8.764450073242188, + "step": 6535 + }, + { + "chosen_geometric_mean": -0.8796947598457336, + "epoch": 1.62, + "grad_norm": 2.203125, + "learning_rate": 4.372798096630831e-07, + "log_odds": 7.368724822998047, + "log_odds_ratio": -0.12667641043663025, + "loss": 0.2809, + "rejected_geometric_mean": -7.821688652038574, + "step": 6536 + }, + { + "chosen_geometric_mean": -0.9291657209396362, + "epoch": 1.62, + "grad_norm": 3.09375, + "learning_rate": 4.3672994968779943e-07, + "log_odds": 6.264719009399414, + "log_odds_ratio": -0.027547597885131836, + "loss": 0.2424, + "rejected_geometric_mean": -6.649890899658203, + "step": 6537 + }, + { + "chosen_geometric_mean": -1.0488276481628418, + "epoch": 1.62, + "grad_norm": 11.9375, + "learning_rate": 4.3618040255378136e-07, + "log_odds": 2.263596773147583, + "log_odds_ratio": -0.30961424112319946, + "loss": 0.2393, + "rejected_geometric_mean": -3.0831174850463867, + "step": 6538 + }, + { + "chosen_geometric_mean": -0.8761163949966431, + "epoch": 1.62, + "grad_norm": 2.25, + "learning_rate": 4.356311683443545e-07, + "log_odds": 5.255759239196777, + "log_odds_ratio": -0.18558812141418457, + "loss": 0.1933, + "rejected_geometric_mean": -5.656127452850342, + "step": 6539 + }, + { + "chosen_geometric_mean": -1.0172553062438965, + "epoch": 1.62, + "grad_norm": 34.25, + "learning_rate": 4.350822471427948e-07, + "log_odds": 5.881753444671631, + "log_odds_ratio": -0.22558808326721191, + "loss": 0.2489, + "rejected_geometric_mean": -6.5476975440979, + "step": 6540 + }, + { + "chosen_geometric_mean": -1.0293903350830078, + "epoch": 1.62, + "grad_norm": 10.125, + "learning_rate": 4.3453363903233266e-07, + "log_odds": 6.730210781097412, + "log_odds_ratio": -0.13103923201560974, + "loss": 0.285, + "rejected_geometric_mean": -7.411855220794678, + "step": 6541 + }, + { + "chosen_geometric_mean": -1.1699953079223633, + "epoch": 1.62, + "grad_norm": 28.0, + "learning_rate": 4.3398534409615056e-07, + "log_odds": 7.205811500549316, + "log_odds_ratio": -0.10943081229925156, + "loss": 0.2617, + "rejected_geometric_mean": -8.010623931884766, + "step": 6542 + }, + { + "chosen_geometric_mean": -0.8983734846115112, + "epoch": 1.62, + "grad_norm": 6.09375, + "learning_rate": 4.334373624173824e-07, + "log_odds": 5.268014907836914, + "log_odds_ratio": -0.19274954497814178, + "loss": 0.246, + "rejected_geometric_mean": -5.794116973876953, + "step": 6543 + }, + { + "chosen_geometric_mean": -0.9581048488616943, + "epoch": 1.62, + "grad_norm": 6.375, + "learning_rate": 4.3288969407911606e-07, + "log_odds": 6.639777660369873, + "log_odds_ratio": -0.10398544371128082, + "loss": 0.3153, + "rejected_geometric_mean": -7.1806488037109375, + "step": 6544 + }, + { + "chosen_geometric_mean": -1.0076614618301392, + "epoch": 1.62, + "grad_norm": 2.234375, + "learning_rate": 4.323423391643905e-07, + "log_odds": 8.595212936401367, + "log_odds_ratio": -0.029092948883771896, + "loss": 0.2351, + "rejected_geometric_mean": -9.153472900390625, + "step": 6545 + }, + { + "chosen_geometric_mean": -0.9120631217956543, + "epoch": 1.62, + "grad_norm": 2.1875, + "learning_rate": 4.317952977561979e-07, + "log_odds": 10.81144905090332, + "log_odds_ratio": -0.020627086982131004, + "loss": 0.2453, + "rejected_geometric_mean": -11.209516525268555, + "step": 6546 + }, + { + "chosen_geometric_mean": -1.1600232124328613, + "epoch": 1.62, + "grad_norm": 6.1875, + "learning_rate": 4.312485699374841e-07, + "log_odds": 4.308016300201416, + "log_odds_ratio": -0.16111566126346588, + "loss": 0.2665, + "rejected_geometric_mean": -5.193574905395508, + "step": 6547 + }, + { + "chosen_geometric_mean": -1.034745216369629, + "epoch": 1.62, + "grad_norm": 3.6875, + "learning_rate": 4.3070215579114507e-07, + "log_odds": 5.356838703155518, + "log_odds_ratio": -0.1786503940820694, + "loss": 0.2388, + "rejected_geometric_mean": -6.040072917938232, + "step": 6548 + }, + { + "chosen_geometric_mean": -0.9550361037254333, + "epoch": 1.62, + "grad_norm": 37.75, + "learning_rate": 4.3015605540002975e-07, + "log_odds": 10.134539604187012, + "log_odds_ratio": -0.010071192868053913, + "loss": 0.3206, + "rejected_geometric_mean": -10.550498962402344, + "step": 6549 + }, + { + "chosen_geometric_mean": -0.9722603559494019, + "epoch": 1.62, + "grad_norm": 20.875, + "learning_rate": 4.296102688469417e-07, + "log_odds": 1.889286994934082, + "log_odds_ratio": -0.2923998534679413, + "loss": 0.2912, + "rejected_geometric_mean": -2.4576098918914795, + "step": 6550 + }, + { + "chosen_geometric_mean": -0.9892877340316772, + "epoch": 1.62, + "grad_norm": 9.8125, + "learning_rate": 4.2906479621463353e-07, + "log_odds": 5.293334484100342, + "log_odds_ratio": -0.22521984577178955, + "loss": 0.259, + "rejected_geometric_mean": -5.978354454040527, + "step": 6551 + }, + { + "chosen_geometric_mean": -0.9517562985420227, + "epoch": 1.62, + "grad_norm": 1.8515625, + "learning_rate": 4.285196375858128e-07, + "log_odds": 10.893999099731445, + "log_odds_ratio": -0.21785201132297516, + "loss": 0.2408, + "rejected_geometric_mean": -11.519898414611816, + "step": 6552 + }, + { + "chosen_geometric_mean": -1.1111485958099365, + "epoch": 1.62, + "grad_norm": 11.0, + "learning_rate": 4.279747930431394e-07, + "log_odds": 12.084362030029297, + "log_odds_ratio": -0.003192541189491749, + "loss": 0.2579, + "rejected_geometric_mean": -12.785449028015137, + "step": 6553 + }, + { + "chosen_geometric_mean": -0.8550284504890442, + "epoch": 1.62, + "grad_norm": 8.5625, + "learning_rate": 4.2743026266922396e-07, + "log_odds": 6.100780487060547, + "log_odds_ratio": -0.0499253049492836, + "loss": 0.2275, + "rejected_geometric_mean": -6.42554235458374, + "step": 6554 + }, + { + "chosen_geometric_mean": -1.0867457389831543, + "epoch": 1.62, + "grad_norm": 6.875, + "learning_rate": 4.2688604654662984e-07, + "log_odds": 11.392817497253418, + "log_odds_ratio": -0.03377017378807068, + "loss": 0.2245, + "rejected_geometric_mean": -12.07192325592041, + "step": 6555 + }, + { + "chosen_geometric_mean": -1.020784854888916, + "epoch": 1.62, + "grad_norm": 51.25, + "learning_rate": 4.2634214475787414e-07, + "log_odds": 5.85378360748291, + "log_odds_ratio": -0.16191785037517548, + "loss": 0.2465, + "rejected_geometric_mean": -6.546968936920166, + "step": 6556 + }, + { + "chosen_geometric_mean": -1.0547857284545898, + "epoch": 1.62, + "grad_norm": 30.25, + "learning_rate": 4.2579855738542563e-07, + "log_odds": 4.785436153411865, + "log_odds_ratio": -0.14623460173606873, + "loss": 0.2819, + "rejected_geometric_mean": -5.479056358337402, + "step": 6557 + }, + { + "chosen_geometric_mean": -1.015904426574707, + "epoch": 1.62, + "grad_norm": 2.484375, + "learning_rate": 4.252552845117042e-07, + "log_odds": 4.94291877746582, + "log_odds_ratio": -0.05101914331316948, + "loss": 0.2794, + "rejected_geometric_mean": -5.5118632316589355, + "step": 6558 + }, + { + "chosen_geometric_mean": -0.9344632625579834, + "epoch": 1.62, + "grad_norm": 32.25, + "learning_rate": 4.247123262190844e-07, + "log_odds": 7.668478012084961, + "log_odds_ratio": -0.29863935708999634, + "loss": 0.3329, + "rejected_geometric_mean": -8.265170097351074, + "step": 6559 + }, + { + "chosen_geometric_mean": -1.7420933246612549, + "epoch": 1.62, + "grad_norm": 37.25, + "learning_rate": 4.241696825898908e-07, + "log_odds": 5.077134132385254, + "log_odds_ratio": -0.5370946526527405, + "loss": 0.303, + "rejected_geometric_mean": -6.613752365112305, + "step": 6560 + }, + { + "chosen_geometric_mean": -1.0821222066879272, + "epoch": 1.62, + "grad_norm": 3.171875, + "learning_rate": 4.236273537064009e-07, + "log_odds": 3.9595131874084473, + "log_odds_ratio": -0.33626487851142883, + "loss": 0.2886, + "rejected_geometric_mean": -4.778049468994141, + "step": 6561 + }, + { + "chosen_geometric_mean": -0.890113115310669, + "epoch": 1.62, + "grad_norm": 30.875, + "learning_rate": 4.2308533965084497e-07, + "log_odds": 7.961070537567139, + "log_odds_ratio": -0.06661365181207657, + "loss": 0.2215, + "rejected_geometric_mean": -8.334306716918945, + "step": 6562 + }, + { + "chosen_geometric_mean": -0.9285014867782593, + "epoch": 1.62, + "grad_norm": 2.15625, + "learning_rate": 4.2254364050540645e-07, + "log_odds": 14.910789489746094, + "log_odds_ratio": -0.0007767017814330757, + "loss": 0.2254, + "rejected_geometric_mean": -15.289644241333008, + "step": 6563 + }, + { + "chosen_geometric_mean": -0.8256540894508362, + "epoch": 1.63, + "grad_norm": 9.0, + "learning_rate": 4.220022563522186e-07, + "log_odds": 2.0037248134613037, + "log_odds_ratio": -0.2726278305053711, + "loss": 0.2736, + "rejected_geometric_mean": -2.514725685119629, + "step": 6564 + }, + { + "chosen_geometric_mean": -0.8888166546821594, + "epoch": 1.63, + "grad_norm": 2.703125, + "learning_rate": 4.214611872733693e-07, + "log_odds": 3.648005962371826, + "log_odds_ratio": -0.10588301718235016, + "loss": 0.2763, + "rejected_geometric_mean": -4.077891826629639, + "step": 6565 + }, + { + "chosen_geometric_mean": -0.9842807054519653, + "epoch": 1.63, + "grad_norm": 37.25, + "learning_rate": 4.209204333508965e-07, + "log_odds": 5.326620578765869, + "log_odds_ratio": -0.29062551259994507, + "loss": 0.2426, + "rejected_geometric_mean": -6.014062881469727, + "step": 6566 + }, + { + "chosen_geometric_mean": -0.9512121677398682, + "epoch": 1.63, + "grad_norm": 2.78125, + "learning_rate": 4.203799946667922e-07, + "log_odds": 6.747199058532715, + "log_odds_ratio": -0.18963958323001862, + "loss": 0.2693, + "rejected_geometric_mean": -7.355605125427246, + "step": 6567 + }, + { + "chosen_geometric_mean": -1.1071958541870117, + "epoch": 1.63, + "grad_norm": 9.6875, + "learning_rate": 4.1983987130300024e-07, + "log_odds": 3.198922634124756, + "log_odds_ratio": -0.10409437119960785, + "loss": 0.262, + "rejected_geometric_mean": -3.959230422973633, + "step": 6568 + }, + { + "chosen_geometric_mean": -1.038061499595642, + "epoch": 1.63, + "grad_norm": 2.265625, + "learning_rate": 4.1930006334141486e-07, + "log_odds": 8.419998168945312, + "log_odds_ratio": -0.11296754330396652, + "loss": 0.2545, + "rejected_geometric_mean": -9.012248992919922, + "step": 6569 + }, + { + "chosen_geometric_mean": -0.9350972771644592, + "epoch": 1.63, + "grad_norm": 2.359375, + "learning_rate": 4.1876057086388586e-07, + "log_odds": 8.24039077758789, + "log_odds_ratio": -0.13397271931171417, + "loss": 0.2384, + "rejected_geometric_mean": -8.710925102233887, + "step": 6570 + }, + { + "chosen_geometric_mean": -1.0015919208526611, + "epoch": 1.63, + "grad_norm": 3.03125, + "learning_rate": 4.18221393952212e-07, + "log_odds": 10.734724998474121, + "log_odds_ratio": -0.08806536346673965, + "loss": 0.2209, + "rejected_geometric_mean": -11.316059112548828, + "step": 6571 + }, + { + "chosen_geometric_mean": -1.6289260387420654, + "epoch": 1.63, + "grad_norm": 14.8125, + "learning_rate": 4.1768253268814507e-07, + "log_odds": 3.6652395725250244, + "log_odds_ratio": -0.09616352617740631, + "loss": 0.2822, + "rejected_geometric_mean": -4.964033603668213, + "step": 6572 + }, + { + "chosen_geometric_mean": -0.9144811034202576, + "epoch": 1.63, + "grad_norm": 4.1875, + "learning_rate": 4.1714398715339e-07, + "log_odds": 6.794547080993652, + "log_odds_ratio": -0.03051440417766571, + "loss": 0.2549, + "rejected_geometric_mean": -7.201767921447754, + "step": 6573 + }, + { + "chosen_geometric_mean": -1.0412132740020752, + "epoch": 1.63, + "grad_norm": 3.390625, + "learning_rate": 4.16605757429604e-07, + "log_odds": 6.105778694152832, + "log_odds_ratio": -0.18446065485477448, + "loss": 0.2507, + "rejected_geometric_mean": -6.790538311004639, + "step": 6574 + }, + { + "chosen_geometric_mean": -1.0674247741699219, + "epoch": 1.63, + "grad_norm": 2.203125, + "learning_rate": 4.160678435983939e-07, + "log_odds": 10.406755447387695, + "log_odds_ratio": -0.02246975526213646, + "loss": 0.26, + "rejected_geometric_mean": -11.032913208007812, + "step": 6575 + }, + { + "chosen_geometric_mean": -1.093163013458252, + "epoch": 1.63, + "grad_norm": 3.734375, + "learning_rate": 4.155302457413218e-07, + "log_odds": 15.32917594909668, + "log_odds_ratio": -0.015194445848464966, + "loss": 0.2752, + "rejected_geometric_mean": -16.015392303466797, + "step": 6576 + }, + { + "chosen_geometric_mean": -0.9220172762870789, + "epoch": 1.63, + "grad_norm": 4.75, + "learning_rate": 4.1499296393990004e-07, + "log_odds": 2.619119644165039, + "log_odds_ratio": -0.2976186275482178, + "loss": 0.2955, + "rejected_geometric_mean": -3.234541177749634, + "step": 6577 + }, + { + "chosen_geometric_mean": -1.08482027053833, + "epoch": 1.63, + "grad_norm": 2.0, + "learning_rate": 4.144559982755922e-07, + "log_odds": 10.461836814880371, + "log_odds_ratio": -0.07892140746116638, + "loss": 0.2573, + "rejected_geometric_mean": -11.165367126464844, + "step": 6578 + }, + { + "chosen_geometric_mean": -1.2664310932159424, + "epoch": 1.63, + "grad_norm": 41.0, + "learning_rate": 4.139193488298171e-07, + "log_odds": 3.83337140083313, + "log_odds_ratio": -0.15885986387729645, + "loss": 0.2957, + "rejected_geometric_mean": -4.71792459487915, + "step": 6579 + }, + { + "chosen_geometric_mean": -1.0253920555114746, + "epoch": 1.63, + "grad_norm": 3.515625, + "learning_rate": 4.133830156839433e-07, + "log_odds": 3.9477689266204834, + "log_odds_ratio": -0.0630105510354042, + "loss": 0.2133, + "rejected_geometric_mean": -4.536860466003418, + "step": 6580 + }, + { + "chosen_geometric_mean": -0.8593745231628418, + "epoch": 1.63, + "grad_norm": 3.96875, + "learning_rate": 4.1284699891929043e-07, + "log_odds": 4.642591953277588, + "log_odds_ratio": -0.27783021330833435, + "loss": 0.2712, + "rejected_geometric_mean": -5.161302089691162, + "step": 6581 + }, + { + "chosen_geometric_mean": -1.0650233030319214, + "epoch": 1.63, + "grad_norm": 51.0, + "learning_rate": 4.123112986171329e-07, + "log_odds": 8.717663764953613, + "log_odds_ratio": -0.04487433657050133, + "loss": 0.2697, + "rejected_geometric_mean": -9.361249923706055, + "step": 6582 + }, + { + "chosen_geometric_mean": -0.8772284984588623, + "epoch": 1.63, + "grad_norm": 2.890625, + "learning_rate": 4.1177591485869474e-07, + "log_odds": 10.435628890991211, + "log_odds_ratio": -0.0659002810716629, + "loss": 0.234, + "rejected_geometric_mean": -10.818413734436035, + "step": 6583 + }, + { + "chosen_geometric_mean": -0.9669823050498962, + "epoch": 1.63, + "grad_norm": 22.25, + "learning_rate": 4.112408477251534e-07, + "log_odds": 9.125530242919922, + "log_odds_ratio": -0.14147675037384033, + "loss": 0.2934, + "rejected_geometric_mean": -9.690997123718262, + "step": 6584 + }, + { + "chosen_geometric_mean": -0.9128354787826538, + "epoch": 1.63, + "grad_norm": 2.390625, + "learning_rate": 4.1070609729763846e-07, + "log_odds": 4.448380947113037, + "log_odds_ratio": -0.28604090213775635, + "loss": 0.2779, + "rejected_geometric_mean": -5.054976463317871, + "step": 6585 + }, + { + "chosen_geometric_mean": -1.0369404554367065, + "epoch": 1.63, + "grad_norm": 2.609375, + "learning_rate": 4.101716636572303e-07, + "log_odds": 9.047514915466309, + "log_odds_ratio": -0.09208720922470093, + "loss": 0.23, + "rejected_geometric_mean": -9.71144962310791, + "step": 6586 + }, + { + "chosen_geometric_mean": -0.8653937578201294, + "epoch": 1.63, + "grad_norm": 32.25, + "learning_rate": 4.096375468849617e-07, + "log_odds": 10.67689323425293, + "log_odds_ratio": -0.03338271379470825, + "loss": 0.2705, + "rejected_geometric_mean": -11.013008117675781, + "step": 6587 + }, + { + "chosen_geometric_mean": -1.1415183544158936, + "epoch": 1.63, + "grad_norm": 14.6875, + "learning_rate": 4.091037470618184e-07, + "log_odds": 3.5483174324035645, + "log_odds_ratio": -0.3526323139667511, + "loss": 0.2719, + "rejected_geometric_mean": -4.478699684143066, + "step": 6588 + }, + { + "chosen_geometric_mean": -1.051820158958435, + "epoch": 1.63, + "grad_norm": 2.265625, + "learning_rate": 4.0857026426873615e-07, + "log_odds": 5.384104251861572, + "log_odds_ratio": -0.19560177624225616, + "loss": 0.2306, + "rejected_geometric_mean": -6.120354652404785, + "step": 6589 + }, + { + "chosen_geometric_mean": -1.4046316146850586, + "epoch": 1.63, + "grad_norm": 10.5625, + "learning_rate": 4.080370985866042e-07, + "log_odds": 8.041561126708984, + "log_odds_ratio": -0.0007457450265064836, + "loss": 0.2672, + "rejected_geometric_mean": -9.162092208862305, + "step": 6590 + }, + { + "chosen_geometric_mean": -0.8430407643318176, + "epoch": 1.63, + "grad_norm": 5.5625, + "learning_rate": 4.075042500962642e-07, + "log_odds": 12.932866096496582, + "log_odds_ratio": -0.059046268463134766, + "loss": 0.2547, + "rejected_geometric_mean": -13.263335227966309, + "step": 6591 + }, + { + "chosen_geometric_mean": -0.9462255239486694, + "epoch": 1.63, + "grad_norm": 2.546875, + "learning_rate": 4.069717188785072e-07, + "log_odds": 4.430391788482666, + "log_odds_ratio": -0.20074227452278137, + "loss": 0.256, + "rejected_geometric_mean": -4.960598468780518, + "step": 6592 + }, + { + "chosen_geometric_mean": -1.0864359140396118, + "epoch": 1.63, + "grad_norm": 3.375, + "learning_rate": 4.064395050140793e-07, + "log_odds": 10.356739044189453, + "log_odds_ratio": -0.13476276397705078, + "loss": 0.2696, + "rejected_geometric_mean": -11.10705852508545, + "step": 6593 + }, + { + "chosen_geometric_mean": -0.9071396589279175, + "epoch": 1.63, + "grad_norm": 14.0625, + "learning_rate": 4.0590760858367523e-07, + "log_odds": 2.5558509826660156, + "log_odds_ratio": -0.08349914848804474, + "loss": 0.2418, + "rejected_geometric_mean": -2.9516515731811523, + "step": 6594 + }, + { + "chosen_geometric_mean": -0.7917335629463196, + "epoch": 1.63, + "grad_norm": 24.125, + "learning_rate": 4.0537602966794434e-07, + "log_odds": 4.090603351593018, + "log_odds_ratio": -0.10932482033967972, + "loss": 0.2451, + "rejected_geometric_mean": -4.369223117828369, + "step": 6595 + }, + { + "chosen_geometric_mean": -0.8326521515846252, + "epoch": 1.63, + "grad_norm": 7.125, + "learning_rate": 4.048447683474871e-07, + "log_odds": 8.11292552947998, + "log_odds_ratio": -0.2678895890712738, + "loss": 0.2677, + "rejected_geometric_mean": -8.635699272155762, + "step": 6596 + }, + { + "chosen_geometric_mean": -0.8114111423492432, + "epoch": 1.63, + "grad_norm": 33.0, + "learning_rate": 4.0431382470285497e-07, + "log_odds": 12.506168365478516, + "log_odds_ratio": -0.017072880640625954, + "loss": 0.3152, + "rejected_geometric_mean": -12.712259292602539, + "step": 6597 + }, + { + "chosen_geometric_mean": -1.1193147897720337, + "epoch": 1.63, + "grad_norm": 6.625, + "learning_rate": 4.03783198814551e-07, + "log_odds": 9.539532661437988, + "log_odds_ratio": -0.23247945308685303, + "loss": 0.2274, + "rejected_geometric_mean": -10.385371208190918, + "step": 6598 + }, + { + "chosen_geometric_mean": -1.0497291088104248, + "epoch": 1.63, + "grad_norm": 3.015625, + "learning_rate": 4.03252890763032e-07, + "log_odds": 3.4409408569335938, + "log_odds_ratio": -0.11488306522369385, + "loss": 0.2262, + "rejected_geometric_mean": -4.066273212432861, + "step": 6599 + }, + { + "chosen_geometric_mean": -0.8934130668640137, + "epoch": 1.63, + "grad_norm": 15.8125, + "learning_rate": 4.027229006287045e-07, + "log_odds": 10.491260528564453, + "log_odds_ratio": -0.05490672215819359, + "loss": 0.2953, + "rejected_geometric_mean": -10.860723495483398, + "step": 6600 + }, + { + "chosen_geometric_mean": -0.9032198190689087, + "epoch": 1.63, + "grad_norm": 44.25, + "learning_rate": 4.0219322849192803e-07, + "log_odds": 13.50814151763916, + "log_odds_ratio": -3.409605415072292e-05, + "loss": 0.2499, + "rejected_geometric_mean": -13.87902545928955, + "step": 6601 + }, + { + "chosen_geometric_mean": -1.1295111179351807, + "epoch": 1.63, + "grad_norm": 6.03125, + "learning_rate": 4.0166387443301424e-07, + "log_odds": 1.4200876951217651, + "log_odds_ratio": -0.3752484917640686, + "loss": 0.2604, + "rejected_geometric_mean": -2.349726915359497, + "step": 6602 + }, + { + "chosen_geometric_mean": -1.232569932937622, + "epoch": 1.63, + "grad_norm": 6.125, + "learning_rate": 4.0113483853222555e-07, + "log_odds": 4.720871925354004, + "log_odds_ratio": -0.16143348813056946, + "loss": 0.3109, + "rejected_geometric_mean": -5.656042098999023, + "step": 6603 + }, + { + "chosen_geometric_mean": -1.127755880355835, + "epoch": 1.64, + "grad_norm": 5.0, + "learning_rate": 4.006061208697756e-07, + "log_odds": 7.155485153198242, + "log_odds_ratio": -0.09139414876699448, + "loss": 0.29, + "rejected_geometric_mean": -7.890316009521484, + "step": 6604 + }, + { + "chosen_geometric_mean": -1.004411220550537, + "epoch": 1.64, + "grad_norm": 3.953125, + "learning_rate": 4.000777215258314e-07, + "log_odds": 2.5163588523864746, + "log_odds_ratio": -0.34480610489845276, + "loss": 0.2704, + "rejected_geometric_mean": -3.2712864875793457, + "step": 6605 + }, + { + "chosen_geometric_mean": -0.91493821144104, + "epoch": 1.64, + "grad_norm": 2.953125, + "learning_rate": 3.995496405805116e-07, + "log_odds": 3.9912476539611816, + "log_odds_ratio": -0.05858909711241722, + "loss": 0.2722, + "rejected_geometric_mean": -4.367210388183594, + "step": 6606 + }, + { + "chosen_geometric_mean": -1.0257865190505981, + "epoch": 1.64, + "grad_norm": 2.71875, + "learning_rate": 3.9902187811388466e-07, + "log_odds": 4.732354640960693, + "log_odds_ratio": -0.1325729787349701, + "loss": 0.2747, + "rejected_geometric_mean": -5.384042739868164, + "step": 6607 + }, + { + "chosen_geometric_mean": -0.9617594480514526, + "epoch": 1.64, + "grad_norm": 12.8125, + "learning_rate": 3.984944342059735e-07, + "log_odds": 7.506627082824707, + "log_odds_ratio": -0.16864299774169922, + "loss": 0.2438, + "rejected_geometric_mean": -8.07752513885498, + "step": 6608 + }, + { + "chosen_geometric_mean": -0.886995792388916, + "epoch": 1.64, + "grad_norm": 2.203125, + "learning_rate": 3.979673089367497e-07, + "log_odds": 2.801790475845337, + "log_odds_ratio": -0.16970601677894592, + "loss": 0.2592, + "rejected_geometric_mean": -3.234926223754883, + "step": 6609 + }, + { + "chosen_geometric_mean": -0.9808973670005798, + "epoch": 1.64, + "grad_norm": 11.6875, + "learning_rate": 3.974405023861397e-07, + "log_odds": 8.454596519470215, + "log_odds_ratio": -0.05944950133562088, + "loss": 0.2315, + "rejected_geometric_mean": -8.955124855041504, + "step": 6610 + }, + { + "chosen_geometric_mean": -0.821637749671936, + "epoch": 1.64, + "grad_norm": 8.8125, + "learning_rate": 3.969140146340184e-07, + "log_odds": 5.516822814941406, + "log_odds_ratio": -0.045853886753320694, + "loss": 0.2672, + "rejected_geometric_mean": -5.748457908630371, + "step": 6611 + }, + { + "chosen_geometric_mean": -1.012817621231079, + "epoch": 1.64, + "grad_norm": 59.0, + "learning_rate": 3.963878457602155e-07, + "log_odds": 2.6462841033935547, + "log_odds_ratio": -0.1455518752336502, + "loss": 0.2646, + "rejected_geometric_mean": -3.2929201126098633, + "step": 6612 + }, + { + "chosen_geometric_mean": -1.0565392971038818, + "epoch": 1.64, + "grad_norm": 10.125, + "learning_rate": 3.9586199584450925e-07, + "log_odds": 6.9370222091674805, + "log_odds_ratio": -0.33497506380081177, + "loss": 0.2534, + "rejected_geometric_mean": -7.816258430480957, + "step": 6613 + }, + { + "chosen_geometric_mean": -0.9626716375350952, + "epoch": 1.64, + "grad_norm": 5.78125, + "learning_rate": 3.9533646496663276e-07, + "log_odds": 7.105966091156006, + "log_odds_ratio": -0.10804177075624466, + "loss": 0.3067, + "rejected_geometric_mean": -7.613674640655518, + "step": 6614 + }, + { + "chosen_geometric_mean": -0.9831121563911438, + "epoch": 1.64, + "grad_norm": 2.015625, + "learning_rate": 3.9481125320626784e-07, + "log_odds": 9.749859809875488, + "log_odds_ratio": -0.003933435771614313, + "loss": 0.2527, + "rejected_geometric_mean": -10.252178192138672, + "step": 6615 + }, + { + "chosen_geometric_mean": -1.0950891971588135, + "epoch": 1.64, + "grad_norm": 6.78125, + "learning_rate": 3.9428636064304973e-07, + "log_odds": 11.795766830444336, + "log_odds_ratio": -5.616678754449822e-05, + "loss": 0.2759, + "rejected_geometric_mean": -12.472972869873047, + "step": 6616 + }, + { + "chosen_geometric_mean": -1.0321247577667236, + "epoch": 1.64, + "grad_norm": 7.21875, + "learning_rate": 3.937617873565652e-07, + "log_odds": 2.881638526916504, + "log_odds_ratio": -0.2284483015537262, + "loss": 0.2521, + "rejected_geometric_mean": -3.5555601119995117, + "step": 6617 + }, + { + "chosen_geometric_mean": -0.8820749521255493, + "epoch": 1.64, + "grad_norm": 2.796875, + "learning_rate": 3.9323753342635106e-07, + "log_odds": 5.946177959442139, + "log_odds_ratio": -0.10833944380283356, + "loss": 0.2664, + "rejected_geometric_mean": -6.347094535827637, + "step": 6618 + }, + { + "chosen_geometric_mean": -1.2542195320129395, + "epoch": 1.64, + "grad_norm": 42.75, + "learning_rate": 3.927135989318981e-07, + "log_odds": 9.973004341125488, + "log_odds_ratio": -0.005230321548879147, + "loss": 0.2595, + "rejected_geometric_mean": -10.766141891479492, + "step": 6619 + }, + { + "chosen_geometric_mean": -0.8293066620826721, + "epoch": 1.64, + "grad_norm": 28.25, + "learning_rate": 3.9218998395264664e-07, + "log_odds": 4.685111045837402, + "log_odds_ratio": -0.20480497181415558, + "loss": 0.2896, + "rejected_geometric_mean": -5.149106502532959, + "step": 6620 + }, + { + "chosen_geometric_mean": -0.8983006477355957, + "epoch": 1.64, + "grad_norm": 5.90625, + "learning_rate": 3.916666885679887e-07, + "log_odds": 8.911112785339355, + "log_odds_ratio": -0.011030232533812523, + "loss": 0.2536, + "rejected_geometric_mean": -9.274245262145996, + "step": 6621 + }, + { + "chosen_geometric_mean": -1.1634037494659424, + "epoch": 1.64, + "grad_norm": 2.03125, + "learning_rate": 3.91143712857269e-07, + "log_odds": 2.7289204597473145, + "log_odds_ratio": -0.15024983882904053, + "loss": 0.2592, + "rejected_geometric_mean": -3.5873470306396484, + "step": 6622 + }, + { + "chosen_geometric_mean": -0.9455737471580505, + "epoch": 1.64, + "grad_norm": 44.0, + "learning_rate": 3.90621056899784e-07, + "log_odds": 6.866729259490967, + "log_odds_ratio": -0.05939701199531555, + "loss": 0.2913, + "rejected_geometric_mean": -7.353600978851318, + "step": 6623 + }, + { + "chosen_geometric_mean": -0.8786205649375916, + "epoch": 1.64, + "grad_norm": 2.578125, + "learning_rate": 3.900987207747792e-07, + "log_odds": 4.932066440582275, + "log_odds_ratio": -0.09560609608888626, + "loss": 0.2392, + "rejected_geometric_mean": -5.348973274230957, + "step": 6624 + }, + { + "chosen_geometric_mean": -0.875542938709259, + "epoch": 1.64, + "grad_norm": 25.25, + "learning_rate": 3.8957670456145513e-07, + "log_odds": 2.3287415504455566, + "log_odds_ratio": -0.30506014823913574, + "loss": 0.283, + "rejected_geometric_mean": -2.915346384048462, + "step": 6625 + }, + { + "chosen_geometric_mean": -0.8956431150436401, + "epoch": 1.64, + "grad_norm": 11.4375, + "learning_rate": 3.8905500833896105e-07, + "log_odds": 6.76082181930542, + "log_odds_ratio": -0.23995070159435272, + "loss": 0.2654, + "rejected_geometric_mean": -7.249383926391602, + "step": 6626 + }, + { + "chosen_geometric_mean": -1.044202446937561, + "epoch": 1.64, + "grad_norm": 2.578125, + "learning_rate": 3.885336321863972e-07, + "log_odds": 7.527915954589844, + "log_odds_ratio": -0.12454560399055481, + "loss": 0.2543, + "rejected_geometric_mean": -8.189781188964844, + "step": 6627 + }, + { + "chosen_geometric_mean": -0.9430168867111206, + "epoch": 1.64, + "grad_norm": 45.0, + "learning_rate": 3.8801257618281964e-07, + "log_odds": 7.661130428314209, + "log_odds_ratio": -0.15615758299827576, + "loss": 0.2709, + "rejected_geometric_mean": -8.142637252807617, + "step": 6628 + }, + { + "chosen_geometric_mean": -0.9964509010314941, + "epoch": 1.64, + "grad_norm": 2.421875, + "learning_rate": 3.874918404072314e-07, + "log_odds": 3.7527620792388916, + "log_odds_ratio": -0.04553016647696495, + "loss": 0.2757, + "rejected_geometric_mean": -4.301416397094727, + "step": 6629 + }, + { + "chosen_geometric_mean": -0.9743348360061646, + "epoch": 1.64, + "grad_norm": 13.875, + "learning_rate": 3.8697142493858806e-07, + "log_odds": 5.356239318847656, + "log_odds_ratio": -0.11062189936637878, + "loss": 0.246, + "rejected_geometric_mean": -5.920083522796631, + "step": 6630 + }, + { + "chosen_geometric_mean": -1.159395456314087, + "epoch": 1.64, + "grad_norm": 38.75, + "learning_rate": 3.8645132985579844e-07, + "log_odds": 3.077557325363159, + "log_odds_ratio": -0.24043554067611694, + "loss": 0.2781, + "rejected_geometric_mean": -3.9105639457702637, + "step": 6631 + }, + { + "chosen_geometric_mean": -0.7457523941993713, + "epoch": 1.64, + "grad_norm": 8.75, + "learning_rate": 3.8593155523771963e-07, + "log_odds": 14.896892547607422, + "log_odds_ratio": -5.434996273834258e-05, + "loss": 0.2406, + "rejected_geometric_mean": -14.96649169921875, + "step": 6632 + }, + { + "chosen_geometric_mean": -1.050160527229309, + "epoch": 1.64, + "grad_norm": 3.359375, + "learning_rate": 3.8541210116316343e-07, + "log_odds": 14.895442008972168, + "log_odds_ratio": -0.12883582711219788, + "loss": 0.269, + "rejected_geometric_mean": -15.553820610046387, + "step": 6633 + }, + { + "chosen_geometric_mean": -1.2417936325073242, + "epoch": 1.64, + "grad_norm": 3.34375, + "learning_rate": 3.848929677108915e-07, + "log_odds": 1.84438955783844, + "log_odds_ratio": -0.4898855686187744, + "loss": 0.2503, + "rejected_geometric_mean": -2.988237142562866, + "step": 6634 + }, + { + "chosen_geometric_mean": -0.9244483113288879, + "epoch": 1.64, + "grad_norm": 2.4375, + "learning_rate": 3.843741549596164e-07, + "log_odds": 4.982263088226318, + "log_odds_ratio": -0.14306116104125977, + "loss": 0.2524, + "rejected_geometric_mean": -5.481927394866943, + "step": 6635 + }, + { + "chosen_geometric_mean": -1.2812392711639404, + "epoch": 1.64, + "grad_norm": 5.90625, + "learning_rate": 3.838556629880022e-07, + "log_odds": 5.913839340209961, + "log_odds_ratio": -0.13430708646774292, + "loss": 0.2748, + "rejected_geometric_mean": -6.923954963684082, + "step": 6636 + }, + { + "chosen_geometric_mean": -1.2267593145370483, + "epoch": 1.64, + "grad_norm": 2.90625, + "learning_rate": 3.8333749187466575e-07, + "log_odds": 5.298300743103027, + "log_odds_ratio": -0.15950746834278107, + "loss": 0.2605, + "rejected_geometric_mean": -6.241020679473877, + "step": 6637 + }, + { + "chosen_geometric_mean": -1.1580888032913208, + "epoch": 1.64, + "grad_norm": 3.34375, + "learning_rate": 3.828196416981733e-07, + "log_odds": 5.540708541870117, + "log_odds_ratio": -0.09332529455423355, + "loss": 0.2836, + "rejected_geometric_mean": -6.326278209686279, + "step": 6638 + }, + { + "chosen_geometric_mean": -0.9979682564735413, + "epoch": 1.64, + "grad_norm": 3.671875, + "learning_rate": 3.823021125370438e-07, + "log_odds": 6.085366725921631, + "log_odds_ratio": -0.16864646971225739, + "loss": 0.2445, + "rejected_geometric_mean": -6.736808776855469, + "step": 6639 + }, + { + "chosen_geometric_mean": -1.1533989906311035, + "epoch": 1.64, + "grad_norm": 3.046875, + "learning_rate": 3.8178490446974745e-07, + "log_odds": 11.0083646774292, + "log_odds_ratio": -0.11303520202636719, + "loss": 0.2675, + "rejected_geometric_mean": -11.798378944396973, + "step": 6640 + }, + { + "chosen_geometric_mean": -1.0918347835540771, + "epoch": 1.64, + "grad_norm": 1.90625, + "learning_rate": 3.812680175747047e-07, + "log_odds": 2.27063250541687, + "log_odds_ratio": -0.30423423647880554, + "loss": 0.2395, + "rejected_geometric_mean": -3.1452572345733643, + "step": 6641 + }, + { + "chosen_geometric_mean": -0.8268996477127075, + "epoch": 1.64, + "grad_norm": 3.96875, + "learning_rate": 3.807514519302888e-07, + "log_odds": 10.497087478637695, + "log_odds_ratio": -0.012224869802594185, + "loss": 0.2339, + "rejected_geometric_mean": -10.747091293334961, + "step": 6642 + }, + { + "chosen_geometric_mean": -0.943697452545166, + "epoch": 1.64, + "grad_norm": 12.0625, + "learning_rate": 3.8023520761482305e-07, + "log_odds": 12.709906578063965, + "log_odds_ratio": -0.0045491233468055725, + "loss": 0.3049, + "rejected_geometric_mean": -13.160462379455566, + "step": 6643 + }, + { + "chosen_geometric_mean": -0.9091424942016602, + "epoch": 1.64, + "grad_norm": 2.78125, + "learning_rate": 3.7971928470658145e-07, + "log_odds": 9.674188613891602, + "log_odds_ratio": -0.07488080114126205, + "loss": 0.2824, + "rejected_geometric_mean": -10.098672866821289, + "step": 6644 + }, + { + "chosen_geometric_mean": -1.1578928232192993, + "epoch": 1.65, + "grad_norm": 28.375, + "learning_rate": 3.7920368328379254e-07, + "log_odds": 9.310025215148926, + "log_odds_ratio": -0.047640230506658554, + "loss": 0.2693, + "rejected_geometric_mean": -10.105207443237305, + "step": 6645 + }, + { + "chosen_geometric_mean": -0.9343633055686951, + "epoch": 1.65, + "grad_norm": 3.765625, + "learning_rate": 3.7868840342463243e-07, + "log_odds": 6.3907928466796875, + "log_odds_ratio": -0.11669318377971649, + "loss": 0.266, + "rejected_geometric_mean": -6.902791976928711, + "step": 6646 + }, + { + "chosen_geometric_mean": -1.1727068424224854, + "epoch": 1.65, + "grad_norm": 4.0625, + "learning_rate": 3.781734452072297e-07, + "log_odds": 4.3352861404418945, + "log_odds_ratio": -0.21655386686325073, + "loss": 0.2728, + "rejected_geometric_mean": -5.225929260253906, + "step": 6647 + }, + { + "chosen_geometric_mean": -0.9549340009689331, + "epoch": 1.65, + "grad_norm": 5.125, + "learning_rate": 3.7765880870966525e-07, + "log_odds": 8.780019760131836, + "log_odds_ratio": -0.10717001557350159, + "loss": 0.217, + "rejected_geometric_mean": -9.321407318115234, + "step": 6648 + }, + { + "chosen_geometric_mean": -0.8169074654579163, + "epoch": 1.65, + "grad_norm": 4.125, + "learning_rate": 3.771444940099694e-07, + "log_odds": 3.755979299545288, + "log_odds_ratio": -0.2325989156961441, + "loss": 0.2693, + "rejected_geometric_mean": -4.1773681640625, + "step": 6649 + }, + { + "chosen_geometric_mean": -0.8994942903518677, + "epoch": 1.65, + "grad_norm": 1.7578125, + "learning_rate": 3.7663050118612545e-07, + "log_odds": 4.65173864364624, + "log_odds_ratio": -0.3784536123275757, + "loss": 0.2067, + "rejected_geometric_mean": -5.319643497467041, + "step": 6650 + }, + { + "chosen_geometric_mean": -0.9397096633911133, + "epoch": 1.65, + "grad_norm": 2.453125, + "learning_rate": 3.76116830316067e-07, + "log_odds": 6.025328636169434, + "log_odds_ratio": -0.12574708461761475, + "loss": 0.2581, + "rejected_geometric_mean": -6.536411285400391, + "step": 6651 + }, + { + "chosen_geometric_mean": -0.8961325287818909, + "epoch": 1.65, + "grad_norm": 14.5, + "learning_rate": 3.756034814776785e-07, + "log_odds": 10.171316146850586, + "log_odds_ratio": -0.06942299008369446, + "loss": 0.278, + "rejected_geometric_mean": -10.554617881774902, + "step": 6652 + }, + { + "chosen_geometric_mean": -1.049647331237793, + "epoch": 1.65, + "grad_norm": 2.0625, + "learning_rate": 3.7509045474879545e-07, + "log_odds": 8.86600399017334, + "log_odds_ratio": -0.11889242380857468, + "loss": 0.2575, + "rejected_geometric_mean": -9.536650657653809, + "step": 6653 + }, + { + "chosen_geometric_mean": -0.7928269505500793, + "epoch": 1.65, + "grad_norm": 7.84375, + "learning_rate": 3.745777502072065e-07, + "log_odds": 5.628174304962158, + "log_odds_ratio": -0.24593579769134521, + "loss": 0.2321, + "rejected_geometric_mean": -6.019102096557617, + "step": 6654 + }, + { + "chosen_geometric_mean": -0.9926885962486267, + "epoch": 1.65, + "grad_norm": 5.03125, + "learning_rate": 3.7406536793064827e-07, + "log_odds": 8.407578468322754, + "log_odds_ratio": -0.18778355419635773, + "loss": 0.2769, + "rejected_geometric_mean": -9.050982475280762, + "step": 6655 + }, + { + "chosen_geometric_mean": -0.7183274030685425, + "epoch": 1.65, + "grad_norm": 37.0, + "learning_rate": 3.7355330799681096e-07, + "log_odds": 9.098087310791016, + "log_odds_ratio": -0.1298437863588333, + "loss": 0.2756, + "rejected_geometric_mean": -9.248035430908203, + "step": 6656 + }, + { + "chosen_geometric_mean": -1.196415901184082, + "epoch": 1.65, + "grad_norm": 4.8125, + "learning_rate": 3.7304157048333594e-07, + "log_odds": 4.020112991333008, + "log_odds_ratio": -0.41558343172073364, + "loss": 0.306, + "rejected_geometric_mean": -5.034281253814697, + "step": 6657 + }, + { + "chosen_geometric_mean": -1.0692936182022095, + "epoch": 1.65, + "grad_norm": 2.609375, + "learning_rate": 3.725301554678135e-07, + "log_odds": 9.050365447998047, + "log_odds_ratio": -0.0010209090542048216, + "loss": 0.2207, + "rejected_geometric_mean": -9.688040733337402, + "step": 6658 + }, + { + "chosen_geometric_mean": -0.9550216794013977, + "epoch": 1.65, + "grad_norm": 122.0, + "learning_rate": 3.7201906302778756e-07, + "log_odds": 16.090126037597656, + "log_odds_ratio": -0.0012018590932711959, + "loss": 0.2418, + "rejected_geometric_mean": -16.55065155029297, + "step": 6659 + }, + { + "chosen_geometric_mean": -1.1834888458251953, + "epoch": 1.65, + "grad_norm": 2.5, + "learning_rate": 3.7150829324075125e-07, + "log_odds": 8.853222846984863, + "log_odds_ratio": -0.017350303009152412, + "loss": 0.2717, + "rejected_geometric_mean": -9.665979385375977, + "step": 6660 + }, + { + "chosen_geometric_mean": -0.9283950924873352, + "epoch": 1.65, + "grad_norm": 2.28125, + "learning_rate": 3.7099784618415015e-07, + "log_odds": 2.348771333694458, + "log_odds_ratio": -0.4214301109313965, + "loss": 0.2979, + "rejected_geometric_mean": -3.093682289123535, + "step": 6661 + }, + { + "chosen_geometric_mean": -0.7533660531044006, + "epoch": 1.65, + "grad_norm": 2.109375, + "learning_rate": 3.704877219353797e-07, + "log_odds": 12.57576847076416, + "log_odds_ratio": -0.00996237713843584, + "loss": 0.253, + "rejected_geometric_mean": -12.676586151123047, + "step": 6662 + }, + { + "chosen_geometric_mean": -1.041718602180481, + "epoch": 1.65, + "grad_norm": 4.5, + "learning_rate": 3.6997792057178764e-07, + "log_odds": 7.2365217208862305, + "log_odds_ratio": -0.014631565660238266, + "loss": 0.2605, + "rejected_geometric_mean": -7.842450141906738, + "step": 6663 + }, + { + "chosen_geometric_mean": -1.1495403051376343, + "epoch": 1.65, + "grad_norm": 3.21875, + "learning_rate": 3.6946844217067094e-07, + "log_odds": 4.795278549194336, + "log_odds_ratio": -0.13698893785476685, + "loss": 0.2385, + "rejected_geometric_mean": -5.607585906982422, + "step": 6664 + }, + { + "chosen_geometric_mean": -0.868937611579895, + "epoch": 1.65, + "grad_norm": 3.359375, + "learning_rate": 3.689592868092803e-07, + "log_odds": 7.531396865844727, + "log_odds_ratio": -0.033215124160051346, + "loss": 0.265, + "rejected_geometric_mean": -7.826723575592041, + "step": 6665 + }, + { + "chosen_geometric_mean": -1.2405120134353638, + "epoch": 1.65, + "grad_norm": 3.703125, + "learning_rate": 3.684504545648146e-07, + "log_odds": 6.243351459503174, + "log_odds_ratio": -0.2368558794260025, + "loss": 0.2554, + "rejected_geometric_mean": -7.230045318603516, + "step": 6666 + }, + { + "chosen_geometric_mean": -1.136282205581665, + "epoch": 1.65, + "grad_norm": 4.25, + "learning_rate": 3.6794194551442536e-07, + "log_odds": 5.535374641418457, + "log_odds_ratio": -0.038135845214128494, + "loss": 0.1979, + "rejected_geometric_mean": -6.287197113037109, + "step": 6667 + }, + { + "chosen_geometric_mean": -0.9634510278701782, + "epoch": 1.65, + "grad_norm": 18.875, + "learning_rate": 3.6743375973521537e-07, + "log_odds": 11.11184310913086, + "log_odds_ratio": -0.0016406726790592074, + "loss": 0.2931, + "rejected_geometric_mean": -11.589978218078613, + "step": 6668 + }, + { + "chosen_geometric_mean": -1.1014968156814575, + "epoch": 1.65, + "grad_norm": 20.625, + "learning_rate": 3.669258973042378e-07, + "log_odds": 9.272924423217773, + "log_odds_ratio": -0.15755002200603485, + "loss": 0.2856, + "rejected_geometric_mean": -10.028472900390625, + "step": 6669 + }, + { + "chosen_geometric_mean": -1.0132198333740234, + "epoch": 1.65, + "grad_norm": 6.59375, + "learning_rate": 3.6641835829849536e-07, + "log_odds": 10.770009994506836, + "log_odds_ratio": -0.09469909965991974, + "loss": 0.2345, + "rejected_geometric_mean": -11.309843063354492, + "step": 6670 + }, + { + "chosen_geometric_mean": -1.1630312204360962, + "epoch": 1.65, + "grad_norm": 45.0, + "learning_rate": 3.659111427949441e-07, + "log_odds": 3.618748664855957, + "log_odds_ratio": -0.04319716617465019, + "loss": 0.2695, + "rejected_geometric_mean": -4.394020080566406, + "step": 6671 + }, + { + "chosen_geometric_mean": -1.1491049528121948, + "epoch": 1.65, + "grad_norm": 28.75, + "learning_rate": 3.654042508704908e-07, + "log_odds": 7.378636360168457, + "log_odds_ratio": -0.09471099823713303, + "loss": 0.2185, + "rejected_geometric_mean": -8.166290283203125, + "step": 6672 + }, + { + "chosen_geometric_mean": -0.8906328678131104, + "epoch": 1.65, + "grad_norm": 3.453125, + "learning_rate": 3.648976826019912e-07, + "log_odds": 4.142910003662109, + "log_odds_ratio": -0.1656661480665207, + "loss": 0.2896, + "rejected_geometric_mean": -4.611066818237305, + "step": 6673 + }, + { + "chosen_geometric_mean": -1.2064857482910156, + "epoch": 1.65, + "grad_norm": 18.125, + "learning_rate": 3.643914380662544e-07, + "log_odds": 2.22898006439209, + "log_odds_ratio": -0.2101999968290329, + "loss": 0.2963, + "rejected_geometric_mean": -3.152050256729126, + "step": 6674 + }, + { + "chosen_geometric_mean": -0.9924007654190063, + "epoch": 1.65, + "grad_norm": 2.046875, + "learning_rate": 3.6388551734003825e-07, + "log_odds": 5.3016862869262695, + "log_odds_ratio": -0.24764639139175415, + "loss": 0.2523, + "rejected_geometric_mean": -5.982250690460205, + "step": 6675 + }, + { + "chosen_geometric_mean": -1.094815969467163, + "epoch": 1.65, + "grad_norm": 10.0625, + "learning_rate": 3.633799205000518e-07, + "log_odds": 4.188803672790527, + "log_odds_ratio": -0.32257768511772156, + "loss": 0.2593, + "rejected_geometric_mean": -5.055059432983398, + "step": 6676 + }, + { + "chosen_geometric_mean": -1.0892993211746216, + "epoch": 1.65, + "grad_norm": 17.0, + "learning_rate": 3.6287464762295805e-07, + "log_odds": 8.473669052124023, + "log_odds_ratio": -0.040851954370737076, + "loss": 0.2308, + "rejected_geometric_mean": -9.1417236328125, + "step": 6677 + }, + { + "chosen_geometric_mean": -1.0390231609344482, + "epoch": 1.65, + "grad_norm": 11.75, + "learning_rate": 3.623696987853667e-07, + "log_odds": 2.6045498847961426, + "log_odds_ratio": -0.3454078137874603, + "loss": 0.278, + "rejected_geometric_mean": -3.3942465782165527, + "step": 6678 + }, + { + "chosen_geometric_mean": -0.9462013244628906, + "epoch": 1.65, + "grad_norm": 4.34375, + "learning_rate": 3.618650740638402e-07, + "log_odds": 10.911093711853027, + "log_odds_ratio": -0.2249029576778412, + "loss": 0.2973, + "rejected_geometric_mean": -11.495936393737793, + "step": 6679 + }, + { + "chosen_geometric_mean": -0.9715782403945923, + "epoch": 1.65, + "grad_norm": 17.25, + "learning_rate": 3.6136077353489257e-07, + "log_odds": 16.22323989868164, + "log_odds_ratio": -1.2964346751687117e-05, + "loss": 0.2657, + "rejected_geometric_mean": -16.64077377319336, + "step": 6680 + }, + { + "chosen_geometric_mean": -1.0062496662139893, + "epoch": 1.65, + "grad_norm": 5.84375, + "learning_rate": 3.6085679727498686e-07, + "log_odds": 13.435765266418457, + "log_odds_ratio": -0.0015686802798882127, + "loss": 0.2653, + "rejected_geometric_mean": -13.983619689941406, + "step": 6681 + }, + { + "chosen_geometric_mean": -1.2497249841690063, + "epoch": 1.65, + "grad_norm": 7.90625, + "learning_rate": 3.6035314536053856e-07, + "log_odds": 5.083186626434326, + "log_odds_ratio": -0.17089177668094635, + "loss": 0.2348, + "rejected_geometric_mean": -6.021299362182617, + "step": 6682 + }, + { + "chosen_geometric_mean": -1.1638613939285278, + "epoch": 1.65, + "grad_norm": 7.6875, + "learning_rate": 3.598498178679136e-07, + "log_odds": 4.654229640960693, + "log_odds_ratio": -0.14910395443439484, + "loss": 0.2676, + "rejected_geometric_mean": -5.485856533050537, + "step": 6683 + }, + { + "chosen_geometric_mean": -0.9538818597793579, + "epoch": 1.65, + "grad_norm": 9.1875, + "learning_rate": 3.5934681487342806e-07, + "log_odds": 4.55727481842041, + "log_odds_ratio": -0.14091035723686218, + "loss": 0.2705, + "rejected_geometric_mean": -5.1171064376831055, + "step": 6684 + }, + { + "chosen_geometric_mean": -0.9574487209320068, + "epoch": 1.66, + "grad_norm": 3.609375, + "learning_rate": 3.5884413645334984e-07, + "log_odds": 1.8688418865203857, + "log_odds_ratio": -0.25338345766067505, + "loss": 0.241, + "rejected_geometric_mean": -2.4871132373809814, + "step": 6685 + }, + { + "chosen_geometric_mean": -1.1569263935089111, + "epoch": 1.66, + "grad_norm": 49.25, + "learning_rate": 3.583417826838967e-07, + "log_odds": 4.574066162109375, + "log_odds_ratio": -0.2963116466999054, + "loss": 0.3464, + "rejected_geometric_mean": -5.4781575202941895, + "step": 6686 + }, + { + "chosen_geometric_mean": -1.2182379961013794, + "epoch": 1.66, + "grad_norm": 34.0, + "learning_rate": 3.5783975364123675e-07, + "log_odds": 5.974949359893799, + "log_odds_ratio": -0.30341029167175293, + "loss": 0.2964, + "rejected_geometric_mean": -6.988551616668701, + "step": 6687 + }, + { + "chosen_geometric_mean": -0.9064658880233765, + "epoch": 1.66, + "grad_norm": 11.625, + "learning_rate": 3.573380494014905e-07, + "log_odds": 21.44611930847168, + "log_odds_ratio": -1.937158458531485e-06, + "loss": 0.2562, + "rejected_geometric_mean": -21.808032989501953, + "step": 6688 + }, + { + "chosen_geometric_mean": -0.9779058694839478, + "epoch": 1.66, + "grad_norm": 19.375, + "learning_rate": 3.5683667004072884e-07, + "log_odds": 9.79872989654541, + "log_odds_ratio": -0.01303063053637743, + "loss": 0.2578, + "rejected_geometric_mean": -10.268806457519531, + "step": 6689 + }, + { + "chosen_geometric_mean": -1.071698546409607, + "epoch": 1.66, + "grad_norm": 2.71875, + "learning_rate": 3.5633561563497174e-07, + "log_odds": 4.643265724182129, + "log_odds_ratio": -0.054524943232536316, + "loss": 0.2429, + "rejected_geometric_mean": -5.312901973724365, + "step": 6690 + }, + { + "chosen_geometric_mean": -0.9654791355133057, + "epoch": 1.66, + "grad_norm": 20.25, + "learning_rate": 3.558348862601921e-07, + "log_odds": 7.826903820037842, + "log_odds_ratio": -0.012564107775688171, + "loss": 0.2631, + "rejected_geometric_mean": -8.3084135055542, + "step": 6691 + }, + { + "chosen_geometric_mean": -1.0965371131896973, + "epoch": 1.66, + "grad_norm": 1.953125, + "learning_rate": 3.5533448199231243e-07, + "log_odds": 10.61423110961914, + "log_odds_ratio": -0.005939924623817205, + "loss": 0.2745, + "rejected_geometric_mean": -11.288095474243164, + "step": 6692 + }, + { + "chosen_geometric_mean": -0.9486217498779297, + "epoch": 1.66, + "grad_norm": 13.0625, + "learning_rate": 3.54834402907204e-07, + "log_odds": 7.672204971313477, + "log_odds_ratio": -0.1890673190355301, + "loss": 0.2529, + "rejected_geometric_mean": -8.259187698364258, + "step": 6693 + }, + { + "chosen_geometric_mean": -1.086775541305542, + "epoch": 1.66, + "grad_norm": 2.09375, + "learning_rate": 3.5433464908069397e-07, + "log_odds": 3.437695026397705, + "log_odds_ratio": -0.17337603867053986, + "loss": 0.2437, + "rejected_geometric_mean": -4.191305160522461, + "step": 6694 + }, + { + "chosen_geometric_mean": -0.9206794500350952, + "epoch": 1.66, + "grad_norm": 2.421875, + "learning_rate": 3.5383522058855536e-07, + "log_odds": 11.183174133300781, + "log_odds_ratio": -0.010057451203465462, + "loss": 0.2296, + "rejected_geometric_mean": -11.584887504577637, + "step": 6695 + }, + { + "chosen_geometric_mean": -0.8670698404312134, + "epoch": 1.66, + "grad_norm": 2.0625, + "learning_rate": 3.533361175065134e-07, + "log_odds": 6.744324684143066, + "log_odds_ratio": -0.1187959536910057, + "loss": 0.2292, + "rejected_geometric_mean": -7.139015197753906, + "step": 6696 + }, + { + "chosen_geometric_mean": -1.0431671142578125, + "epoch": 1.66, + "grad_norm": 2.203125, + "learning_rate": 3.528373399102447e-07, + "log_odds": 9.191681861877441, + "log_odds_ratio": -0.15117396414279938, + "loss": 0.2667, + "rejected_geometric_mean": -9.894391059875488, + "step": 6697 + }, + { + "chosen_geometric_mean": -1.070547342300415, + "epoch": 1.66, + "grad_norm": 35.75, + "learning_rate": 3.52338887875375e-07, + "log_odds": 2.7846150398254395, + "log_odds_ratio": -0.2484842836856842, + "loss": 0.2999, + "rejected_geometric_mean": -3.538586139678955, + "step": 6698 + }, + { + "chosen_geometric_mean": -0.9974098205566406, + "epoch": 1.66, + "grad_norm": 199.0, + "learning_rate": 3.518407614774821e-07, + "log_odds": 6.952264308929443, + "log_odds_ratio": -0.36496680974960327, + "loss": 0.2882, + "rejected_geometric_mean": -7.701579570770264, + "step": 6699 + }, + { + "chosen_geometric_mean": -0.9540742039680481, + "epoch": 1.66, + "grad_norm": 9.875, + "learning_rate": 3.5134296079209456e-07, + "log_odds": 9.032038688659668, + "log_odds_ratio": -0.026600943878293037, + "loss": 0.276, + "rejected_geometric_mean": -9.511308670043945, + "step": 6700 + }, + { + "chosen_geometric_mean": -1.0352485179901123, + "epoch": 1.66, + "grad_norm": 11.9375, + "learning_rate": 3.5084548589469054e-07, + "log_odds": 10.08201789855957, + "log_odds_ratio": -0.07467655092477798, + "loss": 0.3134, + "rejected_geometric_mean": -10.718656539916992, + "step": 6701 + }, + { + "chosen_geometric_mean": -0.9606642723083496, + "epoch": 1.66, + "grad_norm": 5.0, + "learning_rate": 3.503483368606983e-07, + "log_odds": 9.854806900024414, + "log_odds_ratio": -0.0014249893138185143, + "loss": 0.2554, + "rejected_geometric_mean": -10.30743408203125, + "step": 6702 + }, + { + "chosen_geometric_mean": -0.7147963643074036, + "epoch": 1.66, + "grad_norm": 5.15625, + "learning_rate": 3.49851513765499e-07, + "log_odds": 5.3055219650268555, + "log_odds_ratio": -0.06679996848106384, + "loss": 0.2574, + "rejected_geometric_mean": -5.362166404724121, + "step": 6703 + }, + { + "chosen_geometric_mean": -1.0493720769882202, + "epoch": 1.66, + "grad_norm": 2.28125, + "learning_rate": 3.4935501668442144e-07, + "log_odds": 7.904879570007324, + "log_odds_ratio": -0.004286040551960468, + "loss": 0.259, + "rejected_geometric_mean": -8.520401954650879, + "step": 6704 + }, + { + "chosen_geometric_mean": -0.9973750710487366, + "epoch": 1.66, + "grad_norm": 4.65625, + "learning_rate": 3.488588456927472e-07, + "log_odds": 15.75780963897705, + "log_odds_ratio": -0.0669216439127922, + "loss": 0.2929, + "rejected_geometric_mean": -16.337379455566406, + "step": 6705 + }, + { + "chosen_geometric_mean": -0.7821325063705444, + "epoch": 1.66, + "grad_norm": 20.0, + "learning_rate": 3.483630008657085e-07, + "log_odds": 5.539089202880859, + "log_odds_ratio": -0.07920587807893753, + "loss": 0.2797, + "rejected_geometric_mean": -5.767823219299316, + "step": 6706 + }, + { + "chosen_geometric_mean": -0.8976284265518188, + "epoch": 1.66, + "grad_norm": 8.5625, + "learning_rate": 3.478674822784861e-07, + "log_odds": 3.054300308227539, + "log_odds_ratio": -0.1384720802307129, + "loss": 0.2574, + "rejected_geometric_mean": -3.513981580734253, + "step": 6707 + }, + { + "chosen_geometric_mean": -0.8692706227302551, + "epoch": 1.66, + "grad_norm": 2.015625, + "learning_rate": 3.473722900062135e-07, + "log_odds": 4.400393962860107, + "log_odds_ratio": -0.14954417943954468, + "loss": 0.2636, + "rejected_geometric_mean": -4.79927396774292, + "step": 6708 + }, + { + "chosen_geometric_mean": -0.800017237663269, + "epoch": 1.66, + "grad_norm": 6.25, + "learning_rate": 3.468774241239725e-07, + "log_odds": 2.354766607284546, + "log_odds_ratio": -0.22411401569843292, + "loss": 0.317, + "rejected_geometric_mean": -2.74556827545166, + "step": 6709 + }, + { + "chosen_geometric_mean": -0.8884590268135071, + "epoch": 1.66, + "grad_norm": 31.875, + "learning_rate": 3.4638288470679795e-07, + "log_odds": 7.287700653076172, + "log_odds_ratio": -0.14326947927474976, + "loss": 0.2532, + "rejected_geometric_mean": -7.768532752990723, + "step": 6710 + }, + { + "chosen_geometric_mean": -1.0000735521316528, + "epoch": 1.66, + "grad_norm": 22.375, + "learning_rate": 3.458886718296728e-07, + "log_odds": 4.611156463623047, + "log_odds_ratio": -0.05920586362481117, + "loss": 0.2433, + "rejected_geometric_mean": -5.165326118469238, + "step": 6711 + }, + { + "chosen_geometric_mean": -1.049141526222229, + "epoch": 1.66, + "grad_norm": 40.75, + "learning_rate": 3.453947855675327e-07, + "log_odds": 8.298965454101562, + "log_odds_ratio": -0.007266303524374962, + "loss": 0.254, + "rejected_geometric_mean": -8.88262939453125, + "step": 6712 + }, + { + "chosen_geometric_mean": -1.019528865814209, + "epoch": 1.66, + "grad_norm": 49.75, + "learning_rate": 3.4490122599526167e-07, + "log_odds": 9.333337783813477, + "log_odds_ratio": -0.006097121629863977, + "loss": 0.2755, + "rejected_geometric_mean": -9.81429672241211, + "step": 6713 + }, + { + "chosen_geometric_mean": -0.8444885015487671, + "epoch": 1.66, + "grad_norm": 2.59375, + "learning_rate": 3.444079931876959e-07, + "log_odds": 8.004668235778809, + "log_odds_ratio": -0.13465270400047302, + "loss": 0.2125, + "rejected_geometric_mean": -8.426996231079102, + "step": 6714 + }, + { + "chosen_geometric_mean": -1.0841630697250366, + "epoch": 1.66, + "grad_norm": 13.625, + "learning_rate": 3.4391508721962065e-07, + "log_odds": 7.6420087814331055, + "log_odds_ratio": -0.1168203353881836, + "loss": 0.3142, + "rejected_geometric_mean": -8.333548545837402, + "step": 6715 + }, + { + "chosen_geometric_mean": -0.7790856957435608, + "epoch": 1.66, + "grad_norm": 7.4375, + "learning_rate": 3.434225081657727e-07, + "log_odds": 14.347453117370605, + "log_odds_ratio": -0.09770392626523972, + "loss": 0.2324, + "rejected_geometric_mean": -14.60690689086914, + "step": 6716 + }, + { + "chosen_geometric_mean": -0.8807569146156311, + "epoch": 1.66, + "grad_norm": 42.5, + "learning_rate": 3.4293025610083905e-07, + "log_odds": 0.8996785879135132, + "log_odds_ratio": -0.34844785928726196, + "loss": 0.324, + "rejected_geometric_mean": -1.5065882205963135, + "step": 6717 + }, + { + "chosen_geometric_mean": -1.145129680633545, + "epoch": 1.66, + "grad_norm": 13.25, + "learning_rate": 3.4243833109945694e-07, + "log_odds": 4.854952335357666, + "log_odds_ratio": -0.2889421880245209, + "loss": 0.2833, + "rejected_geometric_mean": -5.782201290130615, + "step": 6718 + }, + { + "chosen_geometric_mean": -1.1470091342926025, + "epoch": 1.66, + "grad_norm": 19.625, + "learning_rate": 3.4194673323621337e-07, + "log_odds": 8.795000076293945, + "log_odds_ratio": -0.028943905606865883, + "loss": 0.3153, + "rejected_geometric_mean": -9.557798385620117, + "step": 6719 + }, + { + "chosen_geometric_mean": -0.8944592475891113, + "epoch": 1.66, + "grad_norm": 8.875, + "learning_rate": 3.4145546258564646e-07, + "log_odds": 2.616513967514038, + "log_odds_ratio": -0.26793068647384644, + "loss": 0.275, + "rejected_geometric_mean": -3.1839256286621094, + "step": 6720 + }, + { + "chosen_geometric_mean": -0.9903926849365234, + "epoch": 1.66, + "grad_norm": 2.125, + "learning_rate": 3.4096451922224576e-07, + "log_odds": 7.524814605712891, + "log_odds_ratio": -0.09535211324691772, + "loss": 0.2201, + "rejected_geometric_mean": -8.090003967285156, + "step": 6721 + }, + { + "chosen_geometric_mean": -1.0080137252807617, + "epoch": 1.66, + "grad_norm": 1.9375, + "learning_rate": 3.404739032204485e-07, + "log_odds": 3.4939112663269043, + "log_odds_ratio": -0.11660926043987274, + "loss": 0.2495, + "rejected_geometric_mean": -4.109726905822754, + "step": 6722 + }, + { + "chosen_geometric_mean": -1.0796468257904053, + "epoch": 1.66, + "grad_norm": 13.125, + "learning_rate": 3.3998361465464533e-07, + "log_odds": 6.033940315246582, + "log_odds_ratio": -0.15468302369117737, + "loss": 0.2371, + "rejected_geometric_mean": -6.795094013214111, + "step": 6723 + }, + { + "chosen_geometric_mean": -0.9004780650138855, + "epoch": 1.66, + "grad_norm": 3.953125, + "learning_rate": 3.3949365359917495e-07, + "log_odds": 4.700632095336914, + "log_odds_ratio": -0.21082675457000732, + "loss": 0.2456, + "rejected_geometric_mean": -5.176426410675049, + "step": 6724 + }, + { + "chosen_geometric_mean": -1.256865382194519, + "epoch": 1.67, + "grad_norm": 6.34375, + "learning_rate": 3.390040201283265e-07, + "log_odds": 7.652734279632568, + "log_odds_ratio": -0.053250767290592194, + "loss": 0.2905, + "rejected_geometric_mean": -8.594204902648926, + "step": 6725 + }, + { + "chosen_geometric_mean": -1.1182067394256592, + "epoch": 1.67, + "grad_norm": 7.21875, + "learning_rate": 3.3851471431634143e-07, + "log_odds": 6.319588661193848, + "log_odds_ratio": -0.006949447561055422, + "loss": 0.2697, + "rejected_geometric_mean": -7.03135871887207, + "step": 6726 + }, + { + "chosen_geometric_mean": -0.8277671337127686, + "epoch": 1.67, + "grad_norm": 13.5625, + "learning_rate": 3.380257362374101e-07, + "log_odds": 7.470493316650391, + "log_odds_ratio": -0.01677771657705307, + "loss": 0.2202, + "rejected_geometric_mean": -7.717952728271484, + "step": 6727 + }, + { + "chosen_geometric_mean": -1.0631399154663086, + "epoch": 1.67, + "grad_norm": 22.625, + "learning_rate": 3.3753708596567236e-07, + "log_odds": 16.735090255737305, + "log_odds_ratio": -0.07201513648033142, + "loss": 0.314, + "rejected_geometric_mean": -17.422739028930664, + "step": 6728 + }, + { + "chosen_geometric_mean": -1.021043062210083, + "epoch": 1.67, + "grad_norm": 4.03125, + "learning_rate": 3.3704876357522063e-07, + "log_odds": 7.621922492980957, + "log_odds_ratio": -0.1419283151626587, + "loss": 0.3022, + "rejected_geometric_mean": -8.305953979492188, + "step": 6729 + }, + { + "chosen_geometric_mean": -0.8121132254600525, + "epoch": 1.67, + "grad_norm": 8.9375, + "learning_rate": 3.3656076914009515e-07, + "log_odds": 3.8706107139587402, + "log_odds_ratio": -0.05342569202184677, + "loss": 0.2832, + "rejected_geometric_mean": -4.13236141204834, + "step": 6730 + }, + { + "chosen_geometric_mean": -1.4587335586547852, + "epoch": 1.67, + "grad_norm": 20.5, + "learning_rate": 3.3607310273428783e-07, + "log_odds": 9.965900421142578, + "log_odds_ratio": -0.002633756957948208, + "loss": 0.2727, + "rejected_geometric_mean": -11.11340045928955, + "step": 6731 + }, + { + "chosen_geometric_mean": -0.8035849928855896, + "epoch": 1.67, + "grad_norm": 2.546875, + "learning_rate": 3.3558576443174173e-07, + "log_odds": 8.751046180725098, + "log_odds_ratio": -0.048695895820856094, + "loss": 0.2276, + "rejected_geometric_mean": -9.001428604125977, + "step": 6732 + }, + { + "chosen_geometric_mean": -1.078369379043579, + "epoch": 1.67, + "grad_norm": 4.8125, + "learning_rate": 3.350987543063475e-07, + "log_odds": 4.498928070068359, + "log_odds_ratio": -0.16611984372138977, + "loss": 0.244, + "rejected_geometric_mean": -5.183061599731445, + "step": 6733 + }, + { + "chosen_geometric_mean": -1.3000470399856567, + "epoch": 1.67, + "grad_norm": 26.875, + "learning_rate": 3.3461207243194874e-07, + "log_odds": 6.365464210510254, + "log_odds_ratio": -0.23398855328559875, + "loss": 0.3021, + "rejected_geometric_mean": -7.429364204406738, + "step": 6734 + }, + { + "chosen_geometric_mean": -0.8067383170127869, + "epoch": 1.67, + "grad_norm": 6.90625, + "learning_rate": 3.3412571888233767e-07, + "log_odds": 4.800475597381592, + "log_odds_ratio": -0.15926192700862885, + "loss": 0.2544, + "rejected_geometric_mean": -5.0970683097839355, + "step": 6735 + }, + { + "chosen_geometric_mean": -0.965887725353241, + "epoch": 1.67, + "grad_norm": 3.5625, + "learning_rate": 3.3363969373125637e-07, + "log_odds": 8.045877456665039, + "log_odds_ratio": -0.06328799575567245, + "loss": 0.2698, + "rejected_geometric_mean": -8.551060676574707, + "step": 6736 + }, + { + "chosen_geometric_mean": -1.1508969068527222, + "epoch": 1.67, + "grad_norm": 2.65625, + "learning_rate": 3.331539970523989e-07, + "log_odds": 8.025062561035156, + "log_odds_ratio": -0.027024205774068832, + "loss": 0.2669, + "rejected_geometric_mean": -8.781229019165039, + "step": 6737 + }, + { + "chosen_geometric_mean": -0.9624826312065125, + "epoch": 1.67, + "grad_norm": 6.3125, + "learning_rate": 3.3266862891940866e-07, + "log_odds": 5.201143264770508, + "log_odds_ratio": -0.19555026292800903, + "loss": 0.211, + "rejected_geometric_mean": -5.786465644836426, + "step": 6738 + }, + { + "chosen_geometric_mean": -0.9474264979362488, + "epoch": 1.67, + "grad_norm": 5.6875, + "learning_rate": 3.3218358940587807e-07, + "log_odds": 11.593452453613281, + "log_odds_ratio": -0.0033136592246592045, + "loss": 0.2413, + "rejected_geometric_mean": -12.035205841064453, + "step": 6739 + }, + { + "chosen_geometric_mean": -0.8453555703163147, + "epoch": 1.67, + "grad_norm": 29.25, + "learning_rate": 3.31698878585352e-07, + "log_odds": 3.9542927742004395, + "log_odds_ratio": -0.058060172945261, + "loss": 0.2531, + "rejected_geometric_mean": -4.2726335525512695, + "step": 6740 + }, + { + "chosen_geometric_mean": -1.223529577255249, + "epoch": 1.67, + "grad_norm": 29.25, + "learning_rate": 3.312144965313238e-07, + "log_odds": 8.215002059936523, + "log_odds_ratio": -0.04793839156627655, + "loss": 0.2774, + "rejected_geometric_mean": -9.072671890258789, + "step": 6741 + }, + { + "chosen_geometric_mean": -1.4401541948318481, + "epoch": 1.67, + "grad_norm": 15.0, + "learning_rate": 3.307304433172362e-07, + "log_odds": 4.338194370269775, + "log_odds_ratio": -0.14667701721191406, + "loss": 0.2614, + "rejected_geometric_mean": -5.472506046295166, + "step": 6742 + }, + { + "chosen_geometric_mean": -1.0910606384277344, + "epoch": 1.67, + "grad_norm": 8.125, + "learning_rate": 3.3024671901648516e-07, + "log_odds": 12.77410888671875, + "log_odds_ratio": -0.00011212495883228257, + "loss": 0.233, + "rejected_geometric_mean": -13.355979919433594, + "step": 6743 + }, + { + "chosen_geometric_mean": -0.974506139755249, + "epoch": 1.67, + "grad_norm": 3.453125, + "learning_rate": 3.297633237024145e-07, + "log_odds": 1.1436768770217896, + "log_odds_ratio": -0.35808056592941284, + "loss": 0.2562, + "rejected_geometric_mean": -1.8882893323898315, + "step": 6744 + }, + { + "chosen_geometric_mean": -1.0234347581863403, + "epoch": 1.67, + "grad_norm": 3.03125, + "learning_rate": 3.2928025744831725e-07, + "log_odds": 3.355743408203125, + "log_odds_ratio": -0.31209665536880493, + "loss": 0.2242, + "rejected_geometric_mean": -4.14667272567749, + "step": 6745 + }, + { + "chosen_geometric_mean": -1.027506947517395, + "epoch": 1.67, + "grad_norm": 4.0, + "learning_rate": 3.2879752032743963e-07, + "log_odds": 7.427777290344238, + "log_odds_ratio": -0.05098205804824829, + "loss": 0.2241, + "rejected_geometric_mean": -8.01404857635498, + "step": 6746 + }, + { + "chosen_geometric_mean": -0.9984298348426819, + "epoch": 1.67, + "grad_norm": 2.03125, + "learning_rate": 3.283151124129744e-07, + "log_odds": 5.386395454406738, + "log_odds_ratio": -0.028231889009475708, + "loss": 0.2454, + "rejected_geometric_mean": -5.932748794555664, + "step": 6747 + }, + { + "chosen_geometric_mean": -0.9591100215911865, + "epoch": 1.67, + "grad_norm": 20.5, + "learning_rate": 3.278330337780672e-07, + "log_odds": 1.6147509813308716, + "log_odds_ratio": -0.23944437503814697, + "loss": 0.2304, + "rejected_geometric_mean": -2.2559773921966553, + "step": 6748 + }, + { + "chosen_geometric_mean": -0.9009926319122314, + "epoch": 1.67, + "grad_norm": 7.25, + "learning_rate": 3.273512844958132e-07, + "log_odds": 4.792109489440918, + "log_odds_ratio": -0.3478425145149231, + "loss": 0.237, + "rejected_geometric_mean": -5.442840576171875, + "step": 6749 + }, + { + "chosen_geometric_mean": -1.198219656944275, + "epoch": 1.67, + "grad_norm": 15.4375, + "learning_rate": 3.268698646392565e-07, + "log_odds": 3.1422510147094727, + "log_odds_ratio": -0.5029680728912354, + "loss": 0.2595, + "rejected_geometric_mean": -4.102842807769775, + "step": 6750 + }, + { + "chosen_geometric_mean": -0.967719554901123, + "epoch": 1.67, + "grad_norm": 4.34375, + "learning_rate": 3.263887742813915e-07, + "log_odds": 3.299553155899048, + "log_odds_ratio": -0.23925277590751648, + "loss": 0.2449, + "rejected_geometric_mean": -3.8521199226379395, + "step": 6751 + }, + { + "chosen_geometric_mean": -1.1567089557647705, + "epoch": 1.67, + "grad_norm": 3.515625, + "learning_rate": 3.2590801349516395e-07, + "log_odds": 8.871926307678223, + "log_odds_ratio": -0.1961790919303894, + "loss": 0.2698, + "rejected_geometric_mean": -9.679557800292969, + "step": 6752 + }, + { + "chosen_geometric_mean": -1.1011101007461548, + "epoch": 1.67, + "grad_norm": 4.1875, + "learning_rate": 3.254275823534678e-07, + "log_odds": 15.101261138916016, + "log_odds_ratio": -0.07711456716060638, + "loss": 0.2425, + "rejected_geometric_mean": -15.807244300842285, + "step": 6753 + }, + { + "chosen_geometric_mean": -1.2027792930603027, + "epoch": 1.67, + "grad_norm": 10.125, + "learning_rate": 3.2494748092914837e-07, + "log_odds": 6.7736897468566895, + "log_odds_ratio": -0.12318868190050125, + "loss": 0.2476, + "rejected_geometric_mean": -7.650905609130859, + "step": 6754 + }, + { + "chosen_geometric_mean": -0.9242058992385864, + "epoch": 1.67, + "grad_norm": 34.5, + "learning_rate": 3.2446770929500135e-07, + "log_odds": 9.604896545410156, + "log_odds_ratio": -0.07120852172374725, + "loss": 0.2399, + "rejected_geometric_mean": -10.045638084411621, + "step": 6755 + }, + { + "chosen_geometric_mean": -0.8673975467681885, + "epoch": 1.67, + "grad_norm": 2.25, + "learning_rate": 3.2398826752377017e-07, + "log_odds": 10.753853797912598, + "log_odds_ratio": -0.14869706332683563, + "loss": 0.2653, + "rejected_geometric_mean": -11.194032669067383, + "step": 6756 + }, + { + "chosen_geometric_mean": -1.251993179321289, + "epoch": 1.67, + "grad_norm": 13.1875, + "learning_rate": 3.2350915568815087e-07, + "log_odds": 7.863519668579102, + "log_odds_ratio": -0.011957838200032711, + "loss": 0.2312, + "rejected_geometric_mean": -8.77851676940918, + "step": 6757 + }, + { + "chosen_geometric_mean": -0.8970343470573425, + "epoch": 1.67, + "grad_norm": 20.875, + "learning_rate": 3.2303037386078736e-07, + "log_odds": 5.381178855895996, + "log_odds_ratio": -0.1139884814620018, + "loss": 0.328, + "rejected_geometric_mean": -5.845885276794434, + "step": 6758 + }, + { + "chosen_geometric_mean": -0.8581749796867371, + "epoch": 1.67, + "grad_norm": 7.90625, + "learning_rate": 3.2255192211427504e-07, + "log_odds": 11.008368492126465, + "log_odds_ratio": -0.05136959254741669, + "loss": 0.3153, + "rejected_geometric_mean": -11.3018798828125, + "step": 6759 + }, + { + "chosen_geometric_mean": -1.131327509880066, + "epoch": 1.67, + "grad_norm": 2.265625, + "learning_rate": 3.2207380052115935e-07, + "log_odds": 7.405513763427734, + "log_odds_ratio": -0.0328030064702034, + "loss": 0.29, + "rejected_geometric_mean": -8.115065574645996, + "step": 6760 + }, + { + "chosen_geometric_mean": -0.9957597851753235, + "epoch": 1.67, + "grad_norm": 3.625, + "learning_rate": 3.2159600915393415e-07, + "log_odds": 8.476208686828613, + "log_odds_ratio": -0.19924800097942352, + "loss": 0.2469, + "rejected_geometric_mean": -9.06558895111084, + "step": 6761 + }, + { + "chosen_geometric_mean": -1.0956614017486572, + "epoch": 1.67, + "grad_norm": 2.1875, + "learning_rate": 3.2111854808504346e-07, + "log_odds": 6.132411956787109, + "log_odds_ratio": -0.0030003469437360764, + "loss": 0.2337, + "rejected_geometric_mean": -6.816143989562988, + "step": 6762 + }, + { + "chosen_geometric_mean": -0.9751722812652588, + "epoch": 1.67, + "grad_norm": 10.0625, + "learning_rate": 3.2064141738688325e-07, + "log_odds": 4.2140092849731445, + "log_odds_ratio": -0.24743598699569702, + "loss": 0.2357, + "rejected_geometric_mean": -4.846487998962402, + "step": 6763 + }, + { + "chosen_geometric_mean": -1.0364290475845337, + "epoch": 1.67, + "grad_norm": 13.9375, + "learning_rate": 3.201646171317968e-07, + "log_odds": 0.5755386352539062, + "log_odds_ratio": -0.48027145862579346, + "loss": 0.2551, + "rejected_geometric_mean": -1.461799144744873, + "step": 6764 + }, + { + "chosen_geometric_mean": -1.1311968564987183, + "epoch": 1.67, + "grad_norm": 3.03125, + "learning_rate": 3.1968814739207903e-07, + "log_odds": 3.9674603939056396, + "log_odds_ratio": -0.2565622627735138, + "loss": 0.2567, + "rejected_geometric_mean": -4.89765739440918, + "step": 6765 + }, + { + "chosen_geometric_mean": -1.2627825736999512, + "epoch": 1.68, + "grad_norm": 2.375, + "learning_rate": 3.192120082399744e-07, + "log_odds": 3.514946222305298, + "log_odds_ratio": -0.30005377531051636, + "loss": 0.2397, + "rejected_geometric_mean": -4.574309349060059, + "step": 6766 + }, + { + "chosen_geometric_mean": -1.0298577547073364, + "epoch": 1.68, + "grad_norm": 1.953125, + "learning_rate": 3.187361997476771e-07, + "log_odds": 10.082475662231445, + "log_odds_ratio": -0.15115691721439362, + "loss": 0.2468, + "rejected_geometric_mean": -10.753582954406738, + "step": 6767 + }, + { + "chosen_geometric_mean": -0.9989839792251587, + "epoch": 1.68, + "grad_norm": 4.59375, + "learning_rate": 3.1826072198733015e-07, + "log_odds": 8.948978424072266, + "log_odds_ratio": -0.13935133814811707, + "loss": 0.2375, + "rejected_geometric_mean": -9.551046371459961, + "step": 6768 + }, + { + "chosen_geometric_mean": -0.9148657917976379, + "epoch": 1.68, + "grad_norm": 2.734375, + "learning_rate": 3.1778557503102796e-07, + "log_odds": 9.64998722076416, + "log_odds_ratio": -0.12942636013031006, + "loss": 0.3217, + "rejected_geometric_mean": -10.08969497680664, + "step": 6769 + }, + { + "chosen_geometric_mean": -0.8130173683166504, + "epoch": 1.68, + "grad_norm": 13.0, + "learning_rate": 3.1731075895081484e-07, + "log_odds": 2.8954765796661377, + "log_odds_ratio": -0.10949641466140747, + "loss": 0.2672, + "rejected_geometric_mean": -3.1358702182769775, + "step": 6770 + }, + { + "chosen_geometric_mean": -1.0518134832382202, + "epoch": 1.68, + "grad_norm": 3.09375, + "learning_rate": 3.168362738186834e-07, + "log_odds": 13.085018157958984, + "log_odds_ratio": -0.02904244139790535, + "loss": 0.2321, + "rejected_geometric_mean": -13.710296630859375, + "step": 6771 + }, + { + "chosen_geometric_mean": -1.1319499015808105, + "epoch": 1.68, + "grad_norm": 19.125, + "learning_rate": 3.1636211970657775e-07, + "log_odds": 8.076744079589844, + "log_odds_ratio": -0.008803610689938068, + "loss": 0.2665, + "rejected_geometric_mean": -8.821202278137207, + "step": 6772 + }, + { + "chosen_geometric_mean": -1.1576035022735596, + "epoch": 1.68, + "grad_norm": 19.0, + "learning_rate": 3.1588829668638994e-07, + "log_odds": 14.129192352294922, + "log_odds_ratio": -0.000554041238501668, + "loss": 0.2535, + "rejected_geometric_mean": -14.84377670288086, + "step": 6773 + }, + { + "chosen_geometric_mean": -1.237940788269043, + "epoch": 1.68, + "grad_norm": 99.0, + "learning_rate": 3.1541480482996433e-07, + "log_odds": 7.85644006729126, + "log_odds_ratio": -0.026946932077407837, + "loss": 0.277, + "rejected_geometric_mean": -8.755735397338867, + "step": 6774 + }, + { + "chosen_geometric_mean": -1.2728132009506226, + "epoch": 1.68, + "grad_norm": 12.0625, + "learning_rate": 3.1494164420909217e-07, + "log_odds": 4.13042688369751, + "log_odds_ratio": -0.1206742599606514, + "loss": 0.2728, + "rejected_geometric_mean": -5.094328880310059, + "step": 6775 + }, + { + "chosen_geometric_mean": -0.9804302453994751, + "epoch": 1.68, + "grad_norm": 8.3125, + "learning_rate": 3.1446881489551745e-07, + "log_odds": 6.235837936401367, + "log_odds_ratio": -0.11350838094949722, + "loss": 0.2585, + "rejected_geometric_mean": -6.78414249420166, + "step": 6776 + }, + { + "chosen_geometric_mean": -0.8838472366333008, + "epoch": 1.68, + "grad_norm": 2.828125, + "learning_rate": 3.1399631696093115e-07, + "log_odds": 9.344841003417969, + "log_odds_ratio": -0.22250616550445557, + "loss": 0.2613, + "rejected_geometric_mean": -9.838464736938477, + "step": 6777 + }, + { + "chosen_geometric_mean": -0.981023907661438, + "epoch": 1.68, + "grad_norm": 7.125, + "learning_rate": 3.135241504769762e-07, + "log_odds": 1.4003281593322754, + "log_odds_ratio": -0.2848155200481415, + "loss": 0.2847, + "rejected_geometric_mean": -2.08609676361084, + "step": 6778 + }, + { + "chosen_geometric_mean": -0.9140180349349976, + "epoch": 1.68, + "grad_norm": 1.875, + "learning_rate": 3.1305231551524373e-07, + "log_odds": 6.883292198181152, + "log_odds_ratio": -0.043375469744205475, + "loss": 0.2188, + "rejected_geometric_mean": -7.2793474197387695, + "step": 6779 + }, + { + "chosen_geometric_mean": -1.1526365280151367, + "epoch": 1.68, + "grad_norm": 9.5625, + "learning_rate": 3.1258081214727536e-07, + "log_odds": 5.227941513061523, + "log_odds_ratio": -0.024496963247656822, + "loss": 0.2437, + "rejected_geometric_mean": -6.005608558654785, + "step": 6780 + }, + { + "chosen_geometric_mean": -0.8887656331062317, + "epoch": 1.68, + "grad_norm": 9.8125, + "learning_rate": 3.121096404445631e-07, + "log_odds": 10.99384593963623, + "log_odds_ratio": -0.12678499519824982, + "loss": 0.2689, + "rejected_geometric_mean": -11.419568061828613, + "step": 6781 + }, + { + "chosen_geometric_mean": -1.020765781402588, + "epoch": 1.68, + "grad_norm": 37.25, + "learning_rate": 3.116388004785467e-07, + "log_odds": 9.200103759765625, + "log_odds_ratio": -0.10454758256673813, + "loss": 0.2692, + "rejected_geometric_mean": -9.84174633026123, + "step": 6782 + }, + { + "chosen_geometric_mean": -0.8854847550392151, + "epoch": 1.68, + "grad_norm": 31.375, + "learning_rate": 3.11168292320618e-07, + "log_odds": 14.18731689453125, + "log_odds_ratio": -0.18748335540294647, + "loss": 0.2727, + "rejected_geometric_mean": -14.614574432373047, + "step": 6783 + }, + { + "chosen_geometric_mean": -1.0166194438934326, + "epoch": 1.68, + "grad_norm": 2.703125, + "learning_rate": 3.1069811604211685e-07, + "log_odds": 1.51143479347229, + "log_odds_ratio": -0.36197608709335327, + "loss": 0.2324, + "rejected_geometric_mean": -2.3316473960876465, + "step": 6784 + }, + { + "chosen_geometric_mean": -0.9834058284759521, + "epoch": 1.68, + "grad_norm": 7.1875, + "learning_rate": 3.1022827171433234e-07, + "log_odds": 5.248953342437744, + "log_odds_ratio": -0.14501000940799713, + "loss": 0.2789, + "rejected_geometric_mean": -5.864395618438721, + "step": 6785 + }, + { + "chosen_geometric_mean": -0.7835140824317932, + "epoch": 1.68, + "grad_norm": 2.59375, + "learning_rate": 3.0975875940850497e-07, + "log_odds": 11.760921478271484, + "log_odds_ratio": -0.03155055642127991, + "loss": 0.2482, + "rejected_geometric_mean": -11.956351280212402, + "step": 6786 + }, + { + "chosen_geometric_mean": -1.3769054412841797, + "epoch": 1.68, + "grad_norm": 10.4375, + "learning_rate": 3.0928957919582475e-07, + "log_odds": 10.270692825317383, + "log_odds_ratio": -0.05530279874801636, + "loss": 0.2983, + "rejected_geometric_mean": -11.24710464477539, + "step": 6787 + }, + { + "chosen_geometric_mean": -1.1148793697357178, + "epoch": 1.68, + "grad_norm": 2.390625, + "learning_rate": 3.088207311474292e-07, + "log_odds": 3.6430258750915527, + "log_odds_ratio": -0.326198935508728, + "loss": 0.2575, + "rejected_geometric_mean": -4.524179458618164, + "step": 6788 + }, + { + "chosen_geometric_mean": -1.185826063156128, + "epoch": 1.68, + "grad_norm": 5.125, + "learning_rate": 3.0835221533440847e-07, + "log_odds": 5.803979396820068, + "log_odds_ratio": -0.031113365665078163, + "loss": 0.2578, + "rejected_geometric_mean": -6.636317729949951, + "step": 6789 + }, + { + "chosen_geometric_mean": -1.1423046588897705, + "epoch": 1.68, + "grad_norm": 47.0, + "learning_rate": 3.078840318277998e-07, + "log_odds": 4.807602882385254, + "log_odds_ratio": -0.3279004693031311, + "loss": 0.3188, + "rejected_geometric_mean": -5.770905494689941, + "step": 6790 + }, + { + "chosen_geometric_mean": -0.9037653207778931, + "epoch": 1.68, + "grad_norm": 2.578125, + "learning_rate": 3.0741618069859037e-07, + "log_odds": 6.258852005004883, + "log_odds_ratio": -0.2114025056362152, + "loss": 0.2734, + "rejected_geometric_mean": -6.7575154304504395, + "step": 6791 + }, + { + "chosen_geometric_mean": -0.8785088658332825, + "epoch": 1.68, + "grad_norm": 17.5, + "learning_rate": 3.069486620177195e-07, + "log_odds": 5.280375003814697, + "log_odds_ratio": -0.022410687059164047, + "loss": 0.2422, + "rejected_geometric_mean": -5.632438659667969, + "step": 6792 + }, + { + "chosen_geometric_mean": -1.1267646551132202, + "epoch": 1.68, + "grad_norm": 18.375, + "learning_rate": 3.064814758560733e-07, + "log_odds": 4.102683067321777, + "log_odds_ratio": -0.27747541666030884, + "loss": 0.3159, + "rejected_geometric_mean": -4.97513484954834, + "step": 6793 + }, + { + "chosen_geometric_mean": -1.2466809749603271, + "epoch": 1.68, + "grad_norm": 46.5, + "learning_rate": 3.0601462228448815e-07, + "log_odds": 10.819744110107422, + "log_odds_ratio": -0.07944674789905548, + "loss": 0.2966, + "rejected_geometric_mean": -11.753911018371582, + "step": 6794 + }, + { + "chosen_geometric_mean": -0.8522571325302124, + "epoch": 1.68, + "grad_norm": 8.375, + "learning_rate": 3.055481013737507e-07, + "log_odds": 7.531734943389893, + "log_odds_ratio": -0.1547999233007431, + "loss": 0.2381, + "rejected_geometric_mean": -7.946283340454102, + "step": 6795 + }, + { + "chosen_geometric_mean": -1.1896946430206299, + "epoch": 1.68, + "grad_norm": 2.625, + "learning_rate": 3.0508191319459617e-07, + "log_odds": 3.185087203979492, + "log_odds_ratio": -0.1215241327881813, + "loss": 0.2551, + "rejected_geometric_mean": -4.040682792663574, + "step": 6796 + }, + { + "chosen_geometric_mean": -0.8750311136245728, + "epoch": 1.68, + "grad_norm": 2.59375, + "learning_rate": 3.046160578177101e-07, + "log_odds": 6.067765235900879, + "log_odds_ratio": -0.12456239014863968, + "loss": 0.2747, + "rejected_geometric_mean": -6.49221134185791, + "step": 6797 + }, + { + "chosen_geometric_mean": -1.0656417608261108, + "epoch": 1.68, + "grad_norm": 6.71875, + "learning_rate": 3.0415053531372805e-07, + "log_odds": 8.339021682739258, + "log_odds_ratio": -0.1439468264579773, + "loss": 0.2437, + "rejected_geometric_mean": -9.035839080810547, + "step": 6798 + }, + { + "chosen_geometric_mean": -0.9614616632461548, + "epoch": 1.68, + "grad_norm": 2.40625, + "learning_rate": 3.036853457532338e-07, + "log_odds": 3.767455816268921, + "log_odds_ratio": -0.18520960211753845, + "loss": 0.2686, + "rejected_geometric_mean": -4.3395867347717285, + "step": 6799 + }, + { + "chosen_geometric_mean": -1.0578148365020752, + "epoch": 1.68, + "grad_norm": 4.71875, + "learning_rate": 3.0322048920676086e-07, + "log_odds": 4.195192813873291, + "log_odds_ratio": -0.3815726339817047, + "loss": 0.2324, + "rejected_geometric_mean": -5.064517021179199, + "step": 6800 + }, + { + "chosen_geometric_mean": -0.9686850309371948, + "epoch": 1.68, + "grad_norm": 2.015625, + "learning_rate": 3.0275596574479344e-07, + "log_odds": 11.214098930358887, + "log_odds_ratio": -0.013669254258275032, + "loss": 0.2398, + "rejected_geometric_mean": -11.698185920715332, + "step": 6801 + }, + { + "chosen_geometric_mean": -0.930778980255127, + "epoch": 1.68, + "grad_norm": 41.25, + "learning_rate": 3.0229177543776385e-07, + "log_odds": 4.031875133514404, + "log_odds_ratio": -0.1809316724538803, + "loss": 0.2897, + "rejected_geometric_mean": -4.516429901123047, + "step": 6802 + }, + { + "chosen_geometric_mean": -0.8086432218551636, + "epoch": 1.68, + "grad_norm": 15.5, + "learning_rate": 3.0182791835605434e-07, + "log_odds": 9.902239799499512, + "log_odds_ratio": -0.12878675758838654, + "loss": 0.2726, + "rejected_geometric_mean": -10.209305763244629, + "step": 6803 + }, + { + "chosen_geometric_mean": -1.069769024848938, + "epoch": 1.68, + "grad_norm": 4.59375, + "learning_rate": 3.013643945699979e-07, + "log_odds": 9.937246322631836, + "log_odds_ratio": -0.07862824201583862, + "loss": 0.2408, + "rejected_geometric_mean": -10.61932373046875, + "step": 6804 + }, + { + "chosen_geometric_mean": -0.9620038866996765, + "epoch": 1.68, + "grad_norm": 5.25, + "learning_rate": 3.009012041498746e-07, + "log_odds": 4.578262805938721, + "log_odds_ratio": -0.1445295661687851, + "loss": 0.259, + "rejected_geometric_mean": -5.174807548522949, + "step": 6805 + }, + { + "chosen_geometric_mean": -0.844305157661438, + "epoch": 1.69, + "grad_norm": 2.28125, + "learning_rate": 3.004383471659161e-07, + "log_odds": 6.968283653259277, + "log_odds_ratio": -0.0225750170648098, + "loss": 0.274, + "rejected_geometric_mean": -7.238882541656494, + "step": 6806 + }, + { + "chosen_geometric_mean": -1.2822902202606201, + "epoch": 1.69, + "grad_norm": 14.625, + "learning_rate": 2.999758236883024e-07, + "log_odds": 7.611336708068848, + "log_odds_ratio": -0.05299748107790947, + "loss": 0.2775, + "rejected_geometric_mean": -8.573649406433105, + "step": 6807 + }, + { + "chosen_geometric_mean": -0.7906718254089355, + "epoch": 1.69, + "grad_norm": 3.015625, + "learning_rate": 2.995136337871621e-07, + "log_odds": 6.7648606300354, + "log_odds_ratio": -0.06555721908807755, + "loss": 0.2307, + "rejected_geometric_mean": -7.006011486053467, + "step": 6808 + }, + { + "chosen_geometric_mean": -0.9878231287002563, + "epoch": 1.69, + "grad_norm": 2.515625, + "learning_rate": 2.9905177753257605e-07, + "log_odds": 8.395174026489258, + "log_odds_ratio": -0.02792208269238472, + "loss": 0.2229, + "rejected_geometric_mean": -8.891249656677246, + "step": 6809 + }, + { + "chosen_geometric_mean": -0.8850875496864319, + "epoch": 1.69, + "grad_norm": 11.0, + "learning_rate": 2.9859025499457184e-07, + "log_odds": 6.499980926513672, + "log_odds_ratio": -0.12584617733955383, + "loss": 0.2234, + "rejected_geometric_mean": -6.9438676834106445, + "step": 6810 + }, + { + "chosen_geometric_mean": -0.8482919335365295, + "epoch": 1.69, + "grad_norm": 16.125, + "learning_rate": 2.9812906624312713e-07, + "log_odds": 10.880184173583984, + "log_odds_ratio": -0.05621248483657837, + "loss": 0.2751, + "rejected_geometric_mean": -11.153068542480469, + "step": 6811 + }, + { + "chosen_geometric_mean": -0.865350604057312, + "epoch": 1.69, + "grad_norm": 7.28125, + "learning_rate": 2.9766821134816994e-07, + "log_odds": 9.923761367797852, + "log_odds_ratio": -0.16231954097747803, + "loss": 0.2433, + "rejected_geometric_mean": -10.42223072052002, + "step": 6812 + }, + { + "chosen_geometric_mean": -0.8992680311203003, + "epoch": 1.69, + "grad_norm": 3.8125, + "learning_rate": 2.9720769037957594e-07, + "log_odds": 9.511930465698242, + "log_odds_ratio": -0.11465993523597717, + "loss": 0.2395, + "rejected_geometric_mean": -9.955997467041016, + "step": 6813 + }, + { + "chosen_geometric_mean": -0.9418597221374512, + "epoch": 1.69, + "grad_norm": 2.984375, + "learning_rate": 2.967475034071718e-07, + "log_odds": 4.051268100738525, + "log_odds_ratio": -0.19898244738578796, + "loss": 0.2972, + "rejected_geometric_mean": -4.651541233062744, + "step": 6814 + }, + { + "chosen_geometric_mean": -1.225954532623291, + "epoch": 1.69, + "grad_norm": 2.328125, + "learning_rate": 2.962876505007334e-07, + "log_odds": 9.094383239746094, + "log_odds_ratio": -0.13355374336242676, + "loss": 0.2541, + "rejected_geometric_mean": -10.020347595214844, + "step": 6815 + }, + { + "chosen_geometric_mean": -1.04262113571167, + "epoch": 1.69, + "grad_norm": 42.25, + "learning_rate": 2.9582813172998466e-07, + "log_odds": 8.553462028503418, + "log_odds_ratio": -0.04235363006591797, + "loss": 0.2558, + "rejected_geometric_mean": -9.170689582824707, + "step": 6816 + }, + { + "chosen_geometric_mean": -1.231258511543274, + "epoch": 1.69, + "grad_norm": 111.0, + "learning_rate": 2.953689471645996e-07, + "log_odds": 4.987117290496826, + "log_odds_ratio": -0.08333323895931244, + "loss": 0.2702, + "rejected_geometric_mean": -5.846590518951416, + "step": 6817 + }, + { + "chosen_geometric_mean": -1.0543136596679688, + "epoch": 1.69, + "grad_norm": 4.03125, + "learning_rate": 2.9491009687420244e-07, + "log_odds": 9.9251070022583, + "log_odds_ratio": -0.03370371088385582, + "loss": 0.2499, + "rejected_geometric_mean": -10.566939353942871, + "step": 6818 + }, + { + "chosen_geometric_mean": -1.1653939485549927, + "epoch": 1.69, + "grad_norm": 5.625, + "learning_rate": 2.944515809283649e-07, + "log_odds": 4.254029273986816, + "log_odds_ratio": -0.2603732943534851, + "loss": 0.2553, + "rejected_geometric_mean": -5.127782344818115, + "step": 6819 + }, + { + "chosen_geometric_mean": -1.0397162437438965, + "epoch": 1.69, + "grad_norm": 2.078125, + "learning_rate": 2.939933993966093e-07, + "log_odds": 16.023983001708984, + "log_odds_ratio": -0.12086272239685059, + "loss": 0.2457, + "rejected_geometric_mean": -16.66021728515625, + "step": 6820 + }, + { + "chosen_geometric_mean": -0.9753237962722778, + "epoch": 1.69, + "grad_norm": 5.4375, + "learning_rate": 2.935355523484079e-07, + "log_odds": 10.82264232635498, + "log_odds_ratio": -0.13779383897781372, + "loss": 0.2154, + "rejected_geometric_mean": -11.414212226867676, + "step": 6821 + }, + { + "chosen_geometric_mean": -1.070766568183899, + "epoch": 1.69, + "grad_norm": 2.34375, + "learning_rate": 2.9307803985317987e-07, + "log_odds": 3.2277309894561768, + "log_odds_ratio": -0.06907765567302704, + "loss": 0.2673, + "rejected_geometric_mean": -3.9117400646209717, + "step": 6822 + }, + { + "chosen_geometric_mean": -0.8425414562225342, + "epoch": 1.69, + "grad_norm": 1.921875, + "learning_rate": 2.926208619802964e-07, + "log_odds": 12.724912643432617, + "log_odds_ratio": -0.002825337927788496, + "loss": 0.2478, + "rejected_geometric_mean": -13.000174522399902, + "step": 6823 + }, + { + "chosen_geometric_mean": -1.1330101490020752, + "epoch": 1.69, + "grad_norm": 40.5, + "learning_rate": 2.9216401879907543e-07, + "log_odds": 10.3865966796875, + "log_odds_ratio": -0.05483026057481766, + "loss": 0.2524, + "rejected_geometric_mean": -11.151612281799316, + "step": 6824 + }, + { + "chosen_geometric_mean": -0.8224135637283325, + "epoch": 1.69, + "grad_norm": 35.0, + "learning_rate": 2.9170751037878644e-07, + "log_odds": 6.473151206970215, + "log_odds_ratio": -0.1536530703306198, + "loss": 0.2689, + "rejected_geometric_mean": -6.873003959655762, + "step": 6825 + }, + { + "chosen_geometric_mean": -0.8875790238380432, + "epoch": 1.69, + "grad_norm": 3.03125, + "learning_rate": 2.9125133678864606e-07, + "log_odds": 6.448122024536133, + "log_odds_ratio": -0.19582036137580872, + "loss": 0.2294, + "rejected_geometric_mean": -6.941424369812012, + "step": 6826 + }, + { + "chosen_geometric_mean": -1.1461073160171509, + "epoch": 1.69, + "grad_norm": 2.203125, + "learning_rate": 2.9079549809782226e-07, + "log_odds": 6.2898759841918945, + "log_odds_ratio": -0.20566309988498688, + "loss": 0.2659, + "rejected_geometric_mean": -7.141373157501221, + "step": 6827 + }, + { + "chosen_geometric_mean": -1.191714882850647, + "epoch": 1.69, + "grad_norm": 12.75, + "learning_rate": 2.903399943754298e-07, + "log_odds": 4.857787132263184, + "log_odds_ratio": -0.021753592416644096, + "loss": 0.2717, + "rejected_geometric_mean": -5.66567325592041, + "step": 6828 + }, + { + "chosen_geometric_mean": -0.9289839267730713, + "epoch": 1.69, + "grad_norm": 31.375, + "learning_rate": 2.8988482569053485e-07, + "log_odds": 4.38751220703125, + "log_odds_ratio": -0.3105648159980774, + "loss": 0.2697, + "rejected_geometric_mean": -5.0309038162231445, + "step": 6829 + }, + { + "chosen_geometric_mean": -1.165478229522705, + "epoch": 1.69, + "grad_norm": 43.25, + "learning_rate": 2.8942999211215213e-07, + "log_odds": 4.774385452270508, + "log_odds_ratio": -0.0980692207813263, + "loss": 0.2303, + "rejected_geometric_mean": -5.5587029457092285, + "step": 6830 + }, + { + "chosen_geometric_mean": -1.2607979774475098, + "epoch": 1.69, + "grad_norm": 6.59375, + "learning_rate": 2.8897549370924456e-07, + "log_odds": 2.0719454288482666, + "log_odds_ratio": -0.3952673375606537, + "loss": 0.2646, + "rejected_geometric_mean": -3.0713186264038086, + "step": 6831 + }, + { + "chosen_geometric_mean": -0.9614036083221436, + "epoch": 1.69, + "grad_norm": 5.65625, + "learning_rate": 2.8852133055072563e-07, + "log_odds": 1.6865144968032837, + "log_odds_ratio": -0.29456937313079834, + "loss": 0.2475, + "rejected_geometric_mean": -2.315990924835205, + "step": 6832 + }, + { + "chosen_geometric_mean": -1.1281226873397827, + "epoch": 1.69, + "grad_norm": 4.3125, + "learning_rate": 2.880675027054575e-07, + "log_odds": 9.226088523864746, + "log_odds_ratio": -0.005234663374722004, + "loss": 0.2619, + "rejected_geometric_mean": -9.965290069580078, + "step": 6833 + }, + { + "chosen_geometric_mean": -0.806020200252533, + "epoch": 1.69, + "grad_norm": 2.484375, + "learning_rate": 2.876140102422503e-07, + "log_odds": 3.576648473739624, + "log_odds_ratio": -0.12794587016105652, + "loss": 0.2451, + "rejected_geometric_mean": -3.871295690536499, + "step": 6834 + }, + { + "chosen_geometric_mean": -0.8146198987960815, + "epoch": 1.69, + "grad_norm": 3.6875, + "learning_rate": 2.8716085322986506e-07, + "log_odds": 11.050154685974121, + "log_odds_ratio": -4.64350450783968e-05, + "loss": 0.2915, + "rejected_geometric_mean": -11.244915008544922, + "step": 6835 + }, + { + "chosen_geometric_mean": -1.0368574857711792, + "epoch": 1.69, + "grad_norm": 16.5, + "learning_rate": 2.867080317370119e-07, + "log_odds": 4.886105060577393, + "log_odds_ratio": -0.2233533263206482, + "loss": 0.2555, + "rejected_geometric_mean": -5.587917804718018, + "step": 6836 + }, + { + "chosen_geometric_mean": -1.0641968250274658, + "epoch": 1.69, + "grad_norm": 11.4375, + "learning_rate": 2.862555458323482e-07, + "log_odds": 7.664483070373535, + "log_odds_ratio": -0.07784557342529297, + "loss": 0.2176, + "rejected_geometric_mean": -8.309062004089355, + "step": 6837 + }, + { + "chosen_geometric_mean": -0.923231840133667, + "epoch": 1.69, + "grad_norm": 5.34375, + "learning_rate": 2.858033955844827e-07, + "log_odds": 4.612625598907471, + "log_odds_ratio": -0.28544265031814575, + "loss": 0.319, + "rejected_geometric_mean": -5.203669548034668, + "step": 6838 + }, + { + "chosen_geometric_mean": -1.0110071897506714, + "epoch": 1.69, + "grad_norm": 1.8984375, + "learning_rate": 2.8535158106197196e-07, + "log_odds": 4.440949440002441, + "log_odds_ratio": -0.20381826162338257, + "loss": 0.2438, + "rejected_geometric_mean": -5.080418109893799, + "step": 6839 + }, + { + "chosen_geometric_mean": -0.962748646736145, + "epoch": 1.69, + "grad_norm": 3.984375, + "learning_rate": 2.849001023333209e-07, + "log_odds": 3.7575833797454834, + "log_odds_ratio": -0.13099375367164612, + "loss": 0.2274, + "rejected_geometric_mean": -4.297699928283691, + "step": 6840 + }, + { + "chosen_geometric_mean": -1.0169847011566162, + "epoch": 1.69, + "grad_norm": 3.5, + "learning_rate": 2.844489594669864e-07, + "log_odds": 4.234031677246094, + "log_odds_ratio": -0.056461308151483536, + "loss": 0.2595, + "rejected_geometric_mean": -4.8245849609375, + "step": 6841 + }, + { + "chosen_geometric_mean": -0.9435634613037109, + "epoch": 1.69, + "grad_norm": 11.5, + "learning_rate": 2.839981525313712e-07, + "log_odds": 8.660338401794434, + "log_odds_ratio": -0.04839703440666199, + "loss": 0.2566, + "rejected_geometric_mean": -9.122811317443848, + "step": 6842 + }, + { + "chosen_geometric_mean": -0.9838427901268005, + "epoch": 1.69, + "grad_norm": 1.84375, + "learning_rate": 2.835476815948285e-07, + "log_odds": 7.385172367095947, + "log_odds_ratio": -0.021727818995714188, + "loss": 0.2324, + "rejected_geometric_mean": -7.913431167602539, + "step": 6843 + }, + { + "chosen_geometric_mean": -1.1059516668319702, + "epoch": 1.69, + "grad_norm": 9.75, + "learning_rate": 2.8309754672566136e-07, + "log_odds": 7.878664970397949, + "log_odds_ratio": -0.17854636907577515, + "loss": 0.2867, + "rejected_geometric_mean": -8.636979103088379, + "step": 6844 + }, + { + "chosen_geometric_mean": -0.9523367881774902, + "epoch": 1.69, + "grad_norm": 20.875, + "learning_rate": 2.8264774799212e-07, + "log_odds": 7.2795867919921875, + "log_odds_ratio": -0.14006167650222778, + "loss": 0.2827, + "rejected_geometric_mean": -7.797086715698242, + "step": 6845 + }, + { + "chosen_geometric_mean": -0.6996254920959473, + "epoch": 1.69, + "grad_norm": 17.125, + "learning_rate": 2.82198285462405e-07, + "log_odds": 7.714956283569336, + "log_odds_ratio": -0.2970469295978546, + "loss": 0.2853, + "rejected_geometric_mean": -8.039335250854492, + "step": 6846 + }, + { + "chosen_geometric_mean": -0.9108548164367676, + "epoch": 1.7, + "grad_norm": 12.75, + "learning_rate": 2.8174915920466673e-07, + "log_odds": 4.638856887817383, + "log_odds_ratio": -0.14061239361763, + "loss": 0.2568, + "rejected_geometric_mean": -5.126791000366211, + "step": 6847 + }, + { + "chosen_geometric_mean": -1.2751957178115845, + "epoch": 1.7, + "grad_norm": 6.25, + "learning_rate": 2.8130036928700173e-07, + "log_odds": 7.970573425292969, + "log_odds_ratio": -0.1740347295999527, + "loss": 0.2902, + "rejected_geometric_mean": -8.979869842529297, + "step": 6848 + }, + { + "chosen_geometric_mean": -1.0292168855667114, + "epoch": 1.7, + "grad_norm": 2.484375, + "learning_rate": 2.808519157774589e-07, + "log_odds": 7.262734413146973, + "log_odds_ratio": -0.12965038418769836, + "loss": 0.2488, + "rejected_geometric_mean": -7.873345375061035, + "step": 6849 + }, + { + "chosen_geometric_mean": -0.8938584923744202, + "epoch": 1.7, + "grad_norm": 2.171875, + "learning_rate": 2.804037987440339e-07, + "log_odds": 10.689668655395508, + "log_odds_ratio": -0.019703473895788193, + "loss": 0.2181, + "rejected_geometric_mean": -11.051019668579102, + "step": 6850 + }, + { + "chosen_geometric_mean": -1.0710662603378296, + "epoch": 1.7, + "grad_norm": 2.578125, + "learning_rate": 2.799560182546715e-07, + "log_odds": 13.458043098449707, + "log_odds_ratio": -0.00044047628762200475, + "loss": 0.2725, + "rejected_geometric_mean": -14.083198547363281, + "step": 6851 + }, + { + "chosen_geometric_mean": -0.9159319996833801, + "epoch": 1.7, + "grad_norm": 6.90625, + "learning_rate": 2.795085743772666e-07, + "log_odds": 8.349010467529297, + "log_odds_ratio": -0.16281814873218536, + "loss": 0.296, + "rejected_geometric_mean": -8.836942672729492, + "step": 6852 + }, + { + "chosen_geometric_mean": -0.8946260213851929, + "epoch": 1.7, + "grad_norm": 17.375, + "learning_rate": 2.7906146717966266e-07, + "log_odds": 5.412448883056641, + "log_odds_ratio": -0.03099706396460533, + "loss": 0.2654, + "rejected_geometric_mean": -5.790905952453613, + "step": 6853 + }, + { + "chosen_geometric_mean": -1.068263053894043, + "epoch": 1.7, + "grad_norm": 11.4375, + "learning_rate": 2.7861469672965103e-07, + "log_odds": 11.726126670837402, + "log_odds_ratio": -0.00027444903389550745, + "loss": 0.269, + "rejected_geometric_mean": -12.369370460510254, + "step": 6854 + }, + { + "chosen_geometric_mean": -0.8790732622146606, + "epoch": 1.7, + "grad_norm": 3.984375, + "learning_rate": 2.781682630949739e-07, + "log_odds": 7.281314373016357, + "log_odds_ratio": -0.005711869802325964, + "loss": 0.2682, + "rejected_geometric_mean": -7.601261138916016, + "step": 6855 + }, + { + "chosen_geometric_mean": -0.8953499794006348, + "epoch": 1.7, + "grad_norm": 1.890625, + "learning_rate": 2.777221663433205e-07, + "log_odds": 8.564900398254395, + "log_odds_ratio": -0.003924624528735876, + "loss": 0.2547, + "rejected_geometric_mean": -8.92231559753418, + "step": 6856 + }, + { + "chosen_geometric_mean": -1.1120221614837646, + "epoch": 1.7, + "grad_norm": 8.125, + "learning_rate": 2.7727640654232946e-07, + "log_odds": 0.9789237380027771, + "log_odds_ratio": -0.3810102641582489, + "loss": 0.2498, + "rejected_geometric_mean": -1.9112787246704102, + "step": 6857 + }, + { + "chosen_geometric_mean": -0.9150846004486084, + "epoch": 1.7, + "grad_norm": 2.796875, + "learning_rate": 2.768309837595901e-07, + "log_odds": 7.489654064178467, + "log_odds_ratio": -0.07015089690685272, + "loss": 0.2269, + "rejected_geometric_mean": -7.9202165603637695, + "step": 6858 + }, + { + "chosen_geometric_mean": -1.07075035572052, + "epoch": 1.7, + "grad_norm": 18.75, + "learning_rate": 2.7638589806263824e-07, + "log_odds": 14.499502182006836, + "log_odds_ratio": -0.0002506425080355257, + "loss": 0.2798, + "rejected_geometric_mean": -15.112326622009277, + "step": 6859 + }, + { + "chosen_geometric_mean": -1.1549876928329468, + "epoch": 1.7, + "grad_norm": 2.53125, + "learning_rate": 2.759411495189593e-07, + "log_odds": 8.672011375427246, + "log_odds_ratio": -0.004148512147367001, + "loss": 0.2684, + "rejected_geometric_mean": -9.437582015991211, + "step": 6860 + }, + { + "chosen_geometric_mean": -1.0999367237091064, + "epoch": 1.7, + "grad_norm": 3.3125, + "learning_rate": 2.754967381959886e-07, + "log_odds": 9.533687591552734, + "log_odds_ratio": -0.011491960845887661, + "loss": 0.2706, + "rejected_geometric_mean": -10.171756744384766, + "step": 6861 + }, + { + "chosen_geometric_mean": -1.332786202430725, + "epoch": 1.7, + "grad_norm": 12.5625, + "learning_rate": 2.7505266416110873e-07, + "log_odds": 5.021699905395508, + "log_odds_ratio": -0.18233375251293182, + "loss": 0.239, + "rejected_geometric_mean": -6.009355545043945, + "step": 6862 + }, + { + "chosen_geometric_mean": -1.0356566905975342, + "epoch": 1.7, + "grad_norm": 44.25, + "learning_rate": 2.746089274816524e-07, + "log_odds": 3.7919468879699707, + "log_odds_ratio": -0.39050811529159546, + "loss": 0.2794, + "rejected_geometric_mean": -4.652064800262451, + "step": 6863 + }, + { + "chosen_geometric_mean": -0.8995772004127502, + "epoch": 1.7, + "grad_norm": 5.40625, + "learning_rate": 2.7416552822490144e-07, + "log_odds": 7.1095805168151855, + "log_odds_ratio": -0.19251365959644318, + "loss": 0.2574, + "rejected_geometric_mean": -7.606597423553467, + "step": 6864 + }, + { + "chosen_geometric_mean": -1.180585503578186, + "epoch": 1.7, + "grad_norm": 25.375, + "learning_rate": 2.737224664580851e-07, + "log_odds": 9.811997413635254, + "log_odds_ratio": -0.36455172300338745, + "loss": 0.3252, + "rejected_geometric_mean": -10.66512680053711, + "step": 6865 + }, + { + "chosen_geometric_mean": -1.1502710580825806, + "epoch": 1.7, + "grad_norm": 2.609375, + "learning_rate": 2.7327974224838155e-07, + "log_odds": 6.092277526855469, + "log_odds_ratio": -0.28645527362823486, + "loss": 0.2538, + "rejected_geometric_mean": -6.984203338623047, + "step": 6866 + }, + { + "chosen_geometric_mean": -0.8694016933441162, + "epoch": 1.7, + "grad_norm": 4.96875, + "learning_rate": 2.728373556629199e-07, + "log_odds": 10.198079109191895, + "log_odds_ratio": -0.2063339799642563, + "loss": 0.2035, + "rejected_geometric_mean": -10.678096771240234, + "step": 6867 + }, + { + "chosen_geometric_mean": -1.0178827047348022, + "epoch": 1.7, + "grad_norm": 3.28125, + "learning_rate": 2.7239530676877483e-07, + "log_odds": 6.166115760803223, + "log_odds_ratio": -0.21549154818058014, + "loss": 0.2304, + "rejected_geometric_mean": -6.856472492218018, + "step": 6868 + }, + { + "chosen_geometric_mean": -0.9715100526809692, + "epoch": 1.7, + "grad_norm": 14.9375, + "learning_rate": 2.7195359563297275e-07, + "log_odds": 6.956686496734619, + "log_odds_ratio": -0.0026637050323188305, + "loss": 0.2523, + "rejected_geometric_mean": -7.423366546630859, + "step": 6869 + }, + { + "chosen_geometric_mean": -1.1865425109863281, + "epoch": 1.7, + "grad_norm": 3.09375, + "learning_rate": 2.715122223224881e-07, + "log_odds": 5.597663879394531, + "log_odds_ratio": -0.048836346715688705, + "loss": 0.2578, + "rejected_geometric_mean": -6.408571243286133, + "step": 6870 + }, + { + "chosen_geometric_mean": -1.3128801584243774, + "epoch": 1.7, + "grad_norm": 8.5, + "learning_rate": 2.7107118690424205e-07, + "log_odds": 4.391483783721924, + "log_odds_ratio": -0.30764341354370117, + "loss": 0.2645, + "rejected_geometric_mean": -5.542239189147949, + "step": 6871 + }, + { + "chosen_geometric_mean": -0.9147756099700928, + "epoch": 1.7, + "grad_norm": 1.6796875, + "learning_rate": 2.7063048944510783e-07, + "log_odds": 11.88016414642334, + "log_odds_ratio": -7.878970063757151e-05, + "loss": 0.2271, + "rejected_geometric_mean": -12.275542259216309, + "step": 6872 + }, + { + "chosen_geometric_mean": -0.9350911378860474, + "epoch": 1.7, + "grad_norm": 3.265625, + "learning_rate": 2.7019013001190474e-07, + "log_odds": 10.119775772094727, + "log_odds_ratio": -0.12479380518198013, + "loss": 0.2502, + "rejected_geometric_mean": -10.633922576904297, + "step": 6873 + }, + { + "chosen_geometric_mean": -1.0623139142990112, + "epoch": 1.7, + "grad_norm": 28.5, + "learning_rate": 2.6975010867140216e-07, + "log_odds": 13.296340942382812, + "log_odds_ratio": -0.05289870500564575, + "loss": 0.2589, + "rejected_geometric_mean": -13.965110778808594, + "step": 6874 + }, + { + "chosen_geometric_mean": -1.1336514949798584, + "epoch": 1.7, + "grad_norm": 23.875, + "learning_rate": 2.6931042549031754e-07, + "log_odds": 5.1084136962890625, + "log_odds_ratio": -0.3109290897846222, + "loss": 0.2805, + "rejected_geometric_mean": -5.992652893066406, + "step": 6875 + }, + { + "chosen_geometric_mean": -1.0129778385162354, + "epoch": 1.7, + "grad_norm": 2.09375, + "learning_rate": 2.688710805353184e-07, + "log_odds": 4.365452289581299, + "log_odds_ratio": -0.10962843894958496, + "loss": 0.2286, + "rejected_geometric_mean": -4.9912519454956055, + "step": 6876 + }, + { + "chosen_geometric_mean": -1.167428970336914, + "epoch": 1.7, + "grad_norm": 6.34375, + "learning_rate": 2.684320738730184e-07, + "log_odds": 2.9233498573303223, + "log_odds_ratio": -0.28248727321624756, + "loss": 0.3369, + "rejected_geometric_mean": -3.892775535583496, + "step": 6877 + }, + { + "chosen_geometric_mean": -1.0998904705047607, + "epoch": 1.7, + "grad_norm": 5.0, + "learning_rate": 2.6799340556998316e-07, + "log_odds": 6.073338508605957, + "log_odds_ratio": -0.02082761377096176, + "loss": 0.2705, + "rejected_geometric_mean": -6.774532318115234, + "step": 6878 + }, + { + "chosen_geometric_mean": -1.0152143239974976, + "epoch": 1.7, + "grad_norm": 7.375, + "learning_rate": 2.6755507569272395e-07, + "log_odds": 4.800607204437256, + "log_odds_ratio": -0.1182067021727562, + "loss": 0.2454, + "rejected_geometric_mean": -5.429113388061523, + "step": 6879 + }, + { + "chosen_geometric_mean": -1.0336380004882812, + "epoch": 1.7, + "grad_norm": 1.8203125, + "learning_rate": 2.6711708430770286e-07, + "log_odds": 3.5299978256225586, + "log_odds_ratio": -0.3209584653377533, + "loss": 0.2299, + "rejected_geometric_mean": -4.3424553871154785, + "step": 6880 + }, + { + "chosen_geometric_mean": -0.8020198345184326, + "epoch": 1.7, + "grad_norm": 8.125, + "learning_rate": 2.666794314813298e-07, + "log_odds": 12.234077453613281, + "log_odds_ratio": -1.677904583630152e-05, + "loss": 0.2048, + "rejected_geometric_mean": -12.413434028625488, + "step": 6881 + }, + { + "chosen_geometric_mean": -0.7748472690582275, + "epoch": 1.7, + "grad_norm": 6.78125, + "learning_rate": 2.662421172799637e-07, + "log_odds": 5.161596298217773, + "log_odds_ratio": -0.05381980165839195, + "loss": 0.2553, + "rejected_geometric_mean": -5.367203712463379, + "step": 6882 + }, + { + "chosen_geometric_mean": -1.113388180732727, + "epoch": 1.7, + "grad_norm": 58.75, + "learning_rate": 2.658051417699112e-07, + "log_odds": 8.221782684326172, + "log_odds_ratio": -0.19754046201705933, + "loss": 0.2948, + "rejected_geometric_mean": -9.03823471069336, + "step": 6883 + }, + { + "chosen_geometric_mean": -0.9663295745849609, + "epoch": 1.7, + "grad_norm": 15.8125, + "learning_rate": 2.6536850501742816e-07, + "log_odds": 6.719244480133057, + "log_odds_ratio": -0.007173965685069561, + "loss": 0.3309, + "rejected_geometric_mean": -7.175583362579346, + "step": 6884 + }, + { + "chosen_geometric_mean": -0.9216336011886597, + "epoch": 1.7, + "grad_norm": 7.4375, + "learning_rate": 2.649322070887206e-07, + "log_odds": 2.1197707653045654, + "log_odds_ratio": -0.30794692039489746, + "loss": 0.2259, + "rejected_geometric_mean": -2.7953076362609863, + "step": 6885 + }, + { + "chosen_geometric_mean": -1.058234691619873, + "epoch": 1.7, + "grad_norm": 2.390625, + "learning_rate": 2.6449624804994023e-07, + "log_odds": 4.956911563873291, + "log_odds_ratio": -0.04244677722454071, + "loss": 0.2455, + "rejected_geometric_mean": -5.560024738311768, + "step": 6886 + }, + { + "chosen_geometric_mean": -1.071258544921875, + "epoch": 1.71, + "grad_norm": 13.875, + "learning_rate": 2.640606279671898e-07, + "log_odds": 9.634772300720215, + "log_odds_ratio": -0.00043028854997828603, + "loss": 0.2823, + "rejected_geometric_mean": -10.281879425048828, + "step": 6887 + }, + { + "chosen_geometric_mean": -0.8230998516082764, + "epoch": 1.71, + "grad_norm": 2.265625, + "learning_rate": 2.6362534690651974e-07, + "log_odds": 9.39592170715332, + "log_odds_ratio": -0.05218299478292465, + "loss": 0.2352, + "rejected_geometric_mean": -9.67829704284668, + "step": 6888 + }, + { + "chosen_geometric_mean": -1.1385791301727295, + "epoch": 1.71, + "grad_norm": 10.3125, + "learning_rate": 2.6319040493392815e-07, + "log_odds": 7.76857328414917, + "log_odds_ratio": -0.02063564397394657, + "loss": 0.2641, + "rejected_geometric_mean": -8.514156341552734, + "step": 6889 + }, + { + "chosen_geometric_mean": -0.8918936252593994, + "epoch": 1.71, + "grad_norm": 5.1875, + "learning_rate": 2.6275580211536337e-07, + "log_odds": 10.837209701538086, + "log_odds_ratio": -0.09647280722856522, + "loss": 0.2453, + "rejected_geometric_mean": -11.27649211883545, + "step": 6890 + }, + { + "chosen_geometric_mean": -0.9917762279510498, + "epoch": 1.71, + "grad_norm": 4.625, + "learning_rate": 2.6232153851672213e-07, + "log_odds": 3.3814125061035156, + "log_odds_ratio": -0.2576350271701813, + "loss": 0.3193, + "rejected_geometric_mean": -4.0613274574279785, + "step": 6891 + }, + { + "chosen_geometric_mean": -0.8362206816673279, + "epoch": 1.71, + "grad_norm": 2.984375, + "learning_rate": 2.6188761420384843e-07, + "log_odds": 6.925961494445801, + "log_odds_ratio": -0.05880284309387207, + "loss": 0.261, + "rejected_geometric_mean": -7.198856353759766, + "step": 6892 + }, + { + "chosen_geometric_mean": -0.8716353178024292, + "epoch": 1.71, + "grad_norm": 2.828125, + "learning_rate": 2.6145402924253603e-07, + "log_odds": 10.20180606842041, + "log_odds_ratio": -0.14422619342803955, + "loss": 0.2157, + "rejected_geometric_mean": -10.650242805480957, + "step": 6893 + }, + { + "chosen_geometric_mean": -0.8349302411079407, + "epoch": 1.71, + "grad_norm": 3.828125, + "learning_rate": 2.610207836985265e-07, + "log_odds": 7.798666954040527, + "log_odds_ratio": -0.03083675354719162, + "loss": 0.2498, + "rejected_geometric_mean": -8.07458782196045, + "step": 6894 + }, + { + "chosen_geometric_mean": -0.9186904430389404, + "epoch": 1.71, + "grad_norm": 14.0, + "learning_rate": 2.605878776375101e-07, + "log_odds": 5.001084327697754, + "log_odds_ratio": -0.1324108988046646, + "loss": 0.2642, + "rejected_geometric_mean": -5.4808878898620605, + "step": 6895 + }, + { + "chosen_geometric_mean": -1.2042254209518433, + "epoch": 1.71, + "grad_norm": 8.375, + "learning_rate": 2.601553111251268e-07, + "log_odds": 3.0159308910369873, + "log_odds_ratio": -0.12910380959510803, + "loss": 0.2114, + "rejected_geometric_mean": -3.886646270751953, + "step": 6896 + }, + { + "chosen_geometric_mean": -0.7392101883888245, + "epoch": 1.71, + "grad_norm": 3.78125, + "learning_rate": 2.5972308422696297e-07, + "log_odds": 5.666357040405273, + "log_odds_ratio": -0.11114867031574249, + "loss": 0.2236, + "rejected_geometric_mean": -5.842740535736084, + "step": 6897 + }, + { + "chosen_geometric_mean": -1.1835651397705078, + "epoch": 1.71, + "grad_norm": 2.046875, + "learning_rate": 2.5929119700855545e-07, + "log_odds": 13.247087478637695, + "log_odds_ratio": -0.023578155785799026, + "loss": 0.2507, + "rejected_geometric_mean": -14.06284236907959, + "step": 6898 + }, + { + "chosen_geometric_mean": -0.9881526231765747, + "epoch": 1.71, + "grad_norm": 6.3125, + "learning_rate": 2.5885964953538817e-07, + "log_odds": 2.163377046585083, + "log_odds_ratio": -0.28518837690353394, + "loss": 0.325, + "rejected_geometric_mean": -2.8871030807495117, + "step": 6899 + }, + { + "chosen_geometric_mean": -0.9287277460098267, + "epoch": 1.71, + "grad_norm": 5.15625, + "learning_rate": 2.584284418728936e-07, + "log_odds": 13.516290664672852, + "log_odds_ratio": -2.37832482525846e-05, + "loss": 0.2256, + "rejected_geometric_mean": -13.928899765014648, + "step": 6900 + }, + { + "chosen_geometric_mean": -1.0299878120422363, + "epoch": 1.71, + "grad_norm": 25.75, + "learning_rate": 2.5799757408645374e-07, + "log_odds": 9.712054252624512, + "log_odds_ratio": -0.004657987039536238, + "loss": 0.2788, + "rejected_geometric_mean": -10.297621726989746, + "step": 6901 + }, + { + "chosen_geometric_mean": -1.1620389223098755, + "epoch": 1.71, + "grad_norm": 16.375, + "learning_rate": 2.575670462413993e-07, + "log_odds": 10.053067207336426, + "log_odds_ratio": -0.0007086042314767838, + "loss": 0.2551, + "rejected_geometric_mean": -10.794410705566406, + "step": 6902 + }, + { + "chosen_geometric_mean": -0.7859119772911072, + "epoch": 1.71, + "grad_norm": 2.03125, + "learning_rate": 2.571368584030071e-07, + "log_odds": 8.968573570251465, + "log_odds_ratio": -0.0029758934397250414, + "loss": 0.2112, + "rejected_geometric_mean": -9.131757736206055, + "step": 6903 + }, + { + "chosen_geometric_mean": -1.1920114755630493, + "epoch": 1.71, + "grad_norm": 4.5625, + "learning_rate": 2.5670701063650495e-07, + "log_odds": 9.032317161560059, + "log_odds_ratio": -0.020245261490345, + "loss": 0.2378, + "rejected_geometric_mean": -9.871233940124512, + "step": 6904 + }, + { + "chosen_geometric_mean": -0.8423330783843994, + "epoch": 1.71, + "grad_norm": 19.125, + "learning_rate": 2.5627750300706797e-07, + "log_odds": 10.671121597290039, + "log_odds_ratio": -0.04652858152985573, + "loss": 0.3668, + "rejected_geometric_mean": -10.97842025756836, + "step": 6905 + }, + { + "chosen_geometric_mean": -0.9800238609313965, + "epoch": 1.71, + "grad_norm": 6.21875, + "learning_rate": 2.558483355798189e-07, + "log_odds": 10.937091827392578, + "log_odds_ratio": -0.17704707384109497, + "loss": 0.2152, + "rejected_geometric_mean": -11.558740615844727, + "step": 6906 + }, + { + "chosen_geometric_mean": -0.8071609735488892, + "epoch": 1.71, + "grad_norm": 9.0, + "learning_rate": 2.554195084198313e-07, + "log_odds": 12.745184898376465, + "log_odds_ratio": -0.01166731957346201, + "loss": 0.3156, + "rejected_geometric_mean": -12.953715324401855, + "step": 6907 + }, + { + "chosen_geometric_mean": -0.9299249649047852, + "epoch": 1.71, + "grad_norm": 21.125, + "learning_rate": 2.549910215921253e-07, + "log_odds": 4.690080165863037, + "log_odds_ratio": -0.25807878375053406, + "loss": 0.2867, + "rejected_geometric_mean": -5.353387832641602, + "step": 6908 + }, + { + "chosen_geometric_mean": -1.1904546022415161, + "epoch": 1.71, + "grad_norm": 77.5, + "learning_rate": 2.5456287516166854e-07, + "log_odds": 6.073531150817871, + "log_odds_ratio": -0.1352090835571289, + "loss": 0.2796, + "rejected_geometric_mean": -6.956770896911621, + "step": 6909 + }, + { + "chosen_geometric_mean": -1.0820682048797607, + "epoch": 1.71, + "grad_norm": 2.78125, + "learning_rate": 2.5413506919338e-07, + "log_odds": 1.8919894695281982, + "log_odds_ratio": -0.14884385466575623, + "loss": 0.2569, + "rejected_geometric_mean": -2.6311535835266113, + "step": 6910 + }, + { + "chosen_geometric_mean": -1.1248972415924072, + "epoch": 1.71, + "grad_norm": 2.125, + "learning_rate": 2.537076037521238e-07, + "log_odds": 7.240652561187744, + "log_odds_ratio": -0.07440498471260071, + "loss": 0.2726, + "rejected_geometric_mean": -7.977193832397461, + "step": 6911 + }, + { + "chosen_geometric_mean": -1.1343986988067627, + "epoch": 1.71, + "grad_norm": 37.25, + "learning_rate": 2.532804789027146e-07, + "log_odds": 6.847529888153076, + "log_odds_ratio": -0.23146718740463257, + "loss": 0.2797, + "rejected_geometric_mean": -7.713617324829102, + "step": 6912 + }, + { + "chosen_geometric_mean": -0.7842168807983398, + "epoch": 1.71, + "grad_norm": 18.75, + "learning_rate": 2.5285369470991557e-07, + "log_odds": 10.711257934570312, + "log_odds_ratio": -0.21390216052532196, + "loss": 0.2946, + "rejected_geometric_mean": -11.06478214263916, + "step": 6913 + }, + { + "chosen_geometric_mean": -1.1258668899536133, + "epoch": 1.71, + "grad_norm": 50.75, + "learning_rate": 2.5242725123843683e-07, + "log_odds": 5.256808757781982, + "log_odds_ratio": -0.5572360754013062, + "loss": 0.3503, + "rejected_geometric_mean": -6.03904914855957, + "step": 6914 + }, + { + "chosen_geometric_mean": -1.1619513034820557, + "epoch": 1.71, + "grad_norm": 2.375, + "learning_rate": 2.5200114855293666e-07, + "log_odds": 7.016163349151611, + "log_odds_ratio": -0.053200963884592056, + "loss": 0.231, + "rejected_geometric_mean": -7.788562297821045, + "step": 6915 + }, + { + "chosen_geometric_mean": -0.9890799522399902, + "epoch": 1.71, + "grad_norm": 2.546875, + "learning_rate": 2.515753867180237e-07, + "log_odds": 5.729820251464844, + "log_odds_ratio": -0.048998139798641205, + "loss": 0.2422, + "rejected_geometric_mean": -6.275692939758301, + "step": 6916 + }, + { + "chosen_geometric_mean": -0.9829235076904297, + "epoch": 1.71, + "grad_norm": 9.5, + "learning_rate": 2.5114996579825274e-07, + "log_odds": 5.595013618469238, + "log_odds_ratio": -0.08878307789564133, + "loss": 0.2594, + "rejected_geometric_mean": -6.123542785644531, + "step": 6917 + }, + { + "chosen_geometric_mean": -0.9799259305000305, + "epoch": 1.71, + "grad_norm": 3.03125, + "learning_rate": 2.507248858581279e-07, + "log_odds": 11.275032043457031, + "log_odds_ratio": -0.010202066972851753, + "loss": 0.2414, + "rejected_geometric_mean": -11.780679702758789, + "step": 6918 + }, + { + "chosen_geometric_mean": -1.116365671157837, + "epoch": 1.71, + "grad_norm": 17.125, + "learning_rate": 2.503001469621025e-07, + "log_odds": 8.761441230773926, + "log_odds_ratio": -0.015013385564088821, + "loss": 0.2572, + "rejected_geometric_mean": -9.422776222229004, + "step": 6919 + }, + { + "chosen_geometric_mean": -0.9230701923370361, + "epoch": 1.71, + "grad_norm": 3.859375, + "learning_rate": 2.49875749174576e-07, + "log_odds": 11.081965446472168, + "log_odds_ratio": -0.0037217256613075733, + "loss": 0.2273, + "rejected_geometric_mean": -11.497596740722656, + "step": 6920 + }, + { + "chosen_geometric_mean": -0.9429887533187866, + "epoch": 1.71, + "grad_norm": 2.4375, + "learning_rate": 2.4945169255989804e-07, + "log_odds": 8.392574310302734, + "log_odds_ratio": -0.05202505737543106, + "loss": 0.243, + "rejected_geometric_mean": -8.86693000793457, + "step": 6921 + }, + { + "chosen_geometric_mean": -0.8872700929641724, + "epoch": 1.71, + "grad_norm": 3.296875, + "learning_rate": 2.4902797718236505e-07, + "log_odds": 5.657567024230957, + "log_odds_ratio": -0.22055365145206451, + "loss": 0.2703, + "rejected_geometric_mean": -6.218869209289551, + "step": 6922 + }, + { + "chosen_geometric_mean": -1.0243431329727173, + "epoch": 1.71, + "grad_norm": 3.859375, + "learning_rate": 2.486046031062228e-07, + "log_odds": 10.073732376098633, + "log_odds_ratio": -0.11662714928388596, + "loss": 0.2504, + "rejected_geometric_mean": -10.724072456359863, + "step": 6923 + }, + { + "chosen_geometric_mean": -1.0159634351730347, + "epoch": 1.71, + "grad_norm": 11.0625, + "learning_rate": 2.481815703956655e-07, + "log_odds": 2.0353164672851562, + "log_odds_ratio": -0.260493665933609, + "loss": 0.2424, + "rejected_geometric_mean": -2.682446002960205, + "step": 6924 + }, + { + "chosen_geometric_mean": -1.282629132270813, + "epoch": 1.71, + "grad_norm": 2.0, + "learning_rate": 2.477588791148347e-07, + "log_odds": 4.600430011749268, + "log_odds_ratio": -0.16011391580104828, + "loss": 0.2565, + "rejected_geometric_mean": -5.610092639923096, + "step": 6925 + }, + { + "chosen_geometric_mean": -1.1209423542022705, + "epoch": 1.71, + "grad_norm": 8.75, + "learning_rate": 2.4733652932781985e-07, + "log_odds": 4.6726837158203125, + "log_odds_ratio": -0.26635897159576416, + "loss": 0.2827, + "rejected_geometric_mean": -5.503063201904297, + "step": 6926 + }, + { + "chosen_geometric_mean": -1.347781777381897, + "epoch": 1.72, + "grad_norm": 41.5, + "learning_rate": 2.4691452109866067e-07, + "log_odds": 8.213104248046875, + "log_odds_ratio": -0.1686708778142929, + "loss": 0.2645, + "rejected_geometric_mean": -9.336980819702148, + "step": 6927 + }, + { + "chosen_geometric_mean": -0.9712257981300354, + "epoch": 1.72, + "grad_norm": 3.140625, + "learning_rate": 2.4649285449134196e-07, + "log_odds": 6.707517147064209, + "log_odds_ratio": -0.010611535981297493, + "loss": 0.2606, + "rejected_geometric_mean": -7.19317626953125, + "step": 6928 + }, + { + "chosen_geometric_mean": -1.0953857898712158, + "epoch": 1.72, + "grad_norm": 1.8984375, + "learning_rate": 2.4607152956979993e-07, + "log_odds": 5.360972881317139, + "log_odds_ratio": -0.27989551424980164, + "loss": 0.2557, + "rejected_geometric_mean": -6.210048675537109, + "step": 6929 + }, + { + "chosen_geometric_mean": -0.9456924200057983, + "epoch": 1.72, + "grad_norm": 5.4375, + "learning_rate": 2.456505463979175e-07, + "log_odds": 9.101298332214355, + "log_odds_ratio": -0.09965251386165619, + "loss": 0.2988, + "rejected_geometric_mean": -9.601524353027344, + "step": 6930 + }, + { + "chosen_geometric_mean": -1.2331840991973877, + "epoch": 1.72, + "grad_norm": 1.875, + "learning_rate": 2.452299050395254e-07, + "log_odds": 10.412334442138672, + "log_odds_ratio": -0.09527818858623505, + "loss": 0.2444, + "rejected_geometric_mean": -11.33145809173584, + "step": 6931 + }, + { + "chosen_geometric_mean": -0.8458013534545898, + "epoch": 1.72, + "grad_norm": 6.6875, + "learning_rate": 2.448096055584026e-07, + "log_odds": 5.983198165893555, + "log_odds_ratio": -0.19693440198898315, + "loss": 0.2729, + "rejected_geometric_mean": -6.41735315322876, + "step": 6932 + }, + { + "chosen_geometric_mean": -1.5261987447738647, + "epoch": 1.72, + "grad_norm": 35.75, + "learning_rate": 2.44389648018277e-07, + "log_odds": 2.5236740112304688, + "log_odds_ratio": -0.22331419587135315, + "loss": 0.3055, + "rejected_geometric_mean": -3.8234856128692627, + "step": 6933 + }, + { + "chosen_geometric_mean": -0.891700804233551, + "epoch": 1.72, + "grad_norm": 18.125, + "learning_rate": 2.439700324828248e-07, + "log_odds": 14.569984436035156, + "log_odds_ratio": -0.0002044529828708619, + "loss": 0.1944, + "rejected_geometric_mean": -14.915712356567383, + "step": 6934 + }, + { + "chosen_geometric_mean": -0.7570756673812866, + "epoch": 1.72, + "grad_norm": 20.25, + "learning_rate": 2.4355075901566856e-07, + "log_odds": 6.269917964935303, + "log_odds_ratio": -0.09139843285083771, + "loss": 0.2667, + "rejected_geometric_mean": -6.468649387359619, + "step": 6935 + }, + { + "chosen_geometric_mean": -0.8769280910491943, + "epoch": 1.72, + "grad_norm": 1.8203125, + "learning_rate": 2.4313182768038153e-07, + "log_odds": 11.910260200500488, + "log_odds_ratio": -0.014268056489527225, + "loss": 0.2256, + "rejected_geometric_mean": -12.244165420532227, + "step": 6936 + }, + { + "chosen_geometric_mean": -1.2040945291519165, + "epoch": 1.72, + "grad_norm": 2.953125, + "learning_rate": 2.4271323854048233e-07, + "log_odds": 3.884225845336914, + "log_odds_ratio": -0.21898488700389862, + "loss": 0.2492, + "rejected_geometric_mean": -4.788618087768555, + "step": 6937 + }, + { + "chosen_geometric_mean": -1.062805414199829, + "epoch": 1.72, + "grad_norm": 7.625, + "learning_rate": 2.422949916594408e-07, + "log_odds": 4.938725471496582, + "log_odds_ratio": -0.193625807762146, + "loss": 0.2683, + "rejected_geometric_mean": -5.6612982749938965, + "step": 6938 + }, + { + "chosen_geometric_mean": -1.0316252708435059, + "epoch": 1.72, + "grad_norm": 7.59375, + "learning_rate": 2.4187708710067147e-07, + "log_odds": 8.872708320617676, + "log_odds_ratio": -0.04392712563276291, + "loss": 0.2296, + "rejected_geometric_mean": -9.450201988220215, + "step": 6939 + }, + { + "chosen_geometric_mean": -0.8555506467819214, + "epoch": 1.72, + "grad_norm": 8.125, + "learning_rate": 2.4145952492754005e-07, + "log_odds": 8.007817268371582, + "log_odds_ratio": -0.22670599818229675, + "loss": 0.229, + "rejected_geometric_mean": -8.507566452026367, + "step": 6940 + }, + { + "chosen_geometric_mean": -0.9474488496780396, + "epoch": 1.72, + "grad_norm": 3.71875, + "learning_rate": 2.410423052033581e-07, + "log_odds": 7.385004043579102, + "log_odds_ratio": -0.009114498272538185, + "loss": 0.2433, + "rejected_geometric_mean": -7.788264274597168, + "step": 6941 + }, + { + "chosen_geometric_mean": -0.8505293130874634, + "epoch": 1.72, + "grad_norm": 2.28125, + "learning_rate": 2.406254279913872e-07, + "log_odds": 11.110212326049805, + "log_odds_ratio": -0.11192674934864044, + "loss": 0.2733, + "rejected_geometric_mean": -11.495802879333496, + "step": 6942 + }, + { + "chosen_geometric_mean": -1.0798369646072388, + "epoch": 1.72, + "grad_norm": 9.0, + "learning_rate": 2.402088933548344e-07, + "log_odds": 7.492859363555908, + "log_odds_ratio": -0.1859642118215561, + "loss": 0.2626, + "rejected_geometric_mean": -8.231602668762207, + "step": 6943 + }, + { + "chosen_geometric_mean": -0.9348484873771667, + "epoch": 1.72, + "grad_norm": 51.75, + "learning_rate": 2.3979270135685737e-07, + "log_odds": 2.2864575386047363, + "log_odds_ratio": -0.24357597529888153, + "loss": 0.3129, + "rejected_geometric_mean": -2.873901128768921, + "step": 6944 + }, + { + "chosen_geometric_mean": -1.0892804861068726, + "epoch": 1.72, + "grad_norm": 2.09375, + "learning_rate": 2.3937685206056104e-07, + "log_odds": 9.856206893920898, + "log_odds_ratio": -0.032914575189352036, + "loss": 0.2309, + "rejected_geometric_mean": -10.547870635986328, + "step": 6945 + }, + { + "chosen_geometric_mean": -1.195368766784668, + "epoch": 1.72, + "grad_norm": 4.40625, + "learning_rate": 2.389613455289974e-07, + "log_odds": 1.9431307315826416, + "log_odds_ratio": -0.3633398413658142, + "loss": 0.3036, + "rejected_geometric_mean": -2.972306489944458, + "step": 6946 + }, + { + "chosen_geometric_mean": -1.1246737241744995, + "epoch": 1.72, + "grad_norm": 1.875, + "learning_rate": 2.3854618182516805e-07, + "log_odds": 5.999425888061523, + "log_odds_ratio": -0.1463252604007721, + "loss": 0.2325, + "rejected_geometric_mean": -6.802953243255615, + "step": 6947 + }, + { + "chosen_geometric_mean": -0.9047386646270752, + "epoch": 1.72, + "grad_norm": 11.875, + "learning_rate": 2.3813136101202116e-07, + "log_odds": 5.656530380249023, + "log_odds_ratio": -0.2861754298210144, + "loss": 0.2985, + "rejected_geometric_mean": -6.209559440612793, + "step": 6948 + }, + { + "chosen_geometric_mean": -1.1668498516082764, + "epoch": 1.72, + "grad_norm": 3.484375, + "learning_rate": 2.3771688315245318e-07, + "log_odds": 6.786548614501953, + "log_odds_ratio": -0.17229725420475006, + "loss": 0.2653, + "rejected_geometric_mean": -7.661905288696289, + "step": 6949 + }, + { + "chosen_geometric_mean": -0.891586422920227, + "epoch": 1.72, + "grad_norm": 2.734375, + "learning_rate": 2.373027483093096e-07, + "log_odds": 6.601168632507324, + "log_odds_ratio": -0.04774047061800957, + "loss": 0.2722, + "rejected_geometric_mean": -6.972142696380615, + "step": 6950 + }, + { + "chosen_geometric_mean": -0.9502018690109253, + "epoch": 1.72, + "grad_norm": 3.390625, + "learning_rate": 2.368889565453833e-07, + "log_odds": 10.217825889587402, + "log_odds_ratio": -5.7134278904413804e-05, + "loss": 0.2157, + "rejected_geometric_mean": -10.665209770202637, + "step": 6951 + }, + { + "chosen_geometric_mean": -0.8525580167770386, + "epoch": 1.72, + "grad_norm": 3.9375, + "learning_rate": 2.3647550792341406e-07, + "log_odds": 7.067623138427734, + "log_odds_ratio": -0.18344804644584656, + "loss": 0.2729, + "rejected_geometric_mean": -7.527037620544434, + "step": 6952 + }, + { + "chosen_geometric_mean": -1.1740171909332275, + "epoch": 1.72, + "grad_norm": 3.046875, + "learning_rate": 2.3606240250609204e-07, + "log_odds": 8.853165626525879, + "log_odds_ratio": -0.23677434027194977, + "loss": 0.2693, + "rejected_geometric_mean": -9.736734390258789, + "step": 6953 + }, + { + "chosen_geometric_mean": -1.1169158220291138, + "epoch": 1.72, + "grad_norm": 3.53125, + "learning_rate": 2.3564964035605293e-07, + "log_odds": 7.2593841552734375, + "log_odds_ratio": -0.003241571830585599, + "loss": 0.2916, + "rejected_geometric_mean": -7.97864294052124, + "step": 6954 + }, + { + "chosen_geometric_mean": -0.8843287229537964, + "epoch": 1.72, + "grad_norm": 16.0, + "learning_rate": 2.3523722153588086e-07, + "log_odds": 5.030528545379639, + "log_odds_ratio": -0.21135401725769043, + "loss": 0.2948, + "rejected_geometric_mean": -5.466558456420898, + "step": 6955 + }, + { + "chosen_geometric_mean": -0.8769978284835815, + "epoch": 1.72, + "grad_norm": 14.75, + "learning_rate": 2.3482514610810998e-07, + "log_odds": 13.527039527893066, + "log_odds_ratio": -0.02873755618929863, + "loss": 0.2839, + "rejected_geometric_mean": -13.864827156066895, + "step": 6956 + }, + { + "chosen_geometric_mean": -0.8298678398132324, + "epoch": 1.72, + "grad_norm": 17.5, + "learning_rate": 2.3441341413522028e-07, + "log_odds": 4.209383964538574, + "log_odds_ratio": -0.0664629265666008, + "loss": 0.2775, + "rejected_geometric_mean": -4.505467414855957, + "step": 6957 + }, + { + "chosen_geometric_mean": -0.8256532549858093, + "epoch": 1.72, + "grad_norm": 35.0, + "learning_rate": 2.3400202567963908e-07, + "log_odds": 5.018413543701172, + "log_odds_ratio": -0.030658483505249023, + "loss": 0.298, + "rejected_geometric_mean": -5.292139053344727, + "step": 6958 + }, + { + "chosen_geometric_mean": -1.3946741819381714, + "epoch": 1.72, + "grad_norm": 35.0, + "learning_rate": 2.3359098080374453e-07, + "log_odds": 6.847317218780518, + "log_odds_ratio": -0.10188025236129761, + "loss": 0.3055, + "rejected_geometric_mean": -7.968112945556641, + "step": 6959 + }, + { + "chosen_geometric_mean": -1.0081740617752075, + "epoch": 1.72, + "grad_norm": 2.1875, + "learning_rate": 2.331802795698593e-07, + "log_odds": 9.136974334716797, + "log_odds_ratio": -0.08534366637468338, + "loss": 0.2519, + "rejected_geometric_mean": -9.735169410705566, + "step": 6960 + }, + { + "chosen_geometric_mean": -1.1043846607208252, + "epoch": 1.72, + "grad_norm": 2.203125, + "learning_rate": 2.32769922040256e-07, + "log_odds": 6.82904052734375, + "log_odds_ratio": -0.03010154515504837, + "loss": 0.2507, + "rejected_geometric_mean": -7.5396270751953125, + "step": 6961 + }, + { + "chosen_geometric_mean": -0.8700761795043945, + "epoch": 1.72, + "grad_norm": 10.625, + "learning_rate": 2.3235990827715575e-07, + "log_odds": 7.881041049957275, + "log_odds_ratio": -0.2725277543067932, + "loss": 0.257, + "rejected_geometric_mean": -8.411603927612305, + "step": 6962 + }, + { + "chosen_geometric_mean": -1.1205586194992065, + "epoch": 1.72, + "grad_norm": 2.796875, + "learning_rate": 2.3195023834272552e-07, + "log_odds": 1.4977011680603027, + "log_odds_ratio": -0.3972235321998596, + "loss": 0.2574, + "rejected_geometric_mean": -2.4214744567871094, + "step": 6963 + }, + { + "chosen_geometric_mean": -1.1780650615692139, + "epoch": 1.72, + "grad_norm": 11.8125, + "learning_rate": 2.3154091229908082e-07, + "log_odds": 9.848616600036621, + "log_odds_ratio": -0.008458727970719337, + "loss": 0.2565, + "rejected_geometric_mean": -10.539886474609375, + "step": 6964 + }, + { + "chosen_geometric_mean": -0.9596996307373047, + "epoch": 1.72, + "grad_norm": 32.75, + "learning_rate": 2.3113193020828595e-07, + "log_odds": 8.395344734191895, + "log_odds_ratio": -0.17397961020469666, + "loss": 0.2734, + "rejected_geometric_mean": -8.970401763916016, + "step": 6965 + }, + { + "chosen_geometric_mean": -0.9142194986343384, + "epoch": 1.72, + "grad_norm": 4.59375, + "learning_rate": 2.3072329213235212e-07, + "log_odds": 12.994256019592285, + "log_odds_ratio": -0.00011228324729017913, + "loss": 0.3087, + "rejected_geometric_mean": -13.382640838623047, + "step": 6966 + }, + { + "chosen_geometric_mean": -0.9319498538970947, + "epoch": 1.72, + "grad_norm": 4.90625, + "learning_rate": 2.303149981332384e-07, + "log_odds": 9.809771537780762, + "log_odds_ratio": -0.0490395687520504, + "loss": 0.2446, + "rejected_geometric_mean": -10.265597343444824, + "step": 6967 + }, + { + "chosen_geometric_mean": -0.9722111225128174, + "epoch": 1.73, + "grad_norm": 1.8828125, + "learning_rate": 2.29907048272853e-07, + "log_odds": 10.094063758850098, + "log_odds_ratio": -0.2464030534029007, + "loss": 0.2225, + "rejected_geometric_mean": -10.746479034423828, + "step": 6968 + }, + { + "chosen_geometric_mean": -1.0508670806884766, + "epoch": 1.73, + "grad_norm": 45.75, + "learning_rate": 2.2949944261304952e-07, + "log_odds": 11.368339538574219, + "log_odds_ratio": -0.10990367084741592, + "loss": 0.2814, + "rejected_geometric_mean": -12.033027648925781, + "step": 6969 + }, + { + "chosen_geometric_mean": -0.9259517192840576, + "epoch": 1.73, + "grad_norm": 6.78125, + "learning_rate": 2.2909218121563216e-07, + "log_odds": 9.969365119934082, + "log_odds_ratio": -0.13770952820777893, + "loss": 0.2811, + "rejected_geometric_mean": -10.482872009277344, + "step": 6970 + }, + { + "chosen_geometric_mean": -1.0455490350723267, + "epoch": 1.73, + "grad_norm": 1.890625, + "learning_rate": 2.2868526414235065e-07, + "log_odds": 10.01470947265625, + "log_odds_ratio": -0.0005121517460793257, + "loss": 0.2486, + "rejected_geometric_mean": -10.624671936035156, + "step": 6971 + }, + { + "chosen_geometric_mean": -0.938493013381958, + "epoch": 1.73, + "grad_norm": 14.125, + "learning_rate": 2.282786914549026e-07, + "log_odds": 6.2783966064453125, + "log_odds_ratio": -0.26771995425224304, + "loss": 0.3017, + "rejected_geometric_mean": -6.897871971130371, + "step": 6972 + }, + { + "chosen_geometric_mean": -1.2212482690811157, + "epoch": 1.73, + "grad_norm": 28.25, + "learning_rate": 2.2787246321493594e-07, + "log_odds": 5.5249152183532715, + "log_odds_ratio": -0.2975921630859375, + "loss": 0.3226, + "rejected_geometric_mean": -6.576523780822754, + "step": 6973 + }, + { + "chosen_geometric_mean": -1.7359864711761475, + "epoch": 1.73, + "grad_norm": 48.75, + "learning_rate": 2.2746657948404417e-07, + "log_odds": 3.145205497741699, + "log_odds_ratio": -0.11009781062602997, + "loss": 0.3301, + "rejected_geometric_mean": -4.628973960876465, + "step": 6974 + }, + { + "chosen_geometric_mean": -1.0041967630386353, + "epoch": 1.73, + "grad_norm": 3.875, + "learning_rate": 2.2706104032376803e-07, + "log_odds": 7.373509407043457, + "log_odds_ratio": -0.026731440797448158, + "loss": 0.2591, + "rejected_geometric_mean": -7.92283821105957, + "step": 6975 + }, + { + "chosen_geometric_mean": -0.8507168292999268, + "epoch": 1.73, + "grad_norm": 2.28125, + "learning_rate": 2.266558457955978e-07, + "log_odds": 8.968669891357422, + "log_odds_ratio": -0.09783175587654114, + "loss": 0.243, + "rejected_geometric_mean": -9.326942443847656, + "step": 6976 + }, + { + "chosen_geometric_mean": -1.1088082790374756, + "epoch": 1.73, + "grad_norm": 20.25, + "learning_rate": 2.2625099596097043e-07, + "log_odds": 7.589406967163086, + "log_odds_ratio": -0.04262031987309456, + "loss": 0.267, + "rejected_geometric_mean": -8.250335693359375, + "step": 6977 + }, + { + "chosen_geometric_mean": -1.1598848104476929, + "epoch": 1.73, + "grad_norm": 44.25, + "learning_rate": 2.2584649088127076e-07, + "log_odds": 5.381945610046387, + "log_odds_ratio": -0.04633568972349167, + "loss": 0.3117, + "rejected_geometric_mean": -6.166792392730713, + "step": 6978 + }, + { + "chosen_geometric_mean": -0.866274356842041, + "epoch": 1.73, + "grad_norm": 2.75, + "learning_rate": 2.2544233061783215e-07, + "log_odds": 4.073802947998047, + "log_odds_ratio": -0.12008447200059891, + "loss": 0.2201, + "rejected_geometric_mean": -4.475845813751221, + "step": 6979 + }, + { + "chosen_geometric_mean": -0.896513044834137, + "epoch": 1.73, + "grad_norm": 2.203125, + "learning_rate": 2.2503851523193453e-07, + "log_odds": 12.838872909545898, + "log_odds_ratio": -0.011949445120990276, + "loss": 0.2186, + "rejected_geometric_mean": -13.19526481628418, + "step": 6980 + }, + { + "chosen_geometric_mean": -1.670846939086914, + "epoch": 1.73, + "grad_norm": 16.125, + "learning_rate": 2.2463504478480557e-07, + "log_odds": 8.924030303955078, + "log_odds_ratio": -0.18397419154644012, + "loss": 0.2541, + "rejected_geometric_mean": -10.323750495910645, + "step": 6981 + }, + { + "chosen_geometric_mean": -1.1541203260421753, + "epoch": 1.73, + "grad_norm": 2.265625, + "learning_rate": 2.2423191933762216e-07, + "log_odds": 8.920202255249023, + "log_odds_ratio": -0.190528005361557, + "loss": 0.2719, + "rejected_geometric_mean": -9.791472434997559, + "step": 6982 + }, + { + "chosen_geometric_mean": -1.1276947259902954, + "epoch": 1.73, + "grad_norm": 3.421875, + "learning_rate": 2.2382913895150627e-07, + "log_odds": 7.414840221405029, + "log_odds_ratio": -0.00517425499856472, + "loss": 0.2697, + "rejected_geometric_mean": -8.125702857971191, + "step": 6983 + }, + { + "chosen_geometric_mean": -0.8856455087661743, + "epoch": 1.73, + "grad_norm": 3.703125, + "learning_rate": 2.2342670368753012e-07, + "log_odds": 0.9221162796020508, + "log_odds_ratio": -0.43834930658340454, + "loss": 0.2504, + "rejected_geometric_mean": -1.601287841796875, + "step": 6984 + }, + { + "chosen_geometric_mean": -1.1542290449142456, + "epoch": 1.73, + "grad_norm": 9.0625, + "learning_rate": 2.2302461360671297e-07, + "log_odds": 10.067237854003906, + "log_odds_ratio": -0.12935525178909302, + "loss": 0.2512, + "rejected_geometric_mean": -10.881555557250977, + "step": 6985 + }, + { + "chosen_geometric_mean": -1.0119831562042236, + "epoch": 1.73, + "grad_norm": 15.0, + "learning_rate": 2.2262286877001994e-07, + "log_odds": 9.555436134338379, + "log_odds_ratio": -0.08586583286523819, + "loss": 0.2805, + "rejected_geometric_mean": -10.175224304199219, + "step": 6986 + }, + { + "chosen_geometric_mean": -0.9600294232368469, + "epoch": 1.73, + "grad_norm": 3.359375, + "learning_rate": 2.2222146923836674e-07, + "log_odds": 6.344093322753906, + "log_odds_ratio": -0.24165485799312592, + "loss": 0.2332, + "rejected_geometric_mean": -6.943418502807617, + "step": 6987 + }, + { + "chosen_geometric_mean": -0.8786359429359436, + "epoch": 1.73, + "grad_norm": 4.15625, + "learning_rate": 2.2182041507261387e-07, + "log_odds": 7.383177757263184, + "log_odds_ratio": -0.21405808627605438, + "loss": 0.2427, + "rejected_geometric_mean": -7.861496925354004, + "step": 6988 + }, + { + "chosen_geometric_mean": -1.0883127450942993, + "epoch": 1.73, + "grad_norm": 16.625, + "learning_rate": 2.2141970633357158e-07, + "log_odds": 9.199827194213867, + "log_odds_ratio": -0.01833951100707054, + "loss": 0.2797, + "rejected_geometric_mean": -9.839323043823242, + "step": 6989 + }, + { + "chosen_geometric_mean": -0.8771567940711975, + "epoch": 1.73, + "grad_norm": 16.875, + "learning_rate": 2.2101934308199652e-07, + "log_odds": 7.691989898681641, + "log_odds_ratio": -0.2432754933834076, + "loss": 0.29, + "rejected_geometric_mean": -8.250384330749512, + "step": 6990 + }, + { + "chosen_geometric_mean": -0.9497884511947632, + "epoch": 1.73, + "grad_norm": 2.40625, + "learning_rate": 2.2061932537859375e-07, + "log_odds": 6.145902156829834, + "log_odds_ratio": -0.12117888033390045, + "loss": 0.2775, + "rejected_geometric_mean": -6.616497039794922, + "step": 6991 + }, + { + "chosen_geometric_mean": -1.0779573917388916, + "epoch": 1.73, + "grad_norm": 6.0, + "learning_rate": 2.202196532840148e-07, + "log_odds": 7.108393669128418, + "log_odds_ratio": -0.19429989159107208, + "loss": 0.2467, + "rejected_geometric_mean": -7.855348587036133, + "step": 6992 + }, + { + "chosen_geometric_mean": -1.044506311416626, + "epoch": 1.73, + "grad_norm": 13.9375, + "learning_rate": 2.198203268588603e-07, + "log_odds": 3.0968518257141113, + "log_odds_ratio": -0.2480938881635666, + "loss": 0.287, + "rejected_geometric_mean": -3.884793758392334, + "step": 6993 + }, + { + "chosen_geometric_mean": -0.9277691841125488, + "epoch": 1.73, + "grad_norm": 25.75, + "learning_rate": 2.1942134616367766e-07, + "log_odds": 3.5951008796691895, + "log_odds_ratio": -0.20060518383979797, + "loss": 0.2324, + "rejected_geometric_mean": -4.102888107299805, + "step": 6994 + }, + { + "chosen_geometric_mean": -0.9890430569648743, + "epoch": 1.73, + "grad_norm": 20.375, + "learning_rate": 2.1902271125896158e-07, + "log_odds": 5.826406002044678, + "log_odds_ratio": -0.14474712312221527, + "loss": 0.2795, + "rejected_geometric_mean": -6.396734237670898, + "step": 6995 + }, + { + "chosen_geometric_mean": -1.0514682531356812, + "epoch": 1.73, + "grad_norm": 39.25, + "learning_rate": 2.1862442220515533e-07, + "log_odds": 8.878242492675781, + "log_odds_ratio": -0.14721350371837616, + "loss": 0.2626, + "rejected_geometric_mean": -9.602375030517578, + "step": 6996 + }, + { + "chosen_geometric_mean": -1.08188796043396, + "epoch": 1.73, + "grad_norm": 6.875, + "learning_rate": 2.1822647906264844e-07, + "log_odds": 6.869777679443359, + "log_odds_ratio": -0.08023195713758469, + "loss": 0.2839, + "rejected_geometric_mean": -7.562122821807861, + "step": 6997 + }, + { + "chosen_geometric_mean": -0.7892014384269714, + "epoch": 1.73, + "grad_norm": 2.8125, + "learning_rate": 2.1782888189177842e-07, + "log_odds": 2.819303512573242, + "log_odds_ratio": -0.2358672022819519, + "loss": 0.2243, + "rejected_geometric_mean": -3.2049193382263184, + "step": 6998 + }, + { + "chosen_geometric_mean": -1.3075969219207764, + "epoch": 1.73, + "grad_norm": 7.8125, + "learning_rate": 2.1743163075283096e-07, + "log_odds": 9.258642196655273, + "log_odds_ratio": -0.03897920995950699, + "loss": 0.2512, + "rejected_geometric_mean": -10.20218276977539, + "step": 6999 + }, + { + "chosen_geometric_mean": -1.2009069919586182, + "epoch": 1.73, + "grad_norm": 4.625, + "learning_rate": 2.1703472570603928e-07, + "log_odds": 6.768876552581787, + "log_odds_ratio": -0.11685439199209213, + "loss": 0.3033, + "rejected_geometric_mean": -7.675832748413086, + "step": 7000 + }, + { + "chosen_geometric_mean": -1.0396965742111206, + "epoch": 1.73, + "grad_norm": 3.625, + "learning_rate": 2.16638166811583e-07, + "log_odds": 2.552389621734619, + "log_odds_ratio": -0.30055373907089233, + "loss": 0.2444, + "rejected_geometric_mean": -3.330965995788574, + "step": 7001 + }, + { + "chosen_geometric_mean": -0.8129051923751831, + "epoch": 1.73, + "grad_norm": 19.875, + "learning_rate": 2.162419541295907e-07, + "log_odds": 2.8167223930358887, + "log_odds_ratio": -0.23761530220508575, + "loss": 0.2758, + "rejected_geometric_mean": -3.2417051792144775, + "step": 7002 + }, + { + "chosen_geometric_mean": -0.8464821577072144, + "epoch": 1.73, + "grad_norm": 9.25, + "learning_rate": 2.158460877201371e-07, + "log_odds": 5.283115386962891, + "log_odds_ratio": -0.24577529728412628, + "loss": 0.25, + "rejected_geometric_mean": -5.79742956161499, + "step": 7003 + }, + { + "chosen_geometric_mean": -0.9069527983665466, + "epoch": 1.73, + "grad_norm": 22.125, + "learning_rate": 2.1545056764324445e-07, + "log_odds": 10.469069480895996, + "log_odds_ratio": -0.11217105388641357, + "loss": 0.2335, + "rejected_geometric_mean": -10.868289947509766, + "step": 7004 + }, + { + "chosen_geometric_mean": -1.1925128698349, + "epoch": 1.73, + "grad_norm": 3.09375, + "learning_rate": 2.150553939588848e-07, + "log_odds": 1.9498977661132812, + "log_odds_ratio": -0.20319931209087372, + "loss": 0.2747, + "rejected_geometric_mean": -2.8519246578216553, + "step": 7005 + }, + { + "chosen_geometric_mean": -1.3061761856079102, + "epoch": 1.73, + "grad_norm": 30.875, + "learning_rate": 2.1466056672697493e-07, + "log_odds": 2.6366026401519775, + "log_odds_ratio": -0.10611865669488907, + "loss": 0.2059, + "rejected_geometric_mean": -3.6652724742889404, + "step": 7006 + }, + { + "chosen_geometric_mean": -1.2630407810211182, + "epoch": 1.73, + "grad_norm": 11.3125, + "learning_rate": 2.1426608600738003e-07, + "log_odds": 5.083500385284424, + "log_odds_ratio": -0.2293315976858139, + "loss": 0.2524, + "rejected_geometric_mean": -6.059749603271484, + "step": 7007 + }, + { + "chosen_geometric_mean": -1.0620592832565308, + "epoch": 1.74, + "grad_norm": 23.75, + "learning_rate": 2.1387195185991333e-07, + "log_odds": 8.31043815612793, + "log_odds_ratio": -0.08419015258550644, + "loss": 0.2962, + "rejected_geometric_mean": -8.954325675964355, + "step": 7008 + }, + { + "chosen_geometric_mean": -0.9713870286941528, + "epoch": 1.74, + "grad_norm": 12.75, + "learning_rate": 2.1347816434433426e-07, + "log_odds": 3.185120105743408, + "log_odds_ratio": -0.3024650812149048, + "loss": 0.285, + "rejected_geometric_mean": -3.8610033988952637, + "step": 7009 + }, + { + "chosen_geometric_mean": -0.875450611114502, + "epoch": 1.74, + "grad_norm": 2.109375, + "learning_rate": 2.130847235203512e-07, + "log_odds": 8.250280380249023, + "log_odds_ratio": -0.13802209496498108, + "loss": 0.2368, + "rejected_geometric_mean": -8.687026977539062, + "step": 7010 + }, + { + "chosen_geometric_mean": -0.8665216565132141, + "epoch": 1.74, + "grad_norm": 11.625, + "learning_rate": 2.1269162944761918e-07, + "log_odds": 2.660273551940918, + "log_odds_ratio": -0.17541825771331787, + "loss": 0.2712, + "rejected_geometric_mean": -3.0701217651367188, + "step": 7011 + }, + { + "chosen_geometric_mean": -1.0004523992538452, + "epoch": 1.74, + "grad_norm": 18.375, + "learning_rate": 2.1229888218574025e-07, + "log_odds": 6.369026184082031, + "log_odds_ratio": -0.12197646498680115, + "loss": 0.2269, + "rejected_geometric_mean": -6.939253330230713, + "step": 7012 + }, + { + "chosen_geometric_mean": -1.0571030378341675, + "epoch": 1.74, + "grad_norm": 13.0625, + "learning_rate": 2.1190648179426482e-07, + "log_odds": 5.5992326736450195, + "log_odds_ratio": -0.10482281446456909, + "loss": 0.2518, + "rejected_geometric_mean": -6.27757453918457, + "step": 7013 + }, + { + "chosen_geometric_mean": -1.1058540344238281, + "epoch": 1.74, + "grad_norm": 1.8984375, + "learning_rate": 2.1151442833269005e-07, + "log_odds": 8.702472686767578, + "log_odds_ratio": -0.08673588186502457, + "loss": 0.2542, + "rejected_geometric_mean": -9.430593490600586, + "step": 7014 + }, + { + "chosen_geometric_mean": -0.833617091178894, + "epoch": 1.74, + "grad_norm": 84.5, + "learning_rate": 2.1112272186046e-07, + "log_odds": 5.479269981384277, + "log_odds_ratio": -0.2879105806350708, + "loss": 0.3359, + "rejected_geometric_mean": -5.9225993156433105, + "step": 7015 + }, + { + "chosen_geometric_mean": -0.9351651668548584, + "epoch": 1.74, + "grad_norm": 2.578125, + "learning_rate": 2.1073136243696751e-07, + "log_odds": 5.698136329650879, + "log_odds_ratio": -0.18111790716648102, + "loss": 0.2392, + "rejected_geometric_mean": -6.28654670715332, + "step": 7016 + }, + { + "chosen_geometric_mean": -1.0616192817687988, + "epoch": 1.74, + "grad_norm": 4.25, + "learning_rate": 2.103403501215523e-07, + "log_odds": 6.9095988273620605, + "log_odds_ratio": -0.1151072084903717, + "loss": 0.2956, + "rejected_geometric_mean": -7.631674766540527, + "step": 7017 + }, + { + "chosen_geometric_mean": -1.0155004262924194, + "epoch": 1.74, + "grad_norm": 1.8671875, + "learning_rate": 2.0994968497350028e-07, + "log_odds": 4.340970993041992, + "log_odds_ratio": -0.18484555184841156, + "loss": 0.2172, + "rejected_geometric_mean": -4.961330890655518, + "step": 7018 + }, + { + "chosen_geometric_mean": -1.1007882356643677, + "epoch": 1.74, + "grad_norm": 3.3125, + "learning_rate": 2.0955936705204682e-07, + "log_odds": 4.240048408508301, + "log_odds_ratio": -0.1239163726568222, + "loss": 0.2693, + "rejected_geometric_mean": -4.994854927062988, + "step": 7019 + }, + { + "chosen_geometric_mean": -0.8387354612350464, + "epoch": 1.74, + "grad_norm": 14.0, + "learning_rate": 2.091693964163727e-07, + "log_odds": 9.556331634521484, + "log_odds_ratio": -0.0895303413271904, + "loss": 0.2348, + "rejected_geometric_mean": -9.899883270263672, + "step": 7020 + }, + { + "chosen_geometric_mean": -0.9761675000190735, + "epoch": 1.74, + "grad_norm": 3.484375, + "learning_rate": 2.087797731256061e-07, + "log_odds": 4.114849090576172, + "log_odds_ratio": -0.13854017853736877, + "loss": 0.2453, + "rejected_geometric_mean": -4.675288200378418, + "step": 7021 + }, + { + "chosen_geometric_mean": -1.0079792737960815, + "epoch": 1.74, + "grad_norm": 3.59375, + "learning_rate": 2.083904972388251e-07, + "log_odds": 6.982821464538574, + "log_odds_ratio": -0.20922693610191345, + "loss": 0.2522, + "rejected_geometric_mean": -7.576213836669922, + "step": 7022 + }, + { + "chosen_geometric_mean": -0.9077757000923157, + "epoch": 1.74, + "grad_norm": 2.171875, + "learning_rate": 2.0800156881505246e-07, + "log_odds": 2.888031244277954, + "log_odds_ratio": -0.29403963685035706, + "loss": 0.2741, + "rejected_geometric_mean": -3.4735584259033203, + "step": 7023 + }, + { + "chosen_geometric_mean": -1.090641975402832, + "epoch": 1.74, + "grad_norm": 45.0, + "learning_rate": 2.0761298791325824e-07, + "log_odds": 14.874984741210938, + "log_odds_ratio": -0.021575769409537315, + "loss": 0.2974, + "rejected_geometric_mean": -15.47677993774414, + "step": 7024 + }, + { + "chosen_geometric_mean": -0.9969355463981628, + "epoch": 1.74, + "grad_norm": 7.625, + "learning_rate": 2.0722475459236197e-07, + "log_odds": 4.883578777313232, + "log_odds_ratio": -0.02169816568493843, + "loss": 0.266, + "rejected_geometric_mean": -5.428638458251953, + "step": 7025 + }, + { + "chosen_geometric_mean": -1.1646885871887207, + "epoch": 1.74, + "grad_norm": 26.875, + "learning_rate": 2.0683686891122795e-07, + "log_odds": 9.087043762207031, + "log_odds_ratio": -0.01996198296546936, + "loss": 0.3041, + "rejected_geometric_mean": -9.762486457824707, + "step": 7026 + }, + { + "chosen_geometric_mean": -0.973035454750061, + "epoch": 1.74, + "grad_norm": 6.28125, + "learning_rate": 2.0644933092866943e-07, + "log_odds": 4.458396911621094, + "log_odds_ratio": -0.22661066055297852, + "loss": 0.2414, + "rejected_geometric_mean": -5.082874298095703, + "step": 7027 + }, + { + "chosen_geometric_mean": -0.8230137228965759, + "epoch": 1.74, + "grad_norm": 4.15625, + "learning_rate": 2.0606214070344717e-07, + "log_odds": 9.881635665893555, + "log_odds_ratio": -0.014074795879423618, + "loss": 0.2113, + "rejected_geometric_mean": -10.116842269897461, + "step": 7028 + }, + { + "chosen_geometric_mean": -1.1794719696044922, + "epoch": 1.74, + "grad_norm": 27.625, + "learning_rate": 2.0567529829426752e-07, + "log_odds": 6.9178595542907715, + "log_odds_ratio": -0.30797138810157776, + "loss": 0.2954, + "rejected_geometric_mean": -7.893858909606934, + "step": 7029 + }, + { + "chosen_geometric_mean": -0.8963397145271301, + "epoch": 1.74, + "grad_norm": 84.0, + "learning_rate": 2.0528880375978528e-07, + "log_odds": 8.574671745300293, + "log_odds_ratio": -0.14167897403240204, + "loss": 0.1929, + "rejected_geometric_mean": -9.069944381713867, + "step": 7030 + }, + { + "chosen_geometric_mean": -1.1138737201690674, + "epoch": 1.74, + "grad_norm": 2.0, + "learning_rate": 2.0490265715860297e-07, + "log_odds": 8.782660484313965, + "log_odds_ratio": -0.08169454336166382, + "loss": 0.2593, + "rejected_geometric_mean": -9.532782554626465, + "step": 7031 + }, + { + "chosen_geometric_mean": -0.8330238461494446, + "epoch": 1.74, + "grad_norm": 10.0625, + "learning_rate": 2.045168585492685e-07, + "log_odds": 11.410560607910156, + "log_odds_ratio": -0.11952283978462219, + "loss": 0.273, + "rejected_geometric_mean": -11.712041854858398, + "step": 7032 + }, + { + "chosen_geometric_mean": -1.1332252025604248, + "epoch": 1.74, + "grad_norm": 2.21875, + "learning_rate": 2.0413140799027896e-07, + "log_odds": 9.63341999053955, + "log_odds_ratio": -0.09990113973617554, + "loss": 0.2697, + "rejected_geometric_mean": -10.400572776794434, + "step": 7033 + }, + { + "chosen_geometric_mean": -1.0642344951629639, + "epoch": 1.74, + "grad_norm": 3.34375, + "learning_rate": 2.0374630554007818e-07, + "log_odds": 10.521589279174805, + "log_odds_ratio": -0.1452305018901825, + "loss": 0.2229, + "rejected_geometric_mean": -11.262457847595215, + "step": 7034 + }, + { + "chosen_geometric_mean": -1.3161407709121704, + "epoch": 1.74, + "grad_norm": 25.25, + "learning_rate": 2.0336155125705637e-07, + "log_odds": 2.590545415878296, + "log_odds_ratio": -0.3141329288482666, + "loss": 0.2646, + "rejected_geometric_mean": -3.654226779937744, + "step": 7035 + }, + { + "chosen_geometric_mean": -0.9801419973373413, + "epoch": 1.74, + "grad_norm": 9.75, + "learning_rate": 2.0297714519955186e-07, + "log_odds": 5.763567924499512, + "log_odds_ratio": -0.07084880024194717, + "loss": 0.3001, + "rejected_geometric_mean": -6.311923027038574, + "step": 7036 + }, + { + "chosen_geometric_mean": -0.8784436583518982, + "epoch": 1.74, + "grad_norm": 5.03125, + "learning_rate": 2.0259308742584943e-07, + "log_odds": 14.446538925170898, + "log_odds_ratio": -0.016913503408432007, + "loss": 0.2431, + "rejected_geometric_mean": -14.798134803771973, + "step": 7037 + }, + { + "chosen_geometric_mean": -0.9227516651153564, + "epoch": 1.74, + "grad_norm": 2.78125, + "learning_rate": 2.0220937799418194e-07, + "log_odds": 18.99021339416504, + "log_odds_ratio": -8.940698137394065e-08, + "loss": 0.2097, + "rejected_geometric_mean": -19.401691436767578, + "step": 7038 + }, + { + "chosen_geometric_mean": -0.8145797252655029, + "epoch": 1.74, + "grad_norm": 5.03125, + "learning_rate": 2.018260169627284e-07, + "log_odds": 3.8023693561553955, + "log_odds_ratio": -0.2610182464122772, + "loss": 0.2304, + "rejected_geometric_mean": -4.257874488830566, + "step": 7039 + }, + { + "chosen_geometric_mean": -1.0784748792648315, + "epoch": 1.74, + "grad_norm": 10.625, + "learning_rate": 2.014430043896165e-07, + "log_odds": 15.318597793579102, + "log_odds_ratio": -0.0007451989222317934, + "loss": 0.2643, + "rejected_geometric_mean": -15.973424911499023, + "step": 7040 + }, + { + "chosen_geometric_mean": -0.9466655254364014, + "epoch": 1.74, + "grad_norm": 4.0625, + "learning_rate": 2.0106034033291893e-07, + "log_odds": 7.307432651519775, + "log_odds_ratio": -0.19852042198181152, + "loss": 0.2459, + "rejected_geometric_mean": -7.832855224609375, + "step": 7041 + }, + { + "chosen_geometric_mean": -1.0674654245376587, + "epoch": 1.74, + "grad_norm": 12.0, + "learning_rate": 2.0067802485065764e-07, + "log_odds": 3.5211021900177, + "log_odds_ratio": -0.10234305262565613, + "loss": 0.2445, + "rejected_geometric_mean": -4.22307014465332, + "step": 7042 + }, + { + "chosen_geometric_mean": -1.0938066244125366, + "epoch": 1.74, + "grad_norm": 5.125, + "learning_rate": 2.002960580008001e-07, + "log_odds": 3.8335251808166504, + "log_odds_ratio": -0.1500982791185379, + "loss": 0.2574, + "rejected_geometric_mean": -4.534713268280029, + "step": 7043 + }, + { + "chosen_geometric_mean": -0.964988648891449, + "epoch": 1.74, + "grad_norm": 9.25, + "learning_rate": 1.9991443984126225e-07, + "log_odds": 6.349361419677734, + "log_odds_ratio": -0.12564945220947266, + "loss": 0.2692, + "rejected_geometric_mean": -6.910518169403076, + "step": 7044 + }, + { + "chosen_geometric_mean": -1.1159635782241821, + "epoch": 1.74, + "grad_norm": 16.625, + "learning_rate": 1.9953317042990667e-07, + "log_odds": 6.9492058753967285, + "log_odds_ratio": -0.006732046138495207, + "loss": 0.2819, + "rejected_geometric_mean": -7.584117412567139, + "step": 7045 + }, + { + "chosen_geometric_mean": -1.0362447500228882, + "epoch": 1.74, + "grad_norm": 7.9375, + "learning_rate": 1.9915224982454267e-07, + "log_odds": 9.675135612487793, + "log_odds_ratio": -0.0393502376973629, + "loss": 0.2342, + "rejected_geometric_mean": -10.265091896057129, + "step": 7046 + }, + { + "chosen_geometric_mean": -1.0596672296524048, + "epoch": 1.74, + "grad_norm": 2.203125, + "learning_rate": 1.987716780829266e-07, + "log_odds": 5.668393135070801, + "log_odds_ratio": -0.18550291657447815, + "loss": 0.2144, + "rejected_geometric_mean": -6.437856674194336, + "step": 7047 + }, + { + "chosen_geometric_mean": -1.006986379623413, + "epoch": 1.74, + "grad_norm": 28.625, + "learning_rate": 1.983914552627625e-07, + "log_odds": 15.002464294433594, + "log_odds_ratio": -1.1772061043302529e-05, + "loss": 0.2577, + "rejected_geometric_mean": -15.521881103515625, + "step": 7048 + }, + { + "chosen_geometric_mean": -1.0691074132919312, + "epoch": 1.75, + "grad_norm": 12.3125, + "learning_rate": 1.9801158142170208e-07, + "log_odds": 9.616958618164062, + "log_odds_ratio": -0.03804987668991089, + "loss": 0.2784, + "rejected_geometric_mean": -10.285736083984375, + "step": 7049 + }, + { + "chosen_geometric_mean": -1.1135954856872559, + "epoch": 1.75, + "grad_norm": 4.46875, + "learning_rate": 1.9763205661734201e-07, + "log_odds": 8.263127326965332, + "log_odds_ratio": -0.22995658218860626, + "loss": 0.2776, + "rejected_geometric_mean": -9.122391700744629, + "step": 7050 + }, + { + "chosen_geometric_mean": -1.0476477146148682, + "epoch": 1.75, + "grad_norm": 12.25, + "learning_rate": 1.9725288090722855e-07, + "log_odds": 3.108900547027588, + "log_odds_ratio": -0.20367704331874847, + "loss": 0.2411, + "rejected_geometric_mean": -3.81191349029541, + "step": 7051 + }, + { + "chosen_geometric_mean": -1.036512017250061, + "epoch": 1.75, + "grad_norm": 5.46875, + "learning_rate": 1.9687405434885314e-07, + "log_odds": 6.597110748291016, + "log_odds_ratio": -0.0668284147977829, + "loss": 0.3223, + "rejected_geometric_mean": -7.23291015625, + "step": 7052 + }, + { + "chosen_geometric_mean": -0.8809261322021484, + "epoch": 1.75, + "grad_norm": 40.75, + "learning_rate": 1.9649557699965488e-07, + "log_odds": 2.4839115142822266, + "log_odds_ratio": -0.3465043008327484, + "loss": 0.2971, + "rejected_geometric_mean": -3.093686580657959, + "step": 7053 + }, + { + "chosen_geometric_mean": -1.3444263935089111, + "epoch": 1.75, + "grad_norm": 16.25, + "learning_rate": 1.9611744891702007e-07, + "log_odds": 6.597511291503906, + "log_odds_ratio": -0.15263541042804718, + "loss": 0.2825, + "rejected_geometric_mean": -7.623239517211914, + "step": 7054 + }, + { + "chosen_geometric_mean": -0.8549145460128784, + "epoch": 1.75, + "grad_norm": 6.59375, + "learning_rate": 1.9573967015828282e-07, + "log_odds": 4.646069526672363, + "log_odds_ratio": -0.22024628520011902, + "loss": 0.277, + "rejected_geometric_mean": -5.084150791168213, + "step": 7055 + }, + { + "chosen_geometric_mean": -0.8916133642196655, + "epoch": 1.75, + "grad_norm": 1.9609375, + "learning_rate": 1.9536224078072236e-07, + "log_odds": 1.9204401969909668, + "log_odds_ratio": -0.2295760065317154, + "loss": 0.251, + "rejected_geometric_mean": -2.406231641769409, + "step": 7056 + }, + { + "chosen_geometric_mean": -0.9404937028884888, + "epoch": 1.75, + "grad_norm": 23.0, + "learning_rate": 1.9498516084156704e-07, + "log_odds": 4.615703105926514, + "log_odds_ratio": -0.14984357357025146, + "loss": 0.2237, + "rejected_geometric_mean": -5.168982982635498, + "step": 7057 + }, + { + "chosen_geometric_mean": -0.9237536787986755, + "epoch": 1.75, + "grad_norm": 2.34375, + "learning_rate": 1.9460843039799033e-07, + "log_odds": 11.801413536071777, + "log_odds_ratio": -0.010382656008005142, + "loss": 0.2894, + "rejected_geometric_mean": -12.177576065063477, + "step": 7058 + }, + { + "chosen_geometric_mean": -1.1759235858917236, + "epoch": 1.75, + "grad_norm": 6.9375, + "learning_rate": 1.942320495071137e-07, + "log_odds": 8.307230949401855, + "log_odds_ratio": -0.15811294317245483, + "loss": 0.2549, + "rejected_geometric_mean": -9.193556785583496, + "step": 7059 + }, + { + "chosen_geometric_mean": -0.8780690431594849, + "epoch": 1.75, + "grad_norm": 3.0, + "learning_rate": 1.9385601822600657e-07, + "log_odds": 5.860954284667969, + "log_odds_ratio": -0.006619021762162447, + "loss": 0.2535, + "rejected_geometric_mean": -6.194963455200195, + "step": 7060 + }, + { + "chosen_geometric_mean": -0.9763404130935669, + "epoch": 1.75, + "grad_norm": 32.75, + "learning_rate": 1.9348033661168303e-07, + "log_odds": 6.809357643127441, + "log_odds_ratio": -0.1318080723285675, + "loss": 0.2626, + "rejected_geometric_mean": -7.418801307678223, + "step": 7061 + }, + { + "chosen_geometric_mean": -1.0183045864105225, + "epoch": 1.75, + "grad_norm": 1.9609375, + "learning_rate": 1.9310500472110638e-07, + "log_odds": 6.7099456787109375, + "log_odds_ratio": -0.11007355153560638, + "loss": 0.2393, + "rejected_geometric_mean": -7.340818405151367, + "step": 7062 + }, + { + "chosen_geometric_mean": -1.0015450716018677, + "epoch": 1.75, + "grad_norm": 33.75, + "learning_rate": 1.9273002261118558e-07, + "log_odds": 1.4049042463302612, + "log_odds_ratio": -0.2864447236061096, + "loss": 0.2492, + "rejected_geometric_mean": -2.133679151535034, + "step": 7063 + }, + { + "chosen_geometric_mean": -0.9386343955993652, + "epoch": 1.75, + "grad_norm": 37.75, + "learning_rate": 1.9235539033877648e-07, + "log_odds": 5.441122531890869, + "log_odds_ratio": -0.2821214497089386, + "loss": 0.3002, + "rejected_geometric_mean": -6.0704026222229, + "step": 7064 + }, + { + "chosen_geometric_mean": -0.7926698923110962, + "epoch": 1.75, + "grad_norm": 5.21875, + "learning_rate": 1.9198110796068258e-07, + "log_odds": 7.245537281036377, + "log_odds_ratio": -0.008465217426419258, + "loss": 0.2057, + "rejected_geometric_mean": -7.4387712478637695, + "step": 7065 + }, + { + "chosen_geometric_mean": -1.0664615631103516, + "epoch": 1.75, + "grad_norm": 7.21875, + "learning_rate": 1.916071755336546e-07, + "log_odds": 8.677597999572754, + "log_odds_ratio": -0.1520218402147293, + "loss": 0.2693, + "rejected_geometric_mean": -9.410527229309082, + "step": 7066 + }, + { + "chosen_geometric_mean": -1.1598773002624512, + "epoch": 1.75, + "grad_norm": 24.5, + "learning_rate": 1.912335931143891e-07, + "log_odds": 12.904048919677734, + "log_odds_ratio": -0.1595790833234787, + "loss": 0.2443, + "rejected_geometric_mean": -13.734214782714844, + "step": 7067 + }, + { + "chosen_geometric_mean": -0.7826499342918396, + "epoch": 1.75, + "grad_norm": 5.625, + "learning_rate": 1.908603607595305e-07, + "log_odds": 13.131769180297852, + "log_odds_ratio": -0.022090796381235123, + "loss": 0.2207, + "rejected_geometric_mean": -13.306594848632812, + "step": 7068 + }, + { + "chosen_geometric_mean": -0.9025633335113525, + "epoch": 1.75, + "grad_norm": 3.734375, + "learning_rate": 1.9048747852566967e-07, + "log_odds": 13.638882637023926, + "log_odds_ratio": -0.0019774024840444326, + "loss": 0.2313, + "rejected_geometric_mean": -14.006078720092773, + "step": 7069 + }, + { + "chosen_geometric_mean": -1.2457011938095093, + "epoch": 1.75, + "grad_norm": 58.25, + "learning_rate": 1.901149464693436e-07, + "log_odds": 2.5445597171783447, + "log_odds_ratio": -0.21736544370651245, + "loss": 0.3076, + "rejected_geometric_mean": -3.5681557655334473, + "step": 7070 + }, + { + "chosen_geometric_mean": -0.8114320039749146, + "epoch": 1.75, + "grad_norm": 5.25, + "learning_rate": 1.89742764647039e-07, + "log_odds": 6.0103302001953125, + "log_odds_ratio": -0.052831750363111496, + "loss": 0.2324, + "rejected_geometric_mean": -6.264919281005859, + "step": 7071 + }, + { + "chosen_geometric_mean": -0.964867353439331, + "epoch": 1.75, + "grad_norm": 10.25, + "learning_rate": 1.8937093311518635e-07, + "log_odds": 5.071210861206055, + "log_odds_ratio": -0.09259893000125885, + "loss": 0.2988, + "rejected_geometric_mean": -5.605185031890869, + "step": 7072 + }, + { + "chosen_geometric_mean": -0.9814713001251221, + "epoch": 1.75, + "grad_norm": 5.15625, + "learning_rate": 1.8899945193016384e-07, + "log_odds": 4.692354202270508, + "log_odds_ratio": -0.05761570483446121, + "loss": 0.3159, + "rejected_geometric_mean": -5.2280378341674805, + "step": 7073 + }, + { + "chosen_geometric_mean": -1.031909704208374, + "epoch": 1.75, + "grad_norm": 3.0, + "learning_rate": 1.886283211482981e-07, + "log_odds": 6.56688928604126, + "log_odds_ratio": -0.33475950360298157, + "loss": 0.2507, + "rejected_geometric_mean": -7.291954040527344, + "step": 7074 + }, + { + "chosen_geometric_mean": -0.9593726992607117, + "epoch": 1.75, + "grad_norm": 3.359375, + "learning_rate": 1.8825754082586024e-07, + "log_odds": 4.170812606811523, + "log_odds_ratio": -0.14265957474708557, + "loss": 0.2531, + "rejected_geometric_mean": -4.710150241851807, + "step": 7075 + }, + { + "chosen_geometric_mean": -1.0834200382232666, + "epoch": 1.75, + "grad_norm": 44.25, + "learning_rate": 1.8788711101907026e-07, + "log_odds": 11.047759056091309, + "log_odds_ratio": -0.10985519737005234, + "loss": 0.2597, + "rejected_geometric_mean": -11.7551851272583, + "step": 7076 + }, + { + "chosen_geometric_mean": -0.8804324269294739, + "epoch": 1.75, + "grad_norm": 8.125, + "learning_rate": 1.8751703178409435e-07, + "log_odds": 15.217559814453125, + "log_odds_ratio": -0.00018077170534525067, + "loss": 0.2458, + "rejected_geometric_mean": -15.534920692443848, + "step": 7077 + }, + { + "chosen_geometric_mean": -1.0026211738586426, + "epoch": 1.75, + "grad_norm": 4.1875, + "learning_rate": 1.8714730317704488e-07, + "log_odds": 5.816870212554932, + "log_odds_ratio": -0.27250200510025024, + "loss": 0.3039, + "rejected_geometric_mean": -6.49303674697876, + "step": 7078 + }, + { + "chosen_geometric_mean": -0.83646559715271, + "epoch": 1.75, + "grad_norm": 1.9375, + "learning_rate": 1.867779252539817e-07, + "log_odds": 11.318127632141113, + "log_odds_ratio": -0.00867816898971796, + "loss": 0.2618, + "rejected_geometric_mean": -11.586407661437988, + "step": 7079 + }, + { + "chosen_geometric_mean": -0.8593711853027344, + "epoch": 1.75, + "grad_norm": 5.96875, + "learning_rate": 1.8640889807091168e-07, + "log_odds": 14.172988891601562, + "log_odds_ratio": -8.527767931809649e-05, + "loss": 0.2982, + "rejected_geometric_mean": -14.45706558227539, + "step": 7080 + }, + { + "chosen_geometric_mean": -0.9550838470458984, + "epoch": 1.75, + "grad_norm": 2.890625, + "learning_rate": 1.8604022168378754e-07, + "log_odds": 9.969138145446777, + "log_odds_ratio": -0.16663943231105804, + "loss": 0.2352, + "rejected_geometric_mean": -10.529297828674316, + "step": 7081 + }, + { + "chosen_geometric_mean": -0.8792059421539307, + "epoch": 1.75, + "grad_norm": 16.625, + "learning_rate": 1.8567189614850988e-07, + "log_odds": 3.8091464042663574, + "log_odds_ratio": -0.34748104214668274, + "loss": 0.2286, + "rejected_geometric_mean": -4.41935396194458, + "step": 7082 + }, + { + "chosen_geometric_mean": -0.976448655128479, + "epoch": 1.75, + "grad_norm": 1.78125, + "learning_rate": 1.8530392152092625e-07, + "log_odds": 12.385017395019531, + "log_odds_ratio": -0.015263481996953487, + "loss": 0.2247, + "rejected_geometric_mean": -12.88334846496582, + "step": 7083 + }, + { + "chosen_geometric_mean": -0.8042292594909668, + "epoch": 1.75, + "grad_norm": 4.03125, + "learning_rate": 1.849362978568292e-07, + "log_odds": 5.534246921539307, + "log_odds_ratio": -0.09541261941194534, + "loss": 0.2664, + "rejected_geometric_mean": -5.818527698516846, + "step": 7084 + }, + { + "chosen_geometric_mean": -1.1781173944473267, + "epoch": 1.75, + "grad_norm": 6.40625, + "learning_rate": 1.8456902521196057e-07, + "log_odds": 2.5878798961639404, + "log_odds_ratio": -0.36677154898643494, + "loss": 0.2378, + "rejected_geometric_mean": -3.575235366821289, + "step": 7085 + }, + { + "chosen_geometric_mean": -0.9680103659629822, + "epoch": 1.75, + "grad_norm": 3.625, + "learning_rate": 1.842021036420069e-07, + "log_odds": 5.795578479766846, + "log_odds_ratio": -0.20484225451946259, + "loss": 0.2358, + "rejected_geometric_mean": -6.364189624786377, + "step": 7086 + }, + { + "chosen_geometric_mean": -0.7770195603370667, + "epoch": 1.75, + "grad_norm": 1.90625, + "learning_rate": 1.8383553320260235e-07, + "log_odds": 10.510790824890137, + "log_odds_ratio": -0.0019089471315965056, + "loss": 0.2458, + "rejected_geometric_mean": -10.629179954528809, + "step": 7087 + }, + { + "chosen_geometric_mean": -1.0680972337722778, + "epoch": 1.75, + "grad_norm": 8.1875, + "learning_rate": 1.834693139493285e-07, + "log_odds": 6.47995138168335, + "log_odds_ratio": -0.23626631498336792, + "loss": 0.2681, + "rejected_geometric_mean": -7.250323295593262, + "step": 7088 + }, + { + "chosen_geometric_mean": -0.951901912689209, + "epoch": 1.76, + "grad_norm": 8.1875, + "learning_rate": 1.8310344593771235e-07, + "log_odds": 3.583512544631958, + "log_odds_ratio": -0.0726369246840477, + "loss": 0.2532, + "rejected_geometric_mean": -4.089632987976074, + "step": 7089 + }, + { + "chosen_geometric_mean": -1.021972417831421, + "epoch": 1.76, + "grad_norm": 18.375, + "learning_rate": 1.8273792922322787e-07, + "log_odds": 4.20370626449585, + "log_odds_ratio": -0.3534952998161316, + "loss": 0.2626, + "rejected_geometric_mean": -4.96580696105957, + "step": 7090 + }, + { + "chosen_geometric_mean": -1.1352771520614624, + "epoch": 1.76, + "grad_norm": 3.234375, + "learning_rate": 1.8237276386129738e-07, + "log_odds": 12.64475154876709, + "log_odds_ratio": -0.00015669793356209993, + "loss": 0.2302, + "rejected_geometric_mean": -13.386241912841797, + "step": 7091 + }, + { + "chosen_geometric_mean": -0.9092242121696472, + "epoch": 1.76, + "grad_norm": 17.5, + "learning_rate": 1.8200794990728743e-07, + "log_odds": 12.12123966217041, + "log_odds_ratio": -0.21767136454582214, + "loss": 0.2442, + "rejected_geometric_mean": -12.653985977172852, + "step": 7092 + }, + { + "chosen_geometric_mean": -0.9208985567092896, + "epoch": 1.76, + "grad_norm": 2.9375, + "learning_rate": 1.8164348741651323e-07, + "log_odds": 15.15225887298584, + "log_odds_ratio": -0.0006668064743280411, + "loss": 0.2327, + "rejected_geometric_mean": -15.557558059692383, + "step": 7093 + }, + { + "chosen_geometric_mean": -0.9307752847671509, + "epoch": 1.76, + "grad_norm": 12.0625, + "learning_rate": 1.812793764442364e-07, + "log_odds": 8.247289657592773, + "log_odds_ratio": -0.11012064665555954, + "loss": 0.252, + "rejected_geometric_mean": -8.715267181396484, + "step": 7094 + }, + { + "chosen_geometric_mean": -0.7250785231590271, + "epoch": 1.76, + "grad_norm": 3.25, + "learning_rate": 1.809156170456644e-07, + "log_odds": 11.842594146728516, + "log_odds_ratio": -1.6600281014689244e-05, + "loss": 0.2298, + "rejected_geometric_mean": -11.835416793823242, + "step": 7095 + }, + { + "chosen_geometric_mean": -1.0156927108764648, + "epoch": 1.76, + "grad_norm": 3.125, + "learning_rate": 1.805522092759515e-07, + "log_odds": 3.0565271377563477, + "log_odds_ratio": -0.1459217667579651, + "loss": 0.2457, + "rejected_geometric_mean": -3.7319743633270264, + "step": 7096 + }, + { + "chosen_geometric_mean": -1.4956094026565552, + "epoch": 1.76, + "grad_norm": 93.5, + "learning_rate": 1.8018915319019974e-07, + "log_odds": 6.495708465576172, + "log_odds_ratio": -0.010868728160858154, + "loss": 0.3194, + "rejected_geometric_mean": -7.6582722663879395, + "step": 7097 + }, + { + "chosen_geometric_mean": -1.068591833114624, + "epoch": 1.76, + "grad_norm": 16.0, + "learning_rate": 1.79826448843457e-07, + "log_odds": 12.019169807434082, + "log_odds_ratio": -0.020792733877897263, + "loss": 0.231, + "rejected_geometric_mean": -12.670843124389648, + "step": 7098 + }, + { + "chosen_geometric_mean": -0.836794912815094, + "epoch": 1.76, + "grad_norm": 16.75, + "learning_rate": 1.7946409629071794e-07, + "log_odds": 10.05703353881836, + "log_odds_ratio": -0.07607962936162949, + "loss": 0.211, + "rejected_geometric_mean": -10.375869750976562, + "step": 7099 + }, + { + "chosen_geometric_mean": -0.8881077766418457, + "epoch": 1.76, + "grad_norm": 2.21875, + "learning_rate": 1.7910209558692383e-07, + "log_odds": 6.678447723388672, + "log_odds_ratio": -0.1406591236591339, + "loss": 0.221, + "rejected_geometric_mean": -7.133820056915283, + "step": 7100 + }, + { + "chosen_geometric_mean": -0.8756215572357178, + "epoch": 1.76, + "grad_norm": 6.40625, + "learning_rate": 1.7874044678696246e-07, + "log_odds": 6.908755302429199, + "log_odds_ratio": -0.13682499527931213, + "loss": 0.2227, + "rejected_geometric_mean": -7.2841033935546875, + "step": 7101 + }, + { + "chosen_geometric_mean": -1.1019086837768555, + "epoch": 1.76, + "grad_norm": 21.125, + "learning_rate": 1.7837914994566885e-07, + "log_odds": 9.116914749145508, + "log_odds_ratio": -0.13147518038749695, + "loss": 0.2687, + "rejected_geometric_mean": -9.887441635131836, + "step": 7102 + }, + { + "chosen_geometric_mean": -0.8529601097106934, + "epoch": 1.76, + "grad_norm": 45.75, + "learning_rate": 1.7801820511782393e-07, + "log_odds": 3.5496253967285156, + "log_odds_ratio": -0.11590021848678589, + "loss": 0.2746, + "rejected_geometric_mean": -3.8785223960876465, + "step": 7103 + }, + { + "chosen_geometric_mean": -1.023175597190857, + "epoch": 1.76, + "grad_norm": 7.75, + "learning_rate": 1.7765761235815609e-07, + "log_odds": 3.81754732131958, + "log_odds_ratio": -0.16892299056053162, + "loss": 0.2623, + "rejected_geometric_mean": -4.505239486694336, + "step": 7104 + }, + { + "chosen_geometric_mean": -1.0334137678146362, + "epoch": 1.76, + "grad_norm": 4.34375, + "learning_rate": 1.7729737172133916e-07, + "log_odds": 2.878303289413452, + "log_odds_ratio": -0.30324792861938477, + "loss": 0.2444, + "rejected_geometric_mean": -3.6465001106262207, + "step": 7105 + }, + { + "chosen_geometric_mean": -1.1504299640655518, + "epoch": 1.76, + "grad_norm": 9.0625, + "learning_rate": 1.7693748326199494e-07, + "log_odds": 6.292505264282227, + "log_odds_ratio": -0.00886606052517891, + "loss": 0.2047, + "rejected_geometric_mean": -7.0373454093933105, + "step": 7106 + }, + { + "chosen_geometric_mean": -1.010728120803833, + "epoch": 1.76, + "grad_norm": 27.375, + "learning_rate": 1.7657794703469066e-07, + "log_odds": 9.545150756835938, + "log_odds_ratio": -0.0026247091591358185, + "loss": 0.3514, + "rejected_geometric_mean": -10.098888397216797, + "step": 7107 + }, + { + "chosen_geometric_mean": -1.0383329391479492, + "epoch": 1.76, + "grad_norm": 6.625, + "learning_rate": 1.7621876309394076e-07, + "log_odds": 6.418476104736328, + "log_odds_ratio": -0.0021967755164951086, + "loss": 0.266, + "rejected_geometric_mean": -6.996681213378906, + "step": 7108 + }, + { + "chosen_geometric_mean": -0.9279683828353882, + "epoch": 1.76, + "grad_norm": 5.46875, + "learning_rate": 1.7585993149420667e-07, + "log_odds": 4.205674171447754, + "log_odds_ratio": -0.13150517642498016, + "loss": 0.3004, + "rejected_geometric_mean": -4.702664375305176, + "step": 7109 + }, + { + "chosen_geometric_mean": -1.1510827541351318, + "epoch": 1.76, + "grad_norm": 3.4375, + "learning_rate": 1.7550145228989486e-07, + "log_odds": 9.247652053833008, + "log_odds_ratio": -0.005252180155366659, + "loss": 0.2213, + "rejected_geometric_mean": -9.995171546936035, + "step": 7110 + }, + { + "chosen_geometric_mean": -1.1610828638076782, + "epoch": 1.76, + "grad_norm": 2.90625, + "learning_rate": 1.7514332553536046e-07, + "log_odds": 7.945562839508057, + "log_odds_ratio": -0.2406952977180481, + "loss": 0.2775, + "rejected_geometric_mean": -8.824182510375977, + "step": 7111 + }, + { + "chosen_geometric_mean": -0.9359369874000549, + "epoch": 1.76, + "grad_norm": 2.3125, + "learning_rate": 1.7478555128490366e-07, + "log_odds": 3.0887277126312256, + "log_odds_ratio": -0.1991134136915207, + "loss": 0.2326, + "rejected_geometric_mean": -3.6864242553710938, + "step": 7112 + }, + { + "chosen_geometric_mean": -0.8354011178016663, + "epoch": 1.76, + "grad_norm": 2.96875, + "learning_rate": 1.7442812959277104e-07, + "log_odds": 9.370864868164062, + "log_odds_ratio": -0.2821214199066162, + "loss": 0.2932, + "rejected_geometric_mean": -9.803702354431152, + "step": 7113 + }, + { + "chosen_geometric_mean": -1.0751742124557495, + "epoch": 1.76, + "grad_norm": 9.0, + "learning_rate": 1.740710605131568e-07, + "log_odds": 3.1624441146850586, + "log_odds_ratio": -0.36387988924980164, + "loss": 0.2558, + "rejected_geometric_mean": -3.9969332218170166, + "step": 7114 + }, + { + "chosen_geometric_mean": -0.9911364316940308, + "epoch": 1.76, + "grad_norm": 45.0, + "learning_rate": 1.737143441002015e-07, + "log_odds": 5.575812816619873, + "log_odds_ratio": -0.21990323066711426, + "loss": 0.3192, + "rejected_geometric_mean": -6.25992488861084, + "step": 7115 + }, + { + "chosen_geometric_mean": -0.7501726150512695, + "epoch": 1.76, + "grad_norm": 2.0, + "learning_rate": 1.7335798040799128e-07, + "log_odds": 3.27935791015625, + "log_odds_ratio": -0.08730677515268326, + "loss": 0.2546, + "rejected_geometric_mean": -3.4799981117248535, + "step": 7116 + }, + { + "chosen_geometric_mean": -0.9345991015434265, + "epoch": 1.76, + "grad_norm": 5.03125, + "learning_rate": 1.7300196949056019e-07, + "log_odds": 8.088815689086914, + "log_odds_ratio": -0.07439704239368439, + "loss": 0.2658, + "rejected_geometric_mean": -8.51756763458252, + "step": 7117 + }, + { + "chosen_geometric_mean": -0.809456467628479, + "epoch": 1.76, + "grad_norm": 4.40625, + "learning_rate": 1.7264631140188782e-07, + "log_odds": 5.7706379890441895, + "log_odds_ratio": -0.09684786945581436, + "loss": 0.2344, + "rejected_geometric_mean": -6.014195442199707, + "step": 7118 + }, + { + "chosen_geometric_mean": -0.7898216843605042, + "epoch": 1.76, + "grad_norm": 2.90625, + "learning_rate": 1.7229100619589907e-07, + "log_odds": 8.102970123291016, + "log_odds_ratio": -0.26784300804138184, + "loss": 0.2391, + "rejected_geometric_mean": -8.536314010620117, + "step": 7119 + }, + { + "chosen_geometric_mean": -0.7880526781082153, + "epoch": 1.76, + "grad_norm": 8.0625, + "learning_rate": 1.7193605392646888e-07, + "log_odds": 6.24990177154541, + "log_odds_ratio": -0.2638137936592102, + "loss": 0.274, + "rejected_geometric_mean": -6.6521453857421875, + "step": 7120 + }, + { + "chosen_geometric_mean": -0.9600281119346619, + "epoch": 1.76, + "grad_norm": 13.75, + "learning_rate": 1.7158145464741567e-07, + "log_odds": 10.807097434997559, + "log_odds_ratio": -0.11924787610769272, + "loss": 0.2413, + "rejected_geometric_mean": -11.332934379577637, + "step": 7121 + }, + { + "chosen_geometric_mean": -0.8190209865570068, + "epoch": 1.76, + "grad_norm": 2.4375, + "learning_rate": 1.712272084125044e-07, + "log_odds": 6.83425760269165, + "log_odds_ratio": -0.12661844491958618, + "loss": 0.2299, + "rejected_geometric_mean": -7.170158386230469, + "step": 7122 + }, + { + "chosen_geometric_mean": -0.8316090703010559, + "epoch": 1.76, + "grad_norm": 24.25, + "learning_rate": 1.7087331527544882e-07, + "log_odds": 10.172252655029297, + "log_odds_ratio": -0.07625207304954529, + "loss": 0.272, + "rejected_geometric_mean": -10.448568344116211, + "step": 7123 + }, + { + "chosen_geometric_mean": -1.1259136199951172, + "epoch": 1.76, + "grad_norm": 21.375, + "learning_rate": 1.70519775289906e-07, + "log_odds": 4.980494499206543, + "log_odds_ratio": -0.13370594382286072, + "loss": 0.2425, + "rejected_geometric_mean": -5.755947113037109, + "step": 7124 + }, + { + "chosen_geometric_mean": -1.0748003721237183, + "epoch": 1.76, + "grad_norm": 17.375, + "learning_rate": 1.701665885094822e-07, + "log_odds": 11.630334854125977, + "log_odds_ratio": -0.043001554906368256, + "loss": 0.2609, + "rejected_geometric_mean": -12.304194450378418, + "step": 7125 + }, + { + "chosen_geometric_mean": -0.8704912662506104, + "epoch": 1.76, + "grad_norm": 28.875, + "learning_rate": 1.6981375498772906e-07, + "log_odds": 9.763758659362793, + "log_odds_ratio": -0.10681955516338348, + "loss": 0.2775, + "rejected_geometric_mean": -10.081769943237305, + "step": 7126 + }, + { + "chosen_geometric_mean": -0.7089064121246338, + "epoch": 1.76, + "grad_norm": 6.9375, + "learning_rate": 1.6946127477814405e-07, + "log_odds": 6.039501190185547, + "log_odds_ratio": -0.12293897569179535, + "loss": 0.2459, + "rejected_geometric_mean": -6.1688714027404785, + "step": 7127 + }, + { + "chosen_geometric_mean": -0.9679427146911621, + "epoch": 1.76, + "grad_norm": 13.75, + "learning_rate": 1.6910914793417166e-07, + "log_odds": 11.031250953674316, + "log_odds_ratio": -0.02454999089241028, + "loss": 0.2656, + "rejected_geometric_mean": -11.501916885375977, + "step": 7128 + }, + { + "chosen_geometric_mean": -0.938391923904419, + "epoch": 1.77, + "grad_norm": 1.890625, + "learning_rate": 1.6875737450920332e-07, + "log_odds": 12.62993335723877, + "log_odds_ratio": -0.000308402901282534, + "loss": 0.2355, + "rejected_geometric_mean": -13.045865058898926, + "step": 7129 + }, + { + "chosen_geometric_mean": -1.0425431728363037, + "epoch": 1.77, + "grad_norm": 3.484375, + "learning_rate": 1.6840595455657555e-07, + "log_odds": 3.111628532409668, + "log_odds_ratio": -0.2127123773097992, + "loss": 0.2296, + "rejected_geometric_mean": -3.8311784267425537, + "step": 7130 + }, + { + "chosen_geometric_mean": -0.8027043342590332, + "epoch": 1.77, + "grad_norm": 2.65625, + "learning_rate": 1.6805488812957237e-07, + "log_odds": 7.448295593261719, + "log_odds_ratio": -0.06324032694101334, + "loss": 0.2291, + "rejected_geometric_mean": -7.698402404785156, + "step": 7131 + }, + { + "chosen_geometric_mean": -1.2350791692733765, + "epoch": 1.77, + "grad_norm": 2.3125, + "learning_rate": 1.677041752814243e-07, + "log_odds": 3.5108039379119873, + "log_odds_ratio": -0.24126189947128296, + "loss": 0.27, + "rejected_geometric_mean": -4.446353912353516, + "step": 7132 + }, + { + "chosen_geometric_mean": -1.120240569114685, + "epoch": 1.77, + "grad_norm": 15.5, + "learning_rate": 1.6735381606530738e-07, + "log_odds": 14.217828750610352, + "log_odds_ratio": -0.0678606927394867, + "loss": 0.2434, + "rejected_geometric_mean": -14.96525764465332, + "step": 7133 + }, + { + "chosen_geometric_mean": -0.805138349533081, + "epoch": 1.77, + "grad_norm": 19.875, + "learning_rate": 1.6700381053434494e-07, + "log_odds": 7.1874589920043945, + "log_odds_ratio": -0.12993985414505005, + "loss": 0.2867, + "rejected_geometric_mean": -7.469160079956055, + "step": 7134 + }, + { + "chosen_geometric_mean": -0.9327646493911743, + "epoch": 1.77, + "grad_norm": 2.15625, + "learning_rate": 1.6665415874160566e-07, + "log_odds": 14.234719276428223, + "log_odds_ratio": -0.0017232507234439254, + "loss": 0.2506, + "rejected_geometric_mean": -14.604534149169922, + "step": 7135 + }, + { + "chosen_geometric_mean": -1.376592755317688, + "epoch": 1.77, + "grad_norm": 18.75, + "learning_rate": 1.6630486074010465e-07, + "log_odds": 6.764575958251953, + "log_odds_ratio": -0.4686037302017212, + "loss": 0.3216, + "rejected_geometric_mean": -7.946794033050537, + "step": 7136 + }, + { + "chosen_geometric_mean": -1.0779592990875244, + "epoch": 1.77, + "grad_norm": 2.5625, + "learning_rate": 1.6595591658280535e-07, + "log_odds": 6.954334735870361, + "log_odds_ratio": -0.01097163837403059, + "loss": 0.2207, + "rejected_geometric_mean": -7.592148780822754, + "step": 7137 + }, + { + "chosen_geometric_mean": -0.9808726906776428, + "epoch": 1.77, + "grad_norm": 9.875, + "learning_rate": 1.6560732632261545e-07, + "log_odds": 6.931413173675537, + "log_odds_ratio": -0.16341149806976318, + "loss": 0.2531, + "rejected_geometric_mean": -7.538050651550293, + "step": 7138 + }, + { + "chosen_geometric_mean": -0.9639002084732056, + "epoch": 1.77, + "grad_norm": 1.9921875, + "learning_rate": 1.6525909001238878e-07, + "log_odds": 5.05622673034668, + "log_odds_ratio": -0.09183064103126526, + "loss": 0.2202, + "rejected_geometric_mean": -5.575293064117432, + "step": 7139 + }, + { + "chosen_geometric_mean": -0.8971757888793945, + "epoch": 1.77, + "grad_norm": 12.3125, + "learning_rate": 1.649112077049278e-07, + "log_odds": 4.81130313873291, + "log_odds_ratio": -0.25390422344207764, + "loss": 0.2445, + "rejected_geometric_mean": -5.365382671356201, + "step": 7140 + }, + { + "chosen_geometric_mean": -0.8349450826644897, + "epoch": 1.77, + "grad_norm": 29.375, + "learning_rate": 1.6456367945297812e-07, + "log_odds": 4.845099925994873, + "log_odds_ratio": -0.3117695152759552, + "loss": 0.2903, + "rejected_geometric_mean": -5.298003196716309, + "step": 7141 + }, + { + "chosen_geometric_mean": -0.8595981597900391, + "epoch": 1.77, + "grad_norm": 2.703125, + "learning_rate": 1.6421650530923455e-07, + "log_odds": 9.627986907958984, + "log_odds_ratio": -0.032691873610019684, + "loss": 0.2541, + "rejected_geometric_mean": -9.960439682006836, + "step": 7142 + }, + { + "chosen_geometric_mean": -0.9260504245758057, + "epoch": 1.77, + "grad_norm": 1.75, + "learning_rate": 1.6386968532633717e-07, + "log_odds": 12.791433334350586, + "log_odds_ratio": -0.05193984508514404, + "loss": 0.2086, + "rejected_geometric_mean": -13.233963012695312, + "step": 7143 + }, + { + "chosen_geometric_mean": -1.341223955154419, + "epoch": 1.77, + "grad_norm": 15.0, + "learning_rate": 1.6352321955687173e-07, + "log_odds": 10.752545356750488, + "log_odds_ratio": -0.0008717080927453935, + "loss": 0.2979, + "rejected_geometric_mean": -11.749231338500977, + "step": 7144 + }, + { + "chosen_geometric_mean": -0.8403481841087341, + "epoch": 1.77, + "grad_norm": 1.9921875, + "learning_rate": 1.6317710805337033e-07, + "log_odds": 10.209946632385254, + "log_odds_ratio": -0.034221287816762924, + "loss": 0.2371, + "rejected_geometric_mean": -10.507732391357422, + "step": 7145 + }, + { + "chosen_geometric_mean": -1.3466784954071045, + "epoch": 1.77, + "grad_norm": 2.171875, + "learning_rate": 1.628313508683127e-07, + "log_odds": 3.5314993858337402, + "log_odds_ratio": -0.20278193056583405, + "loss": 0.2375, + "rejected_geometric_mean": -4.655726432800293, + "step": 7146 + }, + { + "chosen_geometric_mean": -1.1786222457885742, + "epoch": 1.77, + "grad_norm": 2.90625, + "learning_rate": 1.6248594805412298e-07, + "log_odds": 4.892643928527832, + "log_odds_ratio": -0.2977294623851776, + "loss": 0.2991, + "rejected_geometric_mean": -5.831270694732666, + "step": 7147 + }, + { + "chosen_geometric_mean": -1.05208158493042, + "epoch": 1.77, + "grad_norm": 2.484375, + "learning_rate": 1.621408996631732e-07, + "log_odds": 6.239467620849609, + "log_odds_ratio": -0.2092784196138382, + "loss": 0.2714, + "rejected_geometric_mean": -6.9872236251831055, + "step": 7148 + }, + { + "chosen_geometric_mean": -1.0961073637008667, + "epoch": 1.77, + "grad_norm": 9.875, + "learning_rate": 1.6179620574778094e-07, + "log_odds": 9.384897232055664, + "log_odds_ratio": -0.08904192596673965, + "loss": 0.2596, + "rejected_geometric_mean": -10.118047714233398, + "step": 7149 + }, + { + "chosen_geometric_mean": -0.8406939506530762, + "epoch": 1.77, + "grad_norm": 6.84375, + "learning_rate": 1.6145186636020992e-07, + "log_odds": 3.4294400215148926, + "log_odds_ratio": -0.3570133447647095, + "loss": 0.2649, + "rejected_geometric_mean": -3.9583449363708496, + "step": 7150 + }, + { + "chosen_geometric_mean": -0.9556252956390381, + "epoch": 1.77, + "grad_norm": 11.6875, + "learning_rate": 1.6110788155267037e-07, + "log_odds": 12.145593643188477, + "log_odds_ratio": -0.004781949799507856, + "loss": 0.2668, + "rejected_geometric_mean": -12.599502563476562, + "step": 7151 + }, + { + "chosen_geometric_mean": -1.0069503784179688, + "epoch": 1.77, + "grad_norm": 12.8125, + "learning_rate": 1.6076425137731832e-07, + "log_odds": 11.712200164794922, + "log_odds_ratio": -0.021211057901382446, + "loss": 0.2653, + "rejected_geometric_mean": -12.238276481628418, + "step": 7152 + }, + { + "chosen_geometric_mean": -1.0027275085449219, + "epoch": 1.77, + "grad_norm": 2.859375, + "learning_rate": 1.604209758862568e-07, + "log_odds": 7.806344509124756, + "log_odds_ratio": -0.13147075474262238, + "loss": 0.27, + "rejected_geometric_mean": -8.425261497497559, + "step": 7153 + }, + { + "chosen_geometric_mean": -0.9179859757423401, + "epoch": 1.77, + "grad_norm": 19.25, + "learning_rate": 1.6007805513153424e-07, + "log_odds": 8.397884368896484, + "log_odds_ratio": -0.11194733530282974, + "loss": 0.2496, + "rejected_geometric_mean": -8.779190063476562, + "step": 7154 + }, + { + "chosen_geometric_mean": -0.7359697818756104, + "epoch": 1.77, + "grad_norm": 7.9375, + "learning_rate": 1.5973548916514593e-07, + "log_odds": 11.588581085205078, + "log_odds_ratio": -0.07293670624494553, + "loss": 0.2507, + "rejected_geometric_mean": -11.722789764404297, + "step": 7155 + }, + { + "chosen_geometric_mean": -1.1661473512649536, + "epoch": 1.77, + "grad_norm": 15.625, + "learning_rate": 1.5939327803903282e-07, + "log_odds": 6.524740219116211, + "log_odds_ratio": -0.16363851726055145, + "loss": 0.3044, + "rejected_geometric_mean": -7.391569137573242, + "step": 7156 + }, + { + "chosen_geometric_mean": -0.9598585963249207, + "epoch": 1.77, + "grad_norm": 3.40625, + "learning_rate": 1.5905142180508259e-07, + "log_odds": 17.23680305480957, + "log_odds_ratio": -0.007805355824530125, + "loss": 0.2534, + "rejected_geometric_mean": -17.691505432128906, + "step": 7157 + }, + { + "chosen_geometric_mean": -1.0939334630966187, + "epoch": 1.77, + "grad_norm": 4.625, + "learning_rate": 1.587099205151288e-07, + "log_odds": 4.719909191131592, + "log_odds_ratio": -0.0823834091424942, + "loss": 0.2666, + "rejected_geometric_mean": -5.442853927612305, + "step": 7158 + }, + { + "chosen_geometric_mean": -1.0031001567840576, + "epoch": 1.77, + "grad_norm": 15.3125, + "learning_rate": 1.583687742209511e-07, + "log_odds": 0.7491397261619568, + "log_odds_ratio": -0.4684901535511017, + "loss": 0.2239, + "rejected_geometric_mean": -1.616683840751648, + "step": 7159 + }, + { + "chosen_geometric_mean": -1.125589370727539, + "epoch": 1.77, + "grad_norm": 6.15625, + "learning_rate": 1.5802798297427562e-07, + "log_odds": 5.795365333557129, + "log_odds_ratio": -0.18638184666633606, + "loss": 0.2391, + "rejected_geometric_mean": -6.59777307510376, + "step": 7160 + }, + { + "chosen_geometric_mean": -1.0420441627502441, + "epoch": 1.77, + "grad_norm": 6.4375, + "learning_rate": 1.5768754682677467e-07, + "log_odds": 7.668168544769287, + "log_odds_ratio": -0.024641957134008408, + "loss": 0.2094, + "rejected_geometric_mean": -8.274927139282227, + "step": 7161 + }, + { + "chosen_geometric_mean": -1.102360486984253, + "epoch": 1.77, + "grad_norm": 24.875, + "learning_rate": 1.5734746583006582e-07, + "log_odds": 4.986260414123535, + "log_odds_ratio": -0.10156621783971786, + "loss": 0.3184, + "rejected_geometric_mean": -5.720745086669922, + "step": 7162 + }, + { + "chosen_geometric_mean": -1.0049585103988647, + "epoch": 1.77, + "grad_norm": 29.0, + "learning_rate": 1.5700774003571423e-07, + "log_odds": 4.4954423904418945, + "log_odds_ratio": -0.054309312254190445, + "loss": 0.2953, + "rejected_geometric_mean": -5.039368629455566, + "step": 7163 + }, + { + "chosen_geometric_mean": -0.9546381831169128, + "epoch": 1.77, + "grad_norm": 4.84375, + "learning_rate": 1.5666836949523033e-07, + "log_odds": 8.56281566619873, + "log_odds_ratio": -0.03699111193418503, + "loss": 0.2354, + "rejected_geometric_mean": -9.011054039001465, + "step": 7164 + }, + { + "chosen_geometric_mean": -0.9385031461715698, + "epoch": 1.77, + "grad_norm": 4.0, + "learning_rate": 1.5632935426007052e-07, + "log_odds": 2.6177144050598145, + "log_odds_ratio": -0.43450072407722473, + "loss": 0.2942, + "rejected_geometric_mean": -3.338562488555908, + "step": 7165 + }, + { + "chosen_geometric_mean": -0.7734591960906982, + "epoch": 1.77, + "grad_norm": 19.875, + "learning_rate": 1.5599069438163806e-07, + "log_odds": 12.164313316345215, + "log_odds_ratio": -0.019039519131183624, + "loss": 0.245, + "rejected_geometric_mean": -12.32513427734375, + "step": 7166 + }, + { + "chosen_geometric_mean": -0.912295937538147, + "epoch": 1.77, + "grad_norm": 4.84375, + "learning_rate": 1.556523899112819e-07, + "log_odds": 8.247358322143555, + "log_odds_ratio": -0.033555518835783005, + "loss": 0.2843, + "rejected_geometric_mean": -8.656081199645996, + "step": 7167 + }, + { + "chosen_geometric_mean": -1.1900880336761475, + "epoch": 1.77, + "grad_norm": 2.0, + "learning_rate": 1.553144409002963e-07, + "log_odds": 13.238744735717773, + "log_odds_ratio": -0.004986133426427841, + "loss": 0.2365, + "rejected_geometric_mean": -14.05171012878418, + "step": 7168 + }, + { + "chosen_geometric_mean": -1.1982121467590332, + "epoch": 1.77, + "grad_norm": 19.5, + "learning_rate": 1.549768473999236e-07, + "log_odds": 4.100582122802734, + "log_odds_ratio": -0.24113114178180695, + "loss": 0.2981, + "rejected_geometric_mean": -5.057552337646484, + "step": 7169 + }, + { + "chosen_geometric_mean": -1.7886792421340942, + "epoch": 1.78, + "grad_norm": 48.25, + "learning_rate": 1.5463960946135087e-07, + "log_odds": 2.439396858215332, + "log_odds_ratio": -0.6989394426345825, + "loss": 0.3944, + "rejected_geometric_mean": -4.039670467376709, + "step": 7170 + }, + { + "chosen_geometric_mean": -0.9718657732009888, + "epoch": 1.78, + "grad_norm": 3.953125, + "learning_rate": 1.543027271357106e-07, + "log_odds": 7.506059646606445, + "log_odds_ratio": -0.19229747354984283, + "loss": 0.2659, + "rejected_geometric_mean": -8.138200759887695, + "step": 7171 + }, + { + "chosen_geometric_mean": -0.9080848693847656, + "epoch": 1.78, + "grad_norm": 4.28125, + "learning_rate": 1.5396620047408328e-07, + "log_odds": 7.788768291473389, + "log_odds_ratio": -0.09081734716892242, + "loss": 0.2294, + "rejected_geometric_mean": -8.209182739257812, + "step": 7172 + }, + { + "chosen_geometric_mean": -1.076728105545044, + "epoch": 1.78, + "grad_norm": 21.75, + "learning_rate": 1.536300295274934e-07, + "log_odds": 8.611360549926758, + "log_odds_ratio": -0.04294619336724281, + "loss": 0.2592, + "rejected_geometric_mean": -9.292834281921387, + "step": 7173 + }, + { + "chosen_geometric_mean": -0.9468832612037659, + "epoch": 1.78, + "grad_norm": 20.5, + "learning_rate": 1.532942143469135e-07, + "log_odds": 17.941356658935547, + "log_odds_ratio": -1.0728848565122462e-06, + "loss": 0.2779, + "rejected_geometric_mean": -18.394084930419922, + "step": 7174 + }, + { + "chosen_geometric_mean": -0.7663687467575073, + "epoch": 1.78, + "grad_norm": 7.3125, + "learning_rate": 1.529587549832609e-07, + "log_odds": 7.649787425994873, + "log_odds_ratio": -0.12912286818027496, + "loss": 0.2778, + "rejected_geometric_mean": -7.901310443878174, + "step": 7175 + }, + { + "chosen_geometric_mean": -0.8790243864059448, + "epoch": 1.78, + "grad_norm": 2.234375, + "learning_rate": 1.5262365148739883e-07, + "log_odds": 10.22339153289795, + "log_odds_ratio": -0.006647022906690836, + "loss": 0.2443, + "rejected_geometric_mean": -10.55134391784668, + "step": 7176 + }, + { + "chosen_geometric_mean": -1.0988470315933228, + "epoch": 1.78, + "grad_norm": 3.78125, + "learning_rate": 1.52288903910138e-07, + "log_odds": 4.228470802307129, + "log_odds_ratio": -0.16978439688682556, + "loss": 0.2572, + "rejected_geometric_mean": -5.027884483337402, + "step": 7177 + }, + { + "chosen_geometric_mean": -1.352708101272583, + "epoch": 1.78, + "grad_norm": 19.875, + "learning_rate": 1.5195451230223342e-07, + "log_odds": 8.972161293029785, + "log_odds_ratio": -0.010481254197657108, + "loss": 0.2935, + "rejected_geometric_mean": -9.963460922241211, + "step": 7178 + }, + { + "chosen_geometric_mean": -0.9392810463905334, + "epoch": 1.78, + "grad_norm": 25.125, + "learning_rate": 1.5162047671438696e-07, + "log_odds": 4.746611595153809, + "log_odds_ratio": -0.07059221714735031, + "loss": 0.2974, + "rejected_geometric_mean": -5.237732410430908, + "step": 7179 + }, + { + "chosen_geometric_mean": -1.0531249046325684, + "epoch": 1.78, + "grad_norm": 29.125, + "learning_rate": 1.512867971972462e-07, + "log_odds": 5.160709381103516, + "log_odds_ratio": -0.2896517813205719, + "loss": 0.2778, + "rejected_geometric_mean": -5.965317726135254, + "step": 7180 + }, + { + "chosen_geometric_mean": -0.9522826671600342, + "epoch": 1.78, + "grad_norm": 3.765625, + "learning_rate": 1.5095347380140597e-07, + "log_odds": 13.372068405151367, + "log_odds_ratio": -0.00017148102051578462, + "loss": 0.2491, + "rejected_geometric_mean": -13.819147109985352, + "step": 7181 + }, + { + "chosen_geometric_mean": -1.013368844985962, + "epoch": 1.78, + "grad_norm": 13.125, + "learning_rate": 1.5062050657740496e-07, + "log_odds": 3.047499656677246, + "log_odds_ratio": -0.09444575011730194, + "loss": 0.2414, + "rejected_geometric_mean": -3.6400046348571777, + "step": 7182 + }, + { + "chosen_geometric_mean": -0.899794340133667, + "epoch": 1.78, + "grad_norm": 12.0, + "learning_rate": 1.5028789557572977e-07, + "log_odds": 4.111579895019531, + "log_odds_ratio": -0.3574243187904358, + "loss": 0.2978, + "rejected_geometric_mean": -4.697897434234619, + "step": 7183 + }, + { + "chosen_geometric_mean": -1.0851399898529053, + "epoch": 1.78, + "grad_norm": 1.9609375, + "learning_rate": 1.4995564084681196e-07, + "log_odds": 5.922462463378906, + "log_odds_ratio": -0.1050347238779068, + "loss": 0.2433, + "rejected_geometric_mean": -6.613129615783691, + "step": 7184 + }, + { + "chosen_geometric_mean": -1.026414155960083, + "epoch": 1.78, + "grad_norm": 1.8828125, + "learning_rate": 1.4962374244102852e-07, + "log_odds": 12.294000625610352, + "log_odds_ratio": -0.0016158809885382652, + "loss": 0.2487, + "rejected_geometric_mean": -12.875868797302246, + "step": 7185 + }, + { + "chosen_geometric_mean": -1.1281893253326416, + "epoch": 1.78, + "grad_norm": 3.1875, + "learning_rate": 1.4929220040870467e-07, + "log_odds": 3.6869544982910156, + "log_odds_ratio": -0.16588132083415985, + "loss": 0.2327, + "rejected_geometric_mean": -4.4492387771606445, + "step": 7186 + }, + { + "chosen_geometric_mean": -0.9912641048431396, + "epoch": 1.78, + "grad_norm": 4.09375, + "learning_rate": 1.4896101480010944e-07, + "log_odds": 13.265663146972656, + "log_odds_ratio": -2.2352654923452064e-05, + "loss": 0.2415, + "rejected_geometric_mean": -13.779457092285156, + "step": 7187 + }, + { + "chosen_geometric_mean": -0.9052464962005615, + "epoch": 1.78, + "grad_norm": 10.625, + "learning_rate": 1.4863018566545816e-07, + "log_odds": 3.2159934043884277, + "log_odds_ratio": -0.06821259111166, + "loss": 0.2701, + "rejected_geometric_mean": -3.6401891708374023, + "step": 7188 + }, + { + "chosen_geometric_mean": -0.9234296083450317, + "epoch": 1.78, + "grad_norm": 2.5, + "learning_rate": 1.4829971305491324e-07, + "log_odds": 1.5058711767196655, + "log_odds_ratio": -0.38550466299057007, + "loss": 0.2168, + "rejected_geometric_mean": -2.1292953491210938, + "step": 7189 + }, + { + "chosen_geometric_mean": -0.7434998750686646, + "epoch": 1.78, + "grad_norm": 5.78125, + "learning_rate": 1.4796959701858127e-07, + "log_odds": 8.767353057861328, + "log_odds_ratio": -0.09582892805337906, + "loss": 0.2472, + "rejected_geometric_mean": -8.96761417388916, + "step": 7190 + }, + { + "chosen_geometric_mean": -0.9648471474647522, + "epoch": 1.78, + "grad_norm": 27.25, + "learning_rate": 1.4763983760651663e-07, + "log_odds": 7.339555740356445, + "log_odds_ratio": -0.14392498135566711, + "loss": 0.3571, + "rejected_geometric_mean": -7.908818244934082, + "step": 7191 + }, + { + "chosen_geometric_mean": -0.9077979326248169, + "epoch": 1.78, + "grad_norm": 56.25, + "learning_rate": 1.4731043486871876e-07, + "log_odds": 2.888260841369629, + "log_odds_ratio": -0.2626407742500305, + "loss": 0.3304, + "rejected_geometric_mean": -3.4565181732177734, + "step": 7192 + }, + { + "chosen_geometric_mean": -1.103621482849121, + "epoch": 1.78, + "grad_norm": 25.125, + "learning_rate": 1.4698138885513248e-07, + "log_odds": 3.018190860748291, + "log_odds_ratio": -0.21420565247535706, + "loss": 0.2736, + "rejected_geometric_mean": -3.8168904781341553, + "step": 7193 + }, + { + "chosen_geometric_mean": -1.0537238121032715, + "epoch": 1.78, + "grad_norm": 2.8125, + "learning_rate": 1.466526996156492e-07, + "log_odds": 6.048429489135742, + "log_odds_ratio": -0.060913555324077606, + "loss": 0.2626, + "rejected_geometric_mean": -6.707350730895996, + "step": 7194 + }, + { + "chosen_geometric_mean": -0.9917201995849609, + "epoch": 1.78, + "grad_norm": 2.828125, + "learning_rate": 1.4632436720010663e-07, + "log_odds": 9.732129096984863, + "log_odds_ratio": -0.14961746335029602, + "loss": 0.2384, + "rejected_geometric_mean": -10.375433921813965, + "step": 7195 + }, + { + "chosen_geometric_mean": -0.8555129766464233, + "epoch": 1.78, + "grad_norm": 7.65625, + "learning_rate": 1.4599639165828682e-07, + "log_odds": 8.063131332397461, + "log_odds_ratio": -0.1363794356584549, + "loss": 0.2383, + "rejected_geometric_mean": -8.49509048461914, + "step": 7196 + }, + { + "chosen_geometric_mean": -1.0343319177627563, + "epoch": 1.78, + "grad_norm": 3.546875, + "learning_rate": 1.456687730399195e-07, + "log_odds": 4.15789270401001, + "log_odds_ratio": -0.1795724332332611, + "loss": 0.2816, + "rejected_geometric_mean": -4.872164249420166, + "step": 7197 + }, + { + "chosen_geometric_mean": -1.1307494640350342, + "epoch": 1.78, + "grad_norm": 4.1875, + "learning_rate": 1.453415113946796e-07, + "log_odds": 6.513427734375, + "log_odds_ratio": -0.022163810208439827, + "loss": 0.2627, + "rejected_geometric_mean": -7.25524377822876, + "step": 7198 + }, + { + "chosen_geometric_mean": -1.0157487392425537, + "epoch": 1.78, + "grad_norm": 4.21875, + "learning_rate": 1.4501460677218748e-07, + "log_odds": 5.373187065124512, + "log_odds_ratio": -0.15208500623703003, + "loss": 0.2512, + "rejected_geometric_mean": -5.971040725708008, + "step": 7199 + }, + { + "chosen_geometric_mean": -0.9924537539482117, + "epoch": 1.78, + "grad_norm": 18.0, + "learning_rate": 1.4468805922200986e-07, + "log_odds": 3.5533623695373535, + "log_odds_ratio": -0.06046763062477112, + "loss": 0.2574, + "rejected_geometric_mean": -4.109716415405273, + "step": 7200 + }, + { + "chosen_geometric_mean": -0.9974381327629089, + "epoch": 1.78, + "grad_norm": 7.03125, + "learning_rate": 1.4436186879365905e-07, + "log_odds": 6.35650634765625, + "log_odds_ratio": -0.06367183476686478, + "loss": 0.2344, + "rejected_geometric_mean": -6.919512748718262, + "step": 7201 + }, + { + "chosen_geometric_mean": -0.9727357625961304, + "epoch": 1.78, + "grad_norm": 2.171875, + "learning_rate": 1.4403603553659384e-07, + "log_odds": 6.728297710418701, + "log_odds_ratio": -0.12804505228996277, + "loss": 0.2639, + "rejected_geometric_mean": -7.339507102966309, + "step": 7202 + }, + { + "chosen_geometric_mean": -0.9094055891036987, + "epoch": 1.78, + "grad_norm": 21.125, + "learning_rate": 1.437105595002175e-07, + "log_odds": 4.352905750274658, + "log_odds_ratio": -0.07250571250915527, + "loss": 0.2338, + "rejected_geometric_mean": -4.781064033508301, + "step": 7203 + }, + { + "chosen_geometric_mean": -1.0030089616775513, + "epoch": 1.78, + "grad_norm": 25.875, + "learning_rate": 1.4338544073388112e-07, + "log_odds": 6.2436699867248535, + "log_odds_ratio": -0.08753013610839844, + "loss": 0.2629, + "rejected_geometric_mean": -6.816743850708008, + "step": 7204 + }, + { + "chosen_geometric_mean": -0.8973734378814697, + "epoch": 1.78, + "grad_norm": 11.625, + "learning_rate": 1.4306067928687938e-07, + "log_odds": 18.281064987182617, + "log_odds_ratio": -2.5034075861185556e-06, + "loss": 0.2182, + "rejected_geometric_mean": -18.633081436157227, + "step": 7205 + }, + { + "chosen_geometric_mean": -0.9076420664787292, + "epoch": 1.78, + "grad_norm": 27.125, + "learning_rate": 1.427362752084549e-07, + "log_odds": 5.932693958282471, + "log_odds_ratio": -0.1375289410352707, + "loss": 0.2752, + "rejected_geometric_mean": -6.422926902770996, + "step": 7206 + }, + { + "chosen_geometric_mean": -1.0532604455947876, + "epoch": 1.78, + "grad_norm": 2.21875, + "learning_rate": 1.4241222854779413e-07, + "log_odds": 4.4378767013549805, + "log_odds_ratio": -0.2411571443080902, + "loss": 0.2493, + "rejected_geometric_mean": -5.196375846862793, + "step": 7207 + }, + { + "chosen_geometric_mean": -1.1335992813110352, + "epoch": 1.78, + "grad_norm": 2.25, + "learning_rate": 1.420885393540311e-07, + "log_odds": 4.397364139556885, + "log_odds_ratio": -0.060661498457193375, + "loss": 0.2567, + "rejected_geometric_mean": -5.166338920593262, + "step": 7208 + }, + { + "chosen_geometric_mean": -1.0454109907150269, + "epoch": 1.78, + "grad_norm": 12.25, + "learning_rate": 1.4176520767624464e-07, + "log_odds": 8.095129013061523, + "log_odds_ratio": -0.1490708589553833, + "loss": 0.2555, + "rejected_geometric_mean": -8.736611366271973, + "step": 7209 + }, + { + "chosen_geometric_mean": -0.9552635550498962, + "epoch": 1.79, + "grad_norm": 38.75, + "learning_rate": 1.4144223356345992e-07, + "log_odds": 9.793636322021484, + "log_odds_ratio": -0.002669353736564517, + "loss": 0.2664, + "rejected_geometric_mean": -10.261415481567383, + "step": 7210 + }, + { + "chosen_geometric_mean": -1.2280933856964111, + "epoch": 1.79, + "grad_norm": 7.34375, + "learning_rate": 1.4111961706464672e-07, + "log_odds": 2.3890891075134277, + "log_odds_ratio": -0.16954416036605835, + "loss": 0.2729, + "rejected_geometric_mean": -3.3375210762023926, + "step": 7211 + }, + { + "chosen_geometric_mean": -0.8408482670783997, + "epoch": 1.79, + "grad_norm": 1.9140625, + "learning_rate": 1.40797358228722e-07, + "log_odds": 7.3744964599609375, + "log_odds_ratio": -0.1294766217470169, + "loss": 0.2447, + "rejected_geometric_mean": -7.768564224243164, + "step": 7212 + }, + { + "chosen_geometric_mean": -0.8949013948440552, + "epoch": 1.79, + "grad_norm": 3.296875, + "learning_rate": 1.4047545710454802e-07, + "log_odds": 5.47849702835083, + "log_odds_ratio": -0.2606412172317505, + "loss": 0.3022, + "rejected_geometric_mean": -6.071311950683594, + "step": 7213 + }, + { + "chosen_geometric_mean": -0.8408411741256714, + "epoch": 1.79, + "grad_norm": 17.5, + "learning_rate": 1.4015391374093246e-07, + "log_odds": 7.687772750854492, + "log_odds_ratio": -0.16073839366436005, + "loss": 0.3152, + "rejected_geometric_mean": -8.00668716430664, + "step": 7214 + }, + { + "chosen_geometric_mean": -0.8958786725997925, + "epoch": 1.79, + "grad_norm": 14.125, + "learning_rate": 1.3983272818662936e-07, + "log_odds": 3.49749493598938, + "log_odds_ratio": -0.23435300588607788, + "loss": 0.2568, + "rejected_geometric_mean": -4.06337833404541, + "step": 7215 + }, + { + "chosen_geometric_mean": -0.8732907772064209, + "epoch": 1.79, + "grad_norm": 3.125, + "learning_rate": 1.395119004903378e-07, + "log_odds": 4.7762675285339355, + "log_odds_ratio": -0.15858592092990875, + "loss": 0.2775, + "rejected_geometric_mean": -5.190735340118408, + "step": 7216 + }, + { + "chosen_geometric_mean": -0.9491993188858032, + "epoch": 1.79, + "grad_norm": 2.109375, + "learning_rate": 1.3919143070070307e-07, + "log_odds": 8.299386978149414, + "log_odds_ratio": -0.03756881132721901, + "loss": 0.2448, + "rejected_geometric_mean": -8.778051376342773, + "step": 7217 + }, + { + "chosen_geometric_mean": -1.1232317686080933, + "epoch": 1.79, + "grad_norm": 3.59375, + "learning_rate": 1.3887131886631572e-07, + "log_odds": 8.144499778747559, + "log_odds_ratio": -0.16274890303611755, + "loss": 0.2459, + "rejected_geometric_mean": -8.935815811157227, + "step": 7218 + }, + { + "chosen_geometric_mean": -1.1745996475219727, + "epoch": 1.79, + "grad_norm": 5.59375, + "learning_rate": 1.385515650357133e-07, + "log_odds": 10.393467903137207, + "log_odds_ratio": -0.001491091912612319, + "loss": 0.3024, + "rejected_geometric_mean": -11.193761825561523, + "step": 7219 + }, + { + "chosen_geometric_mean": -1.0440293550491333, + "epoch": 1.79, + "grad_norm": 2.453125, + "learning_rate": 1.3823216925737736e-07, + "log_odds": 6.322844505310059, + "log_odds_ratio": -0.16711527109146118, + "loss": 0.2534, + "rejected_geometric_mean": -7.024266242980957, + "step": 7220 + }, + { + "chosen_geometric_mean": -0.88174968957901, + "epoch": 1.79, + "grad_norm": 2.03125, + "learning_rate": 1.3791313157973664e-07, + "log_odds": 9.89218521118164, + "log_odds_ratio": -0.0002116594259859994, + "loss": 0.2474, + "rejected_geometric_mean": -10.216322898864746, + "step": 7221 + }, + { + "chosen_geometric_mean": -1.1620500087738037, + "epoch": 1.79, + "grad_norm": 2.15625, + "learning_rate": 1.3759445205116405e-07, + "log_odds": 3.5605697631835938, + "log_odds_ratio": -0.4431530237197876, + "loss": 0.2575, + "rejected_geometric_mean": -4.559288024902344, + "step": 7222 + }, + { + "chosen_geometric_mean": -1.0224484205245972, + "epoch": 1.79, + "grad_norm": 18.5, + "learning_rate": 1.3727613071997937e-07, + "log_odds": 7.254692554473877, + "log_odds_ratio": -0.17995822429656982, + "loss": 0.2687, + "rejected_geometric_mean": -7.94620943069458, + "step": 7223 + }, + { + "chosen_geometric_mean": -1.0032415390014648, + "epoch": 1.79, + "grad_norm": 23.625, + "learning_rate": 1.3695816763444835e-07, + "log_odds": 8.172346115112305, + "log_odds_ratio": -0.17643961310386658, + "loss": 0.3323, + "rejected_geometric_mean": -8.800653457641602, + "step": 7224 + }, + { + "chosen_geometric_mean": -0.9652838110923767, + "epoch": 1.79, + "grad_norm": 2.09375, + "learning_rate": 1.366405628427811e-07, + "log_odds": 5.464848518371582, + "log_odds_ratio": -0.4027317464351654, + "loss": 0.2485, + "rejected_geometric_mean": -6.204944133758545, + "step": 7225 + }, + { + "chosen_geometric_mean": -0.9452574849128723, + "epoch": 1.79, + "grad_norm": 2.09375, + "learning_rate": 1.363233163931349e-07, + "log_odds": 10.907962799072266, + "log_odds_ratio": -0.1560158133506775, + "loss": 0.2676, + "rejected_geometric_mean": -11.455083847045898, + "step": 7226 + }, + { + "chosen_geometric_mean": -1.096740961074829, + "epoch": 1.79, + "grad_norm": 1.796875, + "learning_rate": 1.3600642833361128e-07, + "log_odds": 3.705728054046631, + "log_odds_ratio": -0.17882540822029114, + "loss": 0.2241, + "rejected_geometric_mean": -4.51124382019043, + "step": 7227 + }, + { + "chosen_geometric_mean": -1.0142532587051392, + "epoch": 1.79, + "grad_norm": 6.53125, + "learning_rate": 1.356898987122579e-07, + "log_odds": 7.1700029373168945, + "log_odds_ratio": -0.23503997921943665, + "loss": 0.231, + "rejected_geometric_mean": -7.908323287963867, + "step": 7228 + }, + { + "chosen_geometric_mean": -1.5382850170135498, + "epoch": 1.79, + "grad_norm": 28.75, + "learning_rate": 1.3537372757706835e-07, + "log_odds": 7.425448417663574, + "log_odds_ratio": -0.3960316777229309, + "loss": 0.299, + "rejected_geometric_mean": -8.871675491333008, + "step": 7229 + }, + { + "chosen_geometric_mean": -1.0620830059051514, + "epoch": 1.79, + "grad_norm": 6.25, + "learning_rate": 1.350579149759826e-07, + "log_odds": 9.829254150390625, + "log_odds_ratio": -0.002490594517439604, + "loss": 0.2648, + "rejected_geometric_mean": -10.434249877929688, + "step": 7230 + }, + { + "chosen_geometric_mean": -1.3533345460891724, + "epoch": 1.79, + "grad_norm": 4.75, + "learning_rate": 1.3474246095688455e-07, + "log_odds": 9.023628234863281, + "log_odds_ratio": -0.01703912764787674, + "loss": 0.2302, + "rejected_geometric_mean": -10.069293975830078, + "step": 7231 + }, + { + "chosen_geometric_mean": -1.2335972785949707, + "epoch": 1.79, + "grad_norm": 28.5, + "learning_rate": 1.344273655676051e-07, + "log_odds": 10.753620147705078, + "log_odds_ratio": -0.08846522867679596, + "loss": 0.3135, + "rejected_geometric_mean": -11.690431594848633, + "step": 7232 + }, + { + "chosen_geometric_mean": -1.0184834003448486, + "epoch": 1.79, + "grad_norm": 2.953125, + "learning_rate": 1.3411262885591997e-07, + "log_odds": 7.166924953460693, + "log_odds_ratio": -0.046483203768730164, + "loss": 0.256, + "rejected_geometric_mean": -7.7166056632995605, + "step": 7233 + }, + { + "chosen_geometric_mean": -1.0329302549362183, + "epoch": 1.79, + "grad_norm": 2.71875, + "learning_rate": 1.3379825086955013e-07, + "log_odds": 7.028263092041016, + "log_odds_ratio": -0.29946285486221313, + "loss": 0.2605, + "rejected_geometric_mean": -7.731534481048584, + "step": 7234 + }, + { + "chosen_geometric_mean": -0.8721558451652527, + "epoch": 1.79, + "grad_norm": 9.3125, + "learning_rate": 1.3348423165616437e-07, + "log_odds": 8.154993057250977, + "log_odds_ratio": -0.0007429798133671284, + "loss": 0.2498, + "rejected_geometric_mean": -8.48338508605957, + "step": 7235 + }, + { + "chosen_geometric_mean": -0.9800829291343689, + "epoch": 1.79, + "grad_norm": 14.625, + "learning_rate": 1.3317057126337463e-07, + "log_odds": 2.163189172744751, + "log_odds_ratio": -0.30557000637054443, + "loss": 0.2622, + "rejected_geometric_mean": -2.8509902954101562, + "step": 7236 + }, + { + "chosen_geometric_mean": -0.8488301038742065, + "epoch": 1.79, + "grad_norm": 19.75, + "learning_rate": 1.3285726973873924e-07, + "log_odds": 1.2167580127716064, + "log_odds_ratio": -0.33621788024902344, + "loss": 0.3079, + "rejected_geometric_mean": -1.760796308517456, + "step": 7237 + }, + { + "chosen_geometric_mean": -1.1220132112503052, + "epoch": 1.79, + "grad_norm": 12.5625, + "learning_rate": 1.3254432712976274e-07, + "log_odds": 7.0804266929626465, + "log_odds_ratio": -0.008607677184045315, + "loss": 0.3005, + "rejected_geometric_mean": -7.770976543426514, + "step": 7238 + }, + { + "chosen_geometric_mean": -0.8422836065292358, + "epoch": 1.79, + "grad_norm": 43.25, + "learning_rate": 1.3223174348389434e-07, + "log_odds": 7.181616306304932, + "log_odds_ratio": -0.23497983813285828, + "loss": 0.2599, + "rejected_geometric_mean": -7.688444137573242, + "step": 7239 + }, + { + "chosen_geometric_mean": -0.8455645442008972, + "epoch": 1.79, + "grad_norm": 2.859375, + "learning_rate": 1.3191951884852894e-07, + "log_odds": 4.493409633636475, + "log_odds_ratio": -0.19817784428596497, + "loss": 0.257, + "rejected_geometric_mean": -4.901254653930664, + "step": 7240 + }, + { + "chosen_geometric_mean": -1.654099464416504, + "epoch": 1.79, + "grad_norm": 26.75, + "learning_rate": 1.3160765327100838e-07, + "log_odds": 4.356529235839844, + "log_odds_ratio": -0.22554074227809906, + "loss": 0.3404, + "rejected_geometric_mean": -5.838129997253418, + "step": 7241 + }, + { + "chosen_geometric_mean": -0.7424138188362122, + "epoch": 1.79, + "grad_norm": 18.125, + "learning_rate": 1.3129614679861817e-07, + "log_odds": 4.935230255126953, + "log_odds_ratio": -0.0837659016251564, + "loss": 0.2918, + "rejected_geometric_mean": -5.090036869049072, + "step": 7242 + }, + { + "chosen_geometric_mean": -1.0802441835403442, + "epoch": 1.79, + "grad_norm": 17.5, + "learning_rate": 1.3098499947859e-07, + "log_odds": 8.388919830322266, + "log_odds_ratio": -0.14564010500907898, + "loss": 0.2899, + "rejected_geometric_mean": -9.137682914733887, + "step": 7243 + }, + { + "chosen_geometric_mean": -0.9925278425216675, + "epoch": 1.79, + "grad_norm": 17.875, + "learning_rate": 1.3067421135810165e-07, + "log_odds": 9.426820755004883, + "log_odds_ratio": -0.060229625552892685, + "loss": 0.2457, + "rejected_geometric_mean": -9.988300323486328, + "step": 7244 + }, + { + "chosen_geometric_mean": -0.8650997281074524, + "epoch": 1.79, + "grad_norm": 2.203125, + "learning_rate": 1.3036378248427573e-07, + "log_odds": 3.1439123153686523, + "log_odds_ratio": -0.27057453989982605, + "loss": 0.2707, + "rejected_geometric_mean": -3.653066396713257, + "step": 7245 + }, + { + "chosen_geometric_mean": -0.8857588768005371, + "epoch": 1.79, + "grad_norm": 4.6875, + "learning_rate": 1.3005371290418096e-07, + "log_odds": 10.258780479431152, + "log_odds_ratio": -0.013073975220322609, + "loss": 0.2433, + "rejected_geometric_mean": -10.619368553161621, + "step": 7246 + }, + { + "chosen_geometric_mean": -0.9577805399894714, + "epoch": 1.79, + "grad_norm": 17.875, + "learning_rate": 1.2974400266483167e-07, + "log_odds": 4.9150214195251465, + "log_odds_ratio": -0.1158595159649849, + "loss": 0.2898, + "rejected_geometric_mean": -5.448240280151367, + "step": 7247 + }, + { + "chosen_geometric_mean": -0.9611167907714844, + "epoch": 1.79, + "grad_norm": 3.609375, + "learning_rate": 1.294346518131867e-07, + "log_odds": 7.187285423278809, + "log_odds_ratio": -0.0021182456985116005, + "loss": 0.2606, + "rejected_geometric_mean": -7.650346755981445, + "step": 7248 + }, + { + "chosen_geometric_mean": -0.8468617796897888, + "epoch": 1.79, + "grad_norm": 6.5625, + "learning_rate": 1.2912566039615188e-07, + "log_odds": 8.563697814941406, + "log_odds_ratio": -0.01690712757408619, + "loss": 0.2683, + "rejected_geometric_mean": -8.858112335205078, + "step": 7249 + }, + { + "chosen_geometric_mean": -0.8705015182495117, + "epoch": 1.79, + "grad_norm": 9.0625, + "learning_rate": 1.2881702846057663e-07, + "log_odds": 6.320645332336426, + "log_odds_ratio": -0.03190356492996216, + "loss": 0.2254, + "rejected_geometric_mean": -6.649878025054932, + "step": 7250 + }, + { + "chosen_geometric_mean": -1.0722841024398804, + "epoch": 1.8, + "grad_norm": 1.7265625, + "learning_rate": 1.2850875605325748e-07, + "log_odds": 12.636798858642578, + "log_odds_ratio": -0.001314201159402728, + "loss": 0.2216, + "rejected_geometric_mean": -13.23758602142334, + "step": 7251 + }, + { + "chosen_geometric_mean": -1.1826179027557373, + "epoch": 1.8, + "grad_norm": 2.828125, + "learning_rate": 1.282008432209364e-07, + "log_odds": 2.1449222564697266, + "log_odds_ratio": -0.2071123868227005, + "loss": 0.2407, + "rejected_geometric_mean": -3.052210807800293, + "step": 7252 + }, + { + "chosen_geometric_mean": -1.0567407608032227, + "epoch": 1.8, + "grad_norm": 4.40625, + "learning_rate": 1.2789329001029999e-07, + "log_odds": 5.409642219543457, + "log_odds_ratio": -0.13131040334701538, + "loss": 0.2928, + "rejected_geometric_mean": -6.122533798217773, + "step": 7253 + }, + { + "chosen_geometric_mean": -0.7550753355026245, + "epoch": 1.8, + "grad_norm": 2.546875, + "learning_rate": 1.2758609646798038e-07, + "log_odds": 2.9150564670562744, + "log_odds_ratio": -0.25166818499565125, + "loss": 0.2679, + "rejected_geometric_mean": -3.141146421432495, + "step": 7254 + }, + { + "chosen_geometric_mean": -0.825237512588501, + "epoch": 1.8, + "grad_norm": 20.375, + "learning_rate": 1.2727926264055613e-07, + "log_odds": 8.615970611572266, + "log_odds_ratio": -0.0010874553117901087, + "loss": 0.2618, + "rejected_geometric_mean": -8.83257007598877, + "step": 7255 + }, + { + "chosen_geometric_mean": -0.8386095762252808, + "epoch": 1.8, + "grad_norm": 1.8984375, + "learning_rate": 1.2697278857454976e-07, + "log_odds": 6.990495681762695, + "log_odds_ratio": -0.15975673496723175, + "loss": 0.2428, + "rejected_geometric_mean": -7.334804058074951, + "step": 7256 + }, + { + "chosen_geometric_mean": -0.967893123626709, + "epoch": 1.8, + "grad_norm": 7.0, + "learning_rate": 1.2666667431643077e-07, + "log_odds": 1.0555529594421387, + "log_odds_ratio": -0.3354949951171875, + "loss": 0.2876, + "rejected_geometric_mean": -1.7249176502227783, + "step": 7257 + }, + { + "chosen_geometric_mean": -0.9995470643043518, + "epoch": 1.8, + "grad_norm": 13.0, + "learning_rate": 1.263609199126134e-07, + "log_odds": 7.543499946594238, + "log_odds_ratio": -0.13348251581192017, + "loss": 0.2284, + "rejected_geometric_mean": -8.158150672912598, + "step": 7258 + }, + { + "chosen_geometric_mean": -0.96200031042099, + "epoch": 1.8, + "grad_norm": 4.21875, + "learning_rate": 1.2605552540945752e-07, + "log_odds": 6.649132251739502, + "log_odds_ratio": -0.00867305975407362, + "loss": 0.2533, + "rejected_geometric_mean": -7.1256279945373535, + "step": 7259 + }, + { + "chosen_geometric_mean": -1.1181514263153076, + "epoch": 1.8, + "grad_norm": 28.125, + "learning_rate": 1.2575049085326724e-07, + "log_odds": 7.077396392822266, + "log_odds_ratio": -0.038532815873622894, + "loss": 0.2954, + "rejected_geometric_mean": -7.813427925109863, + "step": 7260 + }, + { + "chosen_geometric_mean": -1.178255558013916, + "epoch": 1.8, + "grad_norm": 43.25, + "learning_rate": 1.2544581629029412e-07, + "log_odds": 9.938620567321777, + "log_odds_ratio": -8.57917038956657e-05, + "loss": 0.2579, + "rejected_geometric_mean": -10.740239143371582, + "step": 7261 + }, + { + "chosen_geometric_mean": -1.2744815349578857, + "epoch": 1.8, + "grad_norm": 4.0, + "learning_rate": 1.25141501766734e-07, + "log_odds": 7.565871238708496, + "log_odds_ratio": -0.09871247410774231, + "loss": 0.255, + "rejected_geometric_mean": -8.54905891418457, + "step": 7262 + }, + { + "chosen_geometric_mean": -1.0879452228546143, + "epoch": 1.8, + "grad_norm": 17.75, + "learning_rate": 1.2483754732872776e-07, + "log_odds": 5.2103071212768555, + "log_odds_ratio": -0.06479105353355408, + "loss": 0.2693, + "rejected_geometric_mean": -5.915156841278076, + "step": 7263 + }, + { + "chosen_geometric_mean": -1.376391887664795, + "epoch": 1.8, + "grad_norm": 9.4375, + "learning_rate": 1.2453395302236327e-07, + "log_odds": 7.803066253662109, + "log_odds_ratio": -0.03509388118982315, + "loss": 0.3368, + "rejected_geometric_mean": -8.834234237670898, + "step": 7264 + }, + { + "chosen_geometric_mean": -1.1049631834030151, + "epoch": 1.8, + "grad_norm": 28.25, + "learning_rate": 1.2423071889367117e-07, + "log_odds": 3.9869370460510254, + "log_odds_ratio": -0.3277240991592407, + "loss": 0.2881, + "rejected_geometric_mean": -4.789999008178711, + "step": 7265 + }, + { + "chosen_geometric_mean": -0.900641918182373, + "epoch": 1.8, + "grad_norm": 2.328125, + "learning_rate": 1.2392784498863053e-07, + "log_odds": 6.784268379211426, + "log_odds_ratio": -0.2407299280166626, + "loss": 0.2558, + "rejected_geometric_mean": -7.30556583404541, + "step": 7266 + }, + { + "chosen_geometric_mean": -1.109915852546692, + "epoch": 1.8, + "grad_norm": 3.796875, + "learning_rate": 1.2362533135316296e-07, + "log_odds": 5.93065071105957, + "log_odds_ratio": -0.22122196853160858, + "loss": 0.2975, + "rejected_geometric_mean": -6.741994857788086, + "step": 7267 + }, + { + "chosen_geometric_mean": -0.9117895364761353, + "epoch": 1.8, + "grad_norm": 2.8125, + "learning_rate": 1.2332317803313813e-07, + "log_odds": 9.567754745483398, + "log_odds_ratio": -0.1396467089653015, + "loss": 0.2776, + "rejected_geometric_mean": -10.023250579833984, + "step": 7268 + }, + { + "chosen_geometric_mean": -0.9108085036277771, + "epoch": 1.8, + "grad_norm": 4.53125, + "learning_rate": 1.230213850743686e-07, + "log_odds": 10.592565536499023, + "log_odds_ratio": -0.07796984910964966, + "loss": 0.23, + "rejected_geometric_mean": -11.04504108428955, + "step": 7269 + }, + { + "chosen_geometric_mean": -1.1143560409545898, + "epoch": 1.8, + "grad_norm": 17.125, + "learning_rate": 1.2271995252261415e-07, + "log_odds": 2.9063549041748047, + "log_odds_ratio": -0.49049973487854004, + "loss": 0.3016, + "rejected_geometric_mean": -3.8401577472686768, + "step": 7270 + }, + { + "chosen_geometric_mean": -0.9356908202171326, + "epoch": 1.8, + "grad_norm": 7.875, + "learning_rate": 1.2241888042357875e-07, + "log_odds": 7.281678676605225, + "log_odds_ratio": -0.13216765224933624, + "loss": 0.2661, + "rejected_geometric_mean": -7.769399642944336, + "step": 7271 + }, + { + "chosen_geometric_mean": -0.9976454377174377, + "epoch": 1.8, + "grad_norm": 2.640625, + "learning_rate": 1.2211816882291226e-07, + "log_odds": 1.414086103439331, + "log_odds_ratio": -0.2803226411342621, + "loss": 0.2945, + "rejected_geometric_mean": -2.0636301040649414, + "step": 7272 + }, + { + "chosen_geometric_mean": -0.9369074702262878, + "epoch": 1.8, + "grad_norm": 59.25, + "learning_rate": 1.218178177662102e-07, + "log_odds": 8.488462448120117, + "log_odds_ratio": -0.015747427940368652, + "loss": 0.2978, + "rejected_geometric_mean": -8.93422794342041, + "step": 7273 + }, + { + "chosen_geometric_mean": -1.0261818170547485, + "epoch": 1.8, + "grad_norm": 2.515625, + "learning_rate": 1.2151782729901224e-07, + "log_odds": 7.631470680236816, + "log_odds_ratio": -0.13151071965694427, + "loss": 0.2823, + "rejected_geometric_mean": -8.286331176757812, + "step": 7274 + }, + { + "chosen_geometric_mean": -1.052459478378296, + "epoch": 1.8, + "grad_norm": 2.625, + "learning_rate": 1.21218197466805e-07, + "log_odds": 8.115684509277344, + "log_odds_ratio": -0.13971790671348572, + "loss": 0.2664, + "rejected_geometric_mean": -8.778932571411133, + "step": 7275 + }, + { + "chosen_geometric_mean": -0.9801517724990845, + "epoch": 1.8, + "grad_norm": 15.0, + "learning_rate": 1.2091892831501918e-07, + "log_odds": 8.22630500793457, + "log_odds_ratio": -0.15528297424316406, + "loss": 0.2477, + "rejected_geometric_mean": -8.846092224121094, + "step": 7276 + }, + { + "chosen_geometric_mean": -0.955934464931488, + "epoch": 1.8, + "grad_norm": 33.25, + "learning_rate": 1.206200198890306e-07, + "log_odds": 10.35396957397461, + "log_odds_ratio": -0.06556646525859833, + "loss": 0.2974, + "rejected_geometric_mean": -10.82854175567627, + "step": 7277 + }, + { + "chosen_geometric_mean": -0.9463297128677368, + "epoch": 1.8, + "grad_norm": 23.0, + "learning_rate": 1.203214722341614e-07, + "log_odds": 8.962207794189453, + "log_odds_ratio": -0.10075901448726654, + "loss": 0.2561, + "rejected_geometric_mean": -9.467116355895996, + "step": 7278 + }, + { + "chosen_geometric_mean": -0.9457559585571289, + "epoch": 1.8, + "grad_norm": 3.15625, + "learning_rate": 1.20023285395679e-07, + "log_odds": 8.972672462463379, + "log_odds_ratio": -0.23921170830726624, + "loss": 0.2546, + "rejected_geometric_mean": -9.55942440032959, + "step": 7279 + }, + { + "chosen_geometric_mean": -0.9003749489784241, + "epoch": 1.8, + "grad_norm": 28.625, + "learning_rate": 1.197254594187952e-07, + "log_odds": 13.348562240600586, + "log_odds_ratio": -0.00026776149752549827, + "loss": 0.2734, + "rejected_geometric_mean": -13.716022491455078, + "step": 7280 + }, + { + "chosen_geometric_mean": -1.1246143579483032, + "epoch": 1.8, + "grad_norm": 2.828125, + "learning_rate": 1.194279943486676e-07, + "log_odds": 10.49700927734375, + "log_odds_ratio": -0.0009433605009689927, + "loss": 0.2459, + "rejected_geometric_mean": -11.225370407104492, + "step": 7281 + }, + { + "chosen_geometric_mean": -0.9970477819442749, + "epoch": 1.8, + "grad_norm": 17.375, + "learning_rate": 1.1913089023039948e-07, + "log_odds": 4.793610095977783, + "log_odds_ratio": -0.10544182360172272, + "loss": 0.287, + "rejected_geometric_mean": -5.349493980407715, + "step": 7282 + }, + { + "chosen_geometric_mean": -0.9125207662582397, + "epoch": 1.8, + "grad_norm": 1.765625, + "learning_rate": 1.1883414710903762e-07, + "log_odds": 9.80523681640625, + "log_odds_ratio": -0.005423488095402718, + "loss": 0.2344, + "rejected_geometric_mean": -10.175419807434082, + "step": 7283 + }, + { + "chosen_geometric_mean": -0.9538439512252808, + "epoch": 1.8, + "grad_norm": 15.25, + "learning_rate": 1.1853776502957714e-07, + "log_odds": 4.675229072570801, + "log_odds_ratio": -0.20980535447597504, + "loss": 0.2841, + "rejected_geometric_mean": -5.245289325714111, + "step": 7284 + }, + { + "chosen_geometric_mean": -0.9235036373138428, + "epoch": 1.8, + "grad_norm": 8.4375, + "learning_rate": 1.1824174403695599e-07, + "log_odds": 9.292485237121582, + "log_odds_ratio": -0.01135309413075447, + "loss": 0.2603, + "rejected_geometric_mean": -9.670971870422363, + "step": 7285 + }, + { + "chosen_geometric_mean": -0.9604121446609497, + "epoch": 1.8, + "grad_norm": 5.21875, + "learning_rate": 1.1794608417605768e-07, + "log_odds": 2.926831007003784, + "log_odds_ratio": -0.3711593449115753, + "loss": 0.2668, + "rejected_geometric_mean": -3.638188600540161, + "step": 7286 + }, + { + "chosen_geometric_mean": -1.0725570917129517, + "epoch": 1.8, + "grad_norm": 16.25, + "learning_rate": 1.1765078549171194e-07, + "log_odds": 5.985025882720947, + "log_odds_ratio": -0.3090916574001312, + "loss": 0.2639, + "rejected_geometric_mean": -6.769338607788086, + "step": 7287 + }, + { + "chosen_geometric_mean": -1.082235336303711, + "epoch": 1.8, + "grad_norm": 11.25, + "learning_rate": 1.1735584802869238e-07, + "log_odds": 12.18972396850586, + "log_odds_ratio": -0.04671391472220421, + "loss": 0.2774, + "rejected_geometric_mean": -12.831573486328125, + "step": 7288 + }, + { + "chosen_geometric_mean": -0.9754820466041565, + "epoch": 1.8, + "grad_norm": 2.125, + "learning_rate": 1.1706127183171906e-07, + "log_odds": 7.774313926696777, + "log_odds_ratio": -0.2365737110376358, + "loss": 0.2599, + "rejected_geometric_mean": -8.403724670410156, + "step": 7289 + }, + { + "chosen_geometric_mean": -0.8888997435569763, + "epoch": 1.8, + "grad_norm": 33.25, + "learning_rate": 1.167670569454571e-07, + "log_odds": 4.779141902923584, + "log_odds_ratio": -0.06565196067094803, + "loss": 0.2856, + "rejected_geometric_mean": -5.189239501953125, + "step": 7290 + }, + { + "chosen_geometric_mean": -1.0845973491668701, + "epoch": 1.81, + "grad_norm": 23.375, + "learning_rate": 1.1647320341451635e-07, + "log_odds": 11.186731338500977, + "log_odds_ratio": -0.0005312613793648779, + "loss": 0.2422, + "rejected_geometric_mean": -11.82957649230957, + "step": 7291 + }, + { + "chosen_geometric_mean": -1.2678205966949463, + "epoch": 1.81, + "grad_norm": 10.9375, + "learning_rate": 1.1617971128345146e-07, + "log_odds": 6.135129451751709, + "log_odds_ratio": -0.019811907783150673, + "loss": 0.273, + "rejected_geometric_mean": -7.0272216796875, + "step": 7292 + }, + { + "chosen_geometric_mean": -0.8936970233917236, + "epoch": 1.81, + "grad_norm": 3.90625, + "learning_rate": 1.1588658059676405e-07, + "log_odds": 7.16292667388916, + "log_odds_ratio": -0.15080055594444275, + "loss": 0.277, + "rejected_geometric_mean": -7.6479172706604, + "step": 7293 + }, + { + "chosen_geometric_mean": -0.8073567152023315, + "epoch": 1.81, + "grad_norm": 3.15625, + "learning_rate": 1.1559381139889831e-07, + "log_odds": 11.265146255493164, + "log_odds_ratio": -0.044470030814409256, + "loss": 0.2425, + "rejected_geometric_mean": -11.50113582611084, + "step": 7294 + }, + { + "chosen_geometric_mean": -0.9362443685531616, + "epoch": 1.81, + "grad_norm": 2.703125, + "learning_rate": 1.153014037342462e-07, + "log_odds": 5.414396286010742, + "log_odds_ratio": -0.1405169665813446, + "loss": 0.2505, + "rejected_geometric_mean": -5.943691253662109, + "step": 7295 + }, + { + "chosen_geometric_mean": -0.88965904712677, + "epoch": 1.81, + "grad_norm": 3.71875, + "learning_rate": 1.1500935764714338e-07, + "log_odds": 14.48520278930664, + "log_odds_ratio": -0.00012192022404633462, + "loss": 0.2283, + "rejected_geometric_mean": -14.832613945007324, + "step": 7296 + }, + { + "chosen_geometric_mean": -1.0585724115371704, + "epoch": 1.81, + "grad_norm": 3.484375, + "learning_rate": 1.147176731818711e-07, + "log_odds": 7.764387130737305, + "log_odds_ratio": -0.25834402441978455, + "loss": 0.297, + "rejected_geometric_mean": -8.475210189819336, + "step": 7297 + }, + { + "chosen_geometric_mean": -0.9097321033477783, + "epoch": 1.81, + "grad_norm": 12.9375, + "learning_rate": 1.1442635038265565e-07, + "log_odds": 5.822387218475342, + "log_odds_ratio": -0.16735278069972992, + "loss": 0.212, + "rejected_geometric_mean": -6.312708854675293, + "step": 7298 + }, + { + "chosen_geometric_mean": -1.0134223699569702, + "epoch": 1.81, + "grad_norm": 5.28125, + "learning_rate": 1.1413538929366891e-07, + "log_odds": 11.90555191040039, + "log_odds_ratio": -0.005379353649914265, + "loss": 0.2503, + "rejected_geometric_mean": -12.451438903808594, + "step": 7299 + }, + { + "chosen_geometric_mean": -0.8328539729118347, + "epoch": 1.81, + "grad_norm": 9.25, + "learning_rate": 1.1384478995902643e-07, + "log_odds": 6.157896518707275, + "log_odds_ratio": -0.241721972823143, + "loss": 0.2261, + "rejected_geometric_mean": -6.58689022064209, + "step": 7300 + }, + { + "chosen_geometric_mean": -1.0055460929870605, + "epoch": 1.81, + "grad_norm": 24.5, + "learning_rate": 1.1355455242279156e-07, + "log_odds": 9.926812171936035, + "log_odds_ratio": -0.008679752238094807, + "loss": 0.2968, + "rejected_geometric_mean": -10.447433471679688, + "step": 7301 + }, + { + "chosen_geometric_mean": -0.9539234042167664, + "epoch": 1.81, + "grad_norm": 3.8125, + "learning_rate": 1.1326467672897051e-07, + "log_odds": 1.4465982913970947, + "log_odds_ratio": -0.2695208191871643, + "loss": 0.2403, + "rejected_geometric_mean": -2.0784308910369873, + "step": 7302 + }, + { + "chosen_geometric_mean": -0.8817657232284546, + "epoch": 1.81, + "grad_norm": 7.03125, + "learning_rate": 1.1297516292151533e-07, + "log_odds": 3.2772622108459473, + "log_odds_ratio": -0.18241947889328003, + "loss": 0.2523, + "rejected_geometric_mean": -3.7735631465911865, + "step": 7303 + }, + { + "chosen_geometric_mean": -1.109208583831787, + "epoch": 1.81, + "grad_norm": 2.34375, + "learning_rate": 1.1268601104432369e-07, + "log_odds": 9.20346450805664, + "log_odds_ratio": -0.14764082431793213, + "loss": 0.2215, + "rejected_geometric_mean": -10.024361610412598, + "step": 7304 + }, + { + "chosen_geometric_mean": -0.7858060598373413, + "epoch": 1.81, + "grad_norm": 11.3125, + "learning_rate": 1.1239722114123719e-07, + "log_odds": 7.3443450927734375, + "log_odds_ratio": -0.21144725382328033, + "loss": 0.2668, + "rejected_geometric_mean": -7.688571453094482, + "step": 7305 + }, + { + "chosen_geometric_mean": -1.0549677610397339, + "epoch": 1.81, + "grad_norm": 2.9375, + "learning_rate": 1.1210879325604385e-07, + "log_odds": 3.3064091205596924, + "log_odds_ratio": -0.16228386759757996, + "loss": 0.2662, + "rejected_geometric_mean": -4.012545585632324, + "step": 7306 + }, + { + "chosen_geometric_mean": -0.8407118320465088, + "epoch": 1.81, + "grad_norm": 15.625, + "learning_rate": 1.1182072743247646e-07, + "log_odds": 15.713861465454102, + "log_odds_ratio": -0.006617861799895763, + "loss": 0.2885, + "rejected_geometric_mean": -15.977319717407227, + "step": 7307 + }, + { + "chosen_geometric_mean": -0.9151878952980042, + "epoch": 1.81, + "grad_norm": 6.5625, + "learning_rate": 1.115330237142126e-07, + "log_odds": 5.139654159545898, + "log_odds_ratio": -0.17229144275188446, + "loss": 0.2775, + "rejected_geometric_mean": -5.699132919311523, + "step": 7308 + }, + { + "chosen_geometric_mean": -0.9415138363838196, + "epoch": 1.81, + "grad_norm": 29.125, + "learning_rate": 1.112456821448743e-07, + "log_odds": 12.448162078857422, + "log_odds_ratio": -0.0009824661538004875, + "loss": 0.2954, + "rejected_geometric_mean": -12.888750076293945, + "step": 7309 + }, + { + "chosen_geometric_mean": -0.9075007438659668, + "epoch": 1.81, + "grad_norm": 2.90625, + "learning_rate": 1.109587027680306e-07, + "log_odds": 14.541007995605469, + "log_odds_ratio": -0.0024276738986372948, + "loss": 0.2334, + "rejected_geometric_mean": -14.91873550415039, + "step": 7310 + }, + { + "chosen_geometric_mean": -1.0157772302627563, + "epoch": 1.81, + "grad_norm": 3.28125, + "learning_rate": 1.1067208562719368e-07, + "log_odds": 10.767980575561523, + "log_odds_ratio": -0.09928061068058014, + "loss": 0.2791, + "rejected_geometric_mean": -11.402358055114746, + "step": 7311 + }, + { + "chosen_geometric_mean": -0.9764159917831421, + "epoch": 1.81, + "grad_norm": 8.3125, + "learning_rate": 1.1038583076582177e-07, + "log_odds": 3.9349985122680664, + "log_odds_ratio": -0.24071873724460602, + "loss": 0.2872, + "rejected_geometric_mean": -4.527690887451172, + "step": 7312 + }, + { + "chosen_geometric_mean": -1.0215193033218384, + "epoch": 1.81, + "grad_norm": 2.578125, + "learning_rate": 1.1009993822731823e-07, + "log_odds": 3.277125835418701, + "log_odds_ratio": -0.27012899518013, + "loss": 0.3096, + "rejected_geometric_mean": -4.0059638023376465, + "step": 7313 + }, + { + "chosen_geometric_mean": -1.0310815572738647, + "epoch": 1.81, + "grad_norm": 1.8984375, + "learning_rate": 1.0981440805503064e-07, + "log_odds": 13.572303771972656, + "log_odds_ratio": -0.11576603353023529, + "loss": 0.2524, + "rejected_geometric_mean": -14.204413414001465, + "step": 7314 + }, + { + "chosen_geometric_mean": -1.0244747400283813, + "epoch": 1.81, + "grad_norm": 9.0, + "learning_rate": 1.0952924029225293e-07, + "log_odds": 9.206808090209961, + "log_odds_ratio": -0.015549704432487488, + "loss": 0.2801, + "rejected_geometric_mean": -9.783536911010742, + "step": 7315 + }, + { + "chosen_geometric_mean": -0.8402954339981079, + "epoch": 1.81, + "grad_norm": 7.0625, + "learning_rate": 1.0924443498222276e-07, + "log_odds": 7.841105937957764, + "log_odds_ratio": -0.1658574342727661, + "loss": 0.2607, + "rejected_geometric_mean": -8.26740550994873, + "step": 7316 + }, + { + "chosen_geometric_mean": -0.9673877954483032, + "epoch": 1.81, + "grad_norm": 14.625, + "learning_rate": 1.089599921681242e-07, + "log_odds": 9.102675437927246, + "log_odds_ratio": -0.18502548336982727, + "loss": 0.2718, + "rejected_geometric_mean": -9.70970344543457, + "step": 7317 + }, + { + "chosen_geometric_mean": -1.0595355033874512, + "epoch": 1.81, + "grad_norm": 5.09375, + "learning_rate": 1.0867591189308468e-07, + "log_odds": 13.946722030639648, + "log_odds_ratio": -0.004673467483371496, + "loss": 0.2852, + "rejected_geometric_mean": -14.535589218139648, + "step": 7318 + }, + { + "chosen_geometric_mean": -0.8151413798332214, + "epoch": 1.81, + "grad_norm": 3.65625, + "learning_rate": 1.0839219420017866e-07, + "log_odds": 9.678937911987305, + "log_odds_ratio": -0.116724394261837, + "loss": 0.2613, + "rejected_geometric_mean": -9.939093589782715, + "step": 7319 + }, + { + "chosen_geometric_mean": -0.9953992962837219, + "epoch": 1.81, + "grad_norm": 5.03125, + "learning_rate": 1.0810883913242337e-07, + "log_odds": 9.750951766967773, + "log_odds_ratio": -0.16065791249275208, + "loss": 0.2481, + "rejected_geometric_mean": -10.41335391998291, + "step": 7320 + }, + { + "chosen_geometric_mean": -1.3098344802856445, + "epoch": 1.81, + "grad_norm": 6.40625, + "learning_rate": 1.0782584673278279e-07, + "log_odds": 3.252420663833618, + "log_odds_ratio": -0.16337332129478455, + "loss": 0.2578, + "rejected_geometric_mean": -4.305151462554932, + "step": 7321 + }, + { + "chosen_geometric_mean": -0.991940975189209, + "epoch": 1.81, + "grad_norm": 2.1875, + "learning_rate": 1.0754321704416593e-07, + "log_odds": 6.494959831237793, + "log_odds_ratio": -0.2827617824077606, + "loss": 0.2749, + "rejected_geometric_mean": -7.1411237716674805, + "step": 7322 + }, + { + "chosen_geometric_mean": -0.8024918437004089, + "epoch": 1.81, + "grad_norm": 2.796875, + "learning_rate": 1.0726095010942517e-07, + "log_odds": 0.4627458453178406, + "log_odds_ratio": -0.49525541067123413, + "loss": 0.2431, + "rejected_geometric_mean": -1.1039354801177979, + "step": 7323 + }, + { + "chosen_geometric_mean": -1.3842663764953613, + "epoch": 1.81, + "grad_norm": 19.375, + "learning_rate": 1.0697904597135989e-07, + "log_odds": 11.635010719299316, + "log_odds_ratio": -0.0016761955339461565, + "loss": 0.2625, + "rejected_geometric_mean": -12.677084922790527, + "step": 7324 + }, + { + "chosen_geometric_mean": -0.9908422231674194, + "epoch": 1.81, + "grad_norm": 59.5, + "learning_rate": 1.0669750467271311e-07, + "log_odds": 5.4258270263671875, + "log_odds_ratio": -0.29078564047813416, + "loss": 0.3486, + "rejected_geometric_mean": -6.132107257843018, + "step": 7325 + }, + { + "chosen_geometric_mean": -0.9286215901374817, + "epoch": 1.81, + "grad_norm": 5.75, + "learning_rate": 1.0641632625617293e-07, + "log_odds": 13.135648727416992, + "log_odds_ratio": -0.04364864155650139, + "loss": 0.2059, + "rejected_geometric_mean": -13.582440376281738, + "step": 7326 + }, + { + "chosen_geometric_mean": -1.0527713298797607, + "epoch": 1.81, + "grad_norm": 14.625, + "learning_rate": 1.0613551076437328e-07, + "log_odds": 3.9412178993225098, + "log_odds_ratio": -0.3048657774925232, + "loss": 0.3525, + "rejected_geometric_mean": -4.733529090881348, + "step": 7327 + }, + { + "chosen_geometric_mean": -0.902999758720398, + "epoch": 1.81, + "grad_norm": 11.1875, + "learning_rate": 1.0585505823989233e-07, + "log_odds": 10.224749565124512, + "log_odds_ratio": -0.007779110688716173, + "loss": 0.2741, + "rejected_geometric_mean": -10.603004455566406, + "step": 7328 + }, + { + "chosen_geometric_mean": -1.052897572517395, + "epoch": 1.81, + "grad_norm": 2.109375, + "learning_rate": 1.0557496872525331e-07, + "log_odds": 13.164627075195312, + "log_odds_ratio": -0.0322754792869091, + "loss": 0.2512, + "rejected_geometric_mean": -13.80323600769043, + "step": 7329 + }, + { + "chosen_geometric_mean": -0.8142076730728149, + "epoch": 1.81, + "grad_norm": 11.4375, + "learning_rate": 1.0529524226292498e-07, + "log_odds": 11.844212532043457, + "log_odds_ratio": -6.224859680514783e-05, + "loss": 0.2231, + "rejected_geometric_mean": -12.068352699279785, + "step": 7330 + }, + { + "chosen_geometric_mean": -1.0010144710540771, + "epoch": 1.82, + "grad_norm": 3.28125, + "learning_rate": 1.0501587889531983e-07, + "log_odds": 11.859563827514648, + "log_odds_ratio": -0.11396843194961548, + "loss": 0.2364, + "rejected_geometric_mean": -12.44139575958252, + "step": 7331 + }, + { + "chosen_geometric_mean": -1.1096205711364746, + "epoch": 1.82, + "grad_norm": 4.21875, + "learning_rate": 1.047368786647962e-07, + "log_odds": 4.061995506286621, + "log_odds_ratio": -0.1540863960981369, + "loss": 0.2546, + "rejected_geometric_mean": -4.842801094055176, + "step": 7332 + }, + { + "chosen_geometric_mean": -1.0824916362762451, + "epoch": 1.82, + "grad_norm": 1.9140625, + "learning_rate": 1.0445824161365803e-07, + "log_odds": 11.545716285705566, + "log_odds_ratio": -0.0022940635681152344, + "loss": 0.2461, + "rejected_geometric_mean": -12.207649230957031, + "step": 7333 + }, + { + "chosen_geometric_mean": -1.101048231124878, + "epoch": 1.82, + "grad_norm": 3.34375, + "learning_rate": 1.0417996778415262e-07, + "log_odds": 2.3630025386810303, + "log_odds_ratio": -0.18978512287139893, + "loss": 0.2491, + "rejected_geometric_mean": -3.1425676345825195, + "step": 7334 + }, + { + "chosen_geometric_mean": -1.100273847579956, + "epoch": 1.82, + "grad_norm": 4.21875, + "learning_rate": 1.0390205721847291e-07, + "log_odds": 5.292459011077881, + "log_odds_ratio": -0.07641015946865082, + "loss": 0.2516, + "rejected_geometric_mean": -6.009579658508301, + "step": 7335 + }, + { + "chosen_geometric_mean": -1.2566521167755127, + "epoch": 1.82, + "grad_norm": 7.28125, + "learning_rate": 1.0362450995875767e-07, + "log_odds": 3.9173479080200195, + "log_odds_ratio": -0.03230292722582817, + "loss": 0.2445, + "rejected_geometric_mean": -4.829434394836426, + "step": 7336 + }, + { + "chosen_geometric_mean": -0.9995535612106323, + "epoch": 1.82, + "grad_norm": 3.875, + "learning_rate": 1.0334732604708853e-07, + "log_odds": 5.805857181549072, + "log_odds_ratio": -0.06104734539985657, + "loss": 0.2802, + "rejected_geometric_mean": -6.356969833374023, + "step": 7337 + }, + { + "chosen_geometric_mean": -1.3193025588989258, + "epoch": 1.82, + "grad_norm": 55.0, + "learning_rate": 1.030705055254938e-07, + "log_odds": 9.380620956420898, + "log_odds_ratio": -0.01570439152419567, + "loss": 0.336, + "rejected_geometric_mean": -10.313392639160156, + "step": 7338 + }, + { + "chosen_geometric_mean": -1.0750218629837036, + "epoch": 1.82, + "grad_norm": 9.625, + "learning_rate": 1.0279404843594659e-07, + "log_odds": 4.419544696807861, + "log_odds_ratio": -0.12532958388328552, + "loss": 0.2331, + "rejected_geometric_mean": -5.1057329177856445, + "step": 7339 + }, + { + "chosen_geometric_mean": -0.9341118335723877, + "epoch": 1.82, + "grad_norm": 28.25, + "learning_rate": 1.0251795482036336e-07, + "log_odds": 5.396562576293945, + "log_odds_ratio": -0.17815154790878296, + "loss": 0.2505, + "rejected_geometric_mean": -5.953456878662109, + "step": 7340 + }, + { + "chosen_geometric_mean": -1.1325201988220215, + "epoch": 1.82, + "grad_norm": 3.71875, + "learning_rate": 1.0224222472060786e-07, + "log_odds": 9.063078880310059, + "log_odds_ratio": -0.12585483491420746, + "loss": 0.2802, + "rejected_geometric_mean": -9.807100296020508, + "step": 7341 + }, + { + "chosen_geometric_mean": -0.9408215284347534, + "epoch": 1.82, + "grad_norm": 2.671875, + "learning_rate": 1.0196685817848634e-07, + "log_odds": 7.24533748626709, + "log_odds_ratio": -0.12936648726463318, + "loss": 0.2815, + "rejected_geometric_mean": -7.796125888824463, + "step": 7342 + }, + { + "chosen_geometric_mean": -0.7732672691345215, + "epoch": 1.82, + "grad_norm": 5.15625, + "learning_rate": 1.0169185523575126e-07, + "log_odds": 11.44853401184082, + "log_odds_ratio": -0.009690014645457268, + "loss": 0.2533, + "rejected_geometric_mean": -11.607114791870117, + "step": 7343 + }, + { + "chosen_geometric_mean": -0.816426694393158, + "epoch": 1.82, + "grad_norm": 14.6875, + "learning_rate": 1.0141721593409953e-07, + "log_odds": 2.6406328678131104, + "log_odds_ratio": -0.26581522822380066, + "loss": 0.3034, + "rejected_geometric_mean": -3.1244075298309326, + "step": 7344 + }, + { + "chosen_geometric_mean": -1.1576963663101196, + "epoch": 1.82, + "grad_norm": 31.0, + "learning_rate": 1.0114294031517368e-07, + "log_odds": 4.771842956542969, + "log_odds_ratio": -0.315986692905426, + "loss": 0.3545, + "rejected_geometric_mean": -5.6771955490112305, + "step": 7345 + }, + { + "chosen_geometric_mean": -0.8916181921958923, + "epoch": 1.82, + "grad_norm": 9.8125, + "learning_rate": 1.0086902842055962e-07, + "log_odds": 11.381054878234863, + "log_odds_ratio": -0.02158322185277939, + "loss": 0.2233, + "rejected_geometric_mean": -11.744861602783203, + "step": 7346 + }, + { + "chosen_geometric_mean": -0.8803703188896179, + "epoch": 1.82, + "grad_norm": 16.25, + "learning_rate": 1.0059548029178967e-07, + "log_odds": 5.169665813446045, + "log_odds_ratio": -0.15284857153892517, + "loss": 0.2479, + "rejected_geometric_mean": -5.545799255371094, + "step": 7347 + }, + { + "chosen_geometric_mean": -1.0041576623916626, + "epoch": 1.82, + "grad_norm": 5.09375, + "learning_rate": 1.003222959703401e-07, + "log_odds": 9.096491813659668, + "log_odds_ratio": -0.04382292181253433, + "loss": 0.2359, + "rejected_geometric_mean": -9.664253234863281, + "step": 7348 + }, + { + "chosen_geometric_mean": -1.1295288801193237, + "epoch": 1.82, + "grad_norm": 1.859375, + "learning_rate": 1.0004947549763166e-07, + "log_odds": 8.201562881469727, + "log_odds_ratio": -0.006774626672267914, + "loss": 0.2298, + "rejected_geometric_mean": -8.920347213745117, + "step": 7349 + }, + { + "chosen_geometric_mean": -1.4011269807815552, + "epoch": 1.82, + "grad_norm": 8.875, + "learning_rate": 9.977701891503128e-08, + "log_odds": 6.363974571228027, + "log_odds_ratio": -0.30671364068984985, + "loss": 0.2594, + "rejected_geometric_mean": -7.602209091186523, + "step": 7350 + }, + { + "chosen_geometric_mean": -0.9782318472862244, + "epoch": 1.82, + "grad_norm": 23.0, + "learning_rate": 9.950492626384978e-08, + "log_odds": 3.471666097640991, + "log_odds_ratio": -0.169115349650383, + "loss": 0.3358, + "rejected_geometric_mean": -4.07302713394165, + "step": 7351 + }, + { + "chosen_geometric_mean": -1.025950312614441, + "epoch": 1.82, + "grad_norm": 10.0625, + "learning_rate": 9.92331975853425e-08, + "log_odds": 8.99456787109375, + "log_odds_ratio": -0.17478257417678833, + "loss": 0.2592, + "rejected_geometric_mean": -9.667547225952148, + "step": 7352 + }, + { + "chosen_geometric_mean": -0.9999397993087769, + "epoch": 1.82, + "grad_norm": 7.09375, + "learning_rate": 9.896183292071037e-08, + "log_odds": 10.930146217346191, + "log_odds_ratio": -0.015325067564845085, + "loss": 0.2357, + "rejected_geometric_mean": -11.45827865600586, + "step": 7353 + }, + { + "chosen_geometric_mean": -1.2510323524475098, + "epoch": 1.82, + "grad_norm": 12.125, + "learning_rate": 9.869083231109854e-08, + "log_odds": 4.328917503356934, + "log_odds_ratio": -0.17356160283088684, + "loss": 0.2986, + "rejected_geometric_mean": -5.283573150634766, + "step": 7354 + }, + { + "chosen_geometric_mean": -0.9707023501396179, + "epoch": 1.82, + "grad_norm": 2.484375, + "learning_rate": 9.84201957975972e-08, + "log_odds": 6.95314884185791, + "log_odds_ratio": -0.05730205029249191, + "loss": 0.2281, + "rejected_geometric_mean": -7.471268653869629, + "step": 7355 + }, + { + "chosen_geometric_mean": -1.1879472732543945, + "epoch": 1.82, + "grad_norm": 3.3125, + "learning_rate": 9.814992342124214e-08, + "log_odds": 14.43830680847168, + "log_odds_ratio": -0.0004502068622969091, + "loss": 0.2752, + "rejected_geometric_mean": -15.25739860534668, + "step": 7356 + }, + { + "chosen_geometric_mean": -1.0051275491714478, + "epoch": 1.82, + "grad_norm": 2.46875, + "learning_rate": 9.788001522301221e-08, + "log_odds": 7.90826416015625, + "log_odds_ratio": -0.13572335243225098, + "loss": 0.2509, + "rejected_geometric_mean": -8.53258228302002, + "step": 7357 + }, + { + "chosen_geometric_mean": -1.2233901023864746, + "epoch": 1.82, + "grad_norm": 4.53125, + "learning_rate": 9.761047124383193e-08, + "log_odds": 3.585482120513916, + "log_odds_ratio": -0.16046491265296936, + "loss": 0.2924, + "rejected_geometric_mean": -4.522059917449951, + "step": 7358 + }, + { + "chosen_geometric_mean": -0.9407493472099304, + "epoch": 1.82, + "grad_norm": 41.5, + "learning_rate": 9.734129152457139e-08, + "log_odds": 6.417130470275879, + "log_odds_ratio": -0.06995832920074463, + "loss": 0.2423, + "rejected_geometric_mean": -6.839208602905273, + "step": 7359 + }, + { + "chosen_geometric_mean": -0.9575390815734863, + "epoch": 1.82, + "grad_norm": 6.28125, + "learning_rate": 9.707247610604403e-08, + "log_odds": 8.29413890838623, + "log_odds_ratio": -0.12370810657739639, + "loss": 0.24, + "rejected_geometric_mean": -8.80879020690918, + "step": 7360 + }, + { + "chosen_geometric_mean": -0.9090902209281921, + "epoch": 1.82, + "grad_norm": 41.25, + "learning_rate": 9.680402502900893e-08, + "log_odds": 13.834734916687012, + "log_odds_ratio": -0.19704465568065643, + "loss": 0.3179, + "rejected_geometric_mean": -14.339755058288574, + "step": 7361 + }, + { + "chosen_geometric_mean": -0.763363242149353, + "epoch": 1.82, + "grad_norm": 23.375, + "learning_rate": 9.653593833416991e-08, + "log_odds": 9.22597885131836, + "log_odds_ratio": -0.08690404146909714, + "loss": 0.2085, + "rejected_geometric_mean": -9.433483123779297, + "step": 7362 + }, + { + "chosen_geometric_mean": -0.745952844619751, + "epoch": 1.82, + "grad_norm": 3.65625, + "learning_rate": 9.626821606217501e-08, + "log_odds": 8.37809944152832, + "log_odds_ratio": -0.17922496795654297, + "loss": 0.2458, + "rejected_geometric_mean": -8.580395698547363, + "step": 7363 + }, + { + "chosen_geometric_mean": -0.7458902597427368, + "epoch": 1.82, + "grad_norm": 8.375, + "learning_rate": 9.600085825361788e-08, + "log_odds": 8.207833290100098, + "log_odds_ratio": -0.05427606776356697, + "loss": 0.2423, + "rejected_geometric_mean": -8.3204984664917, + "step": 7364 + }, + { + "chosen_geometric_mean": -1.0349674224853516, + "epoch": 1.82, + "grad_norm": 24.125, + "learning_rate": 9.573386494903553e-08, + "log_odds": 9.051908493041992, + "log_odds_ratio": -0.002256787847727537, + "loss": 0.2514, + "rejected_geometric_mean": -9.62304973602295, + "step": 7365 + }, + { + "chosen_geometric_mean": -0.852218747138977, + "epoch": 1.82, + "grad_norm": 1.7109375, + "learning_rate": 9.54672361889114e-08, + "log_odds": 12.948627471923828, + "log_odds_ratio": -0.10547799617052078, + "loss": 0.2003, + "rejected_geometric_mean": -13.322209358215332, + "step": 7366 + }, + { + "chosen_geometric_mean": -0.9669423699378967, + "epoch": 1.82, + "grad_norm": 1.984375, + "learning_rate": 9.520097201367234e-08, + "log_odds": 5.855334281921387, + "log_odds_ratio": -0.07517364621162415, + "loss": 0.2622, + "rejected_geometric_mean": -6.378047943115234, + "step": 7367 + }, + { + "chosen_geometric_mean": -1.226254940032959, + "epoch": 1.82, + "grad_norm": 14.8125, + "learning_rate": 9.493507246369078e-08, + "log_odds": 4.660521507263184, + "log_odds_ratio": -0.22822561860084534, + "loss": 0.2689, + "rejected_geometric_mean": -5.556877136230469, + "step": 7368 + }, + { + "chosen_geometric_mean": -1.155285120010376, + "epoch": 1.82, + "grad_norm": 2.1875, + "learning_rate": 9.466953757928276e-08, + "log_odds": 9.530580520629883, + "log_odds_ratio": -0.22581887245178223, + "loss": 0.2658, + "rejected_geometric_mean": -10.426417350769043, + "step": 7369 + }, + { + "chosen_geometric_mean": -0.8628514409065247, + "epoch": 1.82, + "grad_norm": 3.40625, + "learning_rate": 9.440436740071085e-08, + "log_odds": 7.11391544342041, + "log_odds_ratio": -0.16943681240081787, + "loss": 0.2609, + "rejected_geometric_mean": -7.572495937347412, + "step": 7370 + }, + { + "chosen_geometric_mean": -0.8574990630149841, + "epoch": 1.82, + "grad_norm": 13.4375, + "learning_rate": 9.413956196818008e-08, + "log_odds": 3.697108030319214, + "log_odds_ratio": -0.18794019520282745, + "loss": 0.2792, + "rejected_geometric_mean": -4.088531017303467, + "step": 7371 + }, + { + "chosen_geometric_mean": -0.7946792244911194, + "epoch": 1.83, + "grad_norm": 18.125, + "learning_rate": 9.387512132184168e-08, + "log_odds": 11.006370544433594, + "log_odds_ratio": -0.006388213485479355, + "loss": 0.2643, + "rejected_geometric_mean": -11.192352294921875, + "step": 7372 + }, + { + "chosen_geometric_mean": -0.8099485635757446, + "epoch": 1.83, + "grad_norm": 8.5625, + "learning_rate": 9.361104550179162e-08, + "log_odds": 12.494900703430176, + "log_odds_ratio": -0.08531004190444946, + "loss": 0.257, + "rejected_geometric_mean": -12.729816436767578, + "step": 7373 + }, + { + "chosen_geometric_mean": -1.0220177173614502, + "epoch": 1.83, + "grad_norm": 3.921875, + "learning_rate": 9.334733454806983e-08, + "log_odds": 7.617718696594238, + "log_odds_ratio": -0.015073777176439762, + "loss": 0.2616, + "rejected_geometric_mean": -8.188908576965332, + "step": 7374 + }, + { + "chosen_geometric_mean": -1.0078785419464111, + "epoch": 1.83, + "grad_norm": 38.5, + "learning_rate": 9.308398850066098e-08, + "log_odds": 11.118717193603516, + "log_odds_ratio": -0.0014918146189302206, + "loss": 0.2791, + "rejected_geometric_mean": -11.644481658935547, + "step": 7375 + }, + { + "chosen_geometric_mean": -1.0364794731140137, + "epoch": 1.83, + "grad_norm": 2.4375, + "learning_rate": 9.282100739949478e-08, + "log_odds": 3.07226300239563, + "log_odds_ratio": -0.20522144436836243, + "loss": 0.2285, + "rejected_geometric_mean": -3.776451826095581, + "step": 7376 + }, + { + "chosen_geometric_mean": -1.0132458209991455, + "epoch": 1.83, + "grad_norm": 2.671875, + "learning_rate": 9.255839128444604e-08, + "log_odds": 5.609463214874268, + "log_odds_ratio": -0.017438219860196114, + "loss": 0.2338, + "rejected_geometric_mean": -6.177160739898682, + "step": 7377 + }, + { + "chosen_geometric_mean": -1.124274492263794, + "epoch": 1.83, + "grad_norm": 28.125, + "learning_rate": 9.229614019533289e-08, + "log_odds": 5.994840145111084, + "log_odds_ratio": -0.04409258812665939, + "loss": 0.2966, + "rejected_geometric_mean": -6.719171047210693, + "step": 7378 + }, + { + "chosen_geometric_mean": -1.0728342533111572, + "epoch": 1.83, + "grad_norm": 22.0, + "learning_rate": 9.203425417191936e-08, + "log_odds": 3.4977028369903564, + "log_odds_ratio": -0.11600492894649506, + "loss": 0.2641, + "rejected_geometric_mean": -4.212928771972656, + "step": 7379 + }, + { + "chosen_geometric_mean": -0.9600151777267456, + "epoch": 1.83, + "grad_norm": 11.5625, + "learning_rate": 9.177273325391367e-08, + "log_odds": 9.995705604553223, + "log_odds_ratio": -0.0019508072873577476, + "loss": 0.2701, + "rejected_geometric_mean": -10.431171417236328, + "step": 7380 + }, + { + "chosen_geometric_mean": -0.8673790693283081, + "epoch": 1.83, + "grad_norm": 2.3125, + "learning_rate": 9.151157748096829e-08, + "log_odds": 11.817002296447754, + "log_odds_ratio": -0.006956508848816156, + "loss": 0.2451, + "rejected_geometric_mean": -12.105390548706055, + "step": 7381 + }, + { + "chosen_geometric_mean": -0.9977666139602661, + "epoch": 1.83, + "grad_norm": 10.0, + "learning_rate": 9.125078689268068e-08, + "log_odds": 6.581740379333496, + "log_odds_ratio": -0.13976702094078064, + "loss": 0.2727, + "rejected_geometric_mean": -7.214475154876709, + "step": 7382 + }, + { + "chosen_geometric_mean": -0.9991456270217896, + "epoch": 1.83, + "grad_norm": 8.375, + "learning_rate": 9.09903615285937e-08, + "log_odds": 3.938119411468506, + "log_odds_ratio": -0.10480228811502457, + "loss": 0.2491, + "rejected_geometric_mean": -4.513044834136963, + "step": 7383 + }, + { + "chosen_geometric_mean": -1.0391626358032227, + "epoch": 1.83, + "grad_norm": 7.0625, + "learning_rate": 9.073030142819322e-08, + "log_odds": 4.914555549621582, + "log_odds_ratio": -0.16443438827991486, + "loss": 0.2429, + "rejected_geometric_mean": -5.617038249969482, + "step": 7384 + }, + { + "chosen_geometric_mean": -1.117605209350586, + "epoch": 1.83, + "grad_norm": 2.265625, + "learning_rate": 9.047060663091106e-08, + "log_odds": 1.8772146701812744, + "log_odds_ratio": -0.36396265029907227, + "loss": 0.2838, + "rejected_geometric_mean": -2.714538335800171, + "step": 7385 + }, + { + "chosen_geometric_mean": -1.112860083580017, + "epoch": 1.83, + "grad_norm": 26.125, + "learning_rate": 9.021127717612294e-08, + "log_odds": 6.985664367675781, + "log_odds_ratio": -0.050190944224596024, + "loss": 0.2426, + "rejected_geometric_mean": -7.7157816886901855, + "step": 7386 + }, + { + "chosen_geometric_mean": -0.8769963383674622, + "epoch": 1.83, + "grad_norm": 5.8125, + "learning_rate": 8.995231310314962e-08, + "log_odds": 3.111328363418579, + "log_odds_ratio": -0.11366600543260574, + "loss": 0.2341, + "rejected_geometric_mean": -3.4976999759674072, + "step": 7387 + }, + { + "chosen_geometric_mean": -0.9680052399635315, + "epoch": 1.83, + "grad_norm": 14.9375, + "learning_rate": 8.969371445125636e-08, + "log_odds": 7.312770843505859, + "log_odds_ratio": -0.044245678931474686, + "loss": 0.2953, + "rejected_geometric_mean": -7.751620292663574, + "step": 7388 + }, + { + "chosen_geometric_mean": -1.8964992761611938, + "epoch": 1.83, + "grad_norm": 45.25, + "learning_rate": 8.943548125965262e-08, + "log_odds": 3.4359848499298096, + "log_odds_ratio": -0.21757884323596954, + "loss": 0.3842, + "rejected_geometric_mean": -5.131199836730957, + "step": 7389 + }, + { + "chosen_geometric_mean": -0.8731873035430908, + "epoch": 1.83, + "grad_norm": 7.59375, + "learning_rate": 8.917761356749322e-08, + "log_odds": 8.136662483215332, + "log_odds_ratio": -0.15103435516357422, + "loss": 0.285, + "rejected_geometric_mean": -8.556401252746582, + "step": 7390 + }, + { + "chosen_geometric_mean": -1.344868779182434, + "epoch": 1.83, + "grad_norm": 18.625, + "learning_rate": 8.892011141387685e-08, + "log_odds": 6.906096458435059, + "log_odds_ratio": -0.0071897427551448345, + "loss": 0.3279, + "rejected_geometric_mean": -7.914419174194336, + "step": 7391 + }, + { + "chosen_geometric_mean": -1.0345726013183594, + "epoch": 1.83, + "grad_norm": 12.625, + "learning_rate": 8.866297483784647e-08, + "log_odds": 2.2858896255493164, + "log_odds_ratio": -0.2969508767127991, + "loss": 0.2619, + "rejected_geometric_mean": -3.0424976348876953, + "step": 7392 + }, + { + "chosen_geometric_mean": -1.008684754371643, + "epoch": 1.83, + "grad_norm": 13.4375, + "learning_rate": 8.840620387839089e-08, + "log_odds": 12.027217864990234, + "log_odds_ratio": -0.016252955421805382, + "loss": 0.2824, + "rejected_geometric_mean": -12.534971237182617, + "step": 7393 + }, + { + "chosen_geometric_mean": -0.9240323901176453, + "epoch": 1.83, + "grad_norm": 30.875, + "learning_rate": 8.814979857444255e-08, + "log_odds": 7.15421199798584, + "log_odds_ratio": -0.23952317237854004, + "loss": 0.2559, + "rejected_geometric_mean": -7.749232769012451, + "step": 7394 + }, + { + "chosen_geometric_mean": -0.9227814078330994, + "epoch": 1.83, + "grad_norm": 9.3125, + "learning_rate": 8.789375896487845e-08, + "log_odds": 5.6146087646484375, + "log_odds_ratio": -0.10827936232089996, + "loss": 0.2039, + "rejected_geometric_mean": -6.086447715759277, + "step": 7395 + }, + { + "chosen_geometric_mean": -0.9200615286827087, + "epoch": 1.83, + "grad_norm": 4.875, + "learning_rate": 8.763808508852057e-08, + "log_odds": 12.138399124145508, + "log_odds_ratio": -0.01629767194390297, + "loss": 0.2651, + "rejected_geometric_mean": -12.536888122558594, + "step": 7396 + }, + { + "chosen_geometric_mean": -0.7579417824745178, + "epoch": 1.83, + "grad_norm": 3.4375, + "learning_rate": 8.738277698413538e-08, + "log_odds": 12.032862663269043, + "log_odds_ratio": -0.04568895697593689, + "loss": 0.2548, + "rejected_geometric_mean": -12.192906379699707, + "step": 7397 + }, + { + "chosen_geometric_mean": -0.9301517605781555, + "epoch": 1.83, + "grad_norm": 2.234375, + "learning_rate": 8.712783469043279e-08, + "log_odds": 3.1958796977996826, + "log_odds_ratio": -0.3634551167488098, + "loss": 0.2632, + "rejected_geometric_mean": -3.8055131435394287, + "step": 7398 + }, + { + "chosen_geometric_mean": -1.2481435537338257, + "epoch": 1.83, + "grad_norm": 8.1875, + "learning_rate": 8.687325824606935e-08, + "log_odds": 7.124289035797119, + "log_odds_ratio": -0.045710183680057526, + "loss": 0.2659, + "rejected_geometric_mean": -7.952670574188232, + "step": 7399 + }, + { + "chosen_geometric_mean": -0.925931453704834, + "epoch": 1.83, + "grad_norm": 2.046875, + "learning_rate": 8.661904768964446e-08, + "log_odds": 9.69063949584961, + "log_odds_ratio": -0.15306740999221802, + "loss": 0.2512, + "rejected_geometric_mean": -10.180603981018066, + "step": 7400 + }, + { + "chosen_geometric_mean": -0.984944760799408, + "epoch": 1.83, + "grad_norm": 8.4375, + "learning_rate": 8.636520305970231e-08, + "log_odds": 5.440447807312012, + "log_odds_ratio": -0.1104007288813591, + "loss": 0.2547, + "rejected_geometric_mean": -6.022417068481445, + "step": 7401 + }, + { + "chosen_geometric_mean": -1.072791576385498, + "epoch": 1.83, + "grad_norm": 2.921875, + "learning_rate": 8.611172439473236e-08, + "log_odds": 5.2077717781066895, + "log_odds_ratio": -0.06540797650814056, + "loss": 0.2842, + "rejected_geometric_mean": -5.8870463371276855, + "step": 7402 + }, + { + "chosen_geometric_mean": -0.9351367950439453, + "epoch": 1.83, + "grad_norm": 41.75, + "learning_rate": 8.58586117331675e-08, + "log_odds": 7.270294189453125, + "log_odds_ratio": -0.020136192440986633, + "loss": 0.2493, + "rejected_geometric_mean": -7.698277950286865, + "step": 7403 + }, + { + "chosen_geometric_mean": -1.1550683975219727, + "epoch": 1.83, + "grad_norm": 30.25, + "learning_rate": 8.560586511338593e-08, + "log_odds": 9.906261444091797, + "log_odds_ratio": -0.08382052928209305, + "loss": 0.316, + "rejected_geometric_mean": -10.687250137329102, + "step": 7404 + }, + { + "chosen_geometric_mean": -0.8624644875526428, + "epoch": 1.83, + "grad_norm": 4.875, + "learning_rate": 8.535348457371028e-08, + "log_odds": 4.724715232849121, + "log_odds_ratio": -0.09855322539806366, + "loss": 0.2489, + "rejected_geometric_mean": -5.107177734375, + "step": 7405 + }, + { + "chosen_geometric_mean": -0.9565903544425964, + "epoch": 1.83, + "grad_norm": 58.5, + "learning_rate": 8.51014701524075e-08, + "log_odds": 10.589323043823242, + "log_odds_ratio": -0.11690760403871536, + "loss": 0.3614, + "rejected_geometric_mean": -11.11931037902832, + "step": 7406 + }, + { + "chosen_geometric_mean": -1.109655737876892, + "epoch": 1.83, + "grad_norm": 16.625, + "learning_rate": 8.484982188768864e-08, + "log_odds": 5.770557880401611, + "log_odds_ratio": -0.10270366072654724, + "loss": 0.2517, + "rejected_geometric_mean": -6.5392937660217285, + "step": 7407 + }, + { + "chosen_geometric_mean": -0.8657440543174744, + "epoch": 1.83, + "grad_norm": 2.234375, + "learning_rate": 8.459853981771016e-08, + "log_odds": 5.060905933380127, + "log_odds_ratio": -0.2652309834957123, + "loss": 0.2623, + "rejected_geometric_mean": -5.594280242919922, + "step": 7408 + }, + { + "chosen_geometric_mean": -0.8860397934913635, + "epoch": 1.83, + "grad_norm": 3.59375, + "learning_rate": 8.434762398057156e-08, + "log_odds": 6.028539180755615, + "log_odds_ratio": -0.13972093164920807, + "loss": 0.2551, + "rejected_geometric_mean": -6.493843078613281, + "step": 7409 + }, + { + "chosen_geometric_mean": -0.9730688333511353, + "epoch": 1.83, + "grad_norm": 2.296875, + "learning_rate": 8.409707441431852e-08, + "log_odds": 9.158265113830566, + "log_odds_ratio": -0.09330262243747711, + "loss": 0.2131, + "rejected_geometric_mean": -9.606926918029785, + "step": 7410 + }, + { + "chosen_geometric_mean": -0.7848078608512878, + "epoch": 1.83, + "grad_norm": 7.53125, + "learning_rate": 8.384689115694039e-08, + "log_odds": 16.03374671936035, + "log_odds_ratio": -1.6391309145546984e-06, + "loss": 0.2462, + "rejected_geometric_mean": -16.20138168334961, + "step": 7411 + }, + { + "chosen_geometric_mean": -0.9199564456939697, + "epoch": 1.84, + "grad_norm": 17.375, + "learning_rate": 8.359707424637042e-08, + "log_odds": 3.7905001640319824, + "log_odds_ratio": -0.3027247190475464, + "loss": 0.2803, + "rejected_geometric_mean": -4.419932842254639, + "step": 7412 + }, + { + "chosen_geometric_mean": -0.9904739856719971, + "epoch": 1.84, + "grad_norm": 11.9375, + "learning_rate": 8.334762372048749e-08, + "log_odds": 12.604591369628906, + "log_odds_ratio": -1.1265434295637533e-05, + "loss": 0.276, + "rejected_geometric_mean": -13.094795227050781, + "step": 7413 + }, + { + "chosen_geometric_mean": -0.9287043213844299, + "epoch": 1.84, + "grad_norm": 24.0, + "learning_rate": 8.309853961711356e-08, + "log_odds": 9.542899131774902, + "log_odds_ratio": -0.18361757695674896, + "loss": 0.2757, + "rejected_geometric_mean": -10.082906723022461, + "step": 7414 + }, + { + "chosen_geometric_mean": -0.9423361420631409, + "epoch": 1.84, + "grad_norm": 1.90625, + "learning_rate": 8.284982197401592e-08, + "log_odds": 6.884910583496094, + "log_odds_ratio": -0.13561804592609406, + "loss": 0.2316, + "rejected_geometric_mean": -7.427697658538818, + "step": 7415 + }, + { + "chosen_geometric_mean": -0.8964800238609314, + "epoch": 1.84, + "grad_norm": 1.8125, + "learning_rate": 8.26014708289069e-08, + "log_odds": 8.156517028808594, + "log_odds_ratio": -0.21269626915454865, + "loss": 0.1981, + "rejected_geometric_mean": -8.644161224365234, + "step": 7416 + }, + { + "chosen_geometric_mean": -0.9616408348083496, + "epoch": 1.84, + "grad_norm": 34.5, + "learning_rate": 8.235348621944195e-08, + "log_odds": 10.297306060791016, + "log_odds_ratio": -0.029459090903401375, + "loss": 0.3104, + "rejected_geometric_mean": -10.781662940979004, + "step": 7417 + }, + { + "chosen_geometric_mean": -0.903444230556488, + "epoch": 1.84, + "grad_norm": 1.8671875, + "learning_rate": 8.210586818322097e-08, + "log_odds": 8.011244773864746, + "log_odds_ratio": -0.0049991244450211525, + "loss": 0.2009, + "rejected_geometric_mean": -8.343791007995605, + "step": 7418 + }, + { + "chosen_geometric_mean": -0.9249045848846436, + "epoch": 1.84, + "grad_norm": 6.5625, + "learning_rate": 8.185861675778978e-08, + "log_odds": 8.023629188537598, + "log_odds_ratio": -0.3161924183368683, + "loss": 0.268, + "rejected_geometric_mean": -8.710369110107422, + "step": 7419 + }, + { + "chosen_geometric_mean": -0.9292373657226562, + "epoch": 1.84, + "grad_norm": 1.8203125, + "learning_rate": 8.16117319806367e-08, + "log_odds": 8.928627014160156, + "log_odds_ratio": -0.05225583165884018, + "loss": 0.2104, + "rejected_geometric_mean": -9.353719711303711, + "step": 7420 + }, + { + "chosen_geometric_mean": -1.1492314338684082, + "epoch": 1.84, + "grad_norm": 9.3125, + "learning_rate": 8.136521388919599e-08, + "log_odds": 14.637826919555664, + "log_odds_ratio": -0.1127619743347168, + "loss": 0.2296, + "rejected_geometric_mean": -15.404302597045898, + "step": 7421 + }, + { + "chosen_geometric_mean": -1.0157854557037354, + "epoch": 1.84, + "grad_norm": 4.46875, + "learning_rate": 8.111906252084579e-08, + "log_odds": 3.1929266452789307, + "log_odds_ratio": -0.35628318786621094, + "loss": 0.2546, + "rejected_geometric_mean": -3.958341121673584, + "step": 7422 + }, + { + "chosen_geometric_mean": -1.0454421043395996, + "epoch": 1.84, + "grad_norm": 17.375, + "learning_rate": 8.087327791290844e-08, + "log_odds": 6.20911979675293, + "log_odds_ratio": -0.009911952540278435, + "loss": 0.2303, + "rejected_geometric_mean": -6.816390037536621, + "step": 7423 + }, + { + "chosen_geometric_mean": -0.9236699342727661, + "epoch": 1.84, + "grad_norm": 4.53125, + "learning_rate": 8.062786010265028e-08, + "log_odds": 3.9078521728515625, + "log_odds_ratio": -0.06682376563549042, + "loss": 0.2032, + "rejected_geometric_mean": -4.366988658905029, + "step": 7424 + }, + { + "chosen_geometric_mean": -1.0568604469299316, + "epoch": 1.84, + "grad_norm": 47.75, + "learning_rate": 8.038280912728291e-08, + "log_odds": 6.967355728149414, + "log_odds_ratio": -0.1265091598033905, + "loss": 0.259, + "rejected_geometric_mean": -7.665300369262695, + "step": 7425 + }, + { + "chosen_geometric_mean": -0.9237403869628906, + "epoch": 1.84, + "grad_norm": 2.984375, + "learning_rate": 8.013812502396246e-08, + "log_odds": 3.1018948554992676, + "log_odds_ratio": -0.2848256230354309, + "loss": 0.2534, + "rejected_geometric_mean": -3.655855178833008, + "step": 7426 + }, + { + "chosen_geometric_mean": -0.9411095976829529, + "epoch": 1.84, + "grad_norm": 4.90625, + "learning_rate": 7.989380782978783e-08, + "log_odds": 8.11726188659668, + "log_odds_ratio": -0.14156673848628998, + "loss": 0.322, + "rejected_geometric_mean": -8.666595458984375, + "step": 7427 + }, + { + "chosen_geometric_mean": -0.9839215874671936, + "epoch": 1.84, + "grad_norm": 7.34375, + "learning_rate": 7.964985758180443e-08, + "log_odds": 14.616426467895508, + "log_odds_ratio": -0.0018335258355364203, + "loss": 0.2106, + "rejected_geometric_mean": -15.12672233581543, + "step": 7428 + }, + { + "chosen_geometric_mean": -0.9492552280426025, + "epoch": 1.84, + "grad_norm": 6.90625, + "learning_rate": 7.940627431700015e-08, + "log_odds": 11.90097427368164, + "log_odds_ratio": -0.1105649471282959, + "loss": 0.2604, + "rejected_geometric_mean": -12.422419548034668, + "step": 7429 + }, + { + "chosen_geometric_mean": -0.8182786703109741, + "epoch": 1.84, + "grad_norm": 7.59375, + "learning_rate": 7.916305807230878e-08, + "log_odds": 7.730767250061035, + "log_odds_ratio": -0.11513254046440125, + "loss": 0.2389, + "rejected_geometric_mean": -8.078201293945312, + "step": 7430 + }, + { + "chosen_geometric_mean": -0.7785196900367737, + "epoch": 1.84, + "grad_norm": 1.9765625, + "learning_rate": 7.892020888460722e-08, + "log_odds": 5.131006240844727, + "log_odds_ratio": -0.08737698197364807, + "loss": 0.2428, + "rejected_geometric_mean": -5.361644268035889, + "step": 7431 + }, + { + "chosen_geometric_mean": -1.1635348796844482, + "epoch": 1.84, + "grad_norm": 2.90625, + "learning_rate": 7.867772679071739e-08, + "log_odds": 12.224889755249023, + "log_odds_ratio": -0.047609467059373856, + "loss": 0.2634, + "rejected_geometric_mean": -13.031320571899414, + "step": 7432 + }, + { + "chosen_geometric_mean": -0.958060622215271, + "epoch": 1.84, + "grad_norm": 37.0, + "learning_rate": 7.843561182740516e-08, + "log_odds": 3.926220178604126, + "log_odds_ratio": -0.11513005942106247, + "loss": 0.2795, + "rejected_geometric_mean": -4.438558101654053, + "step": 7433 + }, + { + "chosen_geometric_mean": -0.7411688566207886, + "epoch": 1.84, + "grad_norm": 3.546875, + "learning_rate": 7.819386403138174e-08, + "log_odds": 4.07934045791626, + "log_odds_ratio": -0.23597551882266998, + "loss": 0.267, + "rejected_geometric_mean": -4.341656684875488, + "step": 7434 + }, + { + "chosen_geometric_mean": -0.93004310131073, + "epoch": 1.84, + "grad_norm": 2.0625, + "learning_rate": 7.795248343930084e-08, + "log_odds": 9.636665344238281, + "log_odds_ratio": -0.0014016213826835155, + "loss": 0.2894, + "rejected_geometric_mean": -10.052066802978516, + "step": 7435 + }, + { + "chosen_geometric_mean": -1.1286100149154663, + "epoch": 1.84, + "grad_norm": 12.6875, + "learning_rate": 7.771147008776209e-08, + "log_odds": 7.387807846069336, + "log_odds_ratio": -0.16946472227573395, + "loss": 0.2547, + "rejected_geometric_mean": -8.233403205871582, + "step": 7436 + }, + { + "chosen_geometric_mean": -1.0019158124923706, + "epoch": 1.84, + "grad_norm": 12.875, + "learning_rate": 7.74708240133093e-08, + "log_odds": 1.3634896278381348, + "log_odds_ratio": -0.3753373920917511, + "loss": 0.2776, + "rejected_geometric_mean": -2.1250979900360107, + "step": 7437 + }, + { + "chosen_geometric_mean": -0.8480899333953857, + "epoch": 1.84, + "grad_norm": 19.75, + "learning_rate": 7.72305452524294e-08, + "log_odds": 9.736883163452148, + "log_odds_ratio": -0.08334630727767944, + "loss": 0.2807, + "rejected_geometric_mean": -10.037585258483887, + "step": 7438 + }, + { + "chosen_geometric_mean": -0.8501764535903931, + "epoch": 1.84, + "grad_norm": 3.328125, + "learning_rate": 7.69906338415552e-08, + "log_odds": 5.48132848739624, + "log_odds_ratio": -0.20248378813266754, + "loss": 0.2433, + "rejected_geometric_mean": -5.899729251861572, + "step": 7439 + }, + { + "chosen_geometric_mean": -0.8567000031471252, + "epoch": 1.84, + "grad_norm": 9.25, + "learning_rate": 7.675108981706258e-08, + "log_odds": 9.681026458740234, + "log_odds_ratio": -0.035141706466674805, + "loss": 0.2585, + "rejected_geometric_mean": -10.003812789916992, + "step": 7440 + }, + { + "chosen_geometric_mean": -1.0113990306854248, + "epoch": 1.84, + "grad_norm": 2.515625, + "learning_rate": 7.651191321527168e-08, + "log_odds": 9.13896369934082, + "log_odds_ratio": -0.0024646297097206116, + "loss": 0.2632, + "rejected_geometric_mean": -9.649103164672852, + "step": 7441 + }, + { + "chosen_geometric_mean": -1.085532546043396, + "epoch": 1.84, + "grad_norm": 9.625, + "learning_rate": 7.62731040724482e-08, + "log_odds": 9.608407974243164, + "log_odds_ratio": -0.03945520520210266, + "loss": 0.2253, + "rejected_geometric_mean": -10.292603492736816, + "step": 7442 + }, + { + "chosen_geometric_mean": -1.037011981010437, + "epoch": 1.84, + "grad_norm": 2.21875, + "learning_rate": 7.603466242480151e-08, + "log_odds": 10.826688766479492, + "log_odds_ratio": -0.004114336799830198, + "loss": 0.2667, + "rejected_geometric_mean": -11.405078887939453, + "step": 7443 + }, + { + "chosen_geometric_mean": -0.8729941844940186, + "epoch": 1.84, + "grad_norm": 10.5625, + "learning_rate": 7.579658830848408e-08, + "log_odds": 14.600410461425781, + "log_odds_ratio": -0.10470720380544662, + "loss": 0.2511, + "rejected_geometric_mean": -14.962493896484375, + "step": 7444 + }, + { + "chosen_geometric_mean": -0.8920737504959106, + "epoch": 1.84, + "grad_norm": 10.4375, + "learning_rate": 7.555888175959453e-08, + "log_odds": 8.279993057250977, + "log_odds_ratio": -0.0023431864101439714, + "loss": 0.2524, + "rejected_geometric_mean": -8.623018264770508, + "step": 7445 + }, + { + "chosen_geometric_mean": -0.9643880128860474, + "epoch": 1.84, + "grad_norm": 13.25, + "learning_rate": 7.532154281417487e-08, + "log_odds": 6.5868706703186035, + "log_odds_ratio": -0.15543711185455322, + "loss": 0.3375, + "rejected_geometric_mean": -7.100647926330566, + "step": 7446 + }, + { + "chosen_geometric_mean": -0.9078658819198608, + "epoch": 1.84, + "grad_norm": 22.75, + "learning_rate": 7.508457150821019e-08, + "log_odds": 9.108211517333984, + "log_odds_ratio": -0.1968175768852234, + "loss": 0.2464, + "rejected_geometric_mean": -9.557796478271484, + "step": 7447 + }, + { + "chosen_geometric_mean": -0.9720194339752197, + "epoch": 1.84, + "grad_norm": 8.1875, + "learning_rate": 7.484796787763288e-08, + "log_odds": 10.718795776367188, + "log_odds_ratio": -0.103453628718853, + "loss": 0.2599, + "rejected_geometric_mean": -11.250490188598633, + "step": 7448 + }, + { + "chosen_geometric_mean": -1.0254734754562378, + "epoch": 1.84, + "grad_norm": 2.8125, + "learning_rate": 7.461173195831645e-08, + "log_odds": 3.2616801261901855, + "log_odds_ratio": -0.26229578256607056, + "loss": 0.2982, + "rejected_geometric_mean": -3.989290475845337, + "step": 7449 + }, + { + "chosen_geometric_mean": -1.053761601448059, + "epoch": 1.84, + "grad_norm": 3.40625, + "learning_rate": 7.437586378608003e-08, + "log_odds": 1.482069730758667, + "log_odds_ratio": -0.33542513847351074, + "loss": 0.2407, + "rejected_geometric_mean": -2.3216609954833984, + "step": 7450 + }, + { + "chosen_geometric_mean": -0.7033246159553528, + "epoch": 1.84, + "grad_norm": 6.21875, + "learning_rate": 7.41403633966875e-08, + "log_odds": 4.783651351928711, + "log_odds_ratio": -0.07504037767648697, + "loss": 0.2731, + "rejected_geometric_mean": -4.873959541320801, + "step": 7451 + }, + { + "chosen_geometric_mean": -1.0241501331329346, + "epoch": 1.85, + "grad_norm": 3.015625, + "learning_rate": 7.390523082584589e-08, + "log_odds": 8.317008972167969, + "log_odds_ratio": -0.10782041400671005, + "loss": 0.29, + "rejected_geometric_mean": -8.973984718322754, + "step": 7452 + }, + { + "chosen_geometric_mean": -1.017731785774231, + "epoch": 1.85, + "grad_norm": 2.109375, + "learning_rate": 7.367046610920692e-08, + "log_odds": 3.0657618045806885, + "log_odds_ratio": -0.25411349534988403, + "loss": 0.2706, + "rejected_geometric_mean": -3.7873117923736572, + "step": 7453 + }, + { + "chosen_geometric_mean": -1.1039080619812012, + "epoch": 1.85, + "grad_norm": 33.0, + "learning_rate": 7.343606928236685e-08, + "log_odds": 3.0352630615234375, + "log_odds_ratio": -0.17372959852218628, + "loss": 0.2852, + "rejected_geometric_mean": -3.8330981731414795, + "step": 7454 + }, + { + "chosen_geometric_mean": -0.8817843198776245, + "epoch": 1.85, + "grad_norm": 1.9921875, + "learning_rate": 7.320204038086587e-08, + "log_odds": 5.653972625732422, + "log_odds_ratio": -0.323565810918808, + "loss": 0.2246, + "rejected_geometric_mean": -6.285074234008789, + "step": 7455 + }, + { + "chosen_geometric_mean": -0.693561851978302, + "epoch": 1.85, + "grad_norm": 3.625, + "learning_rate": 7.296837944018809e-08, + "log_odds": 8.969903945922852, + "log_odds_ratio": -0.0319383330643177, + "loss": 0.2765, + "rejected_geometric_mean": -8.987568855285645, + "step": 7456 + }, + { + "chosen_geometric_mean": -0.9650037288665771, + "epoch": 1.85, + "grad_norm": 3.265625, + "learning_rate": 7.273508649576239e-08, + "log_odds": 13.282642364501953, + "log_odds_ratio": -0.14072471857070923, + "loss": 0.2629, + "rejected_geometric_mean": -13.866305351257324, + "step": 7457 + }, + { + "chosen_geometric_mean": -1.9815795421600342, + "epoch": 1.85, + "grad_norm": 43.75, + "learning_rate": 7.250216158296103e-08, + "log_odds": 8.139890670776367, + "log_odds_ratio": -0.07427907735109329, + "loss": 0.3535, + "rejected_geometric_mean": -9.821837425231934, + "step": 7458 + }, + { + "chosen_geometric_mean": -1.0621479749679565, + "epoch": 1.85, + "grad_norm": 4.40625, + "learning_rate": 7.22696047371016e-08, + "log_odds": 8.730942726135254, + "log_odds_ratio": -0.048754990100860596, + "loss": 0.2465, + "rejected_geometric_mean": -9.391032218933105, + "step": 7459 + }, + { + "chosen_geometric_mean": -1.0436831712722778, + "epoch": 1.85, + "grad_norm": 14.75, + "learning_rate": 7.203741599344533e-08, + "log_odds": 1.4337265491485596, + "log_odds_ratio": -0.4267064034938812, + "loss": 0.3415, + "rejected_geometric_mean": -2.339726209640503, + "step": 7460 + }, + { + "chosen_geometric_mean": -0.8604608178138733, + "epoch": 1.85, + "grad_norm": 2.53125, + "learning_rate": 7.180559538719712e-08, + "log_odds": 7.332714080810547, + "log_odds_ratio": -0.059492673724889755, + "loss": 0.2625, + "rejected_geometric_mean": -7.6542158126831055, + "step": 7461 + }, + { + "chosen_geometric_mean": -0.8389832377433777, + "epoch": 1.85, + "grad_norm": 3.125, + "learning_rate": 7.157414295350717e-08, + "log_odds": 9.194880485534668, + "log_odds_ratio": -0.021441161632537842, + "loss": 0.2335, + "rejected_geometric_mean": -9.456558227539062, + "step": 7462 + }, + { + "chosen_geometric_mean": -1.0813302993774414, + "epoch": 1.85, + "grad_norm": 2.15625, + "learning_rate": 7.134305872746882e-08, + "log_odds": 5.915003299713135, + "log_odds_ratio": -0.09313122928142548, + "loss": 0.2216, + "rejected_geometric_mean": -6.608726978302002, + "step": 7463 + }, + { + "chosen_geometric_mean": -1.1355775594711304, + "epoch": 1.85, + "grad_norm": 18.375, + "learning_rate": 7.11123427441196e-08, + "log_odds": 3.507129669189453, + "log_odds_ratio": -0.04375407099723816, + "loss": 0.2705, + "rejected_geometric_mean": -4.23798131942749, + "step": 7464 + }, + { + "chosen_geometric_mean": -0.9437336921691895, + "epoch": 1.85, + "grad_norm": 24.0, + "learning_rate": 7.088199503844262e-08, + "log_odds": 7.472733974456787, + "log_odds_ratio": -0.028706025332212448, + "loss": 0.257, + "rejected_geometric_mean": -7.8858842849731445, + "step": 7465 + }, + { + "chosen_geometric_mean": -1.0033859014511108, + "epoch": 1.85, + "grad_norm": 5.90625, + "learning_rate": 7.065201564536355e-08, + "log_odds": 11.458757400512695, + "log_odds_ratio": -0.0015317726647481322, + "loss": 0.2533, + "rejected_geometric_mean": -12.00401782989502, + "step": 7466 + }, + { + "chosen_geometric_mean": -0.9670085310935974, + "epoch": 1.85, + "grad_norm": 2.78125, + "learning_rate": 7.042240459975231e-08, + "log_odds": 11.172208786010742, + "log_odds_ratio": -0.0019002514891326427, + "loss": 0.2902, + "rejected_geometric_mean": -11.653728485107422, + "step": 7467 + }, + { + "chosen_geometric_mean": -1.2783564329147339, + "epoch": 1.85, + "grad_norm": 17.25, + "learning_rate": 7.019316193642433e-08, + "log_odds": 3.3905029296875, + "log_odds_ratio": -0.11004937440156937, + "loss": 0.2812, + "rejected_geometric_mean": -4.374419689178467, + "step": 7468 + }, + { + "chosen_geometric_mean": -1.0767183303833008, + "epoch": 1.85, + "grad_norm": 1.7421875, + "learning_rate": 6.996428769013742e-08, + "log_odds": 4.442831039428711, + "log_odds_ratio": -0.03588258847594261, + "loss": 0.2094, + "rejected_geometric_mean": -5.104808807373047, + "step": 7469 + }, + { + "chosen_geometric_mean": -0.9872777462005615, + "epoch": 1.85, + "grad_norm": 5.34375, + "learning_rate": 6.973578189559516e-08, + "log_odds": 4.375401496887207, + "log_odds_ratio": -0.1475118100643158, + "loss": 0.268, + "rejected_geometric_mean": -4.9963297843933105, + "step": 7470 + }, + { + "chosen_geometric_mean": -0.9498382806777954, + "epoch": 1.85, + "grad_norm": 2.265625, + "learning_rate": 6.95076445874443e-08, + "log_odds": 6.034520626068115, + "log_odds_ratio": -0.12304073572158813, + "loss": 0.2662, + "rejected_geometric_mean": -6.511030197143555, + "step": 7471 + }, + { + "chosen_geometric_mean": -0.8769257068634033, + "epoch": 1.85, + "grad_norm": 3.015625, + "learning_rate": 6.927987580027578e-08, + "log_odds": 5.254164695739746, + "log_odds_ratio": -0.26808232069015503, + "loss": 0.2422, + "rejected_geometric_mean": -5.814719200134277, + "step": 7472 + }, + { + "chosen_geometric_mean": -0.9578088521957397, + "epoch": 1.85, + "grad_norm": 2.71875, + "learning_rate": 6.905247556862476e-08, + "log_odds": 8.41500473022461, + "log_odds_ratio": -0.022878997027873993, + "loss": 0.2733, + "rejected_geometric_mean": -8.8956880569458, + "step": 7473 + }, + { + "chosen_geometric_mean": -0.9849125146865845, + "epoch": 1.85, + "grad_norm": 4.84375, + "learning_rate": 6.882544392697088e-08, + "log_odds": 9.3591947555542, + "log_odds_ratio": -0.02819712460041046, + "loss": 0.2582, + "rejected_geometric_mean": -9.889028549194336, + "step": 7474 + }, + { + "chosen_geometric_mean": -1.0143852233886719, + "epoch": 1.85, + "grad_norm": 2.59375, + "learning_rate": 6.859878090973743e-08, + "log_odds": 11.129758834838867, + "log_odds_ratio": -0.01429259404540062, + "loss": 0.2678, + "rejected_geometric_mean": -11.688446998596191, + "step": 7475 + }, + { + "chosen_geometric_mean": -0.856202244758606, + "epoch": 1.85, + "grad_norm": 22.375, + "learning_rate": 6.837248655129191e-08, + "log_odds": 10.591102600097656, + "log_odds_ratio": -0.0008876369101926684, + "loss": 0.2504, + "rejected_geometric_mean": -10.886007308959961, + "step": 7476 + }, + { + "chosen_geometric_mean": -0.9216631650924683, + "epoch": 1.85, + "grad_norm": 3.015625, + "learning_rate": 6.814656088594606e-08, + "log_odds": 5.4551262855529785, + "log_odds_ratio": -0.2477475106716156, + "loss": 0.2531, + "rejected_geometric_mean": -6.099678993225098, + "step": 7477 + }, + { + "chosen_geometric_mean": -0.8612391948699951, + "epoch": 1.85, + "grad_norm": 5.9375, + "learning_rate": 6.792100394795526e-08, + "log_odds": 1.9005286693572998, + "log_odds_ratio": -0.2627579867839813, + "loss": 0.2808, + "rejected_geometric_mean": -2.4413905143737793, + "step": 7478 + }, + { + "chosen_geometric_mean": -1.079089879989624, + "epoch": 1.85, + "grad_norm": 9.875, + "learning_rate": 6.769581577152018e-08, + "log_odds": 8.304156303405762, + "log_odds_ratio": -0.0811331644654274, + "loss": 0.2648, + "rejected_geometric_mean": -9.013754844665527, + "step": 7479 + }, + { + "chosen_geometric_mean": -0.9662408232688904, + "epoch": 1.85, + "grad_norm": 3.21875, + "learning_rate": 6.74709963907838e-08, + "log_odds": 9.974900245666504, + "log_odds_ratio": -0.0004728647181764245, + "loss": 0.25, + "rejected_geometric_mean": -10.451918601989746, + "step": 7480 + }, + { + "chosen_geometric_mean": -0.7832702994346619, + "epoch": 1.85, + "grad_norm": 7.15625, + "learning_rate": 6.724654583983498e-08, + "log_odds": 3.8577165603637695, + "log_odds_ratio": -0.0957489013671875, + "loss": 0.2308, + "rejected_geometric_mean": -4.099729061126709, + "step": 7481 + }, + { + "chosen_geometric_mean": -1.0682756900787354, + "epoch": 1.85, + "grad_norm": 5.125, + "learning_rate": 6.702246415270508e-08, + "log_odds": 4.650030612945557, + "log_odds_ratio": -0.14342591166496277, + "loss": 0.2546, + "rejected_geometric_mean": -5.3487935066223145, + "step": 7482 + }, + { + "chosen_geometric_mean": -0.9097472429275513, + "epoch": 1.85, + "grad_norm": 12.0, + "learning_rate": 6.679875136337111e-08, + "log_odds": 5.298966407775879, + "log_odds_ratio": -0.18076956272125244, + "loss": 0.269, + "rejected_geometric_mean": -5.803548812866211, + "step": 7483 + }, + { + "chosen_geometric_mean": -1.121164083480835, + "epoch": 1.85, + "grad_norm": 13.625, + "learning_rate": 6.657540750575231e-08, + "log_odds": 8.907232284545898, + "log_odds_ratio": -0.08973236382007599, + "loss": 0.3063, + "rejected_geometric_mean": -9.645172119140625, + "step": 7484 + }, + { + "chosen_geometric_mean": -1.0634765625, + "epoch": 1.85, + "grad_norm": 3.71875, + "learning_rate": 6.635243261371354e-08, + "log_odds": 4.4532670974731445, + "log_odds_ratio": -0.07220858335494995, + "loss": 0.258, + "rejected_geometric_mean": -5.122382164001465, + "step": 7485 + }, + { + "chosen_geometric_mean": -1.0362954139709473, + "epoch": 1.85, + "grad_norm": 5.375, + "learning_rate": 6.612982672106333e-08, + "log_odds": 3.493523120880127, + "log_odds_ratio": -0.13864515721797943, + "loss": 0.2435, + "rejected_geometric_mean": -4.194704532623291, + "step": 7486 + }, + { + "chosen_geometric_mean": -1.0771266222000122, + "epoch": 1.85, + "grad_norm": 14.5625, + "learning_rate": 6.590758986155326e-08, + "log_odds": 4.926846504211426, + "log_odds_ratio": -0.21815666556358337, + "loss": 0.2588, + "rejected_geometric_mean": -5.701191425323486, + "step": 7487 + }, + { + "chosen_geometric_mean": -1.0522890090942383, + "epoch": 1.85, + "grad_norm": 1.9453125, + "learning_rate": 6.568572206888086e-08, + "log_odds": 8.5370454788208, + "log_odds_ratio": -0.03985564783215523, + "loss": 0.2266, + "rejected_geometric_mean": -9.165696144104004, + "step": 7488 + }, + { + "chosen_geometric_mean": -0.9698129296302795, + "epoch": 1.85, + "grad_norm": 7.34375, + "learning_rate": 6.546422337668612e-08, + "log_odds": 12.844868659973145, + "log_odds_ratio": -0.04047036170959473, + "loss": 0.2682, + "rejected_geometric_mean": -13.344467163085938, + "step": 7489 + }, + { + "chosen_geometric_mean": -1.043804407119751, + "epoch": 1.85, + "grad_norm": 1.9921875, + "learning_rate": 6.524309381855304e-08, + "log_odds": 4.727969169616699, + "log_odds_ratio": -0.08309537172317505, + "loss": 0.2624, + "rejected_geometric_mean": -5.372204780578613, + "step": 7490 + }, + { + "chosen_geometric_mean": -0.8570436239242554, + "epoch": 1.85, + "grad_norm": 17.375, + "learning_rate": 6.50223334280109e-08, + "log_odds": 12.466655731201172, + "log_odds_ratio": -0.017449278384447098, + "loss": 0.2494, + "rejected_geometric_mean": -12.769909858703613, + "step": 7491 + }, + { + "chosen_geometric_mean": -1.0189958810806274, + "epoch": 1.85, + "grad_norm": 2.515625, + "learning_rate": 6.480194223853209e-08, + "log_odds": 6.166977882385254, + "log_odds_ratio": -0.3802047371864319, + "loss": 0.314, + "rejected_geometric_mean": -6.9334397315979, + "step": 7492 + }, + { + "chosen_geometric_mean": -0.964820384979248, + "epoch": 1.86, + "grad_norm": 19.375, + "learning_rate": 6.458192028353294e-08, + "log_odds": 8.217649459838867, + "log_odds_ratio": -0.15139813721179962, + "loss": 0.2612, + "rejected_geometric_mean": -8.762020111083984, + "step": 7493 + }, + { + "chosen_geometric_mean": -0.8583041429519653, + "epoch": 1.86, + "grad_norm": 3.28125, + "learning_rate": 6.436226759637454e-08, + "log_odds": 4.4732866287231445, + "log_odds_ratio": -0.18462355434894562, + "loss": 0.2566, + "rejected_geometric_mean": -4.8590474128723145, + "step": 7494 + }, + { + "chosen_geometric_mean": -1.1722779273986816, + "epoch": 1.86, + "grad_norm": 2.71875, + "learning_rate": 6.414298421036108e-08, + "log_odds": 3.4284541606903076, + "log_odds_ratio": -0.23045751452445984, + "loss": 0.2562, + "rejected_geometric_mean": -4.308298587799072, + "step": 7495 + }, + { + "chosen_geometric_mean": -1.202907681465149, + "epoch": 1.86, + "grad_norm": 2.0, + "learning_rate": 6.392407015874098e-08, + "log_odds": 13.178401947021484, + "log_odds_ratio": -0.006954899989068508, + "loss": 0.2573, + "rejected_geometric_mean": -14.010967254638672, + "step": 7496 + }, + { + "chosen_geometric_mean": -0.9427080154418945, + "epoch": 1.86, + "grad_norm": 12.375, + "learning_rate": 6.370552547470765e-08, + "log_odds": 6.9427690505981445, + "log_odds_ratio": -0.09806746244430542, + "loss": 0.2983, + "rejected_geometric_mean": -7.4410600662231445, + "step": 7497 + }, + { + "chosen_geometric_mean": -1.0428016185760498, + "epoch": 1.86, + "grad_norm": 2.484375, + "learning_rate": 6.348735019139712e-08, + "log_odds": 6.812654495239258, + "log_odds_ratio": -0.2116532027721405, + "loss": 0.2668, + "rejected_geometric_mean": -7.51597261428833, + "step": 7498 + }, + { + "chosen_geometric_mean": -0.9421135187149048, + "epoch": 1.86, + "grad_norm": 2.15625, + "learning_rate": 6.326954434189014e-08, + "log_odds": 9.179146766662598, + "log_odds_ratio": -0.0712464302778244, + "loss": 0.2465, + "rejected_geometric_mean": -9.587231636047363, + "step": 7499 + }, + { + "chosen_geometric_mean": -1.1503159999847412, + "epoch": 1.86, + "grad_norm": 10.9375, + "learning_rate": 6.30521079592114e-08, + "log_odds": 6.998824119567871, + "log_odds_ratio": -0.11787933111190796, + "loss": 0.2849, + "rejected_geometric_mean": -7.829596519470215, + "step": 7500 + }, + { + "chosen_geometric_mean": -1.3609387874603271, + "epoch": 1.86, + "grad_norm": 1.8046875, + "learning_rate": 6.283504107632926e-08, + "log_odds": 9.17929458618164, + "log_odds_ratio": -0.025868549942970276, + "loss": 0.2442, + "rejected_geometric_mean": -10.189247131347656, + "step": 7501 + }, + { + "chosen_geometric_mean": -1.0071706771850586, + "epoch": 1.86, + "grad_norm": 120.5, + "learning_rate": 6.261834372615628e-08, + "log_odds": 9.7147798538208, + "log_odds_ratio": -0.04462837800383568, + "loss": 0.2733, + "rejected_geometric_mean": -10.296623229980469, + "step": 7502 + }, + { + "chosen_geometric_mean": -0.9260671138763428, + "epoch": 1.86, + "grad_norm": 4.4375, + "learning_rate": 6.240201594154948e-08, + "log_odds": 8.876212120056152, + "log_odds_ratio": -0.12592017650604248, + "loss": 0.2642, + "rejected_geometric_mean": -9.357573509216309, + "step": 7503 + }, + { + "chosen_geometric_mean": -1.106178879737854, + "epoch": 1.86, + "grad_norm": 1.84375, + "learning_rate": 6.218605775530851e-08, + "log_odds": 7.350826263427734, + "log_odds_ratio": -0.015989316627383232, + "loss": 0.2335, + "rejected_geometric_mean": -8.042901992797852, + "step": 7504 + }, + { + "chosen_geometric_mean": -1.0885666608810425, + "epoch": 1.86, + "grad_norm": 2.9375, + "learning_rate": 6.197046920017879e-08, + "log_odds": 4.059699535369873, + "log_odds_ratio": -0.04507888853549957, + "loss": 0.2553, + "rejected_geometric_mean": -4.757508277893066, + "step": 7505 + }, + { + "chosen_geometric_mean": -0.8297504186630249, + "epoch": 1.86, + "grad_norm": 2.71875, + "learning_rate": 6.17552503088481e-08, + "log_odds": 9.847625732421875, + "log_odds_ratio": -0.10731177777051926, + "loss": 0.2102, + "rejected_geometric_mean": -10.215105056762695, + "step": 7506 + }, + { + "chosen_geometric_mean": -1.107944130897522, + "epoch": 1.86, + "grad_norm": 2.171875, + "learning_rate": 6.154040111394866e-08, + "log_odds": 4.951446533203125, + "log_odds_ratio": -0.052588656544685364, + "loss": 0.2478, + "rejected_geometric_mean": -5.687640190124512, + "step": 7507 + }, + { + "chosen_geometric_mean": -0.9166411757469177, + "epoch": 1.86, + "grad_norm": 1.921875, + "learning_rate": 6.13259216480569e-08, + "log_odds": 8.595349311828613, + "log_odds_ratio": -0.018549872562289238, + "loss": 0.2441, + "rejected_geometric_mean": -8.979711532592773, + "step": 7508 + }, + { + "chosen_geometric_mean": -0.8498927354812622, + "epoch": 1.86, + "grad_norm": 2.78125, + "learning_rate": 6.111181194369348e-08, + "log_odds": 7.3133087158203125, + "log_odds_ratio": -0.03321146219968796, + "loss": 0.2332, + "rejected_geometric_mean": -7.621915817260742, + "step": 7509 + }, + { + "chosen_geometric_mean": -1.078505277633667, + "epoch": 1.86, + "grad_norm": 56.25, + "learning_rate": 6.089807203332187e-08, + "log_odds": 4.21431303024292, + "log_odds_ratio": -0.5863096117973328, + "loss": 0.3065, + "rejected_geometric_mean": -5.101090431213379, + "step": 7510 + }, + { + "chosen_geometric_mean": -1.134597897529602, + "epoch": 1.86, + "grad_norm": 2.796875, + "learning_rate": 6.068470194935089e-08, + "log_odds": 5.801762104034424, + "log_odds_ratio": -0.1446351259946823, + "loss": 0.3058, + "rejected_geometric_mean": -6.5662055015563965, + "step": 7511 + }, + { + "chosen_geometric_mean": -0.9527562260627747, + "epoch": 1.86, + "grad_norm": 2.921875, + "learning_rate": 6.047170172413213e-08, + "log_odds": 2.661153793334961, + "log_odds_ratio": -0.16821828484535217, + "loss": 0.2585, + "rejected_geometric_mean": -3.254781723022461, + "step": 7512 + }, + { + "chosen_geometric_mean": -1.084852695465088, + "epoch": 1.86, + "grad_norm": 35.0, + "learning_rate": 6.025907138996117e-08, + "log_odds": 6.98306131362915, + "log_odds_ratio": -0.07845276594161987, + "loss": 0.2946, + "rejected_geometric_mean": -7.6949381828308105, + "step": 7513 + }, + { + "chosen_geometric_mean": -1.0588903427124023, + "epoch": 1.86, + "grad_norm": 1.875, + "learning_rate": 6.004681097907888e-08, + "log_odds": 8.995126724243164, + "log_odds_ratio": -0.015811093151569366, + "loss": 0.2156, + "rejected_geometric_mean": -9.59854507446289, + "step": 7514 + }, + { + "chosen_geometric_mean": -0.9102454781532288, + "epoch": 1.86, + "grad_norm": 4.09375, + "learning_rate": 5.98349205236684e-08, + "log_odds": 5.154048919677734, + "log_odds_ratio": -0.059707656502723694, + "loss": 0.2598, + "rejected_geometric_mean": -5.542856693267822, + "step": 7515 + }, + { + "chosen_geometric_mean": -1.064698576927185, + "epoch": 1.86, + "grad_norm": 31.0, + "learning_rate": 5.962340005585738e-08, + "log_odds": 1.094038724899292, + "log_odds_ratio": -0.3698376715183258, + "loss": 0.2845, + "rejected_geometric_mean": -2.003859519958496, + "step": 7516 + }, + { + "chosen_geometric_mean": -0.8255066871643066, + "epoch": 1.86, + "grad_norm": 49.5, + "learning_rate": 5.941224960771763e-08, + "log_odds": 5.316426753997803, + "log_odds_ratio": -0.08005936443805695, + "loss": 0.3296, + "rejected_geometric_mean": -5.611435413360596, + "step": 7517 + }, + { + "chosen_geometric_mean": -1.3170158863067627, + "epoch": 1.86, + "grad_norm": 3.4375, + "learning_rate": 5.9201469211264406e-08, + "log_odds": 1.07338547706604, + "log_odds_ratio": -0.3751514256000519, + "loss": 0.2723, + "rejected_geometric_mean": -2.258028984069824, + "step": 7518 + }, + { + "chosen_geometric_mean": -0.9623509049415588, + "epoch": 1.86, + "grad_norm": 2.25, + "learning_rate": 5.8991058898456854e-08, + "log_odds": 4.531497955322266, + "log_odds_ratio": -0.2886835038661957, + "loss": 0.2712, + "rejected_geometric_mean": -5.195590972900391, + "step": 7519 + }, + { + "chosen_geometric_mean": -1.0853002071380615, + "epoch": 1.86, + "grad_norm": 34.5, + "learning_rate": 5.878101870119918e-08, + "log_odds": 6.102141380310059, + "log_odds_ratio": -0.060415834188461304, + "loss": 0.2542, + "rejected_geometric_mean": -6.7926411628723145, + "step": 7520 + }, + { + "chosen_geometric_mean": -1.0339900255203247, + "epoch": 1.86, + "grad_norm": 9.875, + "learning_rate": 5.8571348651337566e-08, + "log_odds": 6.491805076599121, + "log_odds_ratio": -0.2258659154176712, + "loss": 0.2313, + "rejected_geometric_mean": -7.206738471984863, + "step": 7521 + }, + { + "chosen_geometric_mean": -0.8568437099456787, + "epoch": 1.86, + "grad_norm": 12.9375, + "learning_rate": 5.836204878066326e-08, + "log_odds": 17.684213638305664, + "log_odds_ratio": -5.006837909604656e-06, + "loss": 0.2936, + "rejected_geometric_mean": -17.958480834960938, + "step": 7522 + }, + { + "chosen_geometric_mean": -0.9649630784988403, + "epoch": 1.86, + "grad_norm": 19.875, + "learning_rate": 5.8153119120911425e-08, + "log_odds": 4.562532424926758, + "log_odds_ratio": -0.16296708583831787, + "loss": 0.2491, + "rejected_geometric_mean": -5.107416152954102, + "step": 7523 + }, + { + "chosen_geometric_mean": -0.8699858784675598, + "epoch": 1.86, + "grad_norm": 8.75, + "learning_rate": 5.7944559703760615e-08, + "log_odds": 1.3777011632919312, + "log_odds_ratio": -0.2304372787475586, + "loss": 0.2505, + "rejected_geometric_mean": -1.866126298904419, + "step": 7524 + }, + { + "chosen_geometric_mean": -1.044778823852539, + "epoch": 1.86, + "grad_norm": 3.359375, + "learning_rate": 5.773637056083331e-08, + "log_odds": 4.0904927253723145, + "log_odds_ratio": -0.19044309854507446, + "loss": 0.2767, + "rejected_geometric_mean": -4.70171594619751, + "step": 7525 + }, + { + "chosen_geometric_mean": -0.9455933570861816, + "epoch": 1.86, + "grad_norm": 2.53125, + "learning_rate": 5.75285517236962e-08, + "log_odds": 6.574906349182129, + "log_odds_ratio": -0.025212669745087624, + "loss": 0.2516, + "rejected_geometric_mean": -7.036348342895508, + "step": 7526 + }, + { + "chosen_geometric_mean": -0.9280314445495605, + "epoch": 1.86, + "grad_norm": 27.375, + "learning_rate": 5.7321103223859364e-08, + "log_odds": 7.504452228546143, + "log_odds_ratio": -0.10846596211194992, + "loss": 0.2795, + "rejected_geometric_mean": -7.962733268737793, + "step": 7527 + }, + { + "chosen_geometric_mean": -1.1441928148269653, + "epoch": 1.86, + "grad_norm": 4.15625, + "learning_rate": 5.711402509277764e-08, + "log_odds": 9.61426067352295, + "log_odds_ratio": -0.10324014723300934, + "loss": 0.2995, + "rejected_geometric_mean": -10.420637130737305, + "step": 7528 + }, + { + "chosen_geometric_mean": -1.1022894382476807, + "epoch": 1.86, + "grad_norm": 26.375, + "learning_rate": 5.690731736184813e-08, + "log_odds": 5.354197978973389, + "log_odds_ratio": -0.132344588637352, + "loss": 0.2529, + "rejected_geometric_mean": -6.126727104187012, + "step": 7529 + }, + { + "chosen_geometric_mean": -1.0764071941375732, + "epoch": 1.86, + "grad_norm": 5.5625, + "learning_rate": 5.670098006241326e-08, + "log_odds": 8.17403793334961, + "log_odds_ratio": -0.13305456936359406, + "loss": 0.2889, + "rejected_geometric_mean": -8.88166618347168, + "step": 7530 + }, + { + "chosen_geometric_mean": -0.741867184638977, + "epoch": 1.86, + "grad_norm": 2.046875, + "learning_rate": 5.649501322575829e-08, + "log_odds": 6.271383285522461, + "log_odds_ratio": -0.13950088620185852, + "loss": 0.2353, + "rejected_geometric_mean": -6.449006080627441, + "step": 7531 + }, + { + "chosen_geometric_mean": -0.9626538157463074, + "epoch": 1.86, + "grad_norm": 4.8125, + "learning_rate": 5.628941688311324e-08, + "log_odds": 8.896381378173828, + "log_odds_ratio": -0.1087467297911644, + "loss": 0.271, + "rejected_geometric_mean": -9.419013023376465, + "step": 7532 + }, + { + "chosen_geometric_mean": -0.9893263578414917, + "epoch": 1.87, + "grad_norm": 34.75, + "learning_rate": 5.608419106565122e-08, + "log_odds": 4.267629623413086, + "log_odds_ratio": -0.06086330860853195, + "loss": 0.2499, + "rejected_geometric_mean": -4.782281398773193, + "step": 7533 + }, + { + "chosen_geometric_mean": -0.8421576023101807, + "epoch": 1.87, + "grad_norm": 6.125, + "learning_rate": 5.5879335804489564e-08, + "log_odds": 11.578604698181152, + "log_odds_ratio": -0.02511310577392578, + "loss": 0.2556, + "rejected_geometric_mean": -11.871201515197754, + "step": 7534 + }, + { + "chosen_geometric_mean": -1.0437225103378296, + "epoch": 1.87, + "grad_norm": 3.625, + "learning_rate": 5.567485113068871e-08, + "log_odds": 2.539548873901367, + "log_odds_ratio": -0.09799125790596008, + "loss": 0.2611, + "rejected_geometric_mean": -3.184905529022217, + "step": 7535 + }, + { + "chosen_geometric_mean": -1.1814215183258057, + "epoch": 1.87, + "grad_norm": 30.625, + "learning_rate": 5.5470737075253854e-08, + "log_odds": 12.652976989746094, + "log_odds_ratio": -0.08134721219539642, + "loss": 0.3568, + "rejected_geometric_mean": -13.474468231201172, + "step": 7536 + }, + { + "chosen_geometric_mean": -1.093321442604065, + "epoch": 1.87, + "grad_norm": 37.25, + "learning_rate": 5.526699366913357e-08, + "log_odds": 11.132583618164062, + "log_odds_ratio": -0.06623975932598114, + "loss": 0.327, + "rejected_geometric_mean": -11.81998062133789, + "step": 7537 + }, + { + "chosen_geometric_mean": -1.0511455535888672, + "epoch": 1.87, + "grad_norm": 2.171875, + "learning_rate": 5.5063620943220375e-08, + "log_odds": 5.3191423416137695, + "log_odds_ratio": -0.2401035577058792, + "loss": 0.2614, + "rejected_geometric_mean": -6.062273979187012, + "step": 7538 + }, + { + "chosen_geometric_mean": -1.0201375484466553, + "epoch": 1.87, + "grad_norm": 45.75, + "learning_rate": 5.486061892835015e-08, + "log_odds": 9.215084075927734, + "log_odds_ratio": -0.02127167396247387, + "loss": 0.2698, + "rejected_geometric_mean": -9.750662803649902, + "step": 7539 + }, + { + "chosen_geometric_mean": -0.9959726333618164, + "epoch": 1.87, + "grad_norm": 13.25, + "learning_rate": 5.4657987655303e-08, + "log_odds": 6.496464729309082, + "log_odds_ratio": -0.23720042407512665, + "loss": 0.3409, + "rejected_geometric_mean": -7.185710430145264, + "step": 7540 + }, + { + "chosen_geometric_mean": -0.901051938533783, + "epoch": 1.87, + "grad_norm": 9.4375, + "learning_rate": 5.4455727154802964e-08, + "log_odds": 8.603326797485352, + "log_odds_ratio": -0.08328012377023697, + "loss": 0.26, + "rejected_geometric_mean": -9.024678230285645, + "step": 7541 + }, + { + "chosen_geometric_mean": -0.9997640252113342, + "epoch": 1.87, + "grad_norm": 7.78125, + "learning_rate": 5.425383745751689e-08, + "log_odds": 10.767765045166016, + "log_odds_ratio": -0.12351862341165543, + "loss": 0.2717, + "rejected_geometric_mean": -11.34910774230957, + "step": 7542 + }, + { + "chosen_geometric_mean": -0.7261881232261658, + "epoch": 1.87, + "grad_norm": 9.1875, + "learning_rate": 5.4052318594056964e-08, + "log_odds": 8.227492332458496, + "log_odds_ratio": -0.1364717185497284, + "loss": 0.2635, + "rejected_geometric_mean": -8.361180305480957, + "step": 7543 + }, + { + "chosen_geometric_mean": -0.8811954259872437, + "epoch": 1.87, + "grad_norm": 3.5625, + "learning_rate": 5.385117059497791e-08, + "log_odds": 2.366572141647339, + "log_odds_ratio": -0.3359520435333252, + "loss": 0.3, + "rejected_geometric_mean": -2.898149251937866, + "step": 7544 + }, + { + "chosen_geometric_mean": -0.9863991737365723, + "epoch": 1.87, + "grad_norm": 6.46875, + "learning_rate": 5.3650393490778106e-08, + "log_odds": 8.608455657958984, + "log_odds_ratio": -0.01589687541127205, + "loss": 0.2259, + "rejected_geometric_mean": -9.106459617614746, + "step": 7545 + }, + { + "chosen_geometric_mean": -0.9522886276245117, + "epoch": 1.87, + "grad_norm": 56.0, + "learning_rate": 5.344998731190099e-08, + "log_odds": 7.020383358001709, + "log_odds_ratio": -0.13203109800815582, + "loss": 0.2803, + "rejected_geometric_mean": -7.525078296661377, + "step": 7546 + }, + { + "chosen_geometric_mean": -0.9135845303535461, + "epoch": 1.87, + "grad_norm": 2.234375, + "learning_rate": 5.324995208873251e-08, + "log_odds": 10.133259773254395, + "log_odds_ratio": -0.046162791550159454, + "loss": 0.2607, + "rejected_geometric_mean": -10.566579818725586, + "step": 7547 + }, + { + "chosen_geometric_mean": -0.7289351224899292, + "epoch": 1.87, + "grad_norm": 63.5, + "learning_rate": 5.30502878516026e-08, + "log_odds": 7.214095115661621, + "log_odds_ratio": -0.025421367958188057, + "loss": 0.2952, + "rejected_geometric_mean": -7.299412727355957, + "step": 7548 + }, + { + "chosen_geometric_mean": -0.824051558971405, + "epoch": 1.87, + "grad_norm": 10.0, + "learning_rate": 5.285099463078591e-08, + "log_odds": 3.540151596069336, + "log_odds_ratio": -0.27529194951057434, + "loss": 0.2349, + "rejected_geometric_mean": -4.0109543800354, + "step": 7549 + }, + { + "chosen_geometric_mean": -1.1220486164093018, + "epoch": 1.87, + "grad_norm": 34.5, + "learning_rate": 5.265207245649911e-08, + "log_odds": 6.083709716796875, + "log_odds_ratio": -0.20449453592300415, + "loss": 0.2819, + "rejected_geometric_mean": -6.91734504699707, + "step": 7550 + }, + { + "chosen_geometric_mean": -1.4610354900360107, + "epoch": 1.87, + "grad_norm": 46.75, + "learning_rate": 5.245352135890419e-08, + "log_odds": 7.805027961730957, + "log_odds_ratio": -0.05880121886730194, + "loss": 0.2477, + "rejected_geometric_mean": -8.96934986114502, + "step": 7551 + }, + { + "chosen_geometric_mean": -1.0594933032989502, + "epoch": 1.87, + "grad_norm": 7.46875, + "learning_rate": 5.22553413681065e-08, + "log_odds": 9.420415878295898, + "log_odds_ratio": -0.031836990267038345, + "loss": 0.2624, + "rejected_geometric_mean": -10.066465377807617, + "step": 7552 + }, + { + "chosen_geometric_mean": -1.4058218002319336, + "epoch": 1.87, + "grad_norm": 25.5, + "learning_rate": 5.205753251415424e-08, + "log_odds": 5.135100841522217, + "log_odds_ratio": -0.20052656531333923, + "loss": 0.3243, + "rejected_geometric_mean": -6.346575736999512, + "step": 7553 + }, + { + "chosen_geometric_mean": -0.9837476015090942, + "epoch": 1.87, + "grad_norm": 3.734375, + "learning_rate": 5.1860094827040355e-08, + "log_odds": 9.174128532409668, + "log_odds_ratio": -0.0031539916526526213, + "loss": 0.2653, + "rejected_geometric_mean": -9.673887252807617, + "step": 7554 + }, + { + "chosen_geometric_mean": -1.1468422412872314, + "epoch": 1.87, + "grad_norm": 35.0, + "learning_rate": 5.166302833670089e-08, + "log_odds": 5.354355812072754, + "log_odds_ratio": -0.2432575523853302, + "loss": 0.2524, + "rejected_geometric_mean": -6.271699905395508, + "step": 7555 + }, + { + "chosen_geometric_mean": -0.9727790355682373, + "epoch": 1.87, + "grad_norm": 17.75, + "learning_rate": 5.146633307301613e-08, + "log_odds": 9.123726844787598, + "log_odds_ratio": -0.017522737383842468, + "loss": 0.2645, + "rejected_geometric_mean": -9.60810661315918, + "step": 7556 + }, + { + "chosen_geometric_mean": -1.1822563409805298, + "epoch": 1.87, + "grad_norm": 11.1875, + "learning_rate": 5.1270009065809426e-08, + "log_odds": 6.062778472900391, + "log_odds_ratio": -0.1876811683177948, + "loss": 0.2565, + "rejected_geometric_mean": -6.96380615234375, + "step": 7557 + }, + { + "chosen_geometric_mean": -0.9064545631408691, + "epoch": 1.87, + "grad_norm": 5.03125, + "learning_rate": 5.107405634484891e-08, + "log_odds": 11.741207122802734, + "log_odds_ratio": -0.004339838400483131, + "loss": 0.2524, + "rejected_geometric_mean": -12.110490798950195, + "step": 7558 + }, + { + "chosen_geometric_mean": -1.0179634094238281, + "epoch": 1.87, + "grad_norm": 2.28125, + "learning_rate": 5.0878474939844994e-08, + "log_odds": 3.774387836456299, + "log_odds_ratio": -0.11001502722501755, + "loss": 0.212, + "rejected_geometric_mean": -4.401948928833008, + "step": 7559 + }, + { + "chosen_geometric_mean": -1.1741821765899658, + "epoch": 1.87, + "grad_norm": 20.125, + "learning_rate": 5.0683264880452833e-08, + "log_odds": 9.710836410522461, + "log_odds_ratio": -0.009584147483110428, + "loss": 0.2554, + "rejected_geometric_mean": -10.503274917602539, + "step": 7560 + }, + { + "chosen_geometric_mean": -1.0163654088974, + "epoch": 1.87, + "grad_norm": 2.0, + "learning_rate": 5.048842619627098e-08, + "log_odds": 12.000301361083984, + "log_odds_ratio": -0.10502661764621735, + "loss": 0.2287, + "rejected_geometric_mean": -12.58004093170166, + "step": 7561 + }, + { + "chosen_geometric_mean": -0.7344990968704224, + "epoch": 1.87, + "grad_norm": 23.625, + "learning_rate": 5.029395891684136e-08, + "log_odds": 12.02210521697998, + "log_odds_ratio": -0.10720089823007584, + "loss": 0.258, + "rejected_geometric_mean": -12.214859008789062, + "step": 7562 + }, + { + "chosen_geometric_mean": -1.209602952003479, + "epoch": 1.87, + "grad_norm": 22.375, + "learning_rate": 5.0099863071650376e-08, + "log_odds": 9.154705047607422, + "log_odds_ratio": -0.142939493060112, + "loss": 0.3566, + "rejected_geometric_mean": -10.0741548538208, + "step": 7563 + }, + { + "chosen_geometric_mean": -1.1458978652954102, + "epoch": 1.87, + "grad_norm": 59.0, + "learning_rate": 4.990613869012728e-08, + "log_odds": 10.156342506408691, + "log_odds_ratio": -0.003865680191665888, + "loss": 0.2514, + "rejected_geometric_mean": -10.889183044433594, + "step": 7564 + }, + { + "chosen_geometric_mean": -1.0830411911010742, + "epoch": 1.87, + "grad_norm": 3.484375, + "learning_rate": 4.9712785801645234e-08, + "log_odds": 5.024090766906738, + "log_odds_ratio": -0.08726230263710022, + "loss": 0.2631, + "rejected_geometric_mean": -5.750500679016113, + "step": 7565 + }, + { + "chosen_geometric_mean": -0.7358051538467407, + "epoch": 1.87, + "grad_norm": 6.71875, + "learning_rate": 4.9519804435521624e-08, + "log_odds": 8.79655933380127, + "log_odds_ratio": -0.02122577279806137, + "loss": 0.2251, + "rejected_geometric_mean": -8.89912223815918, + "step": 7566 + }, + { + "chosen_geometric_mean": -0.9533535242080688, + "epoch": 1.87, + "grad_norm": 59.75, + "learning_rate": 4.932719462101665e-08, + "log_odds": 5.2551727294921875, + "log_odds_ratio": -0.12064597010612488, + "loss": 0.3076, + "rejected_geometric_mean": -5.790078163146973, + "step": 7567 + }, + { + "chosen_geometric_mean": -0.8934257626533508, + "epoch": 1.87, + "grad_norm": 2.25, + "learning_rate": 4.913495638733445e-08, + "log_odds": 12.994552612304688, + "log_odds_ratio": -0.0031217201612889767, + "loss": 0.266, + "rejected_geometric_mean": -13.344210624694824, + "step": 7568 + }, + { + "chosen_geometric_mean": -1.04606032371521, + "epoch": 1.87, + "grad_norm": 17.625, + "learning_rate": 4.894308976362366e-08, + "log_odds": 7.521966934204102, + "log_odds_ratio": -0.2426515370607376, + "loss": 0.2984, + "rejected_geometric_mean": -8.285004615783691, + "step": 7569 + }, + { + "chosen_geometric_mean": -0.7693573236465454, + "epoch": 1.87, + "grad_norm": 1.640625, + "learning_rate": 4.875159477897545e-08, + "log_odds": 6.935296058654785, + "log_odds_ratio": -0.13196733593940735, + "loss": 0.1977, + "rejected_geometric_mean": -7.187562942504883, + "step": 7570 + }, + { + "chosen_geometric_mean": -0.9306740760803223, + "epoch": 1.87, + "grad_norm": 72.0, + "learning_rate": 4.8560471462424655e-08, + "log_odds": 9.640299797058105, + "log_odds_ratio": -0.1793801188468933, + "loss": 0.2437, + "rejected_geometric_mean": -10.172547340393066, + "step": 7571 + }, + { + "chosen_geometric_mean": -0.9444773197174072, + "epoch": 1.87, + "grad_norm": 10.25, + "learning_rate": 4.836971984295086e-08, + "log_odds": 6.615431308746338, + "log_odds_ratio": -0.18065696954727173, + "loss": 0.2669, + "rejected_geometric_mean": -7.209045886993408, + "step": 7572 + }, + { + "chosen_geometric_mean": -0.8422295451164246, + "epoch": 1.87, + "grad_norm": 4.5625, + "learning_rate": 4.817933994947621e-08, + "log_odds": 11.711130142211914, + "log_odds_ratio": -0.043673500418663025, + "loss": 0.2723, + "rejected_geometric_mean": -11.99649715423584, + "step": 7573 + }, + { + "chosen_geometric_mean": -0.9263152480125427, + "epoch": 1.88, + "grad_norm": 2.875, + "learning_rate": 4.798933181086651e-08, + "log_odds": 4.002444744110107, + "log_odds_ratio": -0.14010635018348694, + "loss": 0.2435, + "rejected_geometric_mean": -4.5006914138793945, + "step": 7574 + }, + { + "chosen_geometric_mean": -0.861592173576355, + "epoch": 1.88, + "grad_norm": 15.9375, + "learning_rate": 4.7799695455932606e-08, + "log_odds": 8.360727310180664, + "log_odds_ratio": -0.11143748462200165, + "loss": 0.278, + "rejected_geometric_mean": -8.794866561889648, + "step": 7575 + }, + { + "chosen_geometric_mean": -1.5008180141448975, + "epoch": 1.88, + "grad_norm": 22.375, + "learning_rate": 4.7610430913426763e-08, + "log_odds": 6.464774131774902, + "log_odds_ratio": -0.108091801404953, + "loss": 0.2719, + "rejected_geometric_mean": -7.594432830810547, + "step": 7576 + }, + { + "chosen_geometric_mean": -1.0626283884048462, + "epoch": 1.88, + "grad_norm": 20.625, + "learning_rate": 4.742153821204687e-08, + "log_odds": 4.144500255584717, + "log_odds_ratio": -0.12519067525863647, + "loss": 0.3527, + "rejected_geometric_mean": -4.846151351928711, + "step": 7577 + }, + { + "chosen_geometric_mean": -0.940054178237915, + "epoch": 1.88, + "grad_norm": 22.0, + "learning_rate": 4.7233017380433346e-08, + "log_odds": 9.736336708068848, + "log_odds_ratio": -0.10853559523820877, + "loss": 0.2761, + "rejected_geometric_mean": -10.258957862854004, + "step": 7578 + }, + { + "chosen_geometric_mean": -0.9264436364173889, + "epoch": 1.88, + "grad_norm": 5.21875, + "learning_rate": 4.704486844716999e-08, + "log_odds": 8.698211669921875, + "log_odds_ratio": -0.31944558024406433, + "loss": 0.2558, + "rejected_geometric_mean": -9.39205551147461, + "step": 7579 + }, + { + "chosen_geometric_mean": -1.2332746982574463, + "epoch": 1.88, + "grad_norm": 4.25, + "learning_rate": 4.685709144078565e-08, + "log_odds": 6.740330696105957, + "log_odds_ratio": -0.13328871130943298, + "loss": 0.303, + "rejected_geometric_mean": -7.645493984222412, + "step": 7580 + }, + { + "chosen_geometric_mean": -1.209578514099121, + "epoch": 1.88, + "grad_norm": 16.0, + "learning_rate": 4.666968638975089e-08, + "log_odds": 8.817829132080078, + "log_odds_ratio": -0.0336284413933754, + "loss": 0.2906, + "rejected_geometric_mean": -9.669343948364258, + "step": 7581 + }, + { + "chosen_geometric_mean": -1.3205660581588745, + "epoch": 1.88, + "grad_norm": 4.96875, + "learning_rate": 4.648265332248131e-08, + "log_odds": 9.079570770263672, + "log_odds_ratio": -0.05307072773575783, + "loss": 0.2532, + "rejected_geometric_mean": -10.099263191223145, + "step": 7582 + }, + { + "chosen_geometric_mean": -1.2282657623291016, + "epoch": 1.88, + "grad_norm": 6.1875, + "learning_rate": 4.629599226733561e-08, + "log_odds": 7.2660298347473145, + "log_odds_ratio": -0.11421898007392883, + "loss": 0.3024, + "rejected_geometric_mean": -8.1390962600708, + "step": 7583 + }, + { + "chosen_geometric_mean": -0.9127357602119446, + "epoch": 1.88, + "grad_norm": 2.46875, + "learning_rate": 4.610970325261588e-08, + "log_odds": 5.869287014007568, + "log_odds_ratio": -0.1594814956188202, + "loss": 0.2574, + "rejected_geometric_mean": -6.369872570037842, + "step": 7584 + }, + { + "chosen_geometric_mean": -0.7973852157592773, + "epoch": 1.88, + "grad_norm": 26.75, + "learning_rate": 4.592378630656813e-08, + "log_odds": 7.6087493896484375, + "log_odds_ratio": -0.14306975901126862, + "loss": 0.241, + "rejected_geometric_mean": -7.911287307739258, + "step": 7585 + }, + { + "chosen_geometric_mean": -0.9798672199249268, + "epoch": 1.88, + "grad_norm": 59.0, + "learning_rate": 4.573824145738176e-08, + "log_odds": 7.4646077156066895, + "log_odds_ratio": -0.005084404721856117, + "loss": 0.3303, + "rejected_geometric_mean": -7.93962287902832, + "step": 7586 + }, + { + "chosen_geometric_mean": -0.9304403066635132, + "epoch": 1.88, + "grad_norm": 5.21875, + "learning_rate": 4.55530687331901e-08, + "log_odds": 4.903213977813721, + "log_odds_ratio": -0.08758128434419632, + "loss": 0.2894, + "rejected_geometric_mean": -5.368236064910889, + "step": 7587 + }, + { + "chosen_geometric_mean": -0.9593489170074463, + "epoch": 1.88, + "grad_norm": 31.625, + "learning_rate": 4.53682681620693e-08, + "log_odds": 7.873891830444336, + "log_odds_ratio": -0.04734551161527634, + "loss": 0.2569, + "rejected_geometric_mean": -8.311586380004883, + "step": 7588 + }, + { + "chosen_geometric_mean": -0.8063094615936279, + "epoch": 1.88, + "grad_norm": 17.375, + "learning_rate": 4.518383977203999e-08, + "log_odds": 10.82394027709961, + "log_odds_ratio": -0.0029182909056544304, + "loss": 0.2765, + "rejected_geometric_mean": -11.032963752746582, + "step": 7589 + }, + { + "chosen_geometric_mean": -0.7803709506988525, + "epoch": 1.88, + "grad_norm": 4.8125, + "learning_rate": 4.4999783591065936e-08, + "log_odds": 5.216182231903076, + "log_odds_ratio": -0.041443999856710434, + "loss": 0.2482, + "rejected_geometric_mean": -5.395148754119873, + "step": 7590 + }, + { + "chosen_geometric_mean": -1.1361464262008667, + "epoch": 1.88, + "grad_norm": 2.140625, + "learning_rate": 4.4816099647053965e-08, + "log_odds": 14.877246856689453, + "log_odds_ratio": -0.006293578539043665, + "loss": 0.2371, + "rejected_geometric_mean": -15.627016067504883, + "step": 7591 + }, + { + "chosen_geometric_mean": -1.161753535270691, + "epoch": 1.88, + "grad_norm": 2.265625, + "learning_rate": 4.4632787967855416e-08, + "log_odds": 10.880617141723633, + "log_odds_ratio": -0.0028608778957277536, + "loss": 0.2574, + "rejected_geometric_mean": -11.66020393371582, + "step": 7592 + }, + { + "chosen_geometric_mean": -1.8321478366851807, + "epoch": 1.88, + "grad_norm": 29.875, + "learning_rate": 4.4449848581265e-08, + "log_odds": 8.87702751159668, + "log_odds_ratio": -0.05258835852146149, + "loss": 0.2461, + "rejected_geometric_mean": -10.436391830444336, + "step": 7593 + }, + { + "chosen_geometric_mean": -0.9910799860954285, + "epoch": 1.88, + "grad_norm": 2.5, + "learning_rate": 4.426728151501997e-08, + "log_odds": 7.62282657623291, + "log_odds_ratio": -0.03186151757836342, + "loss": 0.2503, + "rejected_geometric_mean": -8.155147552490234, + "step": 7594 + }, + { + "chosen_geometric_mean": -0.9132986664772034, + "epoch": 1.88, + "grad_norm": 4.875, + "learning_rate": 4.408508679680207e-08, + "log_odds": 5.878812789916992, + "log_odds_ratio": -0.269386351108551, + "loss": 0.2685, + "rejected_geometric_mean": -6.483306407928467, + "step": 7595 + }, + { + "chosen_geometric_mean": -0.9375624656677246, + "epoch": 1.88, + "grad_norm": 4.46875, + "learning_rate": 4.390326445423698e-08, + "log_odds": 5.587258338928223, + "log_odds_ratio": -0.21514564752578735, + "loss": 0.2248, + "rejected_geometric_mean": -6.157099723815918, + "step": 7596 + }, + { + "chosen_geometric_mean": -1.0586597919464111, + "epoch": 1.88, + "grad_norm": 5.0625, + "learning_rate": 4.372181451489294e-08, + "log_odds": 2.5979156494140625, + "log_odds_ratio": -0.10155795514583588, + "loss": 0.2596, + "rejected_geometric_mean": -3.238229274749756, + "step": 7597 + }, + { + "chosen_geometric_mean": -1.1343668699264526, + "epoch": 1.88, + "grad_norm": 2.21875, + "learning_rate": 4.3540737006282084e-08, + "log_odds": 8.703603744506836, + "log_odds_ratio": -0.010307016782462597, + "loss": 0.2819, + "rejected_geometric_mean": -9.44128131866455, + "step": 7598 + }, + { + "chosen_geometric_mean": -0.9861891865730286, + "epoch": 1.88, + "grad_norm": 2.28125, + "learning_rate": 4.336003195585997e-08, + "log_odds": 9.238774299621582, + "log_odds_ratio": -0.1633683145046234, + "loss": 0.2409, + "rejected_geometric_mean": -9.82434368133545, + "step": 7599 + }, + { + "chosen_geometric_mean": -0.9328838586807251, + "epoch": 1.88, + "grad_norm": 7.03125, + "learning_rate": 4.3179699391026063e-08, + "log_odds": 12.699448585510254, + "log_odds_ratio": -0.001540860626846552, + "loss": 0.2538, + "rejected_geometric_mean": -13.102299690246582, + "step": 7600 + }, + { + "chosen_geometric_mean": -0.9556534290313721, + "epoch": 1.88, + "grad_norm": 1.84375, + "learning_rate": 4.2999739339122936e-08, + "log_odds": 7.791890621185303, + "log_odds_ratio": -0.08935358375310898, + "loss": 0.2117, + "rejected_geometric_mean": -8.278635025024414, + "step": 7601 + }, + { + "chosen_geometric_mean": -2.0357613563537598, + "epoch": 1.88, + "grad_norm": 29.5, + "learning_rate": 4.282015182743709e-08, + "log_odds": 8.100214004516602, + "log_odds_ratio": -0.5292476415634155, + "loss": 0.3001, + "rejected_geometric_mean": -9.886634826660156, + "step": 7602 + }, + { + "chosen_geometric_mean": -0.8244830369949341, + "epoch": 1.88, + "grad_norm": 3.40625, + "learning_rate": 4.2640936883198426e-08, + "log_odds": 7.133505344390869, + "log_odds_ratio": -0.037124767899513245, + "loss": 0.2816, + "rejected_geometric_mean": -7.397724628448486, + "step": 7603 + }, + { + "chosen_geometric_mean": -0.9609875679016113, + "epoch": 1.88, + "grad_norm": 2.796875, + "learning_rate": 4.2462094533579634e-08, + "log_odds": 9.87938117980957, + "log_odds_ratio": -0.11877305805683136, + "loss": 0.3156, + "rejected_geometric_mean": -10.446836471557617, + "step": 7604 + }, + { + "chosen_geometric_mean": -1.042065143585205, + "epoch": 1.88, + "grad_norm": 4.71875, + "learning_rate": 4.228362480569792e-08, + "log_odds": 6.840512752532959, + "log_odds_ratio": -0.21420910954475403, + "loss": 0.26, + "rejected_geometric_mean": -7.568828105926514, + "step": 7605 + }, + { + "chosen_geometric_mean": -0.8243756294250488, + "epoch": 1.88, + "grad_norm": 5.25, + "learning_rate": 4.210552772661358e-08, + "log_odds": 4.050894737243652, + "log_odds_ratio": -0.06451402604579926, + "loss": 0.3017, + "rejected_geometric_mean": -4.328331470489502, + "step": 7606 + }, + { + "chosen_geometric_mean": -0.885076105594635, + "epoch": 1.88, + "grad_norm": 33.0, + "learning_rate": 4.1927803323330293e-08, + "log_odds": 7.240818023681641, + "log_odds_ratio": -0.30715441703796387, + "loss": 0.3083, + "rejected_geometric_mean": -7.83875036239624, + "step": 7607 + }, + { + "chosen_geometric_mean": -1.0919443368911743, + "epoch": 1.88, + "grad_norm": 38.25, + "learning_rate": 4.175045162279512e-08, + "log_odds": 7.1235432624816895, + "log_odds_ratio": -0.1729554682970047, + "loss": 0.2515, + "rejected_geometric_mean": -7.878562927246094, + "step": 7608 + }, + { + "chosen_geometric_mean": -1.2600562572479248, + "epoch": 1.88, + "grad_norm": 23.0, + "learning_rate": 4.1573472651899594e-08, + "log_odds": 2.177143096923828, + "log_odds_ratio": -0.13838455080986023, + "loss": 0.2964, + "rejected_geometric_mean": -3.1529734134674072, + "step": 7609 + }, + { + "chosen_geometric_mean": -0.8054646849632263, + "epoch": 1.88, + "grad_norm": 12.75, + "learning_rate": 4.139686643747726e-08, + "log_odds": 7.805846691131592, + "log_odds_ratio": -0.009144855663180351, + "loss": 0.2761, + "rejected_geometric_mean": -8.015665054321289, + "step": 7610 + }, + { + "chosen_geometric_mean": -0.9024070501327515, + "epoch": 1.88, + "grad_norm": 1.84375, + "learning_rate": 4.122063300630586e-08, + "log_odds": 15.19929313659668, + "log_odds_ratio": -4.970018198946491e-05, + "loss": 0.223, + "rejected_geometric_mean": -15.566773414611816, + "step": 7611 + }, + { + "chosen_geometric_mean": -0.8240041732788086, + "epoch": 1.88, + "grad_norm": 2.125, + "learning_rate": 4.1044772385107065e-08, + "log_odds": 3.1866836547851562, + "log_odds_ratio": -0.21176691353321075, + "loss": 0.2251, + "rejected_geometric_mean": -3.555061101913452, + "step": 7612 + }, + { + "chosen_geometric_mean": -1.1463651657104492, + "epoch": 1.88, + "grad_norm": 3.015625, + "learning_rate": 4.08692846005454e-08, + "log_odds": 1.3826406002044678, + "log_odds_ratio": -0.293698251247406, + "loss": 0.2453, + "rejected_geometric_mean": -2.2981984615325928, + "step": 7613 + }, + { + "chosen_geometric_mean": -0.9499087333679199, + "epoch": 1.89, + "grad_norm": 34.0, + "learning_rate": 4.069416967922874e-08, + "log_odds": 4.0358123779296875, + "log_odds_ratio": -0.11056231707334518, + "loss": 0.2816, + "rejected_geometric_mean": -4.574616432189941, + "step": 7614 + }, + { + "chosen_geometric_mean": -0.9458516836166382, + "epoch": 1.89, + "grad_norm": 46.75, + "learning_rate": 4.051942764770916e-08, + "log_odds": 6.503805160522461, + "log_odds_ratio": -0.22777581214904785, + "loss": 0.3199, + "rejected_geometric_mean": -7.099201202392578, + "step": 7615 + }, + { + "chosen_geometric_mean": -0.8940101861953735, + "epoch": 1.89, + "grad_norm": 3.015625, + "learning_rate": 4.0345058532481327e-08, + "log_odds": 7.129900932312012, + "log_odds_ratio": -0.12109265476465225, + "loss": 0.222, + "rejected_geometric_mean": -7.550737380981445, + "step": 7616 + }, + { + "chosen_geometric_mean": -0.9806315898895264, + "epoch": 1.89, + "grad_norm": 6.5625, + "learning_rate": 4.017106235998408e-08, + "log_odds": 6.390453338623047, + "log_odds_ratio": -0.1028546690940857, + "loss": 0.2736, + "rejected_geometric_mean": -6.9296064376831055, + "step": 7617 + }, + { + "chosen_geometric_mean": -0.9813694357872009, + "epoch": 1.89, + "grad_norm": 4.09375, + "learning_rate": 3.9997439156599374e-08, + "log_odds": 14.755804061889648, + "log_odds_ratio": -0.0909380093216896, + "loss": 0.2626, + "rejected_geometric_mean": -15.310426712036133, + "step": 7618 + }, + { + "chosen_geometric_mean": -1.0393297672271729, + "epoch": 1.89, + "grad_norm": 22.875, + "learning_rate": 3.982418894865253e-08, + "log_odds": 6.376103401184082, + "log_odds_ratio": -0.05970422923564911, + "loss": 0.2616, + "rejected_geometric_mean": -7.00229024887085, + "step": 7619 + }, + { + "chosen_geometric_mean": -0.8793004751205444, + "epoch": 1.89, + "grad_norm": 11.875, + "learning_rate": 3.965131176241255e-08, + "log_odds": 3.495328664779663, + "log_odds_ratio": -0.15032103657722473, + "loss": 0.2445, + "rejected_geometric_mean": -3.9141101837158203, + "step": 7620 + }, + { + "chosen_geometric_mean": -1.0768765211105347, + "epoch": 1.89, + "grad_norm": 3.40625, + "learning_rate": 3.9478807624091785e-08, + "log_odds": 14.78860855102539, + "log_odds_ratio": -7.480438398488332e-06, + "loss": 0.235, + "rejected_geometric_mean": -15.447586059570312, + "step": 7621 + }, + { + "chosen_geometric_mean": -0.8279657959938049, + "epoch": 1.89, + "grad_norm": 14.5625, + "learning_rate": 3.930667655984571e-08, + "log_odds": 6.9715776443481445, + "log_odds_ratio": -0.05560971796512604, + "loss": 0.3008, + "rejected_geometric_mean": -7.257948875427246, + "step": 7622 + }, + { + "chosen_geometric_mean": -0.9960646033287048, + "epoch": 1.89, + "grad_norm": 4.375, + "learning_rate": 3.913491859577373e-08, + "log_odds": 10.873090744018555, + "log_odds_ratio": -0.0014621771406382322, + "loss": 0.3241, + "rejected_geometric_mean": -11.40353012084961, + "step": 7623 + }, + { + "chosen_geometric_mean": -0.9371862411499023, + "epoch": 1.89, + "grad_norm": 7.03125, + "learning_rate": 3.8963533757918613e-08, + "log_odds": 6.104455947875977, + "log_odds_ratio": -0.22315345704555511, + "loss": 0.2838, + "rejected_geometric_mean": -6.664522171020508, + "step": 7624 + }, + { + "chosen_geometric_mean": -0.989007830619812, + "epoch": 1.89, + "grad_norm": 29.125, + "learning_rate": 3.8792522072266246e-08, + "log_odds": 1.706165075302124, + "log_odds_ratio": -0.40650516748428345, + "loss": 0.2449, + "rejected_geometric_mean": -2.5265052318573, + "step": 7625 + }, + { + "chosen_geometric_mean": -0.8381443023681641, + "epoch": 1.89, + "grad_norm": 3.34375, + "learning_rate": 3.862188356474617e-08, + "log_odds": 7.034610271453857, + "log_odds_ratio": -0.04324866831302643, + "loss": 0.2117, + "rejected_geometric_mean": -7.3198747634887695, + "step": 7626 + }, + { + "chosen_geometric_mean": -0.9274042844772339, + "epoch": 1.89, + "grad_norm": 23.375, + "learning_rate": 3.845161826123156e-08, + "log_odds": 3.945626735687256, + "log_odds_ratio": -0.12375323474407196, + "loss": 0.343, + "rejected_geometric_mean": -4.448444843292236, + "step": 7627 + }, + { + "chosen_geometric_mean": -0.8278944492340088, + "epoch": 1.89, + "grad_norm": 15.0, + "learning_rate": 3.828172618753762e-08, + "log_odds": 7.821681976318359, + "log_odds_ratio": -0.07208050787448883, + "loss": 0.2463, + "rejected_geometric_mean": -8.138626098632812, + "step": 7628 + }, + { + "chosen_geometric_mean": -0.9689071774482727, + "epoch": 1.89, + "grad_norm": 8.75, + "learning_rate": 3.811220736942567e-08, + "log_odds": 9.98845100402832, + "log_odds_ratio": -0.04053238406777382, + "loss": 0.2789, + "rejected_geometric_mean": -10.504743576049805, + "step": 7629 + }, + { + "chosen_geometric_mean": -0.8970673680305481, + "epoch": 1.89, + "grad_norm": 51.5, + "learning_rate": 3.794306183259794e-08, + "log_odds": 3.5966262817382812, + "log_odds_ratio": -0.20235544443130493, + "loss": 0.2458, + "rejected_geometric_mean": -4.104638576507568, + "step": 7630 + }, + { + "chosen_geometric_mean": -1.1596877574920654, + "epoch": 1.89, + "grad_norm": 23.375, + "learning_rate": 3.777428960270058e-08, + "log_odds": 6.6164069175720215, + "log_odds_ratio": -0.21841108798980713, + "loss": 0.2419, + "rejected_geometric_mean": -7.504138946533203, + "step": 7631 + }, + { + "chosen_geometric_mean": -0.9675509333610535, + "epoch": 1.89, + "grad_norm": 28.75, + "learning_rate": 3.76058907053245e-08, + "log_odds": 8.795974731445312, + "log_odds_ratio": -0.00312037393450737, + "loss": 0.2881, + "rejected_geometric_mean": -9.281621932983398, + "step": 7632 + }, + { + "chosen_geometric_mean": -1.067855954170227, + "epoch": 1.89, + "grad_norm": 2.546875, + "learning_rate": 3.743786516600206e-08, + "log_odds": 10.910163879394531, + "log_odds_ratio": -0.02278338186442852, + "loss": 0.2929, + "rejected_geometric_mean": -11.555649757385254, + "step": 7633 + }, + { + "chosen_geometric_mean": -1.0757219791412354, + "epoch": 1.89, + "grad_norm": 2.9375, + "learning_rate": 3.7270213010210665e-08, + "log_odds": 8.495050430297852, + "log_odds_ratio": -0.07647649943828583, + "loss": 0.2927, + "rejected_geometric_mean": -9.183236122131348, + "step": 7634 + }, + { + "chosen_geometric_mean": -1.0961112976074219, + "epoch": 1.89, + "grad_norm": 6.5625, + "learning_rate": 3.710293426336997e-08, + "log_odds": 11.570575714111328, + "log_odds_ratio": -0.0013466794043779373, + "loss": 0.2227, + "rejected_geometric_mean": -12.250809669494629, + "step": 7635 + }, + { + "chosen_geometric_mean": -0.8277690410614014, + "epoch": 1.89, + "grad_norm": 15.6875, + "learning_rate": 3.693602895084386e-08, + "log_odds": 8.60986042022705, + "log_odds_ratio": -0.002471890766173601, + "loss": 0.2786, + "rejected_geometric_mean": -8.85214614868164, + "step": 7636 + }, + { + "chosen_geometric_mean": -0.969948410987854, + "epoch": 1.89, + "grad_norm": 10.4375, + "learning_rate": 3.676949709793875e-08, + "log_odds": 14.514427185058594, + "log_odds_ratio": -0.00029072631150484085, + "loss": 0.2391, + "rejected_geometric_mean": -14.981405258178711, + "step": 7637 + }, + { + "chosen_geometric_mean": -0.8727210760116577, + "epoch": 1.89, + "grad_norm": 1.8359375, + "learning_rate": 3.6603338729905015e-08, + "log_odds": 13.594966888427734, + "log_odds_ratio": -2.00572467292659e-05, + "loss": 0.2322, + "rejected_geometric_mean": -13.917930603027344, + "step": 7638 + }, + { + "chosen_geometric_mean": -0.9454659223556519, + "epoch": 1.89, + "grad_norm": 12.75, + "learning_rate": 3.643755387193665e-08, + "log_odds": 3.4107561111450195, + "log_odds_ratio": -0.29820603132247925, + "loss": 0.2603, + "rejected_geometric_mean": -4.112330436706543, + "step": 7639 + }, + { + "chosen_geometric_mean": -0.9005036354064941, + "epoch": 1.89, + "grad_norm": 29.375, + "learning_rate": 3.627214254916994e-08, + "log_odds": 9.889591217041016, + "log_odds_ratio": -0.0012773204362019897, + "loss": 0.2954, + "rejected_geometric_mean": -10.179407119750977, + "step": 7640 + }, + { + "chosen_geometric_mean": -0.960742712020874, + "epoch": 1.89, + "grad_norm": 2.1875, + "learning_rate": 3.610710478668566e-08, + "log_odds": 6.699966907501221, + "log_odds_ratio": -0.33105722069740295, + "loss": 0.282, + "rejected_geometric_mean": -7.40263557434082, + "step": 7641 + }, + { + "chosen_geometric_mean": -0.912007212638855, + "epoch": 1.89, + "grad_norm": 2.234375, + "learning_rate": 3.5942440609507134e-08, + "log_odds": 3.6132147312164307, + "log_odds_ratio": -0.2635907530784607, + "loss": 0.2159, + "rejected_geometric_mean": -4.1575751304626465, + "step": 7642 + }, + { + "chosen_geometric_mean": -1.024817943572998, + "epoch": 1.89, + "grad_norm": 26.625, + "learning_rate": 3.5778150042602157e-08, + "log_odds": 14.862834930419922, + "log_odds_ratio": -0.0018617920577526093, + "loss": 0.2745, + "rejected_geometric_mean": -15.42898178100586, + "step": 7643 + }, + { + "chosen_geometric_mean": -0.9264371395111084, + "epoch": 1.89, + "grad_norm": 59.5, + "learning_rate": 3.561423311088025e-08, + "log_odds": 13.32361888885498, + "log_odds_ratio": -0.011871225200593472, + "loss": 0.2836, + "rejected_geometric_mean": -13.72365951538086, + "step": 7644 + }, + { + "chosen_geometric_mean": -1.0741758346557617, + "epoch": 1.89, + "grad_norm": 8.875, + "learning_rate": 3.545068983919542e-08, + "log_odds": 9.84985637664795, + "log_odds_ratio": -0.007262742146849632, + "loss": 0.2941, + "rejected_geometric_mean": -10.484460830688477, + "step": 7645 + }, + { + "chosen_geometric_mean": -1.0148299932479858, + "epoch": 1.89, + "grad_norm": 29.75, + "learning_rate": 3.5287520252345055e-08, + "log_odds": 6.358733654022217, + "log_odds_ratio": -0.2884186804294586, + "loss": 0.2953, + "rejected_geometric_mean": -7.124396324157715, + "step": 7646 + }, + { + "chosen_geometric_mean": -1.203055739402771, + "epoch": 1.89, + "grad_norm": 3.796875, + "learning_rate": 3.512472437506936e-08, + "log_odds": 4.306647777557373, + "log_odds_ratio": -0.18827258050441742, + "loss": 0.2158, + "rejected_geometric_mean": -5.158293724060059, + "step": 7647 + }, + { + "chosen_geometric_mean": -1.2294195890426636, + "epoch": 1.89, + "grad_norm": 4.875, + "learning_rate": 3.496230223205194e-08, + "log_odds": 5.999637603759766, + "log_odds_ratio": -0.26262572407722473, + "loss": 0.2782, + "rejected_geometric_mean": -6.9712934494018555, + "step": 7648 + }, + { + "chosen_geometric_mean": -1.0342116355895996, + "epoch": 1.89, + "grad_norm": 11.9375, + "learning_rate": 3.48002538479203e-08, + "log_odds": 2.27352237701416, + "log_odds_ratio": -0.23712009191513062, + "loss": 0.2949, + "rejected_geometric_mean": -3.0433623790740967, + "step": 7649 + }, + { + "chosen_geometric_mean": -0.9441230297088623, + "epoch": 1.89, + "grad_norm": 2.390625, + "learning_rate": 3.463857924724451e-08, + "log_odds": 5.254803657531738, + "log_odds_ratio": -0.10303860902786255, + "loss": 0.2454, + "rejected_geometric_mean": -5.784076690673828, + "step": 7650 + }, + { + "chosen_geometric_mean": -0.9329273700714111, + "epoch": 1.89, + "grad_norm": 2.640625, + "learning_rate": 3.44772784545383e-08, + "log_odds": 9.606273651123047, + "log_odds_ratio": -0.007224613334983587, + "loss": 0.2212, + "rejected_geometric_mean": -10.011205673217773, + "step": 7651 + }, + { + "chosen_geometric_mean": -1.2004122734069824, + "epoch": 1.89, + "grad_norm": 11.0, + "learning_rate": 3.4316351494259326e-08, + "log_odds": 8.5662202835083, + "log_odds_ratio": -0.15579909086227417, + "loss": 0.2753, + "rejected_geometric_mean": -9.45509147644043, + "step": 7652 + }, + { + "chosen_geometric_mean": -0.7466846108436584, + "epoch": 1.89, + "grad_norm": 98.5, + "learning_rate": 3.415579839080724e-08, + "log_odds": 10.202797889709473, + "log_odds_ratio": -0.0018301099771633744, + "loss": 0.2854, + "rejected_geometric_mean": -10.269782066345215, + "step": 7653 + }, + { + "chosen_geometric_mean": -1.1311815977096558, + "epoch": 1.9, + "grad_norm": 11.625, + "learning_rate": 3.3995619168525897e-08, + "log_odds": 5.37888240814209, + "log_odds_ratio": -0.2702224850654602, + "loss": 0.3345, + "rejected_geometric_mean": -6.182187557220459, + "step": 7654 + }, + { + "chosen_geometric_mean": -1.123500108718872, + "epoch": 1.9, + "grad_norm": 3.140625, + "learning_rate": 3.3835813851702537e-08, + "log_odds": 9.066524505615234, + "log_odds_ratio": -0.1469677835702896, + "loss": 0.267, + "rejected_geometric_mean": -9.877663612365723, + "step": 7655 + }, + { + "chosen_geometric_mean": -1.0218937397003174, + "epoch": 1.9, + "grad_norm": 17.75, + "learning_rate": 3.36763824645675e-08, + "log_odds": 8.983268737792969, + "log_odds_ratio": -0.3126169741153717, + "loss": 0.261, + "rejected_geometric_mean": -9.777443885803223, + "step": 7656 + }, + { + "chosen_geometric_mean": -0.9654238224029541, + "epoch": 1.9, + "grad_norm": 27.0, + "learning_rate": 3.351732503129424e-08, + "log_odds": 6.547942161560059, + "log_odds_ratio": -0.1368386298418045, + "loss": 0.2646, + "rejected_geometric_mean": -7.105782508850098, + "step": 7657 + }, + { + "chosen_geometric_mean": -1.1253618001937866, + "epoch": 1.9, + "grad_norm": 49.5, + "learning_rate": 3.335864157599983e-08, + "log_odds": 9.590989112854004, + "log_odds_ratio": -0.1392427235841751, + "loss": 0.286, + "rejected_geometric_mean": -10.382379531860352, + "step": 7658 + }, + { + "chosen_geometric_mean": -0.8578602075576782, + "epoch": 1.9, + "grad_norm": 9.6875, + "learning_rate": 3.3200332122744484e-08, + "log_odds": 8.238526344299316, + "log_odds_ratio": -0.03153829276561737, + "loss": 0.2465, + "rejected_geometric_mean": -8.551247596740723, + "step": 7659 + }, + { + "chosen_geometric_mean": -0.8549585938453674, + "epoch": 1.9, + "grad_norm": 22.0, + "learning_rate": 3.304239669553122e-08, + "log_odds": 5.539257526397705, + "log_odds_ratio": -0.28544533252716064, + "loss": 0.2581, + "rejected_geometric_mean": -6.059976577758789, + "step": 7660 + }, + { + "chosen_geometric_mean": -0.9747865200042725, + "epoch": 1.9, + "grad_norm": 3.125, + "learning_rate": 3.288483531830783e-08, + "log_odds": 5.632477760314941, + "log_odds_ratio": -0.32857999205589294, + "loss": 0.2729, + "rejected_geometric_mean": -6.303345203399658, + "step": 7661 + }, + { + "chosen_geometric_mean": -0.7778584957122803, + "epoch": 1.9, + "grad_norm": 2.40625, + "learning_rate": 3.272764801496353e-08, + "log_odds": 7.672782897949219, + "log_odds_ratio": -0.04762888327240944, + "loss": 0.2719, + "rejected_geometric_mean": -7.843236923217773, + "step": 7662 + }, + { + "chosen_geometric_mean": -0.9804388284683228, + "epoch": 1.9, + "grad_norm": 3.3125, + "learning_rate": 3.257083480933204e-08, + "log_odds": 0.3762191832065582, + "log_odds_ratio": -0.527944803237915, + "loss": 0.2829, + "rejected_geometric_mean": -1.223610281944275, + "step": 7663 + }, + { + "chosen_geometric_mean": -1.0349129438400269, + "epoch": 1.9, + "grad_norm": 3.328125, + "learning_rate": 3.2414395725190175e-08, + "log_odds": 3.027859687805176, + "log_odds_ratio": -0.2705059349536896, + "loss": 0.2928, + "rejected_geometric_mean": -3.7637133598327637, + "step": 7664 + }, + { + "chosen_geometric_mean": -0.9759836196899414, + "epoch": 1.9, + "grad_norm": 19.5, + "learning_rate": 3.2258330786257284e-08, + "log_odds": 7.0122270584106445, + "log_odds_ratio": -0.12337200343608856, + "loss": 0.2656, + "rejected_geometric_mean": -7.628846168518066, + "step": 7665 + }, + { + "chosen_geometric_mean": -1.0896685123443604, + "epoch": 1.9, + "grad_norm": 13.875, + "learning_rate": 3.210264001619695e-08, + "log_odds": 0.9357746243476868, + "log_odds_ratio": -0.4328959584236145, + "loss": 0.2995, + "rejected_geometric_mean": -1.8643059730529785, + "step": 7666 + }, + { + "chosen_geometric_mean": -1.1946382522583008, + "epoch": 1.9, + "grad_norm": 39.25, + "learning_rate": 3.1947323438615844e-08, + "log_odds": 6.796868324279785, + "log_odds_ratio": -0.03502961993217468, + "loss": 0.3391, + "rejected_geometric_mean": -7.642840385437012, + "step": 7667 + }, + { + "chosen_geometric_mean": -1.003556728363037, + "epoch": 1.9, + "grad_norm": 2.234375, + "learning_rate": 3.1792381077063173e-08, + "log_odds": 9.107545852661133, + "log_odds_ratio": -0.15625469386577606, + "loss": 0.2289, + "rejected_geometric_mean": -9.659374237060547, + "step": 7668 + }, + { + "chosen_geometric_mean": -1.0136152505874634, + "epoch": 1.9, + "grad_norm": 26.875, + "learning_rate": 3.1637812955032376e-08, + "log_odds": 0.8156747817993164, + "log_odds_ratio": -0.36730876564979553, + "loss": 0.2675, + "rejected_geometric_mean": -1.5907779932022095, + "step": 7669 + }, + { + "chosen_geometric_mean": -0.919985294342041, + "epoch": 1.9, + "grad_norm": 7.0625, + "learning_rate": 3.148361909595943e-08, + "log_odds": 9.36665153503418, + "log_odds_ratio": -0.0014211626257747412, + "loss": 0.26, + "rejected_geometric_mean": -9.778284072875977, + "step": 7670 + }, + { + "chosen_geometric_mean": -0.9389262795448303, + "epoch": 1.9, + "grad_norm": 19.125, + "learning_rate": 3.132979952322396e-08, + "log_odds": 8.799714088439941, + "log_odds_ratio": -0.1111903265118599, + "loss": 0.2731, + "rejected_geometric_mean": -9.302205085754395, + "step": 7671 + }, + { + "chosen_geometric_mean": -0.9931930899620056, + "epoch": 1.9, + "grad_norm": 3.0, + "learning_rate": 3.117635426014842e-08, + "log_odds": 12.419157028198242, + "log_odds_ratio": -0.07640465348958969, + "loss": 0.2307, + "rejected_geometric_mean": -12.955735206604004, + "step": 7672 + }, + { + "chosen_geometric_mean": -1.3234224319458008, + "epoch": 1.9, + "grad_norm": 7.40625, + "learning_rate": 3.102328332999921e-08, + "log_odds": 12.354597091674805, + "log_odds_ratio": -0.04615960270166397, + "loss": 0.3627, + "rejected_geometric_mean": -13.35947036743164, + "step": 7673 + }, + { + "chosen_geometric_mean": -1.680243968963623, + "epoch": 1.9, + "grad_norm": 16.25, + "learning_rate": 3.0870586755985256e-08, + "log_odds": 14.881941795349121, + "log_odds_ratio": -0.0003046890487894416, + "loss": 0.2849, + "rejected_geometric_mean": -16.195871353149414, + "step": 7674 + }, + { + "chosen_geometric_mean": -0.9419079422950745, + "epoch": 1.9, + "grad_norm": 2.765625, + "learning_rate": 3.071826456125915e-08, + "log_odds": 10.788408279418945, + "log_odds_ratio": -0.09148653596639633, + "loss": 0.2499, + "rejected_geometric_mean": -11.30581283569336, + "step": 7675 + }, + { + "chosen_geometric_mean": -0.8880877494812012, + "epoch": 1.9, + "grad_norm": 11.5, + "learning_rate": 3.056631676891686e-08, + "log_odds": 15.917017936706543, + "log_odds_ratio": -0.010201483964920044, + "loss": 0.2526, + "rejected_geometric_mean": -16.274879455566406, + "step": 7676 + }, + { + "chosen_geometric_mean": -1.1991424560546875, + "epoch": 1.9, + "grad_norm": 4.34375, + "learning_rate": 3.041474340199635e-08, + "log_odds": 9.037425994873047, + "log_odds_ratio": -0.09412752836942673, + "loss": 0.2627, + "rejected_geometric_mean": -9.859993934631348, + "step": 7677 + }, + { + "chosen_geometric_mean": -1.1746814250946045, + "epoch": 1.9, + "grad_norm": 12.75, + "learning_rate": 3.026354448348118e-08, + "log_odds": 4.822516441345215, + "log_odds_ratio": -0.17304858565330505, + "loss": 0.2234, + "rejected_geometric_mean": -5.738590240478516, + "step": 7678 + }, + { + "chosen_geometric_mean": -0.9639583826065063, + "epoch": 1.9, + "grad_norm": 1.9921875, + "learning_rate": 3.011272003629578e-08, + "log_odds": 8.423434257507324, + "log_odds_ratio": -0.13913962244987488, + "loss": 0.2451, + "rejected_geometric_mean": -8.986400604248047, + "step": 7679 + }, + { + "chosen_geometric_mean": -1.158539056777954, + "epoch": 1.9, + "grad_norm": 9.0, + "learning_rate": 2.996227008330882e-08, + "log_odds": 7.773647785186768, + "log_odds_ratio": -0.20502863824367523, + "loss": 0.2692, + "rejected_geometric_mean": -8.670884132385254, + "step": 7680 + }, + { + "chosen_geometric_mean": -1.002150058746338, + "epoch": 1.9, + "grad_norm": 103.0, + "learning_rate": 2.981219464733259e-08, + "log_odds": 2.8798670768737793, + "log_odds_ratio": -0.34713104367256165, + "loss": 0.325, + "rejected_geometric_mean": -3.6962428092956543, + "step": 7681 + }, + { + "chosen_geometric_mean": -1.062983512878418, + "epoch": 1.9, + "grad_norm": 3.953125, + "learning_rate": 2.96624937511214e-08, + "log_odds": 10.09915542602539, + "log_odds_ratio": -0.04588010907173157, + "loss": 0.2773, + "rejected_geometric_mean": -10.751504898071289, + "step": 7682 + }, + { + "chosen_geometric_mean": -0.7727863192558289, + "epoch": 1.9, + "grad_norm": 22.5, + "learning_rate": 2.951316741737431e-08, + "log_odds": 6.0895586013793945, + "log_odds_ratio": -0.01097550056874752, + "loss": 0.2537, + "rejected_geometric_mean": -6.235315322875977, + "step": 7683 + }, + { + "chosen_geometric_mean": -1.027126669883728, + "epoch": 1.9, + "grad_norm": 3.375, + "learning_rate": 2.9364215668732376e-08, + "log_odds": 3.632911443710327, + "log_odds_ratio": -0.2887378931045532, + "loss": 0.3423, + "rejected_geometric_mean": -4.382772445678711, + "step": 7684 + }, + { + "chosen_geometric_mean": -0.9017575979232788, + "epoch": 1.9, + "grad_norm": 2.84375, + "learning_rate": 2.921563852778031e-08, + "log_odds": 4.793879985809326, + "log_odds_ratio": -0.16808795928955078, + "loss": 0.2509, + "rejected_geometric_mean": -5.301851272583008, + "step": 7685 + }, + { + "chosen_geometric_mean": -0.9733047485351562, + "epoch": 1.9, + "grad_norm": 7.28125, + "learning_rate": 2.9067436017045646e-08, + "log_odds": 6.347854137420654, + "log_odds_ratio": -0.14184796810150146, + "loss": 0.3053, + "rejected_geometric_mean": -6.93480920791626, + "step": 7686 + }, + { + "chosen_geometric_mean": -0.8937833905220032, + "epoch": 1.9, + "grad_norm": 5.34375, + "learning_rate": 2.8919608159000136e-08, + "log_odds": 10.9765043258667, + "log_odds_ratio": -0.08209069073200226, + "loss": 0.2473, + "rejected_geometric_mean": -11.34096908569336, + "step": 7687 + }, + { + "chosen_geometric_mean": -1.098932147026062, + "epoch": 1.9, + "grad_norm": 21.125, + "learning_rate": 2.877215497605723e-08, + "log_odds": 6.322832107543945, + "log_odds_ratio": -0.01592124253511429, + "loss": 0.2661, + "rejected_geometric_mean": -7.022114276885986, + "step": 7688 + }, + { + "chosen_geometric_mean": -0.8172380924224854, + "epoch": 1.9, + "grad_norm": 14.9375, + "learning_rate": 2.862507649057489e-08, + "log_odds": 12.760626792907715, + "log_odds_ratio": -0.005684310104697943, + "loss": 0.2197, + "rejected_geometric_mean": -12.97654914855957, + "step": 7689 + }, + { + "chosen_geometric_mean": -0.8052214980125427, + "epoch": 1.9, + "grad_norm": 2.015625, + "learning_rate": 2.8478372724853876e-08, + "log_odds": 6.746197700500488, + "log_odds_ratio": -0.19779375195503235, + "loss": 0.2516, + "rejected_geometric_mean": -7.0960822105407715, + "step": 7690 + }, + { + "chosen_geometric_mean": -0.9998136758804321, + "epoch": 1.9, + "grad_norm": 2.109375, + "learning_rate": 2.8332043701137513e-08, + "log_odds": 4.383484363555908, + "log_odds_ratio": -0.09829814732074738, + "loss": 0.2385, + "rejected_geometric_mean": -4.98591947555542, + "step": 7691 + }, + { + "chosen_geometric_mean": -1.0106110572814941, + "epoch": 1.9, + "grad_norm": 47.75, + "learning_rate": 2.818608944161305e-08, + "log_odds": 7.636839389801025, + "log_odds_ratio": -0.1285335123538971, + "loss": 0.3824, + "rejected_geometric_mean": -8.274032592773438, + "step": 7692 + }, + { + "chosen_geometric_mean": -1.0300045013427734, + "epoch": 1.9, + "grad_norm": 4.09375, + "learning_rate": 2.8040509968410845e-08, + "log_odds": 4.533932685852051, + "log_odds_ratio": -0.2545784115791321, + "loss": 0.3183, + "rejected_geometric_mean": -5.303676128387451, + "step": 7693 + }, + { + "chosen_geometric_mean": -1.2694450616836548, + "epoch": 1.9, + "grad_norm": 2.546875, + "learning_rate": 2.789530530360407e-08, + "log_odds": 7.357518196105957, + "log_odds_ratio": -0.006661484949290752, + "loss": 0.2913, + "rejected_geometric_mean": -8.27706241607666, + "step": 7694 + }, + { + "chosen_geometric_mean": -1.3317896127700806, + "epoch": 1.91, + "grad_norm": 31.0, + "learning_rate": 2.7750475469209004e-08, + "log_odds": 9.82975959777832, + "log_odds_ratio": -0.15864811837673187, + "loss": 0.332, + "rejected_geometric_mean": -10.921708106994629, + "step": 7695 + }, + { + "chosen_geometric_mean": -0.7480112314224243, + "epoch": 1.91, + "grad_norm": 2.03125, + "learning_rate": 2.760602048718558e-08, + "log_odds": 11.22500228881836, + "log_odds_ratio": -0.024972448125481606, + "loss": 0.2446, + "rejected_geometric_mean": -11.347684860229492, + "step": 7696 + }, + { + "chosen_geometric_mean": -0.9131985902786255, + "epoch": 1.91, + "grad_norm": 5.0625, + "learning_rate": 2.7461940379436557e-08, + "log_odds": 7.625718593597412, + "log_odds_ratio": -0.045920226722955704, + "loss": 0.3336, + "rejected_geometric_mean": -8.025764465332031, + "step": 7697 + }, + { + "chosen_geometric_mean": -1.118236780166626, + "epoch": 1.91, + "grad_norm": 38.0, + "learning_rate": 2.7318235167808072e-08, + "log_odds": 2.6834189891815186, + "log_odds_ratio": -0.3470355272293091, + "loss": 0.3033, + "rejected_geometric_mean": -3.543914794921875, + "step": 7698 + }, + { + "chosen_geometric_mean": -1.437331199645996, + "epoch": 1.91, + "grad_norm": 7.625, + "learning_rate": 2.717490487408908e-08, + "log_odds": 5.22666597366333, + "log_odds_ratio": -0.15520724654197693, + "loss": 0.2763, + "rejected_geometric_mean": -6.348228931427002, + "step": 7699 + }, + { + "chosen_geometric_mean": -1.3123250007629395, + "epoch": 1.91, + "grad_norm": 2.71875, + "learning_rate": 2.7031949520011923e-08, + "log_odds": 6.214601516723633, + "log_odds_ratio": -0.11513668298721313, + "loss": 0.3057, + "rejected_geometric_mean": -7.214427471160889, + "step": 7700 + }, + { + "chosen_geometric_mean": -0.9944193363189697, + "epoch": 1.91, + "grad_norm": 55.0, + "learning_rate": 2.688936912725232e-08, + "log_odds": 8.661099433898926, + "log_odds_ratio": -0.04557228088378906, + "loss": 0.2785, + "rejected_geometric_mean": -9.207723617553711, + "step": 7701 + }, + { + "chosen_geometric_mean": -0.9974213242530823, + "epoch": 1.91, + "grad_norm": 3.828125, + "learning_rate": 2.6747163717428803e-08, + "log_odds": 10.934593200683594, + "log_odds_ratio": -0.175737664103508, + "loss": 0.235, + "rejected_geometric_mean": -11.590514183044434, + "step": 7702 + }, + { + "chosen_geometric_mean": -1.0067107677459717, + "epoch": 1.91, + "grad_norm": 9.0625, + "learning_rate": 2.6605333312102742e-08, + "log_odds": 9.28269100189209, + "log_odds_ratio": -0.0171365849673748, + "loss": 0.2752, + "rejected_geometric_mean": -9.835744857788086, + "step": 7703 + }, + { + "chosen_geometric_mean": -1.109857439994812, + "epoch": 1.91, + "grad_norm": 8.125, + "learning_rate": 2.6463877932779438e-08, + "log_odds": 6.622889041900635, + "log_odds_ratio": -0.1816641092300415, + "loss": 0.2629, + "rejected_geometric_mean": -7.395622253417969, + "step": 7704 + }, + { + "chosen_geometric_mean": -0.9941810965538025, + "epoch": 1.91, + "grad_norm": 3.359375, + "learning_rate": 2.6322797600906724e-08, + "log_odds": 4.0618062019348145, + "log_odds_ratio": -0.10456283390522003, + "loss": 0.2252, + "rejected_geometric_mean": -4.630007743835449, + "step": 7705 + }, + { + "chosen_geometric_mean": -1.2243403196334839, + "epoch": 1.91, + "grad_norm": 16.875, + "learning_rate": 2.618209233787583e-08, + "log_odds": 3.7436254024505615, + "log_odds_ratio": -0.16399380564689636, + "loss": 0.2774, + "rejected_geometric_mean": -4.612054824829102, + "step": 7706 + }, + { + "chosen_geometric_mean": -1.0230493545532227, + "epoch": 1.91, + "grad_norm": 27.0, + "learning_rate": 2.6041762165021357e-08, + "log_odds": 12.608749389648438, + "log_odds_ratio": -0.0018468782072886825, + "loss": 0.2641, + "rejected_geometric_mean": -13.166452407836914, + "step": 7707 + }, + { + "chosen_geometric_mean": -1.4111244678497314, + "epoch": 1.91, + "grad_norm": 42.25, + "learning_rate": 2.5901807103620168e-08, + "log_odds": 10.079849243164062, + "log_odds_ratio": -0.02072984352707863, + "loss": 0.2783, + "rejected_geometric_mean": -11.19670581817627, + "step": 7708 + }, + { + "chosen_geometric_mean": -1.0514520406723022, + "epoch": 1.91, + "grad_norm": 14.5, + "learning_rate": 2.5762227174893072e-08, + "log_odds": 2.8144755363464355, + "log_odds_ratio": -0.34712275862693787, + "loss": 0.2632, + "rejected_geometric_mean": -3.5809223651885986, + "step": 7709 + }, + { + "chosen_geometric_mean": -1.053820013999939, + "epoch": 1.91, + "grad_norm": 54.0, + "learning_rate": 2.5623022400003694e-08, + "log_odds": 8.193657875061035, + "log_odds_ratio": -0.294065922498703, + "loss": 0.3279, + "rejected_geometric_mean": -8.931046485900879, + "step": 7710 + }, + { + "chosen_geometric_mean": -1.1488951444625854, + "epoch": 1.91, + "grad_norm": 2.6875, + "learning_rate": 2.5484192800059038e-08, + "log_odds": 8.057318687438965, + "log_odds_ratio": -0.1578814834356308, + "loss": 0.2567, + "rejected_geometric_mean": -8.805302619934082, + "step": 7711 + }, + { + "chosen_geometric_mean": -1.1593505144119263, + "epoch": 1.91, + "grad_norm": 13.3125, + "learning_rate": 2.5345738396108655e-08, + "log_odds": 5.903652667999268, + "log_odds_ratio": -0.03871582821011543, + "loss": 0.2467, + "rejected_geometric_mean": -6.687268257141113, + "step": 7712 + }, + { + "chosen_geometric_mean": -0.9255831241607666, + "epoch": 1.91, + "grad_norm": 30.0, + "learning_rate": 2.520765920914603e-08, + "log_odds": 7.128475189208984, + "log_odds_ratio": -0.004567908588796854, + "loss": 0.2911, + "rejected_geometric_mean": -7.535885810852051, + "step": 7713 + }, + { + "chosen_geometric_mean": -0.8870270252227783, + "epoch": 1.91, + "grad_norm": 7.6875, + "learning_rate": 2.5069955260106637e-08, + "log_odds": 2.7789554595947266, + "log_odds_ratio": -0.22530238330364227, + "loss": 0.2868, + "rejected_geometric_mean": -3.2269115447998047, + "step": 7714 + }, + { + "chosen_geometric_mean": -0.9830034375190735, + "epoch": 1.91, + "grad_norm": 6.53125, + "learning_rate": 2.4932626569870167e-08, + "log_odds": 7.2058000564575195, + "log_odds_ratio": -0.22614699602127075, + "loss": 0.2619, + "rejected_geometric_mean": -7.865397930145264, + "step": 7715 + }, + { + "chosen_geometric_mean": -1.2239242792129517, + "epoch": 1.91, + "grad_norm": 48.25, + "learning_rate": 2.4795673159258848e-08, + "log_odds": 11.416714668273926, + "log_odds_ratio": -0.00023067615984473377, + "loss": 0.2427, + "rejected_geometric_mean": -12.196179389953613, + "step": 7716 + }, + { + "chosen_geometric_mean": -0.959557056427002, + "epoch": 1.91, + "grad_norm": 2.71875, + "learning_rate": 2.4659095049038016e-08, + "log_odds": 3.108170986175537, + "log_odds_ratio": -0.18950867652893066, + "loss": 0.2752, + "rejected_geometric_mean": -3.66630482673645, + "step": 7717 + }, + { + "chosen_geometric_mean": -1.038648009300232, + "epoch": 1.91, + "grad_norm": 2.625, + "learning_rate": 2.4522892259916387e-08, + "log_odds": 8.521419525146484, + "log_odds_ratio": -0.047195740044116974, + "loss": 0.2837, + "rejected_geometric_mean": -9.13582706451416, + "step": 7718 + }, + { + "chosen_geometric_mean": -0.8068625926971436, + "epoch": 1.91, + "grad_norm": 16.0, + "learning_rate": 2.4387064812545212e-08, + "log_odds": 8.592161178588867, + "log_odds_ratio": -0.12974637746810913, + "loss": 0.2427, + "rejected_geometric_mean": -8.886502265930176, + "step": 7719 + }, + { + "chosen_geometric_mean": -1.0343401432037354, + "epoch": 1.91, + "grad_norm": 3.234375, + "learning_rate": 2.4251612727519692e-08, + "log_odds": 4.103682994842529, + "log_odds_ratio": -0.17321576178073883, + "loss": 0.242, + "rejected_geometric_mean": -4.8108320236206055, + "step": 7720 + }, + { + "chosen_geometric_mean": -0.8706644773483276, + "epoch": 1.91, + "grad_norm": 6.78125, + "learning_rate": 2.4116536025377002e-08, + "log_odds": 8.690874099731445, + "log_odds_ratio": -0.13650456070899963, + "loss": 0.2368, + "rejected_geometric_mean": -9.093992233276367, + "step": 7721 + }, + { + "chosen_geometric_mean": -0.8565634489059448, + "epoch": 1.91, + "grad_norm": 13.125, + "learning_rate": 2.398183472659854e-08, + "log_odds": 7.116845607757568, + "log_odds_ratio": -0.09509221464395523, + "loss": 0.2484, + "rejected_geometric_mean": -7.502624988555908, + "step": 7722 + }, + { + "chosen_geometric_mean": -1.0944159030914307, + "epoch": 1.91, + "grad_norm": 6.5, + "learning_rate": 2.3847508851608247e-08, + "log_odds": 4.544233322143555, + "log_odds_ratio": -0.12250259518623352, + "loss": 0.2558, + "rejected_geometric_mean": -5.301011085510254, + "step": 7723 + }, + { + "chosen_geometric_mean": -0.9310368299484253, + "epoch": 1.91, + "grad_norm": 5.8125, + "learning_rate": 2.3713558420772885e-08, + "log_odds": 6.2535905838012695, + "log_odds_ratio": -0.093346506357193, + "loss": 0.2812, + "rejected_geometric_mean": -6.716055870056152, + "step": 7724 + }, + { + "chosen_geometric_mean": -0.8903161883354187, + "epoch": 1.91, + "grad_norm": 2.296875, + "learning_rate": 2.3579983454402598e-08, + "log_odds": 5.613363265991211, + "log_odds_ratio": -0.1904379278421402, + "loss": 0.2978, + "rejected_geometric_mean": -6.112606525421143, + "step": 7725 + }, + { + "chosen_geometric_mean": -0.9385271072387695, + "epoch": 1.91, + "grad_norm": 8.1875, + "learning_rate": 2.3446783972750353e-08, + "log_odds": 7.932223320007324, + "log_odds_ratio": -0.14175456762313843, + "loss": 0.2568, + "rejected_geometric_mean": -8.446495056152344, + "step": 7726 + }, + { + "chosen_geometric_mean": -0.9441437721252441, + "epoch": 1.91, + "grad_norm": 2.671875, + "learning_rate": 2.3313959996012768e-08, + "log_odds": 7.815896034240723, + "log_odds_ratio": -0.15200552344322205, + "loss": 0.3005, + "rejected_geometric_mean": -8.372089385986328, + "step": 7727 + }, + { + "chosen_geometric_mean": -0.9764019250869751, + "epoch": 1.91, + "grad_norm": 6.3125, + "learning_rate": 2.3181511544329294e-08, + "log_odds": 10.15709400177002, + "log_odds_ratio": -0.004578654654324055, + "loss": 0.2231, + "rejected_geometric_mean": -10.582948684692383, + "step": 7728 + }, + { + "chosen_geometric_mean": -0.6075074672698975, + "epoch": 1.91, + "grad_norm": 4.09375, + "learning_rate": 2.304943863778164e-08, + "log_odds": 10.948822021484375, + "log_odds_ratio": -0.10789129883050919, + "loss": 0.3402, + "rejected_geometric_mean": -10.886295318603516, + "step": 7729 + }, + { + "chosen_geometric_mean": -1.0214409828186035, + "epoch": 1.91, + "grad_norm": 1.921875, + "learning_rate": 2.2917741296396013e-08, + "log_odds": 6.697053909301758, + "log_odds_ratio": -0.07225558161735535, + "loss": 0.2349, + "rejected_geometric_mean": -7.293939590454102, + "step": 7730 + }, + { + "chosen_geometric_mean": -0.7620667219161987, + "epoch": 1.91, + "grad_norm": 3.578125, + "learning_rate": 2.2786419540140047e-08, + "log_odds": 9.053590774536133, + "log_odds_ratio": -0.019819136708974838, + "loss": 0.2929, + "rejected_geometric_mean": -9.189349174499512, + "step": 7731 + }, + { + "chosen_geometric_mean": -1.3606261014938354, + "epoch": 1.91, + "grad_norm": 51.25, + "learning_rate": 2.2655473388925873e-08, + "log_odds": 3.346560478210449, + "log_odds_ratio": -0.20536339282989502, + "loss": 0.2707, + "rejected_geometric_mean": -4.441510200500488, + "step": 7732 + }, + { + "chosen_geometric_mean": -0.9953579306602478, + "epoch": 1.91, + "grad_norm": 2.375, + "learning_rate": 2.2524902862608166e-08, + "log_odds": 6.938990592956543, + "log_odds_ratio": -0.012277898378670216, + "loss": 0.2725, + "rejected_geometric_mean": -7.469743251800537, + "step": 7733 + }, + { + "chosen_geometric_mean": -1.176788330078125, + "epoch": 1.91, + "grad_norm": 80.0, + "learning_rate": 2.2394707980984143e-08, + "log_odds": 6.138405799865723, + "log_odds_ratio": -0.03467436507344246, + "loss": 0.2836, + "rejected_geometric_mean": -6.919872283935547, + "step": 7734 + }, + { + "chosen_geometric_mean": -0.9734513163566589, + "epoch": 1.92, + "grad_norm": 11.75, + "learning_rate": 2.2264888763794403e-08, + "log_odds": 10.722197532653809, + "log_odds_ratio": -0.0644238218665123, + "loss": 0.2517, + "rejected_geometric_mean": -11.202157020568848, + "step": 7735 + }, + { + "chosen_geometric_mean": -1.126094102859497, + "epoch": 1.92, + "grad_norm": 30.5, + "learning_rate": 2.2135445230723198e-08, + "log_odds": 1.5388292074203491, + "log_odds_ratio": -0.2748374342918396, + "loss": 0.2783, + "rejected_geometric_mean": -2.4237401485443115, + "step": 7736 + }, + { + "chosen_geometric_mean": -0.9811735153198242, + "epoch": 1.92, + "grad_norm": 1.9453125, + "learning_rate": 2.2006377401396774e-08, + "log_odds": 13.65088939666748, + "log_odds_ratio": -0.01382686197757721, + "loss": 0.2551, + "rejected_geometric_mean": -14.117024421691895, + "step": 7737 + }, + { + "chosen_geometric_mean": -1.0757886171340942, + "epoch": 1.92, + "grad_norm": 14.0, + "learning_rate": 2.187768529538503e-08, + "log_odds": 9.190735816955566, + "log_odds_ratio": -0.12462663650512695, + "loss": 0.2268, + "rejected_geometric_mean": -9.915399551391602, + "step": 7738 + }, + { + "chosen_geometric_mean": -0.9471989870071411, + "epoch": 1.92, + "grad_norm": 10.0625, + "learning_rate": 2.1749368932200975e-08, + "log_odds": 4.503255367279053, + "log_odds_ratio": -0.16498707234859467, + "loss": 0.2869, + "rejected_geometric_mean": -5.107385635375977, + "step": 7739 + }, + { + "chosen_geometric_mean": -1.0284595489501953, + "epoch": 1.92, + "grad_norm": 8.75, + "learning_rate": 2.1621428331300155e-08, + "log_odds": 4.122073173522949, + "log_odds_ratio": -0.12487499415874481, + "loss": 0.2026, + "rejected_geometric_mean": -4.770164966583252, + "step": 7740 + }, + { + "chosen_geometric_mean": -0.9935003519058228, + "epoch": 1.92, + "grad_norm": 7.375, + "learning_rate": 2.149386351208177e-08, + "log_odds": 8.024029731750488, + "log_odds_ratio": -0.037613242864608765, + "loss": 0.2878, + "rejected_geometric_mean": -8.57465648651123, + "step": 7741 + }, + { + "chosen_geometric_mean": -0.7891080379486084, + "epoch": 1.92, + "grad_norm": 7.46875, + "learning_rate": 2.1366674493887297e-08, + "log_odds": 2.156545639038086, + "log_odds_ratio": -0.26143619418144226, + "loss": 0.248, + "rejected_geometric_mean": -2.515852928161621, + "step": 7742 + }, + { + "chosen_geometric_mean": -0.9845101237297058, + "epoch": 1.92, + "grad_norm": 3.40625, + "learning_rate": 2.123986129600214e-08, + "log_odds": 6.869755268096924, + "log_odds_ratio": -0.02868509478867054, + "loss": 0.2471, + "rejected_geometric_mean": -7.363530158996582, + "step": 7743 + }, + { + "chosen_geometric_mean": -1.0365227460861206, + "epoch": 1.92, + "grad_norm": 2.03125, + "learning_rate": 2.1113423937653976e-08, + "log_odds": 4.792730808258057, + "log_odds_ratio": -0.12278617918491364, + "loss": 0.2477, + "rejected_geometric_mean": -5.460049152374268, + "step": 7744 + }, + { + "chosen_geometric_mean": -1.0798346996307373, + "epoch": 1.92, + "grad_norm": 4.21875, + "learning_rate": 2.0987362438013582e-08, + "log_odds": 7.35985803604126, + "log_odds_ratio": -0.0013936202740296721, + "loss": 0.253, + "rejected_geometric_mean": -8.021214485168457, + "step": 7745 + }, + { + "chosen_geometric_mean": -0.9424368143081665, + "epoch": 1.92, + "grad_norm": 1.9140625, + "learning_rate": 2.086167681619511e-08, + "log_odds": 3.4322032928466797, + "log_odds_ratio": -0.2783491611480713, + "loss": 0.2561, + "rejected_geometric_mean": -4.116913318634033, + "step": 7746 + }, + { + "chosen_geometric_mean": -1.1249592304229736, + "epoch": 1.92, + "grad_norm": 6.9375, + "learning_rate": 2.0736367091255537e-08, + "log_odds": 8.150142669677734, + "log_odds_ratio": -0.1698794811964035, + "loss": 0.2262, + "rejected_geometric_mean": -8.954459190368652, + "step": 7747 + }, + { + "chosen_geometric_mean": -1.160999059677124, + "epoch": 1.92, + "grad_norm": 7.0625, + "learning_rate": 2.0611433282194672e-08, + "log_odds": 3.0361623764038086, + "log_odds_ratio": -0.28819218277931213, + "loss": 0.2744, + "rejected_geometric_mean": -3.9796290397644043, + "step": 7748 + }, + { + "chosen_geometric_mean": -0.7623724937438965, + "epoch": 1.92, + "grad_norm": 30.75, + "learning_rate": 2.048687540795541e-08, + "log_odds": 9.887153625488281, + "log_odds_ratio": -0.13076384365558624, + "loss": 0.2471, + "rejected_geometric_mean": -10.06501579284668, + "step": 7749 + }, + { + "chosen_geometric_mean": -0.9108811020851135, + "epoch": 1.92, + "grad_norm": 18.875, + "learning_rate": 2.0362693487424034e-08, + "log_odds": 1.8257631063461304, + "log_odds_ratio": -0.23866385221481323, + "loss": 0.2805, + "rejected_geometric_mean": -2.3811917304992676, + "step": 7750 + }, + { + "chosen_geometric_mean": -1.0511265993118286, + "epoch": 1.92, + "grad_norm": 3.453125, + "learning_rate": 2.0238887539429376e-08, + "log_odds": 3.714965343475342, + "log_odds_ratio": -0.45114681124687195, + "loss": 0.3145, + "rejected_geometric_mean": -4.645514011383057, + "step": 7751 + }, + { + "chosen_geometric_mean": -1.294608473777771, + "epoch": 1.92, + "grad_norm": 9.25, + "learning_rate": 2.0115457582743357e-08, + "log_odds": 4.951227188110352, + "log_odds_ratio": -0.08813533931970596, + "loss": 0.2676, + "rejected_geometric_mean": -5.880800247192383, + "step": 7752 + }, + { + "chosen_geometric_mean": -0.8489193916320801, + "epoch": 1.92, + "grad_norm": 25.875, + "learning_rate": 1.9992403636080736e-08, + "log_odds": 6.500113487243652, + "log_odds_ratio": -0.22760558128356934, + "loss": 0.2593, + "rejected_geometric_mean": -6.974969387054443, + "step": 7753 + }, + { + "chosen_geometric_mean": -0.896208643913269, + "epoch": 1.92, + "grad_norm": 14.625, + "learning_rate": 1.986972571809964e-08, + "log_odds": 5.764976501464844, + "log_odds_ratio": -0.19472138583660126, + "loss": 0.2387, + "rejected_geometric_mean": -6.2589030265808105, + "step": 7754 + }, + { + "chosen_geometric_mean": -0.9573072195053101, + "epoch": 1.92, + "grad_norm": 46.0, + "learning_rate": 1.9747423847401027e-08, + "log_odds": 4.762997150421143, + "log_odds_ratio": -0.02989070490002632, + "loss": 0.3248, + "rejected_geometric_mean": -5.2364821434021, + "step": 7755 + }, + { + "chosen_geometric_mean": -0.9539562463760376, + "epoch": 1.92, + "grad_norm": 25.375, + "learning_rate": 1.9625498042528944e-08, + "log_odds": 7.5625319480896, + "log_odds_ratio": -0.22227652370929718, + "loss": 0.3124, + "rejected_geometric_mean": -8.199738502502441, + "step": 7756 + }, + { + "chosen_geometric_mean": -1.0944997072219849, + "epoch": 1.92, + "grad_norm": 5.21875, + "learning_rate": 1.9503948321970002e-08, + "log_odds": 12.269558906555176, + "log_odds_ratio": -0.00024151921388693154, + "loss": 0.2604, + "rejected_geometric_mean": -12.932073593139648, + "step": 7757 + }, + { + "chosen_geometric_mean": -0.9040548801422119, + "epoch": 1.92, + "grad_norm": 4.3125, + "learning_rate": 1.9382774704153894e-08, + "log_odds": 11.25718879699707, + "log_odds_ratio": -0.06760738790035248, + "loss": 0.2127, + "rejected_geometric_mean": -11.681918144226074, + "step": 7758 + }, + { + "chosen_geometric_mean": -2.4440722465515137, + "epoch": 1.92, + "grad_norm": 37.25, + "learning_rate": 1.9261977207453708e-08, + "log_odds": 12.041461944580078, + "log_odds_ratio": -0.008162706159055233, + "loss": 0.328, + "rejected_geometric_mean": -14.121782302856445, + "step": 7759 + }, + { + "chosen_geometric_mean": -1.0120737552642822, + "epoch": 1.92, + "grad_norm": 1.9921875, + "learning_rate": 1.9141555850185347e-08, + "log_odds": 2.8272297382354736, + "log_odds_ratio": -0.3517746329307556, + "loss": 0.24, + "rejected_geometric_mean": -3.605363368988037, + "step": 7760 + }, + { + "chosen_geometric_mean": -1.257384181022644, + "epoch": 1.92, + "grad_norm": 37.5, + "learning_rate": 1.9021510650607257e-08, + "log_odds": 2.928558826446533, + "log_odds_ratio": -0.3065820634365082, + "loss": 0.2874, + "rejected_geometric_mean": -3.9603259563446045, + "step": 7761 + }, + { + "chosen_geometric_mean": -0.8789927959442139, + "epoch": 1.92, + "grad_norm": 2.859375, + "learning_rate": 1.8901841626921548e-08, + "log_odds": 2.706587553024292, + "log_odds_ratio": -0.07654069364070892, + "loss": 0.2387, + "rejected_geometric_mean": -3.0841569900512695, + "step": 7762 + }, + { + "chosen_geometric_mean": -1.0674152374267578, + "epoch": 1.92, + "grad_norm": 12.0625, + "learning_rate": 1.8782548797272592e-08, + "log_odds": 7.881756782531738, + "log_odds_ratio": -0.05045595020055771, + "loss": 0.2635, + "rejected_geometric_mean": -8.518240928649902, + "step": 7763 + }, + { + "chosen_geometric_mean": -0.8798410892486572, + "epoch": 1.92, + "grad_norm": 3.796875, + "learning_rate": 1.8663632179748146e-08, + "log_odds": 7.689121246337891, + "log_odds_ratio": -0.12302587926387787, + "loss": 0.2478, + "rejected_geometric_mean": -8.13715934753418, + "step": 7764 + }, + { + "chosen_geometric_mean": -0.8576064705848694, + "epoch": 1.92, + "grad_norm": 3.546875, + "learning_rate": 1.8545091792379067e-08, + "log_odds": 8.29913330078125, + "log_odds_ratio": -0.00514760660007596, + "loss": 0.264, + "rejected_geometric_mean": -8.583736419677734, + "step": 7765 + }, + { + "chosen_geometric_mean": -0.8640305995941162, + "epoch": 1.92, + "grad_norm": 5.84375, + "learning_rate": 1.842692765313847e-08, + "log_odds": 17.075298309326172, + "log_odds_ratio": -5.960465188081798e-08, + "loss": 0.2496, + "rejected_geometric_mean": -17.372604370117188, + "step": 7766 + }, + { + "chosen_geometric_mean": -0.9731463193893433, + "epoch": 1.92, + "grad_norm": 3.140625, + "learning_rate": 1.8309139779943418e-08, + "log_odds": 5.915510177612305, + "log_odds_ratio": -0.03600709140300751, + "loss": 0.3015, + "rejected_geometric_mean": -6.42226505279541, + "step": 7767 + }, + { + "chosen_geometric_mean": -0.9015485048294067, + "epoch": 1.92, + "grad_norm": 11.375, + "learning_rate": 1.8191728190652956e-08, + "log_odds": 3.36085844039917, + "log_odds_ratio": -0.30625393986701965, + "loss": 0.2626, + "rejected_geometric_mean": -3.9189019203186035, + "step": 7768 + }, + { + "chosen_geometric_mean": -0.8075742721557617, + "epoch": 1.92, + "grad_norm": 1.90625, + "learning_rate": 1.8074692903069513e-08, + "log_odds": 14.173402786254883, + "log_odds_ratio": -0.011639471165835857, + "loss": 0.2398, + "rejected_geometric_mean": -14.382511138916016, + "step": 7769 + }, + { + "chosen_geometric_mean": -1.154268503189087, + "epoch": 1.92, + "grad_norm": 3.75, + "learning_rate": 1.7958033934938336e-08, + "log_odds": 8.333697319030762, + "log_odds_ratio": -0.14987385272979736, + "loss": 0.2692, + "rejected_geometric_mean": -9.171760559082031, + "step": 7770 + }, + { + "chosen_geometric_mean": -0.8255310654640198, + "epoch": 1.92, + "grad_norm": 2.703125, + "learning_rate": 1.784175130394833e-08, + "log_odds": 2.5072553157806396, + "log_odds_ratio": -0.5133100748062134, + "loss": 0.2793, + "rejected_geometric_mean": -3.1599183082580566, + "step": 7771 + }, + { + "chosen_geometric_mean": -0.9757820963859558, + "epoch": 1.92, + "grad_norm": 29.125, + "learning_rate": 1.7725845027730115e-08, + "log_odds": 11.680092811584473, + "log_odds_ratio": -0.03473488241434097, + "loss": 0.2706, + "rejected_geometric_mean": -12.1807222366333, + "step": 7772 + }, + { + "chosen_geometric_mean": -0.9482555389404297, + "epoch": 1.92, + "grad_norm": 2.296875, + "learning_rate": 1.7610315123858522e-08, + "log_odds": 9.264269828796387, + "log_odds_ratio": -0.2456952929496765, + "loss": 0.2957, + "rejected_geometric_mean": -9.830166816711426, + "step": 7773 + }, + { + "chosen_geometric_mean": -1.0579410791397095, + "epoch": 1.92, + "grad_norm": 1.9609375, + "learning_rate": 1.749516160984982e-08, + "log_odds": 3.5352981090545654, + "log_odds_ratio": -0.29419276118278503, + "loss": 0.2311, + "rejected_geometric_mean": -4.32286262512207, + "step": 7774 + }, + { + "chosen_geometric_mean": -1.0915157794952393, + "epoch": 1.92, + "grad_norm": 16.875, + "learning_rate": 1.738038450316448e-08, + "log_odds": 7.293014049530029, + "log_odds_ratio": -0.2352345734834671, + "loss": 0.2759, + "rejected_geometric_mean": -8.101503372192383, + "step": 7775 + }, + { + "chosen_geometric_mean": -0.9265494346618652, + "epoch": 1.93, + "grad_norm": 2.203125, + "learning_rate": 1.7265983821205535e-08, + "log_odds": 9.68597412109375, + "log_odds_ratio": -0.10348222404718399, + "loss": 0.2716, + "rejected_geometric_mean": -10.158638954162598, + "step": 7776 + }, + { + "chosen_geometric_mean": -0.8215509057044983, + "epoch": 1.93, + "grad_norm": 1.921875, + "learning_rate": 1.715195958131882e-08, + "log_odds": 13.331318855285645, + "log_odds_ratio": -0.00010463202488608658, + "loss": 0.2139, + "rejected_geometric_mean": -13.542972564697266, + "step": 7777 + }, + { + "chosen_geometric_mean": -0.9350025653839111, + "epoch": 1.93, + "grad_norm": 8.625, + "learning_rate": 1.7038311800793018e-08, + "log_odds": 10.780562400817871, + "log_odds_ratio": -0.31106507778167725, + "loss": 0.2194, + "rejected_geometric_mean": -11.4616117477417, + "step": 7778 + }, + { + "chosen_geometric_mean": -0.8679834008216858, + "epoch": 1.93, + "grad_norm": 34.75, + "learning_rate": 1.6925040496860167e-08, + "log_odds": 2.484689712524414, + "log_odds_ratio": -0.16310758888721466, + "loss": 0.2979, + "rejected_geometric_mean": -2.9315831661224365, + "step": 7779 + }, + { + "chosen_geometric_mean": -1.1596312522888184, + "epoch": 1.93, + "grad_norm": 2.046875, + "learning_rate": 1.6812145686694314e-08, + "log_odds": 5.4403533935546875, + "log_odds_ratio": -0.19254449009895325, + "loss": 0.2271, + "rejected_geometric_mean": -6.312087059020996, + "step": 7780 + }, + { + "chosen_geometric_mean": -0.9445978999137878, + "epoch": 1.93, + "grad_norm": 2.015625, + "learning_rate": 1.6699627387413707e-08, + "log_odds": 8.4794340133667, + "log_odds_ratio": -0.14771254360675812, + "loss": 0.2849, + "rejected_geometric_mean": -8.98353099822998, + "step": 7781 + }, + { + "chosen_geometric_mean": -0.9996431469917297, + "epoch": 1.93, + "grad_norm": 10.25, + "learning_rate": 1.6587485616078592e-08, + "log_odds": 8.887017250061035, + "log_odds_ratio": -0.049339838325977325, + "loss": 0.2799, + "rejected_geometric_mean": -9.449980735778809, + "step": 7782 + }, + { + "chosen_geometric_mean": -0.9825543165206909, + "epoch": 1.93, + "grad_norm": 2.359375, + "learning_rate": 1.6475720389692305e-08, + "log_odds": 6.257970809936523, + "log_odds_ratio": -0.013340714387595654, + "loss": 0.2732, + "rejected_geometric_mean": -6.743301868438721, + "step": 7783 + }, + { + "chosen_geometric_mean": -0.9601864814758301, + "epoch": 1.93, + "grad_norm": 2.640625, + "learning_rate": 1.6364331725200744e-08, + "log_odds": 6.363279342651367, + "log_odds_ratio": -0.027614567428827286, + "loss": 0.248, + "rejected_geometric_mean": -6.81938362121582, + "step": 7784 + }, + { + "chosen_geometric_mean": -1.0101038217544556, + "epoch": 1.93, + "grad_norm": 2.1875, + "learning_rate": 1.6253319639493724e-08, + "log_odds": 7.226508140563965, + "log_odds_ratio": -0.04998467117547989, + "loss": 0.2746, + "rejected_geometric_mean": -7.8018479347229, + "step": 7785 + }, + { + "chosen_geometric_mean": -0.8853747844696045, + "epoch": 1.93, + "grad_norm": 5.34375, + "learning_rate": 1.6142684149403066e-08, + "log_odds": 5.322540283203125, + "log_odds_ratio": -0.23747655749320984, + "loss": 0.2941, + "rejected_geometric_mean": -5.836056709289551, + "step": 7786 + }, + { + "chosen_geometric_mean": -0.7939239144325256, + "epoch": 1.93, + "grad_norm": 3.828125, + "learning_rate": 1.603242527170368e-08, + "log_odds": 8.421586990356445, + "log_odds_ratio": -0.12512052059173584, + "loss": 0.2582, + "rejected_geometric_mean": -8.71157455444336, + "step": 7787 + }, + { + "chosen_geometric_mean": -0.8574857711791992, + "epoch": 1.93, + "grad_norm": 2.0625, + "learning_rate": 1.5922543023113858e-08, + "log_odds": 8.228659629821777, + "log_odds_ratio": -0.02341489866375923, + "loss": 0.2111, + "rejected_geometric_mean": -8.504627227783203, + "step": 7788 + }, + { + "chosen_geometric_mean": -0.7332313060760498, + "epoch": 1.93, + "grad_norm": 4.0, + "learning_rate": 1.5813037420293886e-08, + "log_odds": 3.9702744483947754, + "log_odds_ratio": -0.18048706650733948, + "loss": 0.2665, + "rejected_geometric_mean": -4.214496612548828, + "step": 7789 + }, + { + "chosen_geometric_mean": -0.9026256799697876, + "epoch": 1.93, + "grad_norm": 8.375, + "learning_rate": 1.5703908479847708e-08, + "log_odds": 7.4777607917785645, + "log_odds_ratio": -0.25880947709083557, + "loss": 0.2715, + "rejected_geometric_mean": -8.008405685424805, + "step": 7790 + }, + { + "chosen_geometric_mean": -0.9864448308944702, + "epoch": 1.93, + "grad_norm": 13.6875, + "learning_rate": 1.5595156218322082e-08, + "log_odds": 3.078423500061035, + "log_odds_ratio": -0.29695817828178406, + "loss": 0.248, + "rejected_geometric_mean": -3.7738823890686035, + "step": 7791 + }, + { + "chosen_geometric_mean": -1.0909090042114258, + "epoch": 1.93, + "grad_norm": 7.90625, + "learning_rate": 1.548678065220577e-08, + "log_odds": 4.482979774475098, + "log_odds_ratio": -0.15094350278377533, + "loss": 0.31, + "rejected_geometric_mean": -5.215359687805176, + "step": 7792 + }, + { + "chosen_geometric_mean": -0.8319804668426514, + "epoch": 1.93, + "grad_norm": 12.75, + "learning_rate": 1.5378781797932007e-08, + "log_odds": 7.7839155197143555, + "log_odds_ratio": -0.12985514104366302, + "loss": 0.2603, + "rejected_geometric_mean": -8.1344575881958, + "step": 7793 + }, + { + "chosen_geometric_mean": -0.8807843923568726, + "epoch": 1.93, + "grad_norm": 5.28125, + "learning_rate": 1.5271159671875202e-08, + "log_odds": 9.458359718322754, + "log_odds_ratio": -0.005060829222202301, + "loss": 0.2421, + "rejected_geometric_mean": -9.801025390625, + "step": 7794 + }, + { + "chosen_geometric_mean": -0.98921799659729, + "epoch": 1.93, + "grad_norm": 2.421875, + "learning_rate": 1.5163914290353966e-08, + "log_odds": 11.335996627807617, + "log_odds_ratio": -0.0003226494009140879, + "loss": 0.2643, + "rejected_geometric_mean": -11.814797401428223, + "step": 7795 + }, + { + "chosen_geometric_mean": -1.0435572862625122, + "epoch": 1.93, + "grad_norm": 33.75, + "learning_rate": 1.5057045669629177e-08, + "log_odds": 11.67658519744873, + "log_odds_ratio": -0.0794747844338417, + "loss": 0.2689, + "rejected_geometric_mean": -12.325478553771973, + "step": 7796 + }, + { + "chosen_geometric_mean": -0.995025634765625, + "epoch": 1.93, + "grad_norm": 8.625, + "learning_rate": 1.4950553825904547e-08, + "log_odds": 11.94180679321289, + "log_odds_ratio": -0.19218890368938446, + "loss": 0.2916, + "rejected_geometric_mean": -12.545093536376953, + "step": 7797 + }, + { + "chosen_geometric_mean": -1.0050239562988281, + "epoch": 1.93, + "grad_norm": 11.8125, + "learning_rate": 1.4844438775326875e-08, + "log_odds": 9.113969802856445, + "log_odds_ratio": -0.01105615682899952, + "loss": 0.2866, + "rejected_geometric_mean": -9.644132614135742, + "step": 7798 + }, + { + "chosen_geometric_mean": -0.9166523218154907, + "epoch": 1.93, + "grad_norm": 8.5, + "learning_rate": 1.4738700533985795e-08, + "log_odds": 10.090944290161133, + "log_odds_ratio": -0.1332223266363144, + "loss": 0.2427, + "rejected_geometric_mean": -10.588167190551758, + "step": 7799 + }, + { + "chosen_geometric_mean": -0.9836181998252869, + "epoch": 1.93, + "grad_norm": 4.6875, + "learning_rate": 1.4633339117913759e-08, + "log_odds": 12.320749282836914, + "log_odds_ratio": -0.2164849191904068, + "loss": 0.2522, + "rejected_geometric_mean": -12.924378395080566, + "step": 7800 + }, + { + "chosen_geometric_mean": -1.198701024055481, + "epoch": 1.93, + "grad_norm": 2.8125, + "learning_rate": 1.4528354543086043e-08, + "log_odds": 4.242321014404297, + "log_odds_ratio": -0.03144155442714691, + "loss": 0.2643, + "rejected_geometric_mean": -5.085897922515869, + "step": 7801 + }, + { + "chosen_geometric_mean": -0.9163945317268372, + "epoch": 1.93, + "grad_norm": 44.25, + "learning_rate": 1.4423746825420748e-08, + "log_odds": 8.88627815246582, + "log_odds_ratio": -0.08905285596847534, + "loss": 0.3361, + "rejected_geometric_mean": -9.358476638793945, + "step": 7802 + }, + { + "chosen_geometric_mean": -1.0148873329162598, + "epoch": 1.93, + "grad_norm": 5.3125, + "learning_rate": 1.4319515980779075e-08, + "log_odds": 7.006296634674072, + "log_odds_ratio": -0.19827225804328918, + "loss": 0.3294, + "rejected_geometric_mean": -7.626696586608887, + "step": 7803 + }, + { + "chosen_geometric_mean": -1.0038889646530151, + "epoch": 1.93, + "grad_norm": 2.515625, + "learning_rate": 1.4215662024964772e-08, + "log_odds": 2.891799211502075, + "log_odds_ratio": -0.2556203603744507, + "loss": 0.2516, + "rejected_geometric_mean": -3.535639762878418, + "step": 7804 + }, + { + "chosen_geometric_mean": -0.8404447436332703, + "epoch": 1.93, + "grad_norm": 2.234375, + "learning_rate": 1.4112184973724962e-08, + "log_odds": 7.729067802429199, + "log_odds_ratio": -0.1698203980922699, + "loss": 0.2748, + "rejected_geometric_mean": -8.110437393188477, + "step": 7805 + }, + { + "chosen_geometric_mean": -1.1312410831451416, + "epoch": 1.93, + "grad_norm": 4.21875, + "learning_rate": 1.4009084842748488e-08, + "log_odds": 8.023179054260254, + "log_odds_ratio": -0.08591262996196747, + "loss": 0.2549, + "rejected_geometric_mean": -8.777605056762695, + "step": 7806 + }, + { + "chosen_geometric_mean": -0.8723081350326538, + "epoch": 1.93, + "grad_norm": 2.921875, + "learning_rate": 1.3906361647668675e-08, + "log_odds": 7.536294460296631, + "log_odds_ratio": -0.10501611232757568, + "loss": 0.2538, + "rejected_geometric_mean": -7.911704063415527, + "step": 7807 + }, + { + "chosen_geometric_mean": -0.965650200843811, + "epoch": 1.93, + "grad_norm": 1.640625, + "learning_rate": 1.380401540406029e-08, + "log_odds": 8.7599458694458, + "log_odds_ratio": -0.1757277250289917, + "loss": 0.1994, + "rejected_geometric_mean": -9.362239837646484, + "step": 7808 + }, + { + "chosen_geometric_mean": -1.1290185451507568, + "epoch": 1.93, + "grad_norm": 14.625, + "learning_rate": 1.3702046127441471e-08, + "log_odds": 3.976736545562744, + "log_odds_ratio": -0.11321574449539185, + "loss": 0.2555, + "rejected_geometric_mean": -4.738347053527832, + "step": 7809 + }, + { + "chosen_geometric_mean": -0.9138315916061401, + "epoch": 1.93, + "grad_norm": 40.25, + "learning_rate": 1.3600453833273464e-08, + "log_odds": 13.631157875061035, + "log_odds_ratio": -0.00015475090185645968, + "loss": 0.2691, + "rejected_geometric_mean": -13.992795944213867, + "step": 7810 + }, + { + "chosen_geometric_mean": -1.2357962131500244, + "epoch": 1.93, + "grad_norm": 2.609375, + "learning_rate": 1.3499238536960058e-08, + "log_odds": 4.920932769775391, + "log_odds_ratio": -0.11071242392063141, + "loss": 0.2624, + "rejected_geometric_mean": -5.83235502243042, + "step": 7811 + }, + { + "chosen_geometric_mean": -0.9743651151657104, + "epoch": 1.93, + "grad_norm": 74.0, + "learning_rate": 1.3398400253847587e-08, + "log_odds": 4.620654106140137, + "log_odds_ratio": -0.15764956176280975, + "loss": 0.2197, + "rejected_geometric_mean": -5.205808162689209, + "step": 7812 + }, + { + "chosen_geometric_mean": -1.3359935283660889, + "epoch": 1.93, + "grad_norm": 40.0, + "learning_rate": 1.3297938999226045e-08, + "log_odds": 3.84476637840271, + "log_odds_ratio": -0.3679996430873871, + "loss": 0.3015, + "rejected_geometric_mean": -5.037067413330078, + "step": 7813 + }, + { + "chosen_geometric_mean": -1.147939682006836, + "epoch": 1.93, + "grad_norm": 10.8125, + "learning_rate": 1.3197854788327691e-08, + "log_odds": 4.453367233276367, + "log_odds_ratio": -0.18853774666786194, + "loss": 0.2542, + "rejected_geometric_mean": -5.301097869873047, + "step": 7814 + }, + { + "chosen_geometric_mean": -1.0138442516326904, + "epoch": 1.93, + "grad_norm": 1.9765625, + "learning_rate": 1.3098147636327053e-08, + "log_odds": 11.289741516113281, + "log_odds_ratio": -0.0016045395750552416, + "loss": 0.282, + "rejected_geometric_mean": -11.84081745147705, + "step": 7815 + }, + { + "chosen_geometric_mean": -0.7656903862953186, + "epoch": 1.94, + "grad_norm": 4.5, + "learning_rate": 1.2998817558343147e-08, + "log_odds": 7.518660545349121, + "log_odds_ratio": -0.15104946494102478, + "loss": 0.2717, + "rejected_geometric_mean": -7.72855281829834, + "step": 7816 + }, + { + "chosen_geometric_mean": -0.7765179872512817, + "epoch": 1.94, + "grad_norm": 11.6875, + "learning_rate": 1.2899864569436149e-08, + "log_odds": 11.770137786865234, + "log_odds_ratio": -0.12004117667675018, + "loss": 0.2777, + "rejected_geometric_mean": -12.046741485595703, + "step": 7817 + }, + { + "chosen_geometric_mean": -0.9176989793777466, + "epoch": 1.94, + "grad_norm": 34.75, + "learning_rate": 1.2801288684609614e-08, + "log_odds": 5.044650077819824, + "log_odds_ratio": -0.1987105756998062, + "loss": 0.2662, + "rejected_geometric_mean": -5.548555374145508, + "step": 7818 + }, + { + "chosen_geometric_mean": -1.0101323127746582, + "epoch": 1.94, + "grad_norm": 4.8125, + "learning_rate": 1.2703089918810197e-08, + "log_odds": 8.437098503112793, + "log_odds_ratio": -0.07543963193893433, + "loss": 0.2206, + "rejected_geometric_mean": -9.011848449707031, + "step": 7819 + }, + { + "chosen_geometric_mean": -0.9707581996917725, + "epoch": 1.94, + "grad_norm": 2.453125, + "learning_rate": 1.2605268286927374e-08, + "log_odds": 6.085050106048584, + "log_odds_ratio": -0.005191429518163204, + "loss": 0.219, + "rejected_geometric_mean": -6.562503814697266, + "step": 7820 + }, + { + "chosen_geometric_mean": -1.280526041984558, + "epoch": 1.94, + "grad_norm": 23.125, + "learning_rate": 1.2507823803793173e-08, + "log_odds": 4.834589004516602, + "log_odds_ratio": -0.2135108858346939, + "loss": 0.3402, + "rejected_geometric_mean": -5.859108924865723, + "step": 7821 + }, + { + "chosen_geometric_mean": -1.282860279083252, + "epoch": 1.94, + "grad_norm": 7.625, + "learning_rate": 1.241075648418244e-08, + "log_odds": 9.256888389587402, + "log_odds_ratio": -0.2145797461271286, + "loss": 0.2434, + "rejected_geometric_mean": -10.22659683227539, + "step": 7822 + }, + { + "chosen_geometric_mean": -1.0617165565490723, + "epoch": 1.94, + "grad_norm": 15.9375, + "learning_rate": 1.2314066342812847e-08, + "log_odds": 8.092954635620117, + "log_odds_ratio": -0.26882219314575195, + "loss": 0.29, + "rejected_geometric_mean": -8.848221778869629, + "step": 7823 + }, + { + "chosen_geometric_mean": -0.8255374431610107, + "epoch": 1.94, + "grad_norm": 12.5625, + "learning_rate": 1.2217753394344889e-08, + "log_odds": 1.473029375076294, + "log_odds_ratio": -0.4076598584651947, + "loss": 0.2079, + "rejected_geometric_mean": -1.9667855501174927, + "step": 7824 + }, + { + "chosen_geometric_mean": -0.9697144627571106, + "epoch": 1.94, + "grad_norm": 6.375, + "learning_rate": 1.2121817653382161e-08, + "log_odds": 8.497749328613281, + "log_odds_ratio": -0.15148767828941345, + "loss": 0.2849, + "rejected_geometric_mean": -9.04010009765625, + "step": 7825 + }, + { + "chosen_geometric_mean": -0.9864743947982788, + "epoch": 1.94, + "grad_norm": 5.0, + "learning_rate": 1.202625913447053e-08, + "log_odds": 7.977694034576416, + "log_odds_ratio": -0.06018216907978058, + "loss": 0.2415, + "rejected_geometric_mean": -8.527557373046875, + "step": 7826 + }, + { + "chosen_geometric_mean": -0.8332975506782532, + "epoch": 1.94, + "grad_norm": 2.96875, + "learning_rate": 1.1931077852099237e-08, + "log_odds": 14.286109924316406, + "log_odds_ratio": -3.9043585275067016e-05, + "loss": 0.2358, + "rejected_geometric_mean": -14.518674850463867, + "step": 7827 + }, + { + "chosen_geometric_mean": -0.9699909687042236, + "epoch": 1.94, + "grad_norm": 22.75, + "learning_rate": 1.1836273820700073e-08, + "log_odds": 7.755823612213135, + "log_odds_ratio": -0.01885053887963295, + "loss": 0.261, + "rejected_geometric_mean": -8.253799438476562, + "step": 7828 + }, + { + "chosen_geometric_mean": -0.9208240509033203, + "epoch": 1.94, + "grad_norm": 26.375, + "learning_rate": 1.174184705464737e-08, + "log_odds": 7.2678351402282715, + "log_odds_ratio": -0.0016683439025655389, + "loss": 0.2541, + "rejected_geometric_mean": -7.662841320037842, + "step": 7829 + }, + { + "chosen_geometric_mean": -1.1181690692901611, + "epoch": 1.94, + "grad_norm": 1.9765625, + "learning_rate": 1.1647797568258568e-08, + "log_odds": 5.9749436378479, + "log_odds_ratio": -0.18307146430015564, + "loss": 0.2331, + "rejected_geometric_mean": -6.774840354919434, + "step": 7830 + }, + { + "chosen_geometric_mean": -0.9904962778091431, + "epoch": 1.94, + "grad_norm": 64.0, + "learning_rate": 1.1554125375793923e-08, + "log_odds": 5.344152927398682, + "log_odds_ratio": -0.12038657069206238, + "loss": 0.4331, + "rejected_geometric_mean": -5.890412330627441, + "step": 7831 + }, + { + "chosen_geometric_mean": -1.5832370519638062, + "epoch": 1.94, + "grad_norm": 44.0, + "learning_rate": 1.1460830491456243e-08, + "log_odds": 0.54700767993927, + "log_odds_ratio": -0.48283156752586365, + "loss": 0.3039, + "rejected_geometric_mean": -2.0535974502563477, + "step": 7832 + }, + { + "chosen_geometric_mean": -0.8956912159919739, + "epoch": 1.94, + "grad_norm": 25.5, + "learning_rate": 1.1367912929391434e-08, + "log_odds": 8.149494171142578, + "log_odds_ratio": -0.04564296081662178, + "loss": 0.2763, + "rejected_geometric_mean": -8.541114807128906, + "step": 7833 + }, + { + "chosen_geometric_mean": -1.2342824935913086, + "epoch": 1.94, + "grad_norm": 5.4375, + "learning_rate": 1.1275372703687948e-08, + "log_odds": 7.816122531890869, + "log_odds_ratio": -0.19105438888072968, + "loss": 0.3045, + "rejected_geometric_mean": -8.74119758605957, + "step": 7834 + }, + { + "chosen_geometric_mean": -0.7842851877212524, + "epoch": 1.94, + "grad_norm": 2.078125, + "learning_rate": 1.1183209828377062e-08, + "log_odds": 7.440375328063965, + "log_odds_ratio": -0.1393802911043167, + "loss": 0.2516, + "rejected_geometric_mean": -7.7511396408081055, + "step": 7835 + }, + { + "chosen_geometric_mean": -0.9356662034988403, + "epoch": 1.94, + "grad_norm": 2.609375, + "learning_rate": 1.1091424317432876e-08, + "log_odds": 12.613632202148438, + "log_odds_ratio": -0.006092106457799673, + "loss": 0.3138, + "rejected_geometric_mean": -13.040969848632812, + "step": 7836 + }, + { + "chosen_geometric_mean": -1.1171026229858398, + "epoch": 1.94, + "grad_norm": 17.75, + "learning_rate": 1.100001618477231e-08, + "log_odds": 2.5605673789978027, + "log_odds_ratio": -0.22444301843643188, + "loss": 0.2366, + "rejected_geometric_mean": -3.419955015182495, + "step": 7837 + }, + { + "chosen_geometric_mean": -0.8212023377418518, + "epoch": 1.94, + "grad_norm": 2.078125, + "learning_rate": 1.0908985444255116e-08, + "log_odds": 2.1242408752441406, + "log_odds_ratio": -0.43663090467453003, + "loss": 0.2774, + "rejected_geometric_mean": -2.7469778060913086, + "step": 7838 + }, + { + "chosen_geometric_mean": -0.9031877517700195, + "epoch": 1.94, + "grad_norm": 3.734375, + "learning_rate": 1.0818332109683583e-08, + "log_odds": 7.1761579513549805, + "log_odds_ratio": -0.12028651684522629, + "loss": 0.2498, + "rejected_geometric_mean": -7.67193078994751, + "step": 7839 + }, + { + "chosen_geometric_mean": -0.9207801222801208, + "epoch": 1.94, + "grad_norm": 23.375, + "learning_rate": 1.0728056194803105e-08, + "log_odds": 10.196562767028809, + "log_odds_ratio": -0.16461926698684692, + "loss": 0.3182, + "rejected_geometric_mean": -10.664603233337402, + "step": 7840 + }, + { + "chosen_geometric_mean": -0.8837776184082031, + "epoch": 1.94, + "grad_norm": 3.640625, + "learning_rate": 1.0638157713301345e-08, + "log_odds": 5.833462715148926, + "log_odds_ratio": -0.08595582842826843, + "loss": 0.3083, + "rejected_geometric_mean": -6.228260040283203, + "step": 7841 + }, + { + "chosen_geometric_mean": -1.2671656608581543, + "epoch": 1.94, + "grad_norm": 15.5625, + "learning_rate": 1.054863667880962e-08, + "log_odds": 8.182716369628906, + "log_odds_ratio": -0.0744759812951088, + "loss": 0.2518, + "rejected_geometric_mean": -9.05687141418457, + "step": 7842 + }, + { + "chosen_geometric_mean": -1.2272751331329346, + "epoch": 1.94, + "grad_norm": 18.5, + "learning_rate": 1.0459493104901242e-08, + "log_odds": 10.78314208984375, + "log_odds_ratio": -0.0007868062821216881, + "loss": 0.2783, + "rejected_geometric_mean": -11.546701431274414, + "step": 7843 + }, + { + "chosen_geometric_mean": -1.081921100616455, + "epoch": 1.94, + "grad_norm": 2.4375, + "learning_rate": 1.0370727005092062e-08, + "log_odds": 3.958817481994629, + "log_odds_ratio": -0.21876274049282074, + "loss": 0.277, + "rejected_geometric_mean": -4.673555850982666, + "step": 7844 + }, + { + "chosen_geometric_mean": -0.9331817626953125, + "epoch": 1.94, + "grad_norm": 20.5, + "learning_rate": 1.0282338392841873e-08, + "log_odds": 3.291071891784668, + "log_odds_ratio": -0.259111613035202, + "loss": 0.2239, + "rejected_geometric_mean": -3.9147660732269287, + "step": 7845 + }, + { + "chosen_geometric_mean": -1.016156792640686, + "epoch": 1.94, + "grad_norm": 5.4375, + "learning_rate": 1.0194327281551897e-08, + "log_odds": 7.2381062507629395, + "log_odds_ratio": -0.0828418880701065, + "loss": 0.2816, + "rejected_geometric_mean": -7.834118366241455, + "step": 7846 + }, + { + "chosen_geometric_mean": -1.0478638410568237, + "epoch": 1.94, + "grad_norm": 8.875, + "learning_rate": 1.0106693684567014e-08, + "log_odds": 11.22488784790039, + "log_odds_ratio": -0.1539449244737625, + "loss": 0.2377, + "rejected_geometric_mean": -11.896171569824219, + "step": 7847 + }, + { + "chosen_geometric_mean": -1.0272198915481567, + "epoch": 1.94, + "grad_norm": 12.4375, + "learning_rate": 1.0019437615174654e-08, + "log_odds": 7.162747383117676, + "log_odds_ratio": -0.10954072326421738, + "loss": 0.2701, + "rejected_geometric_mean": -7.79522180557251, + "step": 7848 + }, + { + "chosen_geometric_mean": -1.1125609874725342, + "epoch": 1.94, + "grad_norm": 2.140625, + "learning_rate": 9.932559086605064e-09, + "log_odds": 4.89474630355835, + "log_odds_ratio": -0.22166210412979126, + "loss": 0.2494, + "rejected_geometric_mean": -5.69133186340332, + "step": 7849 + }, + { + "chosen_geometric_mean": -0.9038057327270508, + "epoch": 1.94, + "grad_norm": 2.578125, + "learning_rate": 9.846058112030488e-09, + "log_odds": 4.755582809448242, + "log_odds_ratio": -0.279531329870224, + "loss": 0.2738, + "rejected_geometric_mean": -5.29248046875, + "step": 7850 + }, + { + "chosen_geometric_mean": -1.1289615631103516, + "epoch": 1.94, + "grad_norm": 11.5625, + "learning_rate": 9.759934704567098e-09, + "log_odds": 7.796267032623291, + "log_odds_ratio": -0.029914023354649544, + "loss": 0.2741, + "rejected_geometric_mean": -8.544015884399414, + "step": 7851 + }, + { + "chosen_geometric_mean": -0.9481145739555359, + "epoch": 1.94, + "grad_norm": 10.4375, + "learning_rate": 9.674188877273339e-09, + "log_odds": 13.207454681396484, + "log_odds_ratio": -0.07040863484144211, + "loss": 0.2391, + "rejected_geometric_mean": -13.68432331085205, + "step": 7852 + }, + { + "chosen_geometric_mean": -0.7461572885513306, + "epoch": 1.94, + "grad_norm": 10.6875, + "learning_rate": 9.588820643149643e-09, + "log_odds": 15.95226764678955, + "log_odds_ratio": -3.218671054128208e-06, + "loss": 0.2471, + "rejected_geometric_mean": -16.046371459960938, + "step": 7853 + }, + { + "chosen_geometric_mean": -0.9492440223693848, + "epoch": 1.94, + "grad_norm": 2.140625, + "learning_rate": 9.503830015140936e-09, + "log_odds": 10.19039535522461, + "log_odds_ratio": -0.0005169653450138867, + "loss": 0.2448, + "rejected_geometric_mean": -10.646381378173828, + "step": 7854 + }, + { + "chosen_geometric_mean": -1.0328435897827148, + "epoch": 1.94, + "grad_norm": 48.5, + "learning_rate": 9.419217006132741e-09, + "log_odds": 5.958855152130127, + "log_odds_ratio": -0.17416910827159882, + "loss": 0.3226, + "rejected_geometric_mean": -6.689596176147461, + "step": 7855 + }, + { + "chosen_geometric_mean": -0.9560455083847046, + "epoch": 1.95, + "grad_norm": 2.484375, + "learning_rate": 9.334981628955353e-09, + "log_odds": 3.4327638149261475, + "log_odds_ratio": -0.2999393939971924, + "loss": 0.2848, + "rejected_geometric_mean": -4.020711421966553, + "step": 7856 + }, + { + "chosen_geometric_mean": -1.0206983089447021, + "epoch": 1.95, + "grad_norm": 2.34375, + "learning_rate": 9.25112389638022e-09, + "log_odds": 9.438212394714355, + "log_odds_ratio": -0.007735434919595718, + "loss": 0.2402, + "rejected_geometric_mean": -9.964862823486328, + "step": 7857 + }, + { + "chosen_geometric_mean": -1.1379978656768799, + "epoch": 1.95, + "grad_norm": 3.828125, + "learning_rate": 9.167643821122451e-09, + "log_odds": 3.578972816467285, + "log_odds_ratio": -0.37373673915863037, + "loss": 0.2591, + "rejected_geometric_mean": -4.5194478034973145, + "step": 7858 + }, + { + "chosen_geometric_mean": -0.9428999423980713, + "epoch": 1.95, + "grad_norm": 19.5, + "learning_rate": 9.084541415839699e-09, + "log_odds": 10.169512748718262, + "log_odds_ratio": -0.172055184841156, + "loss": 0.2391, + "rejected_geometric_mean": -10.658214569091797, + "step": 7859 + }, + { + "chosen_geometric_mean": -1.0175024271011353, + "epoch": 1.95, + "grad_norm": 2.109375, + "learning_rate": 9.001816693132159e-09, + "log_odds": 10.133636474609375, + "log_odds_ratio": -0.14241546392440796, + "loss": 0.2438, + "rejected_geometric_mean": -10.74738597869873, + "step": 7860 + }, + { + "chosen_geometric_mean": -0.7576503753662109, + "epoch": 1.95, + "grad_norm": 7.40625, + "learning_rate": 8.919469665542856e-09, + "log_odds": 9.343544006347656, + "log_odds_ratio": -0.05969797819852829, + "loss": 0.2305, + "rejected_geometric_mean": -9.502479553222656, + "step": 7861 + }, + { + "chosen_geometric_mean": -1.1345467567443848, + "epoch": 1.95, + "grad_norm": 13.4375, + "learning_rate": 8.837500345557914e-09, + "log_odds": 7.601813793182373, + "log_odds_ratio": -0.14844205975532532, + "loss": 0.3205, + "rejected_geometric_mean": -8.42044448852539, + "step": 7862 + }, + { + "chosen_geometric_mean": -0.90445876121521, + "epoch": 1.95, + "grad_norm": 20.625, + "learning_rate": 8.755908745605169e-09, + "log_odds": 6.959939956665039, + "log_odds_ratio": -0.06332342326641083, + "loss": 0.2685, + "rejected_geometric_mean": -7.3717803955078125, + "step": 7863 + }, + { + "chosen_geometric_mean": -0.8887491822242737, + "epoch": 1.95, + "grad_norm": 3.515625, + "learning_rate": 8.674694878056667e-09, + "log_odds": 15.379369735717773, + "log_odds_ratio": -0.14421623945236206, + "loss": 0.2855, + "rejected_geometric_mean": -15.826295852661133, + "step": 7864 + }, + { + "chosen_geometric_mean": -0.8861850500106812, + "epoch": 1.95, + "grad_norm": 3.1875, + "learning_rate": 8.593858755225892e-09, + "log_odds": 9.231315612792969, + "log_odds_ratio": -0.13378159701824188, + "loss": 0.2379, + "rejected_geometric_mean": -9.70630168914795, + "step": 7865 + }, + { + "chosen_geometric_mean": -0.8975731134414673, + "epoch": 1.95, + "grad_norm": 2.0625, + "learning_rate": 8.51340038936943e-09, + "log_odds": 10.776250839233398, + "log_odds_ratio": -0.14846614003181458, + "loss": 0.2721, + "rejected_geometric_mean": -11.263242721557617, + "step": 7866 + }, + { + "chosen_geometric_mean": -1.1565691232681274, + "epoch": 1.95, + "grad_norm": 6.53125, + "learning_rate": 8.433319792686967e-09, + "log_odds": 6.782962322235107, + "log_odds_ratio": -0.13204722106456757, + "loss": 0.2822, + "rejected_geometric_mean": -7.5841522216796875, + "step": 7867 + }, + { + "chosen_geometric_mean": -1.083181619644165, + "epoch": 1.95, + "grad_norm": 11.9375, + "learning_rate": 8.353616977320733e-09, + "log_odds": 9.279261589050293, + "log_odds_ratio": -0.07210859656333923, + "loss": 0.2948, + "rejected_geometric_mean": -9.987483978271484, + "step": 7868 + }, + { + "chosen_geometric_mean": -0.8986371159553528, + "epoch": 1.95, + "grad_norm": 2.875, + "learning_rate": 8.274291955355506e-09, + "log_odds": 5.249512195587158, + "log_odds_ratio": -0.16538092494010925, + "loss": 0.2649, + "rejected_geometric_mean": -5.745992660522461, + "step": 7869 + }, + { + "chosen_geometric_mean": -1.0630000829696655, + "epoch": 1.95, + "grad_norm": 2.71875, + "learning_rate": 8.19534473881861e-09, + "log_odds": 5.206341743469238, + "log_odds_ratio": -0.0805247575044632, + "loss": 0.2631, + "rejected_geometric_mean": -5.847667217254639, + "step": 7870 + }, + { + "chosen_geometric_mean": -1.0109021663665771, + "epoch": 1.95, + "grad_norm": 2.390625, + "learning_rate": 8.116775339680471e-09, + "log_odds": 1.0106446743011475, + "log_odds_ratio": -0.46003299951553345, + "loss": 0.3171, + "rejected_geometric_mean": -1.819610834121704, + "step": 7871 + }, + { + "chosen_geometric_mean": -0.9602853655815125, + "epoch": 1.95, + "grad_norm": 4.03125, + "learning_rate": 8.038583769854335e-09, + "log_odds": 11.964018821716309, + "log_odds_ratio": -0.06978089362382889, + "loss": 0.2668, + "rejected_geometric_mean": -12.494060516357422, + "step": 7872 + }, + { + "chosen_geometric_mean": -1.0518895387649536, + "epoch": 1.95, + "grad_norm": 22.5, + "learning_rate": 7.96077004119572e-09, + "log_odds": 7.849979400634766, + "log_odds_ratio": -0.005490329582244158, + "loss": 0.2708, + "rejected_geometric_mean": -8.409467697143555, + "step": 7873 + }, + { + "chosen_geometric_mean": -0.9952832460403442, + "epoch": 1.95, + "grad_norm": 6.59375, + "learning_rate": 7.883334165502964e-09, + "log_odds": 4.27530574798584, + "log_odds_ratio": -0.19265921413898468, + "loss": 0.2596, + "rejected_geometric_mean": -4.943055152893066, + "step": 7874 + }, + { + "chosen_geometric_mean": -1.269425868988037, + "epoch": 1.95, + "grad_norm": 31.25, + "learning_rate": 7.806276154517233e-09, + "log_odds": 8.320159912109375, + "log_odds_ratio": -0.11420141160488129, + "loss": 0.2481, + "rejected_geometric_mean": -9.183090209960938, + "step": 7875 + }, + { + "chosen_geometric_mean": -0.9637743830680847, + "epoch": 1.95, + "grad_norm": 3.8125, + "learning_rate": 7.729596019922513e-09, + "log_odds": 11.190141677856445, + "log_odds_ratio": -0.005349710583686829, + "loss": 0.212, + "rejected_geometric_mean": -11.65917682647705, + "step": 7876 + }, + { + "chosen_geometric_mean": -1.0699636936187744, + "epoch": 1.95, + "grad_norm": 2.9375, + "learning_rate": 7.653293773345339e-09, + "log_odds": 5.3765130043029785, + "log_odds_ratio": -0.2674770653247833, + "loss": 0.3591, + "rejected_geometric_mean": -6.126712799072266, + "step": 7877 + }, + { + "chosen_geometric_mean": -0.8000741004943848, + "epoch": 1.95, + "grad_norm": 3.625, + "learning_rate": 7.577369426355064e-09, + "log_odds": 1.5276007652282715, + "log_odds_ratio": -0.3395672142505646, + "loss": 0.2708, + "rejected_geometric_mean": -1.9775562286376953, + "step": 7878 + }, + { + "chosen_geometric_mean": -0.8763449192047119, + "epoch": 1.95, + "grad_norm": 11.0, + "learning_rate": 7.501822990463314e-09, + "log_odds": 0.8192778825759888, + "log_odds_ratio": -0.3732917904853821, + "loss": 0.2924, + "rejected_geometric_mean": -1.4361271858215332, + "step": 7879 + }, + { + "chosen_geometric_mean": -0.991348385810852, + "epoch": 1.95, + "grad_norm": 6.65625, + "learning_rate": 7.426654477124817e-09, + "log_odds": 10.904376983642578, + "log_odds_ratio": -0.1686069518327713, + "loss": 0.2632, + "rejected_geometric_mean": -11.531562805175781, + "step": 7880 + }, + { + "chosen_geometric_mean": -0.9970319867134094, + "epoch": 1.95, + "grad_norm": 19.625, + "learning_rate": 7.3518638977371216e-09, + "log_odds": 7.835449695587158, + "log_odds_ratio": -0.1346762478351593, + "loss": 0.2546, + "rejected_geometric_mean": -8.489887237548828, + "step": 7881 + }, + { + "chosen_geometric_mean": -0.9271398782730103, + "epoch": 1.95, + "grad_norm": 32.75, + "learning_rate": 7.277451263640323e-09, + "log_odds": 3.283224582672119, + "log_odds_ratio": -0.1826542317867279, + "loss": 0.2735, + "rejected_geometric_mean": -3.724567174911499, + "step": 7882 + }, + { + "chosen_geometric_mean": -1.1159385442733765, + "epoch": 1.95, + "grad_norm": 3.140625, + "learning_rate": 7.203416586117063e-09, + "log_odds": 6.896093845367432, + "log_odds_ratio": -0.09036200493574142, + "loss": 0.2682, + "rejected_geometric_mean": -7.664829254150391, + "step": 7883 + }, + { + "chosen_geometric_mean": -0.9520490169525146, + "epoch": 1.95, + "grad_norm": 19.0, + "learning_rate": 7.129759876392528e-09, + "log_odds": 7.729804992675781, + "log_odds_ratio": -0.04203050583600998, + "loss": 0.2643, + "rejected_geometric_mean": -8.197224617004395, + "step": 7884 + }, + { + "chosen_geometric_mean": -0.9547373056411743, + "epoch": 1.95, + "grad_norm": 25.25, + "learning_rate": 7.0564811456350076e-09, + "log_odds": 1.0153286457061768, + "log_odds_ratio": -0.3372264504432678, + "loss": 0.2248, + "rejected_geometric_mean": -1.6973211765289307, + "step": 7885 + }, + { + "chosen_geometric_mean": -1.141453504562378, + "epoch": 1.95, + "grad_norm": 13.875, + "learning_rate": 6.983580404955614e-09, + "log_odds": 6.125326156616211, + "log_odds_ratio": -0.014751981943845749, + "loss": 0.2584, + "rejected_geometric_mean": -6.877738952636719, + "step": 7886 + }, + { + "chosen_geometric_mean": -1.0742353200912476, + "epoch": 1.95, + "grad_norm": 6.09375, + "learning_rate": 6.911057665407173e-09, + "log_odds": 6.738471031188965, + "log_odds_ratio": -0.09520122408866882, + "loss": 0.2621, + "rejected_geometric_mean": -7.420548915863037, + "step": 7887 + }, + { + "chosen_geometric_mean": -0.8625958561897278, + "epoch": 1.95, + "grad_norm": 8.375, + "learning_rate": 6.838912937986719e-09, + "log_odds": 5.909633636474609, + "log_odds_ratio": -0.26912105083465576, + "loss": 0.2697, + "rejected_geometric_mean": -6.4080963134765625, + "step": 7888 + }, + { + "chosen_geometric_mean": -0.8453698754310608, + "epoch": 1.95, + "grad_norm": 23.875, + "learning_rate": 6.767146233632449e-09, + "log_odds": 6.810389518737793, + "log_odds_ratio": -0.13370424509048462, + "loss": 0.2608, + "rejected_geometric_mean": -7.157967567443848, + "step": 7889 + }, + { + "chosen_geometric_mean": -1.0112992525100708, + "epoch": 1.95, + "grad_norm": 2.59375, + "learning_rate": 6.695757563226213e-09, + "log_odds": 8.251901626586914, + "log_odds_ratio": -0.09359439462423325, + "loss": 0.2039, + "rejected_geometric_mean": -8.81969928741455, + "step": 7890 + }, + { + "chosen_geometric_mean": -1.019707441329956, + "epoch": 1.95, + "grad_norm": 3.078125, + "learning_rate": 6.624746937592408e-09, + "log_odds": 9.548020362854004, + "log_odds_ratio": -0.0309407077729702, + "loss": 0.2758, + "rejected_geometric_mean": -10.121822357177734, + "step": 7891 + }, + { + "chosen_geometric_mean": -0.8995111584663391, + "epoch": 1.95, + "grad_norm": 13.375, + "learning_rate": 6.554114367497421e-09, + "log_odds": 11.190155982971191, + "log_odds_ratio": -0.03176712244749069, + "loss": 0.2276, + "rejected_geometric_mean": -11.582611083984375, + "step": 7892 + }, + { + "chosen_geometric_mean": -1.1439005136489868, + "epoch": 1.95, + "grad_norm": 2.140625, + "learning_rate": 6.483859863651299e-09, + "log_odds": 8.95123291015625, + "log_odds_ratio": -0.06806347519159317, + "loss": 0.2505, + "rejected_geometric_mean": -9.732473373413086, + "step": 7893 + }, + { + "chosen_geometric_mean": -0.8796841502189636, + "epoch": 1.95, + "grad_norm": 21.75, + "learning_rate": 6.413983436706351e-09, + "log_odds": 3.6951122283935547, + "log_odds_ratio": -0.1392837017774582, + "loss": 0.2818, + "rejected_geometric_mean": -4.132889270782471, + "step": 7894 + }, + { + "chosen_geometric_mean": -0.9247472286224365, + "epoch": 1.95, + "grad_norm": 2.203125, + "learning_rate": 6.344485097257158e-09, + "log_odds": 8.412935256958008, + "log_odds_ratio": -0.15184687077999115, + "loss": 0.2224, + "rejected_geometric_mean": -8.926480293273926, + "step": 7895 + }, + { + "chosen_geometric_mean": -0.9161202907562256, + "epoch": 1.95, + "grad_norm": 6.9375, + "learning_rate": 6.2753648558414035e-09, + "log_odds": 12.377861976623535, + "log_odds_ratio": -0.021416857838630676, + "loss": 0.262, + "rejected_geometric_mean": -12.783084869384766, + "step": 7896 + }, + { + "chosen_geometric_mean": -0.9188679456710815, + "epoch": 1.96, + "grad_norm": 1.90625, + "learning_rate": 6.206622722939592e-09, + "log_odds": 2.937866449356079, + "log_odds_ratio": -0.18224243819713593, + "loss": 0.233, + "rejected_geometric_mean": -3.487060070037842, + "step": 7897 + }, + { + "chosen_geometric_mean": -0.8650883436203003, + "epoch": 1.96, + "grad_norm": 2.0, + "learning_rate": 6.1382587089747755e-09, + "log_odds": 11.559032440185547, + "log_odds_ratio": -0.12101154029369354, + "loss": 0.2562, + "rejected_geometric_mean": -11.957871437072754, + "step": 7898 + }, + { + "chosen_geometric_mean": -0.7895505428314209, + "epoch": 1.96, + "grad_norm": 2.5625, + "learning_rate": 6.070272824312273e-09, + "log_odds": 2.553586959838867, + "log_odds_ratio": -0.3177874684333801, + "loss": 0.2785, + "rejected_geometric_mean": -3.0409579277038574, + "step": 7899 + }, + { + "chosen_geometric_mean": -1.010512351989746, + "epoch": 1.96, + "grad_norm": 2.140625, + "learning_rate": 6.002665079260506e-09, + "log_odds": 3.1655771732330322, + "log_odds_ratio": -0.10002286732196808, + "loss": 0.2389, + "rejected_geometric_mean": -3.75923752784729, + "step": 7900 + }, + { + "chosen_geometric_mean": -0.887790322303772, + "epoch": 1.96, + "grad_norm": 4.84375, + "learning_rate": 5.93543548407044e-09, + "log_odds": 8.375597953796387, + "log_odds_ratio": -0.021352747455239296, + "loss": 0.2771, + "rejected_geometric_mean": -8.65578556060791, + "step": 7901 + }, + { + "chosen_geometric_mean": -1.1320829391479492, + "epoch": 1.96, + "grad_norm": 8.75, + "learning_rate": 5.868584048935588e-09, + "log_odds": 9.980752944946289, + "log_odds_ratio": -0.030605947598814964, + "loss": 0.2892, + "rejected_geometric_mean": -10.697561264038086, + "step": 7902 + }, + { + "chosen_geometric_mean": -0.8579392433166504, + "epoch": 1.96, + "grad_norm": 2.3125, + "learning_rate": 5.802110783992288e-09, + "log_odds": 10.04194164276123, + "log_odds_ratio": -0.0057593705132603645, + "loss": 0.283, + "rejected_geometric_mean": -10.311174392700195, + "step": 7903 + }, + { + "chosen_geometric_mean": -0.8435564637184143, + "epoch": 1.96, + "grad_norm": 1.71875, + "learning_rate": 5.736015699319419e-09, + "log_odds": 5.642480850219727, + "log_odds_ratio": -0.14349772036075592, + "loss": 0.2047, + "rejected_geometric_mean": -6.0512285232543945, + "step": 7904 + }, + { + "chosen_geometric_mean": -1.0576481819152832, + "epoch": 1.96, + "grad_norm": 25.25, + "learning_rate": 5.670298804938967e-09, + "log_odds": 5.878633499145508, + "log_odds_ratio": -0.11269810795783997, + "loss": 0.2528, + "rejected_geometric_mean": -6.558002471923828, + "step": 7905 + }, + { + "chosen_geometric_mean": -1.1232349872589111, + "epoch": 1.96, + "grad_norm": 4.125, + "learning_rate": 5.604960110814905e-09, + "log_odds": 6.958186149597168, + "log_odds_ratio": -0.2201208770275116, + "loss": 0.2789, + "rejected_geometric_mean": -7.754985809326172, + "step": 7906 + }, + { + "chosen_geometric_mean": -1.0199921131134033, + "epoch": 1.96, + "grad_norm": 1.984375, + "learning_rate": 5.539999626853754e-09, + "log_odds": 13.320352554321289, + "log_odds_ratio": -0.01171250268816948, + "loss": 0.243, + "rejected_geometric_mean": -13.857913970947266, + "step": 7907 + }, + { + "chosen_geometric_mean": -1.0512882471084595, + "epoch": 1.96, + "grad_norm": 2.84375, + "learning_rate": 5.475417362905966e-09, + "log_odds": 5.574810028076172, + "log_odds_ratio": -0.10262813419103622, + "loss": 0.2696, + "rejected_geometric_mean": -6.247775554656982, + "step": 7908 + }, + { + "chosen_geometric_mean": -0.9900555610656738, + "epoch": 1.96, + "grad_norm": 2.203125, + "learning_rate": 5.411213328762876e-09, + "log_odds": 5.143619537353516, + "log_odds_ratio": -0.044662781059741974, + "loss": 0.252, + "rejected_geometric_mean": -5.680181980133057, + "step": 7909 + }, + { + "chosen_geometric_mean": -0.9331337213516235, + "epoch": 1.96, + "grad_norm": 13.0625, + "learning_rate": 5.3473875341600315e-09, + "log_odds": 15.200324058532715, + "log_odds_ratio": -0.0009586370433680713, + "loss": 0.2811, + "rejected_geometric_mean": -15.618912696838379, + "step": 7910 + }, + { + "chosen_geometric_mean": -0.9655944108963013, + "epoch": 1.96, + "grad_norm": 2.234375, + "learning_rate": 5.2839399887744116e-09, + "log_odds": 4.109334468841553, + "log_odds_ratio": -0.38170135021209717, + "loss": 0.2535, + "rejected_geometric_mean": -4.853941440582275, + "step": 7911 + }, + { + "chosen_geometric_mean": -0.9791080951690674, + "epoch": 1.96, + "grad_norm": 7.53125, + "learning_rate": 5.220870702226655e-09, + "log_odds": 5.581691741943359, + "log_odds_ratio": -0.10829748958349228, + "loss": 0.3201, + "rejected_geometric_mean": -6.146742343902588, + "step": 7912 + }, + { + "chosen_geometric_mean": -1.1859506368637085, + "epoch": 1.96, + "grad_norm": 11.5625, + "learning_rate": 5.1581796840793896e-09, + "log_odds": 4.910512447357178, + "log_odds_ratio": -0.016937995329499245, + "loss": 0.2477, + "rejected_geometric_mean": -5.688093185424805, + "step": 7913 + }, + { + "chosen_geometric_mean": -1.0049846172332764, + "epoch": 1.96, + "grad_norm": 21.125, + "learning_rate": 5.09586694383779e-09, + "log_odds": 10.011600494384766, + "log_odds_ratio": -0.065719373524189, + "loss": 0.2564, + "rejected_geometric_mean": -10.604164123535156, + "step": 7914 + }, + { + "chosen_geometric_mean": -1.100434422492981, + "epoch": 1.96, + "grad_norm": 29.5, + "learning_rate": 5.033932490950411e-09, + "log_odds": 12.471464157104492, + "log_odds_ratio": -0.0007049007690511644, + "loss": 0.3521, + "rejected_geometric_mean": -13.157078742980957, + "step": 7915 + }, + { + "chosen_geometric_mean": -1.1078484058380127, + "epoch": 1.96, + "grad_norm": 15.875, + "learning_rate": 4.972376334808071e-09, + "log_odds": 5.7659912109375, + "log_odds_ratio": -0.09707564115524292, + "loss": 0.2529, + "rejected_geometric_mean": -6.517267227172852, + "step": 7916 + }, + { + "chosen_geometric_mean": -0.9850471615791321, + "epoch": 1.96, + "grad_norm": 3.59375, + "learning_rate": 4.911198484743585e-09, + "log_odds": 7.134466171264648, + "log_odds_ratio": -0.18989400565624237, + "loss": 0.2161, + "rejected_geometric_mean": -7.732442855834961, + "step": 7917 + }, + { + "chosen_geometric_mean": -0.9726846814155579, + "epoch": 1.96, + "grad_norm": 9.625, + "learning_rate": 4.850398950033419e-09, + "log_odds": 3.5364789962768555, + "log_odds_ratio": -0.14146165549755096, + "loss": 0.2661, + "rejected_geometric_mean": -4.103529930114746, + "step": 7918 + }, + { + "chosen_geometric_mean": -0.9894282817840576, + "epoch": 1.96, + "grad_norm": 3.65625, + "learning_rate": 4.789977739896035e-09, + "log_odds": 2.868020534515381, + "log_odds_ratio": -0.25867992639541626, + "loss": 0.273, + "rejected_geometric_mean": -3.543513298034668, + "step": 7919 + }, + { + "chosen_geometric_mean": -1.541076898574829, + "epoch": 1.96, + "grad_norm": 49.25, + "learning_rate": 4.729934863492991e-09, + "log_odds": 2.044959545135498, + "log_odds_ratio": -0.2199656069278717, + "loss": 0.3254, + "rejected_geometric_mean": -3.381105422973633, + "step": 7920 + }, + { + "chosen_geometric_mean": -1.7051035165786743, + "epoch": 1.96, + "grad_norm": 20.875, + "learning_rate": 4.6702703299281175e-09, + "log_odds": 4.551398277282715, + "log_odds_ratio": -0.13758334517478943, + "loss": 0.322, + "rejected_geometric_mean": -5.928224563598633, + "step": 7921 + }, + { + "chosen_geometric_mean": -0.8745890855789185, + "epoch": 1.96, + "grad_norm": 15.625, + "learning_rate": 4.610984148247788e-09, + "log_odds": 2.6477184295654297, + "log_odds_ratio": -0.198851078748703, + "loss": 0.2878, + "rejected_geometric_mean": -3.1423022747039795, + "step": 7922 + }, + { + "chosen_geometric_mean": -0.8562115430831909, + "epoch": 1.96, + "grad_norm": 6.15625, + "learning_rate": 4.552076327441479e-09, + "log_odds": 8.28179931640625, + "log_odds_ratio": -0.07376701384782791, + "loss": 0.229, + "rejected_geometric_mean": -8.61721134185791, + "step": 7923 + }, + { + "chosen_geometric_mean": -0.9066446423530579, + "epoch": 1.96, + "grad_norm": 6.125, + "learning_rate": 4.493546876440935e-09, + "log_odds": 2.898770332336426, + "log_odds_ratio": -0.2746509313583374, + "loss": 0.2841, + "rejected_geometric_mean": -3.4640467166900635, + "step": 7924 + }, + { + "chosen_geometric_mean": -0.8847521543502808, + "epoch": 1.96, + "grad_norm": 4.59375, + "learning_rate": 4.435395804120446e-09, + "log_odds": 11.842835426330566, + "log_odds_ratio": -0.0025772808585315943, + "loss": 0.267, + "rejected_geometric_mean": -12.182952880859375, + "step": 7925 + }, + { + "chosen_geometric_mean": -0.8452416062355042, + "epoch": 1.96, + "grad_norm": 7.59375, + "learning_rate": 4.3776231192974026e-09, + "log_odds": 9.818626403808594, + "log_odds_ratio": -0.15718230605125427, + "loss": 0.2488, + "rejected_geometric_mean": -10.21577262878418, + "step": 7926 + }, + { + "chosen_geometric_mean": -0.9421037435531616, + "epoch": 1.96, + "grad_norm": 2.90625, + "learning_rate": 4.3202288307314655e-09, + "log_odds": 4.424831390380859, + "log_odds_ratio": -0.326433002948761, + "loss": 0.2463, + "rejected_geometric_mean": -5.136711597442627, + "step": 7927 + }, + { + "chosen_geometric_mean": -1.1856482028961182, + "epoch": 1.96, + "grad_norm": 6.125, + "learning_rate": 4.263212947124839e-09, + "log_odds": 7.042908668518066, + "log_odds_ratio": -0.040286365896463394, + "loss": 0.2356, + "rejected_geometric_mean": -7.870708465576172, + "step": 7928 + }, + { + "chosen_geometric_mean": -1.237474799156189, + "epoch": 1.96, + "grad_norm": 13.4375, + "learning_rate": 4.2065754771225545e-09, + "log_odds": 5.740861415863037, + "log_odds_ratio": -0.21882593631744385, + "loss": 0.2658, + "rejected_geometric_mean": -6.73994779586792, + "step": 7929 + }, + { + "chosen_geometric_mean": -1.108968734741211, + "epoch": 1.96, + "grad_norm": 128.0, + "learning_rate": 4.150316429312185e-09, + "log_odds": 5.362966060638428, + "log_odds_ratio": -0.1895608901977539, + "loss": 0.3115, + "rejected_geometric_mean": -6.117568492889404, + "step": 7930 + }, + { + "chosen_geometric_mean": -1.1208897829055786, + "epoch": 1.96, + "grad_norm": 6.71875, + "learning_rate": 4.094435812224407e-09, + "log_odds": 6.711319446563721, + "log_odds_ratio": -0.19691625237464905, + "loss": 0.2611, + "rejected_geometric_mean": -7.519865989685059, + "step": 7931 + }, + { + "chosen_geometric_mean": -0.931597113609314, + "epoch": 1.96, + "grad_norm": 6.65625, + "learning_rate": 4.038933634331332e-09, + "log_odds": 7.392413139343262, + "log_odds_ratio": -0.11206796765327454, + "loss": 0.2957, + "rejected_geometric_mean": -7.8817853927612305, + "step": 7932 + }, + { + "chosen_geometric_mean": -1.4079465866088867, + "epoch": 1.96, + "grad_norm": 13.625, + "learning_rate": 3.98380990404873e-09, + "log_odds": 4.187163352966309, + "log_odds_ratio": -0.3995503783226013, + "loss": 0.2601, + "rejected_geometric_mean": -5.336270332336426, + "step": 7933 + }, + { + "chosen_geometric_mean": -1.025444507598877, + "epoch": 1.96, + "grad_norm": 5.0, + "learning_rate": 3.929064629734914e-09, + "log_odds": 3.9578161239624023, + "log_odds_ratio": -0.11266465485095978, + "loss": 0.2713, + "rejected_geometric_mean": -4.593589782714844, + "step": 7934 + }, + { + "chosen_geometric_mean": -0.8484172224998474, + "epoch": 1.96, + "grad_norm": 31.75, + "learning_rate": 3.874697819690187e-09, + "log_odds": 7.550390720367432, + "log_odds_ratio": -0.08318788558244705, + "loss": 0.2994, + "rejected_geometric_mean": -7.816211700439453, + "step": 7935 + }, + { + "chosen_geometric_mean": -1.0800089836120605, + "epoch": 1.96, + "grad_norm": 2.46875, + "learning_rate": 3.820709482158236e-09, + "log_odds": 6.624329566955566, + "log_odds_ratio": -0.006787898484617472, + "loss": 0.2534, + "rejected_geometric_mean": -7.267298221588135, + "step": 7936 + }, + { + "chosen_geometric_mean": -0.8373773694038391, + "epoch": 1.97, + "grad_norm": 2.65625, + "learning_rate": 3.767099625324733e-09, + "log_odds": 10.001363754272461, + "log_odds_ratio": -0.07523497194051743, + "loss": 0.2837, + "rejected_geometric_mean": -10.309026718139648, + "step": 7937 + }, + { + "chosen_geometric_mean": -1.0728968381881714, + "epoch": 1.97, + "grad_norm": 1.984375, + "learning_rate": 3.7138682573184536e-09, + "log_odds": 6.422186851501465, + "log_odds_ratio": -0.15838097035884857, + "loss": 0.245, + "rejected_geometric_mean": -7.160152435302734, + "step": 7938 + }, + { + "chosen_geometric_mean": -1.1331208944320679, + "epoch": 1.97, + "grad_norm": 3.5, + "learning_rate": 3.661015386210165e-09, + "log_odds": 2.53456449508667, + "log_odds_ratio": -0.22547729313373566, + "loss": 0.2145, + "rejected_geometric_mean": -3.398979663848877, + "step": 7939 + }, + { + "chosen_geometric_mean": -0.7863420248031616, + "epoch": 1.97, + "grad_norm": 11.0625, + "learning_rate": 3.6085410200137337e-09, + "log_odds": 1.7674529552459717, + "log_odds_ratio": -0.23238703608512878, + "loss": 0.2793, + "rejected_geometric_mean": -2.1534838676452637, + "step": 7940 + }, + { + "chosen_geometric_mean": -1.0184025764465332, + "epoch": 1.97, + "grad_norm": 29.125, + "learning_rate": 3.556445166685851e-09, + "log_odds": 4.476401329040527, + "log_odds_ratio": -0.1312282383441925, + "loss": 0.251, + "rejected_geometric_mean": -5.097591876983643, + "step": 7941 + }, + { + "chosen_geometric_mean": -1.0870496034622192, + "epoch": 1.97, + "grad_norm": 4.59375, + "learning_rate": 3.5047278341254767e-09, + "log_odds": 10.740991592407227, + "log_odds_ratio": -0.0001819915050873533, + "loss": 0.2397, + "rejected_geometric_mean": -11.399751663208008, + "step": 7942 + }, + { + "chosen_geometric_mean": -0.9908583164215088, + "epoch": 1.97, + "grad_norm": 16.875, + "learning_rate": 3.453389030173837e-09, + "log_odds": 6.620548248291016, + "log_odds_ratio": -0.04199964553117752, + "loss": 0.2889, + "rejected_geometric_mean": -7.165963172912598, + "step": 7943 + }, + { + "chosen_geometric_mean": -1.0313925743103027, + "epoch": 1.97, + "grad_norm": 9.375, + "learning_rate": 3.402428762615262e-09, + "log_odds": 3.4404215812683105, + "log_odds_ratio": -0.30309468507766724, + "loss": 0.2323, + "rejected_geometric_mean": -4.160595893859863, + "step": 7944 + }, + { + "chosen_geometric_mean": -1.1415661573410034, + "epoch": 1.97, + "grad_norm": 9.4375, + "learning_rate": 3.3518470391766255e-09, + "log_odds": 12.178837776184082, + "log_odds_ratio": -0.002605757210403681, + "loss": 0.2415, + "rejected_geometric_mean": -12.915240287780762, + "step": 7945 + }, + { + "chosen_geometric_mean": -0.9903182983398438, + "epoch": 1.97, + "grad_norm": 3.34375, + "learning_rate": 3.3016438675276265e-09, + "log_odds": 2.7158169746398926, + "log_odds_ratio": -0.2597854435443878, + "loss": 0.3367, + "rejected_geometric_mean": -3.357480525970459, + "step": 7946 + }, + { + "chosen_geometric_mean": -0.8625970482826233, + "epoch": 1.97, + "grad_norm": 1.875, + "learning_rate": 3.2518192552796757e-09, + "log_odds": 8.078117370605469, + "log_odds_ratio": -0.128932386636734, + "loss": 0.2464, + "rejected_geometric_mean": -8.491077423095703, + "step": 7947 + }, + { + "chosen_geometric_mean": -1.3146203756332397, + "epoch": 1.97, + "grad_norm": 78.5, + "learning_rate": 3.2023732099875638e-09, + "log_odds": 5.151424407958984, + "log_odds_ratio": -0.1977749466896057, + "loss": 0.3587, + "rejected_geometric_mean": -6.190862655639648, + "step": 7948 + }, + { + "chosen_geometric_mean": -0.9480583667755127, + "epoch": 1.97, + "grad_norm": 95.0, + "learning_rate": 3.153305739148904e-09, + "log_odds": 2.869100332260132, + "log_odds_ratio": -0.2565206289291382, + "loss": 0.2372, + "rejected_geometric_mean": -3.501896858215332, + "step": 7949 + }, + { + "chosen_geometric_mean": -1.0091358423233032, + "epoch": 1.97, + "grad_norm": 2.46875, + "learning_rate": 3.104616850203024e-09, + "log_odds": 7.373143672943115, + "log_odds_ratio": -0.011336044408380985, + "loss": 0.2705, + "rejected_geometric_mean": -7.837989807128906, + "step": 7950 + }, + { + "chosen_geometric_mean": -0.9589006900787354, + "epoch": 1.97, + "grad_norm": 1.8515625, + "learning_rate": 3.056306550532351e-09, + "log_odds": 7.026039123535156, + "log_odds_ratio": -0.01257803849875927, + "loss": 0.2403, + "rejected_geometric_mean": -7.476808547973633, + "step": 7951 + }, + { + "chosen_geometric_mean": -0.7879126071929932, + "epoch": 1.97, + "grad_norm": 4.125, + "learning_rate": 3.0083748474621366e-09, + "log_odds": 10.924427032470703, + "log_odds_ratio": -0.10695565491914749, + "loss": 0.2137, + "rejected_geometric_mean": -11.195219039916992, + "step": 7952 + }, + { + "chosen_geometric_mean": -0.9719210267066956, + "epoch": 1.97, + "grad_norm": 17.0, + "learning_rate": 2.960821748259901e-09, + "log_odds": 7.8834123611450195, + "log_odds_ratio": -0.009981823153793812, + "loss": 0.2645, + "rejected_geometric_mean": -8.361205101013184, + "step": 7953 + }, + { + "chosen_geometric_mean": -1.3360706567764282, + "epoch": 1.97, + "grad_norm": 19.375, + "learning_rate": 2.9136472601359876e-09, + "log_odds": 9.601217269897461, + "log_odds_ratio": -0.20961153507232666, + "loss": 0.2997, + "rejected_geometric_mean": -10.684381484985352, + "step": 7954 + }, + { + "chosen_geometric_mean": -1.1101791858673096, + "epoch": 1.97, + "grad_norm": 4.34375, + "learning_rate": 2.866851390242731e-09, + "log_odds": 11.500268936157227, + "log_odds_ratio": -0.17022605240345, + "loss": 0.2923, + "rejected_geometric_mean": -12.307085037231445, + "step": 7955 + }, + { + "chosen_geometric_mean": -1.0603355169296265, + "epoch": 1.97, + "grad_norm": 7.40625, + "learning_rate": 2.8204341456758435e-09, + "log_odds": 8.641439437866211, + "log_odds_ratio": -0.01838338002562523, + "loss": 0.2757, + "rejected_geometric_mean": -9.27393913269043, + "step": 7956 + }, + { + "chosen_geometric_mean": -0.9030596017837524, + "epoch": 1.97, + "grad_norm": 22.375, + "learning_rate": 2.7743955334733066e-09, + "log_odds": 8.948042869567871, + "log_odds_ratio": -0.010400203987956047, + "loss": 0.2898, + "rejected_geometric_mean": -9.277256965637207, + "step": 7957 + }, + { + "chosen_geometric_mean": -1.0544276237487793, + "epoch": 1.97, + "grad_norm": 3.03125, + "learning_rate": 2.7287355606156473e-09, + "log_odds": 12.973066329956055, + "log_odds_ratio": -0.0002874579222407192, + "loss": 0.2593, + "rejected_geometric_mean": -13.589245796203613, + "step": 7958 + }, + { + "chosen_geometric_mean": -0.8032742142677307, + "epoch": 1.97, + "grad_norm": 2.671875, + "learning_rate": 2.6834542340259396e-09, + "log_odds": 9.399215698242188, + "log_odds_ratio": -0.08531053364276886, + "loss": 0.2536, + "rejected_geometric_mean": -9.689638137817383, + "step": 7959 + }, + { + "chosen_geometric_mean": -1.011443018913269, + "epoch": 1.97, + "grad_norm": 5.03125, + "learning_rate": 2.6385515605700797e-09, + "log_odds": 1.586347222328186, + "log_odds_ratio": -0.36322665214538574, + "loss": 0.2786, + "rejected_geometric_mean": -2.378153085708618, + "step": 7960 + }, + { + "chosen_geometric_mean": -0.9863644242286682, + "epoch": 1.97, + "grad_norm": 23.5, + "learning_rate": 2.594027547055955e-09, + "log_odds": 7.412690162658691, + "log_odds_ratio": -0.08597737550735474, + "loss": 0.2745, + "rejected_geometric_mean": -7.895268440246582, + "step": 7961 + }, + { + "chosen_geometric_mean": -1.128102421760559, + "epoch": 1.97, + "grad_norm": 43.25, + "learning_rate": 2.5498822002351097e-09, + "log_odds": 6.342683792114258, + "log_odds_ratio": -0.025481482967734337, + "loss": 0.2714, + "rejected_geometric_mean": -6.94549560546875, + "step": 7962 + }, + { + "chosen_geometric_mean": -1.0028553009033203, + "epoch": 1.97, + "grad_norm": 2.78125, + "learning_rate": 2.5061155268005235e-09, + "log_odds": 12.215170860290527, + "log_odds_ratio": -0.0017473583575338125, + "loss": 0.2515, + "rejected_geometric_mean": -12.741239547729492, + "step": 7963 + }, + { + "chosen_geometric_mean": -0.9286350607872009, + "epoch": 1.97, + "grad_norm": 1.7265625, + "learning_rate": 2.462727533388276e-09, + "log_odds": 10.389469146728516, + "log_odds_ratio": -0.00010558504436630756, + "loss": 0.2077, + "rejected_geometric_mean": -10.80970573425293, + "step": 7964 + }, + { + "chosen_geometric_mean": -0.9738286733627319, + "epoch": 1.97, + "grad_norm": 16.25, + "learning_rate": 2.41971822657755e-09, + "log_odds": 3.0784101486206055, + "log_odds_ratio": -0.2992500364780426, + "loss": 0.3091, + "rejected_geometric_mean": -3.7457032203674316, + "step": 7965 + }, + { + "chosen_geometric_mean": -0.9057321548461914, + "epoch": 1.97, + "grad_norm": 2.046875, + "learning_rate": 2.377087612888962e-09, + "log_odds": 8.246062278747559, + "log_odds_ratio": -0.16395805776119232, + "loss": 0.2364, + "rejected_geometric_mean": -8.742753982543945, + "step": 7966 + }, + { + "chosen_geometric_mean": -1.0381847620010376, + "epoch": 1.97, + "grad_norm": 37.25, + "learning_rate": 2.334835698786786e-09, + "log_odds": 8.614456176757812, + "log_odds_ratio": -0.1408858448266983, + "loss": 0.2851, + "rejected_geometric_mean": -9.269876480102539, + "step": 7967 + }, + { + "chosen_geometric_mean": -0.9495768547058105, + "epoch": 1.97, + "grad_norm": 14.0625, + "learning_rate": 2.2929624906772864e-09, + "log_odds": 8.63593864440918, + "log_odds_ratio": -0.09077639877796173, + "loss": 0.2643, + "rejected_geometric_mean": -9.152229309082031, + "step": 7968 + }, + { + "chosen_geometric_mean": -1.240838646888733, + "epoch": 1.97, + "grad_norm": 5.125, + "learning_rate": 2.251467994909551e-09, + "log_odds": 4.057651042938232, + "log_odds_ratio": -0.07930153608322144, + "loss": 0.258, + "rejected_geometric_mean": -4.944290637969971, + "step": 7969 + }, + { + "chosen_geometric_mean": -1.3282862901687622, + "epoch": 1.97, + "grad_norm": 2.734375, + "learning_rate": 2.2103522177746582e-09, + "log_odds": 2.420635223388672, + "log_odds_ratio": -0.2324732095003128, + "loss": 0.265, + "rejected_geometric_mean": -3.5126383304595947, + "step": 7970 + }, + { + "chosen_geometric_mean": -1.2070412635803223, + "epoch": 1.97, + "grad_norm": 22.375, + "learning_rate": 2.169615165507344e-09, + "log_odds": 4.066312313079834, + "log_odds_ratio": -0.3476353883743286, + "loss": 0.3266, + "rejected_geometric_mean": -5.0772528648376465, + "step": 7971 + }, + { + "chosen_geometric_mean": -0.8726689219474792, + "epoch": 1.97, + "grad_norm": 8.375, + "learning_rate": 2.1292568442840554e-09, + "log_odds": 6.4971537590026855, + "log_odds_ratio": -0.008454935625195503, + "loss": 0.2515, + "rejected_geometric_mean": -6.832799911499023, + "step": 7972 + }, + { + "chosen_geometric_mean": -0.9501616358757019, + "epoch": 1.97, + "grad_norm": 2.0625, + "learning_rate": 2.089277260224343e-09, + "log_odds": 9.866826057434082, + "log_odds_ratio": -0.009272503666579723, + "loss": 0.2266, + "rejected_geometric_mean": -10.326421737670898, + "step": 7973 + }, + { + "chosen_geometric_mean": -0.838463306427002, + "epoch": 1.97, + "grad_norm": 5.0, + "learning_rate": 2.049676419389468e-09, + "log_odds": 10.550124168395996, + "log_odds_ratio": -0.04681985452771187, + "loss": 0.2448, + "rejected_geometric_mean": -10.846055030822754, + "step": 7974 + }, + { + "chosen_geometric_mean": -0.8408775925636292, + "epoch": 1.97, + "grad_norm": 2.34375, + "learning_rate": 2.0104543277843503e-09, + "log_odds": 6.2521281242370605, + "log_odds_ratio": -0.014068215154111385, + "loss": 0.268, + "rejected_geometric_mean": -6.531536102294922, + "step": 7975 + }, + { + "chosen_geometric_mean": -0.9673066139221191, + "epoch": 1.97, + "grad_norm": 3.796875, + "learning_rate": 1.9716109913558988e-09, + "log_odds": 3.975430965423584, + "log_odds_ratio": -0.26893723011016846, + "loss": 0.258, + "rejected_geometric_mean": -4.644568920135498, + "step": 7976 + }, + { + "chosen_geometric_mean": -0.8617404699325562, + "epoch": 1.97, + "grad_norm": 8.1875, + "learning_rate": 1.9331464159938475e-09, + "log_odds": 12.85844612121582, + "log_odds_ratio": -0.0007913138251751661, + "loss": 0.2915, + "rejected_geometric_mean": -13.133705139160156, + "step": 7977 + }, + { + "chosen_geometric_mean": -0.9314951300621033, + "epoch": 1.98, + "grad_norm": 8.625, + "learning_rate": 1.8950606075299195e-09, + "log_odds": 13.01147174835205, + "log_odds_ratio": -0.04430196061730385, + "loss": 0.2216, + "rejected_geometric_mean": -13.464631080627441, + "step": 7978 + }, + { + "chosen_geometric_mean": -1.0906529426574707, + "epoch": 1.98, + "grad_norm": 3.765625, + "learning_rate": 1.8573535717392177e-09, + "log_odds": 6.289061546325684, + "log_odds_ratio": -0.08350235968828201, + "loss": 0.2553, + "rejected_geometric_mean": -7.002262115478516, + "step": 7979 + }, + { + "chosen_geometric_mean": -0.9246584177017212, + "epoch": 1.98, + "grad_norm": 2.84375, + "learning_rate": 1.8200253143391134e-09, + "log_odds": 6.376605033874512, + "log_odds_ratio": -0.12803930044174194, + "loss": 0.2571, + "rejected_geometric_mean": -6.85554838180542, + "step": 7980 + }, + { + "chosen_geometric_mean": -1.024962067604065, + "epoch": 1.98, + "grad_norm": 2.46875, + "learning_rate": 1.7830758409889681e-09, + "log_odds": 1.9427359104156494, + "log_odds_ratio": -0.5182445049285889, + "loss": 0.2308, + "rejected_geometric_mean": -2.977536201477051, + "step": 7981 + }, + { + "chosen_geometric_mean": -1.0227315425872803, + "epoch": 1.98, + "grad_norm": 38.75, + "learning_rate": 1.7465051572918e-09, + "log_odds": 5.298476219177246, + "log_odds_ratio": -0.13614031672477722, + "loss": 0.2611, + "rejected_geometric_mean": -5.938607215881348, + "step": 7982 + }, + { + "chosen_geometric_mean": -1.0183870792388916, + "epoch": 1.98, + "grad_norm": 2.453125, + "learning_rate": 1.7103132687920631e-09, + "log_odds": 10.27796459197998, + "log_odds_ratio": -0.0002298351319041103, + "loss": 0.2491, + "rejected_geometric_mean": -10.799211502075195, + "step": 7983 + }, + { + "chosen_geometric_mean": -1.1135214567184448, + "epoch": 1.98, + "grad_norm": 6.34375, + "learning_rate": 1.6745001809778672e-09, + "log_odds": 0.7536089420318604, + "log_odds_ratio": -0.42343807220458984, + "loss": 0.2926, + "rejected_geometric_mean": -1.6813199520111084, + "step": 7984 + }, + { + "chosen_geometric_mean": -0.9347261190414429, + "epoch": 1.98, + "grad_norm": 31.0, + "learning_rate": 1.6390658992787578e-09, + "log_odds": 1.8369123935699463, + "log_odds_ratio": -0.3765556514263153, + "loss": 0.2523, + "rejected_geometric_mean": -2.5517730712890625, + "step": 7985 + }, + { + "chosen_geometric_mean": -0.9203327298164368, + "epoch": 1.98, + "grad_norm": 2.734375, + "learning_rate": 1.604010429067937e-09, + "log_odds": 8.90195369720459, + "log_odds_ratio": -0.02392999641597271, + "loss": 0.2676, + "rejected_geometric_mean": -9.327240943908691, + "step": 7986 + }, + { + "chosen_geometric_mean": -0.9298981428146362, + "epoch": 1.98, + "grad_norm": 2.046875, + "learning_rate": 1.56933377566032e-09, + "log_odds": 5.123627662658691, + "log_odds_ratio": -0.12264976650476456, + "loss": 0.2146, + "rejected_geometric_mean": -5.633297920227051, + "step": 7987 + }, + { + "chosen_geometric_mean": -1.0174646377563477, + "epoch": 1.98, + "grad_norm": 5.28125, + "learning_rate": 1.5350359443136454e-09, + "log_odds": 9.782157897949219, + "log_odds_ratio": -0.00199390877969563, + "loss": 0.2358, + "rejected_geometric_mean": -10.339240074157715, + "step": 7988 + }, + { + "chosen_geometric_mean": -0.8680022954940796, + "epoch": 1.98, + "grad_norm": 4.875, + "learning_rate": 1.501116940228753e-09, + "log_odds": 7.769988059997559, + "log_odds_ratio": -0.07231290638446808, + "loss": 0.3068, + "rejected_geometric_mean": -8.149744987487793, + "step": 7989 + }, + { + "chosen_geometric_mean": -0.815664529800415, + "epoch": 1.98, + "grad_norm": 25.625, + "learning_rate": 1.4675767685481956e-09, + "log_odds": 2.132969856262207, + "log_odds_ratio": -0.37194064259529114, + "loss": 0.264, + "rejected_geometric_mean": -2.67004656791687, + "step": 7990 + }, + { + "chosen_geometric_mean": -0.8795682787895203, + "epoch": 1.98, + "grad_norm": 9.9375, + "learning_rate": 1.4344154343576278e-09, + "log_odds": 1.4682414531707764, + "log_odds_ratio": -0.28532782196998596, + "loss": 0.2683, + "rejected_geometric_mean": -2.037374973297119, + "step": 7991 + }, + { + "chosen_geometric_mean": -0.807104766368866, + "epoch": 1.98, + "grad_norm": 4.0625, + "learning_rate": 1.4016329426849717e-09, + "log_odds": 4.247438430786133, + "log_odds_ratio": -0.10494396090507507, + "loss": 0.2162, + "rejected_geometric_mean": -4.553997039794922, + "step": 7992 + }, + { + "chosen_geometric_mean": -0.8647661209106445, + "epoch": 1.98, + "grad_norm": 3.40625, + "learning_rate": 1.3692292985009736e-09, + "log_odds": 5.817791938781738, + "log_odds_ratio": -0.1960255205631256, + "loss": 0.2429, + "rejected_geometric_mean": -6.310423374176025, + "step": 7993 + }, + { + "chosen_geometric_mean": -1.5653489828109741, + "epoch": 1.98, + "grad_norm": 28.25, + "learning_rate": 1.3372045067186478e-09, + "log_odds": 7.236949920654297, + "log_odds_ratio": -0.15972566604614258, + "loss": 0.2714, + "rejected_geometric_mean": -8.590921401977539, + "step": 7994 + }, + { + "chosen_geometric_mean": -0.8997841477394104, + "epoch": 1.98, + "grad_norm": 7.71875, + "learning_rate": 1.3055585721938323e-09, + "log_odds": 5.455108165740967, + "log_odds_ratio": -0.19747507572174072, + "loss": 0.2332, + "rejected_geometric_mean": -5.993626117706299, + "step": 7995 + }, + { + "chosen_geometric_mean": -0.8942203521728516, + "epoch": 1.98, + "grad_norm": 3.734375, + "learning_rate": 1.2742914997246337e-09, + "log_odds": 9.735343933105469, + "log_odds_ratio": -0.025226356461644173, + "loss": 0.2446, + "rejected_geometric_mean": -10.110061645507812, + "step": 7996 + }, + { + "chosen_geometric_mean": -0.9666078686714172, + "epoch": 1.98, + "grad_norm": 3.5625, + "learning_rate": 1.2434032940522588e-09, + "log_odds": 8.801399230957031, + "log_odds_ratio": -0.08855453878641129, + "loss": 0.2526, + "rejected_geometric_mean": -9.296699523925781, + "step": 7997 + }, + { + "chosen_geometric_mean": -0.8811635971069336, + "epoch": 1.98, + "grad_norm": 28.125, + "learning_rate": 1.2128939598599066e-09, + "log_odds": 0.5609064698219299, + "log_odds_ratio": -0.45309916138648987, + "loss": 0.2549, + "rejected_geometric_mean": -1.2331042289733887, + "step": 7998 + }, + { + "chosen_geometric_mean": -1.0639994144439697, + "epoch": 1.98, + "grad_norm": 2.9375, + "learning_rate": 1.1827635017735983e-09, + "log_odds": 3.379880905151367, + "log_odds_ratio": -0.19847789406776428, + "loss": 0.2558, + "rejected_geometric_mean": -4.075312614440918, + "step": 7999 + }, + { + "chosen_geometric_mean": -0.9750704169273376, + "epoch": 1.98, + "grad_norm": 5.28125, + "learning_rate": 1.1530119243613468e-09, + "log_odds": 3.1400136947631836, + "log_odds_ratio": -0.3167404532432556, + "loss": 0.2632, + "rejected_geometric_mean": -3.8870596885681152, + "step": 8000 + }, + { + "chosen_geometric_mean": -0.8203004598617554, + "epoch": 1.98, + "grad_norm": 15.8125, + "learning_rate": 1.1236392321350987e-09, + "log_odds": 6.174983978271484, + "log_odds_ratio": -0.1644214540719986, + "loss": 0.2388, + "rejected_geometric_mean": -6.540092468261719, + "step": 8001 + }, + { + "chosen_geometric_mean": -0.9474231004714966, + "epoch": 1.98, + "grad_norm": 2.203125, + "learning_rate": 1.0946454295476805e-09, + "log_odds": 6.1221842765808105, + "log_odds_ratio": -0.12541578710079193, + "loss": 0.2561, + "rejected_geometric_mean": -6.63397741317749, + "step": 8002 + }, + { + "chosen_geometric_mean": -1.1179848909378052, + "epoch": 1.98, + "grad_norm": 4.34375, + "learning_rate": 1.0660305209955757e-09, + "log_odds": 7.313684463500977, + "log_odds_ratio": -0.1242571696639061, + "loss": 0.2595, + "rejected_geometric_mean": -8.095130920410156, + "step": 8003 + }, + { + "chosen_geometric_mean": -1.242103099822998, + "epoch": 1.98, + "grad_norm": 35.0, + "learning_rate": 1.0377945108175357e-09, + "log_odds": 5.7947998046875, + "log_odds_ratio": -0.14154258370399475, + "loss": 0.2836, + "rejected_geometric_mean": -6.771282196044922, + "step": 8004 + }, + { + "chosen_geometric_mean": -0.8263195157051086, + "epoch": 1.98, + "grad_norm": 2.5625, + "learning_rate": 1.0099374032945807e-09, + "log_odds": 3.8198142051696777, + "log_odds_ratio": -0.31509947776794434, + "loss": 0.2928, + "rejected_geometric_mean": -4.343708038330078, + "step": 8005 + }, + { + "chosen_geometric_mean": -0.8925220966339111, + "epoch": 1.98, + "grad_norm": 3.515625, + "learning_rate": 9.824592026508317e-10, + "log_odds": 3.2144129276275635, + "log_odds_ratio": -0.1304849535226822, + "loss": 0.2426, + "rejected_geometric_mean": -3.59975004196167, + "step": 8006 + }, + { + "chosen_geometric_mean": -0.8923498392105103, + "epoch": 1.98, + "grad_norm": 8.25, + "learning_rate": 9.553599130524006e-10, + "log_odds": 6.160147666931152, + "log_odds_ratio": -0.1072222962975502, + "loss": 0.239, + "rejected_geometric_mean": -6.56431770324707, + "step": 8007 + }, + { + "chosen_geometric_mean": -1.3966591358184814, + "epoch": 1.98, + "grad_norm": 14.9375, + "learning_rate": 9.286395386082225e-10, + "log_odds": 2.908048629760742, + "log_odds_ratio": -0.16961771249771118, + "loss": 0.2689, + "rejected_geometric_mean": -4.07751989364624, + "step": 8008 + }, + { + "chosen_geometric_mean": -1.0421416759490967, + "epoch": 1.98, + "grad_norm": 3.625, + "learning_rate": 9.022980833697792e-10, + "log_odds": 11.80734634399414, + "log_odds_ratio": -0.12444815039634705, + "loss": 0.2805, + "rejected_geometric_mean": -12.4818754196167, + "step": 8009 + }, + { + "chosen_geometric_mean": -1.1361560821533203, + "epoch": 1.98, + "grad_norm": 10.625, + "learning_rate": 8.763355513310978e-10, + "log_odds": 6.156696319580078, + "log_odds_ratio": -0.0832642912864685, + "loss": 0.2798, + "rejected_geometric_mean": -6.93868350982666, + "step": 8010 + }, + { + "chosen_geometric_mean": -1.0948184728622437, + "epoch": 1.98, + "grad_norm": 11.875, + "learning_rate": 8.507519464287517e-10, + "log_odds": 6.244352340698242, + "log_odds_ratio": -0.19833414256572723, + "loss": 0.2543, + "rejected_geometric_mean": -7.032164573669434, + "step": 8011 + }, + { + "chosen_geometric_mean": -1.016422986984253, + "epoch": 1.98, + "grad_norm": 23.25, + "learning_rate": 8.255472725415825e-10, + "log_odds": 1.1406407356262207, + "log_odds_ratio": -0.3586401343345642, + "loss": 0.2307, + "rejected_geometric_mean": -1.9523100852966309, + "step": 8012 + }, + { + "chosen_geometric_mean": -0.9575146436691284, + "epoch": 1.98, + "grad_norm": 5.78125, + "learning_rate": 8.007215334918106e-10, + "log_odds": 8.987473487854004, + "log_odds_ratio": -0.003426617942750454, + "loss": 0.2891, + "rejected_geometric_mean": -9.450614929199219, + "step": 8013 + }, + { + "chosen_geometric_mean": -0.9856367111206055, + "epoch": 1.98, + "grad_norm": 8.4375, + "learning_rate": 7.762747330430919e-10, + "log_odds": 9.543699264526367, + "log_odds_ratio": -0.0005209465743973851, + "loss": 0.2726, + "rejected_geometric_mean": -10.050539016723633, + "step": 8014 + }, + { + "chosen_geometric_mean": -1.1152509450912476, + "epoch": 1.98, + "grad_norm": 11.3125, + "learning_rate": 7.522068749021838e-10, + "log_odds": 11.59343147277832, + "log_odds_ratio": -0.014651285484433174, + "loss": 0.2744, + "rejected_geometric_mean": -12.296929359436035, + "step": 8015 + }, + { + "chosen_geometric_mean": -1.0535857677459717, + "epoch": 1.98, + "grad_norm": 4.375, + "learning_rate": 7.285179627183892e-10, + "log_odds": 10.233869552612305, + "log_odds_ratio": -0.004017618950456381, + "loss": 0.2255, + "rejected_geometric_mean": -10.8571138381958, + "step": 8016 + }, + { + "chosen_geometric_mean": -1.0414087772369385, + "epoch": 1.98, + "grad_norm": 6.21875, + "learning_rate": 7.052080000835571e-10, + "log_odds": 14.45539379119873, + "log_odds_ratio": -0.006378585938364267, + "loss": 0.2585, + "rejected_geometric_mean": -15.032472610473633, + "step": 8017 + }, + { + "chosen_geometric_mean": -0.9319993257522583, + "epoch": 1.99, + "grad_norm": 14.625, + "learning_rate": 6.822769905323601e-10, + "log_odds": 3.337839365005493, + "log_odds_ratio": -0.2366023063659668, + "loss": 0.2725, + "rejected_geometric_mean": -3.904207229614258, + "step": 8018 + }, + { + "chosen_geometric_mean": -1.0116877555847168, + "epoch": 1.99, + "grad_norm": 2.859375, + "learning_rate": 6.597249375411841e-10, + "log_odds": 4.723593711853027, + "log_odds_ratio": -0.09093921631574631, + "loss": 0.2555, + "rejected_geometric_mean": -5.337559700012207, + "step": 8019 + }, + { + "chosen_geometric_mean": -1.1073092222213745, + "epoch": 1.99, + "grad_norm": 4.125, + "learning_rate": 6.375518445295159e-10, + "log_odds": 6.9587297439575195, + "log_odds_ratio": -0.014429303817451, + "loss": 0.2354, + "rejected_geometric_mean": -7.6692280769348145, + "step": 8020 + }, + { + "chosen_geometric_mean": -1.0508925914764404, + "epoch": 1.99, + "grad_norm": 28.625, + "learning_rate": 6.15757714859666e-10, + "log_odds": 5.009456157684326, + "log_odds_ratio": -0.12139242887496948, + "loss": 0.2815, + "rejected_geometric_mean": -5.6568379402160645, + "step": 8021 + }, + { + "chosen_geometric_mean": -0.9145260453224182, + "epoch": 1.99, + "grad_norm": 19.375, + "learning_rate": 5.943425518359358e-10, + "log_odds": 11.803861618041992, + "log_odds_ratio": -0.001013864646665752, + "loss": 0.2475, + "rejected_geometric_mean": -12.18336009979248, + "step": 8022 + }, + { + "chosen_geometric_mean": -1.1179413795471191, + "epoch": 1.99, + "grad_norm": 2.5, + "learning_rate": 5.733063587054499e-10, + "log_odds": 10.0337553024292, + "log_odds_ratio": -0.1655777245759964, + "loss": 0.261, + "rejected_geometric_mean": -10.807994842529297, + "step": 8023 + }, + { + "chosen_geometric_mean": -2.6380538940429688, + "epoch": 1.99, + "grad_norm": 54.75, + "learning_rate": 5.526491386578791e-10, + "log_odds": 4.7791056632995605, + "log_odds_ratio": -0.5075699687004089, + "loss": 0.3703, + "rejected_geometric_mean": -7.029623031616211, + "step": 8024 + }, + { + "chosen_geometric_mean": -0.9135137796401978, + "epoch": 1.99, + "grad_norm": 6.65625, + "learning_rate": 5.323708948248851e-10, + "log_odds": 6.724203586578369, + "log_odds_ratio": -0.016263362020254135, + "loss": 0.2499, + "rejected_geometric_mean": -7.117358684539795, + "step": 8025 + }, + { + "chosen_geometric_mean": -0.6930125951766968, + "epoch": 1.99, + "grad_norm": 2.78125, + "learning_rate": 5.124716302817856e-10, + "log_odds": 11.099998474121094, + "log_odds_ratio": -0.011410384438931942, + "loss": 0.2639, + "rejected_geometric_mean": -11.108816146850586, + "step": 8026 + }, + { + "chosen_geometric_mean": -1.0068254470825195, + "epoch": 1.99, + "grad_norm": 45.5, + "learning_rate": 4.929513480453341e-10, + "log_odds": 10.615976333618164, + "log_odds_ratio": -0.0006532781990244985, + "loss": 0.2817, + "rejected_geometric_mean": -11.146138191223145, + "step": 8027 + }, + { + "chosen_geometric_mean": -0.8595067262649536, + "epoch": 1.99, + "grad_norm": 2.3125, + "learning_rate": 4.738100510753851e-10, + "log_odds": 8.078604698181152, + "log_odds_ratio": -0.056027643382549286, + "loss": 0.2726, + "rejected_geometric_mean": -8.407389640808105, + "step": 8028 + }, + { + "chosen_geometric_mean": -0.887291669845581, + "epoch": 1.99, + "grad_norm": 21.25, + "learning_rate": 4.5504774227433936e-10, + "log_odds": 6.571572780609131, + "log_odds_ratio": -0.008780419826507568, + "loss": 0.299, + "rejected_geometric_mean": -6.930011749267578, + "step": 8029 + }, + { + "chosen_geometric_mean": -1.0653049945831299, + "epoch": 1.99, + "grad_norm": 7.0, + "learning_rate": 4.366644244868656e-10, + "log_odds": 15.822362899780273, + "log_odds_ratio": -2.741826165220118e-06, + "loss": 0.2479, + "rejected_geometric_mean": -16.464183807373047, + "step": 8030 + }, + { + "chosen_geometric_mean": -0.8557647466659546, + "epoch": 1.99, + "grad_norm": 10.5625, + "learning_rate": 4.186601005004565e-10, + "log_odds": 0.5709657073020935, + "log_odds_ratio": -0.4785590171813965, + "loss": 0.2466, + "rejected_geometric_mean": -1.2638583183288574, + "step": 8031 + }, + { + "chosen_geometric_mean": -1.046603798866272, + "epoch": 1.99, + "grad_norm": 5.65625, + "learning_rate": 4.010347730448727e-10, + "log_odds": 6.7058305740356445, + "log_odds_ratio": -0.15375180542469025, + "loss": 0.2754, + "rejected_geometric_mean": -7.397397994995117, + "step": 8032 + }, + { + "chosen_geometric_mean": -1.1011289358139038, + "epoch": 1.99, + "grad_norm": 4.71875, + "learning_rate": 3.8378844479242117e-10, + "log_odds": 5.778884410858154, + "log_odds_ratio": -0.034440621733665466, + "loss": 0.1889, + "rejected_geometric_mean": -6.488032341003418, + "step": 8033 + }, + { + "chosen_geometric_mean": -0.8431394696235657, + "epoch": 1.99, + "grad_norm": 5.59375, + "learning_rate": 3.6692111835850974e-10, + "log_odds": 8.601766586303711, + "log_odds_ratio": -0.15204744040966034, + "loss": 0.2603, + "rejected_geometric_mean": -9.02830982208252, + "step": 8034 + }, + { + "chosen_geometric_mean": -1.0084753036499023, + "epoch": 1.99, + "grad_norm": 13.125, + "learning_rate": 3.504327963002596e-10, + "log_odds": 11.956315040588379, + "log_odds_ratio": -0.033008672297000885, + "loss": 0.2638, + "rejected_geometric_mean": -12.530024528503418, + "step": 8035 + }, + { + "chosen_geometric_mean": -1.0411432981491089, + "epoch": 1.99, + "grad_norm": 8.875, + "learning_rate": 3.343234811176155e-10, + "log_odds": 2.769437313079834, + "log_odds_ratio": -0.3684413731098175, + "loss": 0.3288, + "rejected_geometric_mean": -3.607598066329956, + "step": 8036 + }, + { + "chosen_geometric_mean": -0.9462295770645142, + "epoch": 1.99, + "grad_norm": 15.4375, + "learning_rate": 3.1859317525334553e-10, + "log_odds": 5.3485894203186035, + "log_odds_ratio": -0.04366012290120125, + "loss": 0.2737, + "rejected_geometric_mean": -5.828009605407715, + "step": 8037 + }, + { + "chosen_geometric_mean": -1.2307524681091309, + "epoch": 1.99, + "grad_norm": 3.265625, + "learning_rate": 3.03241881092764e-10, + "log_odds": 12.990461349487305, + "log_odds_ratio": -0.01428256370127201, + "loss": 0.2378, + "rejected_geometric_mean": -13.874488830566406, + "step": 8038 + }, + { + "chosen_geometric_mean": -0.987601637840271, + "epoch": 1.99, + "grad_norm": 18.875, + "learning_rate": 2.8826960096289826e-10, + "log_odds": 3.0264384746551514, + "log_odds_ratio": -0.31360912322998047, + "loss": 0.3097, + "rejected_geometric_mean": -3.764937162399292, + "step": 8039 + }, + { + "chosen_geometric_mean": -0.8420879244804382, + "epoch": 1.99, + "grad_norm": 48.25, + "learning_rate": 2.73676337134432e-10, + "log_odds": 2.9423913955688477, + "log_odds_ratio": -0.16653616726398468, + "loss": 0.2412, + "rejected_geometric_mean": -3.358370304107666, + "step": 8040 + }, + { + "chosen_geometric_mean": -1.0455586910247803, + "epoch": 1.99, + "grad_norm": 8.625, + "learning_rate": 2.594620918197621e-10, + "log_odds": 5.13496732711792, + "log_odds_ratio": -0.14263205230236053, + "loss": 0.2568, + "rejected_geometric_mean": -5.838893413543701, + "step": 8041 + }, + { + "chosen_geometric_mean": -0.921492874622345, + "epoch": 1.99, + "grad_norm": 2.703125, + "learning_rate": 2.4562686717438667e-10, + "log_odds": 6.92446756362915, + "log_odds_ratio": -0.13976849615573883, + "loss": 0.2567, + "rejected_geometric_mean": -7.450933456420898, + "step": 8042 + }, + { + "chosen_geometric_mean": -1.1300475597381592, + "epoch": 1.99, + "grad_norm": 2.078125, + "learning_rate": 2.3217066529551692e-10, + "log_odds": 11.729787826538086, + "log_odds_ratio": -0.13120414316654205, + "loss": 0.2511, + "rejected_geometric_mean": -12.560482025146484, + "step": 8043 + }, + { + "chosen_geometric_mean": -0.9501501321792603, + "epoch": 1.99, + "grad_norm": 26.375, + "learning_rate": 2.1909348822402032e-10, + "log_odds": 8.126618385314941, + "log_odds_ratio": -0.08727338165044785, + "loss": 0.3441, + "rejected_geometric_mean": -8.658493041992188, + "step": 8044 + }, + { + "chosen_geometric_mean": -0.9835208058357239, + "epoch": 1.99, + "grad_norm": 3.09375, + "learning_rate": 2.0639533794247767e-10, + "log_odds": 7.741863250732422, + "log_odds_ratio": -0.18322357535362244, + "loss": 0.2636, + "rejected_geometric_mean": -8.405613899230957, + "step": 8045 + }, + { + "chosen_geometric_mean": -1.0241855382919312, + "epoch": 1.99, + "grad_norm": 16.625, + "learning_rate": 1.9407621637629325e-10, + "log_odds": 6.3026838302612305, + "log_odds_ratio": -0.08675908297300339, + "loss": 0.2367, + "rejected_geometric_mean": -6.91610050201416, + "step": 8046 + }, + { + "chosen_geometric_mean": -1.038915753364563, + "epoch": 1.99, + "grad_norm": 6.3125, + "learning_rate": 1.8213612539313975e-10, + "log_odds": 12.25772476196289, + "log_odds_ratio": -0.00041531454189680517, + "loss": 0.2653, + "rejected_geometric_mean": -12.843673706054688, + "step": 8047 + }, + { + "chosen_geometric_mean": -1.062429666519165, + "epoch": 1.99, + "grad_norm": 3.046875, + "learning_rate": 1.7057506680379087e-10, + "log_odds": 3.355395793914795, + "log_odds_ratio": -0.08055104315280914, + "loss": 0.2678, + "rejected_geometric_mean": -4.025697231292725, + "step": 8048 + }, + { + "chosen_geometric_mean": -0.9134645462036133, + "epoch": 1.99, + "grad_norm": 18.5, + "learning_rate": 1.5939304236073371e-10, + "log_odds": 6.789112091064453, + "log_odds_ratio": -0.05785893648862839, + "loss": 0.2701, + "rejected_geometric_mean": -7.231484889984131, + "step": 8049 + }, + { + "chosen_geometric_mean": -0.8843685984611511, + "epoch": 1.99, + "grad_norm": 38.75, + "learning_rate": 1.4859005375983392e-10, + "log_odds": 5.256277561187744, + "log_odds_ratio": -0.06428789347410202, + "loss": 0.2263, + "rejected_geometric_mean": -5.656999588012695, + "step": 8050 + }, + { + "chosen_geometric_mean": -0.9791165590286255, + "epoch": 1.99, + "grad_norm": 6.40625, + "learning_rate": 1.3816610263894802e-10, + "log_odds": 8.708626747131348, + "log_odds_ratio": -0.11875612288713455, + "loss": 0.2603, + "rejected_geometric_mean": -9.316865921020508, + "step": 8051 + }, + { + "chosen_geometric_mean": -0.7328838109970093, + "epoch": 1.99, + "grad_norm": 5.84375, + "learning_rate": 1.2812119057820095e-10, + "log_odds": 12.081432342529297, + "log_odds_ratio": -0.09896606206893921, + "loss": 0.234, + "rejected_geometric_mean": -12.267128944396973, + "step": 8052 + }, + { + "chosen_geometric_mean": -0.9265494346618652, + "epoch": 1.99, + "grad_norm": 9.25, + "learning_rate": 1.1845531910137377e-10, + "log_odds": 6.216374397277832, + "log_odds_ratio": -0.08077515661716461, + "loss": 0.2273, + "rejected_geometric_mean": -6.673104763031006, + "step": 8053 + }, + { + "chosen_geometric_mean": -0.8971859216690063, + "epoch": 1.99, + "grad_norm": 6.46875, + "learning_rate": 1.0916848967340577e-10, + "log_odds": 4.918268203735352, + "log_odds_ratio": -0.23714151978492737, + "loss": 0.2415, + "rejected_geometric_mean": -5.4727020263671875, + "step": 8054 + }, + { + "chosen_geometric_mean": -1.11082124710083, + "epoch": 1.99, + "grad_norm": 8.3125, + "learning_rate": 1.0026070370289243e-10, + "log_odds": 1.579697608947754, + "log_odds_ratio": -0.24248278141021729, + "loss": 0.3731, + "rejected_geometric_mean": -2.4296815395355225, + "step": 8055 + }, + { + "chosen_geometric_mean": -0.807015597820282, + "epoch": 1.99, + "grad_norm": 37.5, + "learning_rate": 9.173196254014249e-11, + "log_odds": 5.756215572357178, + "log_odds_ratio": -0.20425304770469666, + "loss": 0.2467, + "rejected_geometric_mean": -6.1438422203063965, + "step": 8056 + }, + { + "chosen_geometric_mean": -0.9084469079971313, + "epoch": 1.99, + "grad_norm": 8.8125, + "learning_rate": 8.358226747856579e-11, + "log_odds": 9.772018432617188, + "log_odds_ratio": -0.030358772724866867, + "loss": 0.3317, + "rejected_geometric_mean": -10.170450210571289, + "step": 8057 + }, + { + "chosen_geometric_mean": -0.9034774899482727, + "epoch": 2.0, + "grad_norm": 13.625, + "learning_rate": 7.581161975356299e-11, + "log_odds": 1.2277579307556152, + "log_odds_ratio": -0.41411691904067993, + "loss": 0.2682, + "rejected_geometric_mean": -1.9097981452941895, + "step": 8058 + }, + { + "chosen_geometric_mean": -1.0656358003616333, + "epoch": 2.0, + "grad_norm": 1.90625, + "learning_rate": 6.842002054335828e-11, + "log_odds": 3.05885910987854, + "log_odds_ratio": -0.21126708388328552, + "loss": 0.2452, + "rejected_geometric_mean": -3.7389402389526367, + "step": 8059 + }, + { + "chosen_geometric_mean": -0.9641014337539673, + "epoch": 2.0, + "grad_norm": 3.40625, + "learning_rate": 6.140747096899934e-11, + "log_odds": 4.482960224151611, + "log_odds_ratio": -0.07839308679103851, + "loss": 0.2296, + "rejected_geometric_mean": -4.86424446105957, + "step": 8060 + }, + { + "chosen_geometric_mean": -1.043420672416687, + "epoch": 2.0, + "grad_norm": 2.1875, + "learning_rate": 5.477397209352475e-11, + "log_odds": 12.538989067077637, + "log_odds_ratio": -0.048732757568359375, + "loss": 0.2511, + "rejected_geometric_mean": -13.13394546508789, + "step": 8061 + }, + { + "chosen_geometric_mean": -0.8465732932090759, + "epoch": 2.0, + "grad_norm": 9.1875, + "learning_rate": 4.8519524923074104e-11, + "log_odds": 4.3766984939575195, + "log_odds_ratio": -0.24461936950683594, + "loss": 0.2698, + "rejected_geometric_mean": -4.83010721206665, + "step": 8062 + }, + { + "chosen_geometric_mean": -0.8312300443649292, + "epoch": 2.0, + "grad_norm": 9.625, + "learning_rate": 4.264413040522275e-11, + "log_odds": 7.940499305725098, + "log_odds_ratio": -0.08288305997848511, + "loss": 0.2767, + "rejected_geometric_mean": -8.191433906555176, + "step": 8063 + }, + { + "chosen_geometric_mean": -0.9859530329704285, + "epoch": 2.0, + "grad_norm": 9.8125, + "learning_rate": 3.7147789431479786e-11, + "log_odds": 7.552896022796631, + "log_odds_ratio": -0.1606573909521103, + "loss": 0.2064, + "rejected_geometric_mean": -8.226659774780273, + "step": 8064 + }, + { + "chosen_geometric_mean": -0.872744083404541, + "epoch": 2.0, + "grad_norm": 6.03125, + "learning_rate": 3.203050283534515e-11, + "log_odds": 9.038351058959961, + "log_odds_ratio": -0.032306645065546036, + "loss": 0.2878, + "rejected_geometric_mean": -9.371352195739746, + "step": 8065 + }, + { + "chosen_geometric_mean": -0.9732475280761719, + "epoch": 2.0, + "grad_norm": 21.625, + "learning_rate": 2.7292271392032056e-11, + "log_odds": 5.348012447357178, + "log_odds_ratio": -0.07745516300201416, + "loss": 0.2751, + "rejected_geometric_mean": -5.891438007354736, + "step": 8066 + }, + { + "chosen_geometric_mean": -1.1130032539367676, + "epoch": 2.0, + "grad_norm": 2.03125, + "learning_rate": 2.2933095820409924e-11, + "log_odds": 7.842358112335205, + "log_odds_ratio": -0.2028646320104599, + "loss": 0.2633, + "rejected_geometric_mean": -8.674644470214844, + "step": 8067 + }, + { + "chosen_geometric_mean": -0.8363812565803528, + "epoch": 2.0, + "grad_norm": 1.8359375, + "learning_rate": 1.8952976781339005e-11, + "log_odds": 9.741300582885742, + "log_odds_ratio": -0.09820874780416489, + "loss": 0.2278, + "rejected_geometric_mean": -10.076972007751465, + "step": 8068 + }, + { + "chosen_geometric_mean": -0.7955410480499268, + "epoch": 2.0, + "grad_norm": 10.875, + "learning_rate": 1.5351914878503073e-11, + "log_odds": 7.319605350494385, + "log_odds_ratio": -0.15000461041927338, + "loss": 0.2426, + "rejected_geometric_mean": -7.66214656829834, + "step": 8069 + }, + { + "chosen_geometric_mean": -1.038684606552124, + "epoch": 2.0, + "grad_norm": 19.25, + "learning_rate": 1.2129910657576738e-11, + "log_odds": 5.255504608154297, + "log_odds_ratio": -0.097211554646492, + "loss": 0.2407, + "rejected_geometric_mean": -5.882931709289551, + "step": 8070 + }, + { + "chosen_geometric_mean": -0.8837145566940308, + "epoch": 2.0, + "grad_norm": 3.0625, + "learning_rate": 9.286964607335691e-12, + "log_odds": 10.772552490234375, + "log_odds_ratio": -0.17317132651805878, + "loss": 0.212, + "rejected_geometric_mean": -11.275376319885254, + "step": 8071 + }, + { + "chosen_geometric_mean": -0.9651635885238647, + "epoch": 2.0, + "grad_norm": 23.75, + "learning_rate": 6.823077158824021e-12, + "log_odds": 9.16014575958252, + "log_odds_ratio": -0.16525670886039734, + "loss": 0.245, + "rejected_geometric_mean": -9.689102172851562, + "step": 8072 + }, + { + "chosen_geometric_mean": -0.724663496017456, + "epoch": 2.0, + "grad_norm": 5.09375, + "learning_rate": 4.738248685631774e-12, + "log_odds": 9.561771392822266, + "log_odds_ratio": -0.005505032371729612, + "loss": 0.2463, + "rejected_geometric_mean": -9.627205848693848, + "step": 8073 + }, + { + "chosen_geometric_mean": -0.9745297431945801, + "epoch": 2.0, + "grad_norm": 2.265625, + "learning_rate": 3.0324795036174027e-12, + "log_odds": 4.360714912414551, + "log_odds_ratio": -0.2876715362071991, + "loss": 0.2123, + "rejected_geometric_mean": -5.071956634521484, + "step": 8074 + }, + { + "chosen_geometric_mean": -1.4076133966445923, + "epoch": 2.0, + "grad_norm": 37.0, + "learning_rate": 1.7057698717404258e-12, + "log_odds": 0.9644297361373901, + "log_odds_ratio": -0.3680565655231476, + "loss": 0.3423, + "rejected_geometric_mean": -2.2083988189697266, + "step": 8075 + }, + { + "chosen_geometric_mean": -1.0696707963943481, + "epoch": 2.0, + "grad_norm": 2.28125, + "learning_rate": 7.581199909512116e-13, + "log_odds": 4.79437255859375, + "log_odds_ratio": -0.08000592142343521, + "loss": 0.2701, + "rejected_geometric_mean": -5.465938568115234, + "step": 8076 + }, + { + "chosen_geometric_mean": -0.8226131200790405, + "epoch": 2.0, + "grad_norm": 3.609375, + "learning_rate": 1.8953000502364149e-13, + "log_odds": 6.110816955566406, + "log_odds_ratio": -0.12493787705898285, + "loss": 0.222, + "rejected_geometric_mean": -6.433416843414307, + "step": 8077 + }, + { + "chosen_geometric_mean": -1.0889161825180054, + "epoch": 2.0, + "grad_norm": 16.0, + "learning_rate": 0.0, + "log_odds": 3.9560446739196777, + "log_odds_ratio": -0.20549584925174713, + "loss": 0.2747, + "rejected_geometric_mean": -4.736770153045654, + "step": 8078 } ], "logging_steps": 1, @@ -44443,7 +88872,7 @@ "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4039, - "total_flos": 1.1293146268726985e+19, + "total_flos": 2.258629253745397e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null