{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 763, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001310615989515072, "grad_norm": 0.0791015625, "learning_rate": 9.98689384010485e-06, "loss": 1.2556, "step": 1 }, { "epoch": 0.002621231979030144, "grad_norm": 0.0810546875, "learning_rate": 9.973787680209699e-06, "loss": 1.3767, "step": 2 }, { "epoch": 0.003931847968545216, "grad_norm": 0.0830078125, "learning_rate": 9.960681520314549e-06, "loss": 1.4053, "step": 3 }, { "epoch": 0.005242463958060288, "grad_norm": 0.08251953125, "learning_rate": 9.947575360419398e-06, "loss": 1.3944, "step": 4 }, { "epoch": 0.00655307994757536, "grad_norm": 0.0849609375, "learning_rate": 9.934469200524247e-06, "loss": 1.3895, "step": 5 }, { "epoch": 0.007863695937090432, "grad_norm": 0.0859375, "learning_rate": 9.921363040629096e-06, "loss": 1.3428, "step": 6 }, { "epoch": 0.009174311926605505, "grad_norm": 0.09228515625, "learning_rate": 9.908256880733946e-06, "loss": 1.465, "step": 7 }, { "epoch": 0.010484927916120577, "grad_norm": 0.091796875, "learning_rate": 9.895150720838795e-06, "loss": 1.3829, "step": 8 }, { "epoch": 0.011795543905635648, "grad_norm": 0.09423828125, "learning_rate": 9.882044560943644e-06, "loss": 1.4298, "step": 9 }, { "epoch": 0.01310615989515072, "grad_norm": 0.0947265625, "learning_rate": 9.868938401048493e-06, "loss": 1.3555, "step": 10 }, { "epoch": 0.014416775884665793, "grad_norm": 0.103515625, "learning_rate": 9.855832241153342e-06, "loss": 1.4013, "step": 11 }, { "epoch": 0.015727391874180863, "grad_norm": 0.10546875, "learning_rate": 9.842726081258192e-06, "loss": 1.4454, "step": 12 }, { "epoch": 0.01703800786369594, "grad_norm": 0.111328125, "learning_rate": 9.829619921363041e-06, "loss": 1.4422, "step": 13 }, { "epoch": 0.01834862385321101, "grad_norm": 0.111328125, "learning_rate": 9.81651376146789e-06, "loss": 1.3862, "step": 14 }, { "epoch": 0.019659239842726082, "grad_norm": 0.1064453125, "learning_rate": 9.803407601572739e-06, "loss": 1.2946, "step": 15 }, { "epoch": 0.020969855832241154, "grad_norm": 0.12451171875, "learning_rate": 9.790301441677588e-06, "loss": 1.4944, "step": 16 }, { "epoch": 0.022280471821756225, "grad_norm": 0.1240234375, "learning_rate": 9.777195281782439e-06, "loss": 1.461, "step": 17 }, { "epoch": 0.023591087811271297, "grad_norm": 0.11669921875, "learning_rate": 9.764089121887287e-06, "loss": 1.303, "step": 18 }, { "epoch": 0.02490170380078637, "grad_norm": 0.1240234375, "learning_rate": 9.750982961992136e-06, "loss": 1.3031, "step": 19 }, { "epoch": 0.02621231979030144, "grad_norm": 0.138671875, "learning_rate": 9.737876802096987e-06, "loss": 1.4607, "step": 20 }, { "epoch": 0.027522935779816515, "grad_norm": 0.140625, "learning_rate": 9.724770642201836e-06, "loss": 1.4104, "step": 21 }, { "epoch": 0.028833551769331587, "grad_norm": 0.1357421875, "learning_rate": 9.711664482306685e-06, "loss": 1.3664, "step": 22 }, { "epoch": 0.03014416775884666, "grad_norm": 0.142578125, "learning_rate": 9.698558322411533e-06, "loss": 1.3703, "step": 23 }, { "epoch": 0.03145478374836173, "grad_norm": 0.146484375, "learning_rate": 9.685452162516384e-06, "loss": 1.3818, "step": 24 }, { "epoch": 0.0327653997378768, "grad_norm": 0.1494140625, "learning_rate": 9.672346002621233e-06, "loss": 1.4122, "step": 25 }, { "epoch": 0.03407601572739188, "grad_norm": 0.15234375, "learning_rate": 9.659239842726082e-06, "loss": 1.3792, "step": 26 }, { "epoch": 0.035386631716906945, "grad_norm": 0.146484375, "learning_rate": 9.64613368283093e-06, "loss": 1.2953, "step": 27 }, { "epoch": 0.03669724770642202, "grad_norm": 0.162109375, "learning_rate": 9.633027522935781e-06, "loss": 1.2915, "step": 28 }, { "epoch": 0.03800786369593709, "grad_norm": 0.189453125, "learning_rate": 9.61992136304063e-06, "loss": 1.3475, "step": 29 }, { "epoch": 0.039318479685452164, "grad_norm": 0.1572265625, "learning_rate": 9.606815203145479e-06, "loss": 1.3437, "step": 30 }, { "epoch": 0.04062909567496723, "grad_norm": 0.16796875, "learning_rate": 9.59370904325033e-06, "loss": 1.364, "step": 31 }, { "epoch": 0.04193971166448231, "grad_norm": 0.1552734375, "learning_rate": 9.580602883355178e-06, "loss": 1.2421, "step": 32 }, { "epoch": 0.04325032765399738, "grad_norm": 0.166015625, "learning_rate": 9.567496723460027e-06, "loss": 1.2606, "step": 33 }, { "epoch": 0.04456094364351245, "grad_norm": 0.1640625, "learning_rate": 9.554390563564876e-06, "loss": 1.318, "step": 34 }, { "epoch": 0.045871559633027525, "grad_norm": 0.1875, "learning_rate": 9.541284403669727e-06, "loss": 1.4466, "step": 35 }, { "epoch": 0.047182175622542594, "grad_norm": 0.1923828125, "learning_rate": 9.528178243774576e-06, "loss": 1.4755, "step": 36 }, { "epoch": 0.04849279161205767, "grad_norm": 0.1708984375, "learning_rate": 9.515072083879425e-06, "loss": 1.2888, "step": 37 }, { "epoch": 0.04980340760157274, "grad_norm": 0.17578125, "learning_rate": 9.501965923984273e-06, "loss": 1.2959, "step": 38 }, { "epoch": 0.05111402359108781, "grad_norm": 0.18359375, "learning_rate": 9.488859764089122e-06, "loss": 1.3576, "step": 39 }, { "epoch": 0.05242463958060288, "grad_norm": 0.18359375, "learning_rate": 9.475753604193973e-06, "loss": 1.3013, "step": 40 }, { "epoch": 0.053735255570117955, "grad_norm": 0.1787109375, "learning_rate": 9.462647444298822e-06, "loss": 1.2869, "step": 41 }, { "epoch": 0.05504587155963303, "grad_norm": 0.1728515625, "learning_rate": 9.44954128440367e-06, "loss": 1.222, "step": 42 }, { "epoch": 0.0563564875491481, "grad_norm": 0.205078125, "learning_rate": 9.43643512450852e-06, "loss": 1.4095, "step": 43 }, { "epoch": 0.057667103538663174, "grad_norm": 0.1787109375, "learning_rate": 9.423328964613368e-06, "loss": 1.2354, "step": 44 }, { "epoch": 0.05897771952817824, "grad_norm": 0.19140625, "learning_rate": 9.410222804718219e-06, "loss": 1.3337, "step": 45 }, { "epoch": 0.06028833551769332, "grad_norm": 0.1962890625, "learning_rate": 9.397116644823068e-06, "loss": 1.3496, "step": 46 }, { "epoch": 0.061598951507208385, "grad_norm": 0.1826171875, "learning_rate": 9.384010484927917e-06, "loss": 1.2765, "step": 47 }, { "epoch": 0.06290956749672345, "grad_norm": 0.185546875, "learning_rate": 9.370904325032766e-06, "loss": 1.3376, "step": 48 }, { "epoch": 0.06422018348623854, "grad_norm": 0.189453125, "learning_rate": 9.357798165137616e-06, "loss": 1.2688, "step": 49 }, { "epoch": 0.0655307994757536, "grad_norm": 0.1875, "learning_rate": 9.344692005242465e-06, "loss": 1.3008, "step": 50 }, { "epoch": 0.06684141546526867, "grad_norm": 0.1904296875, "learning_rate": 9.331585845347314e-06, "loss": 1.2723, "step": 51 }, { "epoch": 0.06815203145478375, "grad_norm": 0.1767578125, "learning_rate": 9.318479685452163e-06, "loss": 1.2026, "step": 52 }, { "epoch": 0.06946264744429882, "grad_norm": 0.1787109375, "learning_rate": 9.305373525557012e-06, "loss": 1.231, "step": 53 }, { "epoch": 0.07077326343381389, "grad_norm": 0.1884765625, "learning_rate": 9.292267365661862e-06, "loss": 1.2807, "step": 54 }, { "epoch": 0.07208387942332896, "grad_norm": 0.197265625, "learning_rate": 9.279161205766711e-06, "loss": 1.3617, "step": 55 }, { "epoch": 0.07339449541284404, "grad_norm": 0.171875, "learning_rate": 9.26605504587156e-06, "loss": 1.1847, "step": 56 }, { "epoch": 0.07470511140235911, "grad_norm": 0.203125, "learning_rate": 9.252948885976409e-06, "loss": 1.4314, "step": 57 }, { "epoch": 0.07601572739187418, "grad_norm": 0.189453125, "learning_rate": 9.239842726081258e-06, "loss": 1.2855, "step": 58 }, { "epoch": 0.07732634338138926, "grad_norm": 0.173828125, "learning_rate": 9.226736566186108e-06, "loss": 1.2622, "step": 59 }, { "epoch": 0.07863695937090433, "grad_norm": 0.1728515625, "learning_rate": 9.213630406290957e-06, "loss": 1.2148, "step": 60 }, { "epoch": 0.0799475753604194, "grad_norm": 0.185546875, "learning_rate": 9.200524246395806e-06, "loss": 1.2849, "step": 61 }, { "epoch": 0.08125819134993446, "grad_norm": 0.1796875, "learning_rate": 9.187418086500655e-06, "loss": 1.2641, "step": 62 }, { "epoch": 0.08256880733944955, "grad_norm": 0.17578125, "learning_rate": 9.174311926605506e-06, "loss": 1.2309, "step": 63 }, { "epoch": 0.08387942332896461, "grad_norm": 0.171875, "learning_rate": 9.161205766710354e-06, "loss": 1.2382, "step": 64 }, { "epoch": 0.08519003931847968, "grad_norm": 0.1748046875, "learning_rate": 9.148099606815203e-06, "loss": 1.223, "step": 65 }, { "epoch": 0.08650065530799476, "grad_norm": 0.1767578125, "learning_rate": 9.134993446920052e-06, "loss": 1.2475, "step": 66 }, { "epoch": 0.08781127129750983, "grad_norm": 0.1640625, "learning_rate": 9.121887287024903e-06, "loss": 1.2017, "step": 67 }, { "epoch": 0.0891218872870249, "grad_norm": 0.1728515625, "learning_rate": 9.108781127129752e-06, "loss": 1.2381, "step": 68 }, { "epoch": 0.09043250327653997, "grad_norm": 0.16015625, "learning_rate": 9.0956749672346e-06, "loss": 1.1767, "step": 69 }, { "epoch": 0.09174311926605505, "grad_norm": 0.1640625, "learning_rate": 9.08256880733945e-06, "loss": 1.1802, "step": 70 }, { "epoch": 0.09305373525557012, "grad_norm": 0.162109375, "learning_rate": 9.0694626474443e-06, "loss": 1.1855, "step": 71 }, { "epoch": 0.09436435124508519, "grad_norm": 0.16796875, "learning_rate": 9.056356487549149e-06, "loss": 1.235, "step": 72 }, { "epoch": 0.09567496723460026, "grad_norm": 0.1630859375, "learning_rate": 9.043250327653998e-06, "loss": 1.2176, "step": 73 }, { "epoch": 0.09698558322411534, "grad_norm": 0.146484375, "learning_rate": 9.030144167758848e-06, "loss": 1.0679, "step": 74 }, { "epoch": 0.0982961992136304, "grad_norm": 0.154296875, "learning_rate": 9.017038007863697e-06, "loss": 1.1711, "step": 75 }, { "epoch": 0.09960681520314547, "grad_norm": 0.1826171875, "learning_rate": 9.003931847968546e-06, "loss": 1.2938, "step": 76 }, { "epoch": 0.10091743119266056, "grad_norm": 0.166015625, "learning_rate": 8.990825688073395e-06, "loss": 1.2285, "step": 77 }, { "epoch": 0.10222804718217562, "grad_norm": 0.154296875, "learning_rate": 8.977719528178246e-06, "loss": 1.1362, "step": 78 }, { "epoch": 0.10353866317169069, "grad_norm": 0.158203125, "learning_rate": 8.964613368283094e-06, "loss": 1.1319, "step": 79 }, { "epoch": 0.10484927916120576, "grad_norm": 0.150390625, "learning_rate": 8.951507208387943e-06, "loss": 1.1595, "step": 80 }, { "epoch": 0.10615989515072084, "grad_norm": 0.1484375, "learning_rate": 8.938401048492792e-06, "loss": 1.1481, "step": 81 }, { "epoch": 0.10747051114023591, "grad_norm": 0.185546875, "learning_rate": 8.925294888597643e-06, "loss": 1.2817, "step": 82 }, { "epoch": 0.10878112712975098, "grad_norm": 0.1513671875, "learning_rate": 8.912188728702492e-06, "loss": 1.1217, "step": 83 }, { "epoch": 0.11009174311926606, "grad_norm": 0.1689453125, "learning_rate": 8.89908256880734e-06, "loss": 1.2165, "step": 84 }, { "epoch": 0.11140235910878113, "grad_norm": 0.1533203125, "learning_rate": 8.88597640891219e-06, "loss": 1.1469, "step": 85 }, { "epoch": 0.1127129750982962, "grad_norm": 0.162109375, "learning_rate": 8.872870249017038e-06, "loss": 1.1946, "step": 86 }, { "epoch": 0.11402359108781127, "grad_norm": 0.1494140625, "learning_rate": 8.859764089121889e-06, "loss": 1.1542, "step": 87 }, { "epoch": 0.11533420707732635, "grad_norm": 0.1455078125, "learning_rate": 8.846657929226738e-06, "loss": 1.1733, "step": 88 }, { "epoch": 0.11664482306684142, "grad_norm": 0.140625, "learning_rate": 8.833551769331587e-06, "loss": 1.1442, "step": 89 }, { "epoch": 0.11795543905635648, "grad_norm": 0.1298828125, "learning_rate": 8.820445609436435e-06, "loss": 1.0989, "step": 90 }, { "epoch": 0.11926605504587157, "grad_norm": 0.1396484375, "learning_rate": 8.807339449541286e-06, "loss": 1.1514, "step": 91 }, { "epoch": 0.12057667103538663, "grad_norm": 0.142578125, "learning_rate": 8.794233289646135e-06, "loss": 1.1793, "step": 92 }, { "epoch": 0.1218872870249017, "grad_norm": 0.12158203125, "learning_rate": 8.781127129750984e-06, "loss": 1.0437, "step": 93 }, { "epoch": 0.12319790301441677, "grad_norm": 0.12353515625, "learning_rate": 8.768020969855833e-06, "loss": 1.1171, "step": 94 }, { "epoch": 0.12450851900393185, "grad_norm": 0.123046875, "learning_rate": 8.754914809960682e-06, "loss": 1.0807, "step": 95 }, { "epoch": 0.1258191349934469, "grad_norm": 0.12890625, "learning_rate": 8.741808650065532e-06, "loss": 1.1298, "step": 96 }, { "epoch": 0.127129750982962, "grad_norm": 0.140625, "learning_rate": 8.728702490170381e-06, "loss": 1.2133, "step": 97 }, { "epoch": 0.12844036697247707, "grad_norm": 0.1376953125, "learning_rate": 8.71559633027523e-06, "loss": 1.1301, "step": 98 }, { "epoch": 0.12975098296199214, "grad_norm": 0.1220703125, "learning_rate": 8.702490170380079e-06, "loss": 1.0789, "step": 99 }, { "epoch": 0.1310615989515072, "grad_norm": 0.1376953125, "learning_rate": 8.689384010484928e-06, "loss": 1.1856, "step": 100 }, { "epoch": 0.13237221494102228, "grad_norm": 0.1201171875, "learning_rate": 8.676277850589778e-06, "loss": 1.0449, "step": 101 }, { "epoch": 0.13368283093053734, "grad_norm": 0.134765625, "learning_rate": 8.663171690694627e-06, "loss": 1.1543, "step": 102 }, { "epoch": 0.1349934469200524, "grad_norm": 0.11181640625, "learning_rate": 8.650065530799476e-06, "loss": 1.0142, "step": 103 }, { "epoch": 0.1363040629095675, "grad_norm": 0.1474609375, "learning_rate": 8.636959370904325e-06, "loss": 1.2604, "step": 104 }, { "epoch": 0.13761467889908258, "grad_norm": 0.126953125, "learning_rate": 8.623853211009175e-06, "loss": 1.1036, "step": 105 }, { "epoch": 0.13892529488859764, "grad_norm": 0.1318359375, "learning_rate": 8.610747051114024e-06, "loss": 1.1137, "step": 106 }, { "epoch": 0.1402359108781127, "grad_norm": 0.1318359375, "learning_rate": 8.597640891218873e-06, "loss": 1.133, "step": 107 }, { "epoch": 0.14154652686762778, "grad_norm": 0.1220703125, "learning_rate": 8.584534731323722e-06, "loss": 1.0935, "step": 108 }, { "epoch": 0.14285714285714285, "grad_norm": 0.1171875, "learning_rate": 8.571428571428571e-06, "loss": 1.0659, "step": 109 }, { "epoch": 0.14416775884665792, "grad_norm": 0.12158203125, "learning_rate": 8.558322411533421e-06, "loss": 1.1022, "step": 110 }, { "epoch": 0.145478374836173, "grad_norm": 0.130859375, "learning_rate": 8.54521625163827e-06, "loss": 1.1436, "step": 111 }, { "epoch": 0.14678899082568808, "grad_norm": 0.11669921875, "learning_rate": 8.53211009174312e-06, "loss": 1.0806, "step": 112 }, { "epoch": 0.14809960681520315, "grad_norm": 0.123046875, "learning_rate": 8.519003931847968e-06, "loss": 1.1075, "step": 113 }, { "epoch": 0.14941022280471822, "grad_norm": 0.1123046875, "learning_rate": 8.505897771952819e-06, "loss": 1.0394, "step": 114 }, { "epoch": 0.15072083879423329, "grad_norm": 0.1357421875, "learning_rate": 8.492791612057668e-06, "loss": 1.138, "step": 115 }, { "epoch": 0.15203145478374835, "grad_norm": 0.12255859375, "learning_rate": 8.479685452162516e-06, "loss": 1.1298, "step": 116 }, { "epoch": 0.15334207077326342, "grad_norm": 0.1240234375, "learning_rate": 8.466579292267367e-06, "loss": 1.0441, "step": 117 }, { "epoch": 0.15465268676277852, "grad_norm": 0.1171875, "learning_rate": 8.453473132372216e-06, "loss": 1.078, "step": 118 }, { "epoch": 0.1559633027522936, "grad_norm": 0.130859375, "learning_rate": 8.440366972477065e-06, "loss": 1.0481, "step": 119 }, { "epoch": 0.15727391874180865, "grad_norm": 0.126953125, "learning_rate": 8.427260812581914e-06, "loss": 1.108, "step": 120 }, { "epoch": 0.15858453473132372, "grad_norm": 0.11181640625, "learning_rate": 8.414154652686764e-06, "loss": 1.0522, "step": 121 }, { "epoch": 0.1598951507208388, "grad_norm": 0.12353515625, "learning_rate": 8.401048492791613e-06, "loss": 1.1116, "step": 122 }, { "epoch": 0.16120576671035386, "grad_norm": 0.12890625, "learning_rate": 8.387942332896462e-06, "loss": 1.0993, "step": 123 }, { "epoch": 0.16251638269986893, "grad_norm": 0.11962890625, "learning_rate": 8.374836173001311e-06, "loss": 1.0644, "step": 124 }, { "epoch": 0.16382699868938402, "grad_norm": 0.11279296875, "learning_rate": 8.361730013106161e-06, "loss": 1.0056, "step": 125 }, { "epoch": 0.1651376146788991, "grad_norm": 0.11328125, "learning_rate": 8.34862385321101e-06, "loss": 1.0006, "step": 126 }, { "epoch": 0.16644823066841416, "grad_norm": 0.13671875, "learning_rate": 8.33551769331586e-06, "loss": 1.1509, "step": 127 }, { "epoch": 0.16775884665792923, "grad_norm": 0.11279296875, "learning_rate": 8.32241153342071e-06, "loss": 1.0404, "step": 128 }, { "epoch": 0.1690694626474443, "grad_norm": 0.11083984375, "learning_rate": 8.309305373525559e-06, "loss": 1.0368, "step": 129 }, { "epoch": 0.17038007863695936, "grad_norm": 0.1328125, "learning_rate": 8.296199213630408e-06, "loss": 1.154, "step": 130 }, { "epoch": 0.17169069462647443, "grad_norm": 0.1220703125, "learning_rate": 8.283093053735256e-06, "loss": 1.0531, "step": 131 }, { "epoch": 0.17300131061598953, "grad_norm": 0.123046875, "learning_rate": 8.269986893840105e-06, "loss": 1.0737, "step": 132 }, { "epoch": 0.1743119266055046, "grad_norm": 0.1259765625, "learning_rate": 8.256880733944956e-06, "loss": 1.0988, "step": 133 }, { "epoch": 0.17562254259501967, "grad_norm": 0.1171875, "learning_rate": 8.243774574049805e-06, "loss": 1.0644, "step": 134 }, { "epoch": 0.17693315858453473, "grad_norm": 0.11767578125, "learning_rate": 8.230668414154654e-06, "loss": 1.0439, "step": 135 }, { "epoch": 0.1782437745740498, "grad_norm": 0.12255859375, "learning_rate": 8.217562254259502e-06, "loss": 1.067, "step": 136 }, { "epoch": 0.17955439056356487, "grad_norm": 0.1220703125, "learning_rate": 8.204456094364351e-06, "loss": 1.0904, "step": 137 }, { "epoch": 0.18086500655307994, "grad_norm": 0.10986328125, "learning_rate": 8.191349934469202e-06, "loss": 0.9656, "step": 138 }, { "epoch": 0.182175622542595, "grad_norm": 0.123046875, "learning_rate": 8.17824377457405e-06, "loss": 1.0849, "step": 139 }, { "epoch": 0.1834862385321101, "grad_norm": 0.11669921875, "learning_rate": 8.1651376146789e-06, "loss": 1.0446, "step": 140 }, { "epoch": 0.18479685452162517, "grad_norm": 0.11865234375, "learning_rate": 8.152031454783749e-06, "loss": 1.0548, "step": 141 }, { "epoch": 0.18610747051114024, "grad_norm": 0.1015625, "learning_rate": 8.1389252948886e-06, "loss": 0.9563, "step": 142 }, { "epoch": 0.1874180865006553, "grad_norm": 0.1171875, "learning_rate": 8.125819134993448e-06, "loss": 1.0506, "step": 143 }, { "epoch": 0.18872870249017037, "grad_norm": 0.115234375, "learning_rate": 8.112712975098297e-06, "loss": 1.0642, "step": 144 }, { "epoch": 0.19003931847968544, "grad_norm": 0.10791015625, "learning_rate": 8.099606815203146e-06, "loss": 1.023, "step": 145 }, { "epoch": 0.1913499344692005, "grad_norm": 0.123046875, "learning_rate": 8.086500655307995e-06, "loss": 1.0591, "step": 146 }, { "epoch": 0.1926605504587156, "grad_norm": 0.11328125, "learning_rate": 8.073394495412845e-06, "loss": 1.0078, "step": 147 }, { "epoch": 0.19397116644823068, "grad_norm": 0.1220703125, "learning_rate": 8.060288335517694e-06, "loss": 1.0094, "step": 148 }, { "epoch": 0.19528178243774574, "grad_norm": 0.1142578125, "learning_rate": 8.047182175622543e-06, "loss": 1.0657, "step": 149 }, { "epoch": 0.1965923984272608, "grad_norm": 0.1318359375, "learning_rate": 8.034076015727392e-06, "loss": 1.0694, "step": 150 }, { "epoch": 0.19790301441677588, "grad_norm": 0.1240234375, "learning_rate": 8.02096985583224e-06, "loss": 1.0499, "step": 151 }, { "epoch": 0.19921363040629095, "grad_norm": 0.1201171875, "learning_rate": 8.007863695937091e-06, "loss": 1.0521, "step": 152 }, { "epoch": 0.20052424639580602, "grad_norm": 0.1279296875, "learning_rate": 7.99475753604194e-06, "loss": 1.0872, "step": 153 }, { "epoch": 0.2018348623853211, "grad_norm": 0.1123046875, "learning_rate": 7.981651376146789e-06, "loss": 0.9765, "step": 154 }, { "epoch": 0.20314547837483618, "grad_norm": 0.10205078125, "learning_rate": 7.968545216251638e-06, "loss": 0.95, "step": 155 }, { "epoch": 0.20445609436435125, "grad_norm": 0.10546875, "learning_rate": 7.955439056356489e-06, "loss": 0.9681, "step": 156 }, { "epoch": 0.20576671035386632, "grad_norm": 0.1337890625, "learning_rate": 7.942332896461337e-06, "loss": 1.0637, "step": 157 }, { "epoch": 0.20707732634338138, "grad_norm": 0.12353515625, "learning_rate": 7.929226736566186e-06, "loss": 1.0796, "step": 158 }, { "epoch": 0.20838794233289645, "grad_norm": 0.11962890625, "learning_rate": 7.916120576671035e-06, "loss": 1.0473, "step": 159 }, { "epoch": 0.20969855832241152, "grad_norm": 0.107421875, "learning_rate": 7.903014416775884e-06, "loss": 0.9582, "step": 160 }, { "epoch": 0.21100917431192662, "grad_norm": 0.11328125, "learning_rate": 7.889908256880735e-06, "loss": 1.0292, "step": 161 }, { "epoch": 0.21231979030144169, "grad_norm": 0.1044921875, "learning_rate": 7.876802096985583e-06, "loss": 1.027, "step": 162 }, { "epoch": 0.21363040629095675, "grad_norm": 0.109375, "learning_rate": 7.863695937090432e-06, "loss": 0.9905, "step": 163 }, { "epoch": 0.21494102228047182, "grad_norm": 0.109375, "learning_rate": 7.850589777195283e-06, "loss": 0.9685, "step": 164 }, { "epoch": 0.2162516382699869, "grad_norm": 0.111328125, "learning_rate": 7.837483617300132e-06, "loss": 0.9984, "step": 165 }, { "epoch": 0.21756225425950196, "grad_norm": 0.09765625, "learning_rate": 7.82437745740498e-06, "loss": 0.9624, "step": 166 }, { "epoch": 0.21887287024901703, "grad_norm": 0.1171875, "learning_rate": 7.81127129750983e-06, "loss": 1.0142, "step": 167 }, { "epoch": 0.22018348623853212, "grad_norm": 0.11767578125, "learning_rate": 7.79816513761468e-06, "loss": 1.0403, "step": 168 }, { "epoch": 0.2214941022280472, "grad_norm": 0.10791015625, "learning_rate": 7.785058977719529e-06, "loss": 0.9952, "step": 169 }, { "epoch": 0.22280471821756226, "grad_norm": 0.103515625, "learning_rate": 7.771952817824378e-06, "loss": 0.968, "step": 170 }, { "epoch": 0.22411533420707733, "grad_norm": 0.10546875, "learning_rate": 7.758846657929228e-06, "loss": 0.9589, "step": 171 }, { "epoch": 0.2254259501965924, "grad_norm": 0.109375, "learning_rate": 7.745740498034077e-06, "loss": 1.012, "step": 172 }, { "epoch": 0.22673656618610746, "grad_norm": 0.107421875, "learning_rate": 7.732634338138926e-06, "loss": 1.017, "step": 173 }, { "epoch": 0.22804718217562253, "grad_norm": 0.099609375, "learning_rate": 7.719528178243775e-06, "loss": 0.9515, "step": 174 }, { "epoch": 0.22935779816513763, "grad_norm": 0.10009765625, "learning_rate": 7.706422018348626e-06, "loss": 0.9591, "step": 175 }, { "epoch": 0.2306684141546527, "grad_norm": 0.10791015625, "learning_rate": 7.693315858453475e-06, "loss": 0.9612, "step": 176 }, { "epoch": 0.23197903014416776, "grad_norm": 0.09326171875, "learning_rate": 7.680209698558323e-06, "loss": 0.936, "step": 177 }, { "epoch": 0.23328964613368283, "grad_norm": 0.1025390625, "learning_rate": 7.667103538663172e-06, "loss": 0.987, "step": 178 }, { "epoch": 0.2346002621231979, "grad_norm": 0.10107421875, "learning_rate": 7.653997378768021e-06, "loss": 0.9288, "step": 179 }, { "epoch": 0.23591087811271297, "grad_norm": 0.103515625, "learning_rate": 7.640891218872872e-06, "loss": 0.9708, "step": 180 }, { "epoch": 0.23722149410222804, "grad_norm": 0.10107421875, "learning_rate": 7.627785058977721e-06, "loss": 0.9012, "step": 181 }, { "epoch": 0.23853211009174313, "grad_norm": 0.10009765625, "learning_rate": 7.6146788990825695e-06, "loss": 0.97, "step": 182 }, { "epoch": 0.2398427260812582, "grad_norm": 0.0947265625, "learning_rate": 7.601572739187418e-06, "loss": 0.9259, "step": 183 }, { "epoch": 0.24115334207077327, "grad_norm": 0.10546875, "learning_rate": 7.588466579292268e-06, "loss": 0.9549, "step": 184 }, { "epoch": 0.24246395806028834, "grad_norm": 0.10595703125, "learning_rate": 7.575360419397118e-06, "loss": 0.9635, "step": 185 }, { "epoch": 0.2437745740498034, "grad_norm": 0.1103515625, "learning_rate": 7.562254259501967e-06, "loss": 0.9415, "step": 186 }, { "epoch": 0.24508519003931847, "grad_norm": 0.1005859375, "learning_rate": 7.549148099606816e-06, "loss": 0.9457, "step": 187 }, { "epoch": 0.24639580602883354, "grad_norm": 0.0986328125, "learning_rate": 7.5360419397116645e-06, "loss": 0.9088, "step": 188 }, { "epoch": 0.24770642201834864, "grad_norm": 0.09130859375, "learning_rate": 7.522935779816515e-06, "loss": 0.8947, "step": 189 }, { "epoch": 0.2490170380078637, "grad_norm": 0.1044921875, "learning_rate": 7.509829619921364e-06, "loss": 0.9703, "step": 190 }, { "epoch": 0.2503276539973788, "grad_norm": 0.10205078125, "learning_rate": 7.496723460026213e-06, "loss": 0.9181, "step": 191 }, { "epoch": 0.2516382699868938, "grad_norm": 0.123046875, "learning_rate": 7.483617300131062e-06, "loss": 1.0153, "step": 192 }, { "epoch": 0.2529488859764089, "grad_norm": 0.1083984375, "learning_rate": 7.470511140235911e-06, "loss": 0.9802, "step": 193 }, { "epoch": 0.254259501965924, "grad_norm": 0.103515625, "learning_rate": 7.457404980340761e-06, "loss": 0.9592, "step": 194 }, { "epoch": 0.25557011795543905, "grad_norm": 0.083984375, "learning_rate": 7.44429882044561e-06, "loss": 0.8833, "step": 195 }, { "epoch": 0.25688073394495414, "grad_norm": 0.0947265625, "learning_rate": 7.431192660550459e-06, "loss": 0.941, "step": 196 }, { "epoch": 0.2581913499344692, "grad_norm": 0.09814453125, "learning_rate": 7.418086500655308e-06, "loss": 0.9528, "step": 197 }, { "epoch": 0.2595019659239843, "grad_norm": 0.09619140625, "learning_rate": 7.404980340760158e-06, "loss": 0.8998, "step": 198 }, { "epoch": 0.2608125819134993, "grad_norm": 0.09228515625, "learning_rate": 7.391874180865007e-06, "loss": 0.9324, "step": 199 }, { "epoch": 0.2621231979030144, "grad_norm": 0.10107421875, "learning_rate": 7.378768020969856e-06, "loss": 0.9305, "step": 200 }, { "epoch": 0.2634338138925295, "grad_norm": 0.111328125, "learning_rate": 7.365661861074706e-06, "loss": 0.9688, "step": 201 }, { "epoch": 0.26474442988204455, "grad_norm": 0.126953125, "learning_rate": 7.352555701179555e-06, "loss": 1.0586, "step": 202 }, { "epoch": 0.26605504587155965, "grad_norm": 0.09814453125, "learning_rate": 7.3394495412844045e-06, "loss": 0.8897, "step": 203 }, { "epoch": 0.2673656618610747, "grad_norm": 0.0869140625, "learning_rate": 7.326343381389253e-06, "loss": 0.8846, "step": 204 }, { "epoch": 0.2686762778505898, "grad_norm": 0.10302734375, "learning_rate": 7.313237221494103e-06, "loss": 0.9742, "step": 205 }, { "epoch": 0.2699868938401048, "grad_norm": 0.0947265625, "learning_rate": 7.300131061598952e-06, "loss": 0.9316, "step": 206 }, { "epoch": 0.2712975098296199, "grad_norm": 0.115234375, "learning_rate": 7.287024901703801e-06, "loss": 1.0154, "step": 207 }, { "epoch": 0.272608125819135, "grad_norm": 0.09765625, "learning_rate": 7.273918741808651e-06, "loss": 0.9601, "step": 208 }, { "epoch": 0.27391874180865006, "grad_norm": 0.08984375, "learning_rate": 7.2608125819135e-06, "loss": 0.8991, "step": 209 }, { "epoch": 0.27522935779816515, "grad_norm": 0.09912109375, "learning_rate": 7.247706422018349e-06, "loss": 0.936, "step": 210 }, { "epoch": 0.2765399737876802, "grad_norm": 0.1044921875, "learning_rate": 7.234600262123198e-06, "loss": 0.9497, "step": 211 }, { "epoch": 0.2778505897771953, "grad_norm": 0.10595703125, "learning_rate": 7.221494102228049e-06, "loss": 0.9594, "step": 212 }, { "epoch": 0.27916120576671033, "grad_norm": 0.095703125, "learning_rate": 7.2083879423328975e-06, "loss": 0.8151, "step": 213 }, { "epoch": 0.2804718217562254, "grad_norm": 0.11474609375, "learning_rate": 7.195281782437746e-06, "loss": 0.9903, "step": 214 }, { "epoch": 0.2817824377457405, "grad_norm": 0.10888671875, "learning_rate": 7.182175622542595e-06, "loss": 0.9934, "step": 215 }, { "epoch": 0.28309305373525556, "grad_norm": 0.10009765625, "learning_rate": 7.169069462647444e-06, "loss": 0.8955, "step": 216 }, { "epoch": 0.28440366972477066, "grad_norm": 0.0869140625, "learning_rate": 7.155963302752295e-06, "loss": 0.8998, "step": 217 }, { "epoch": 0.2857142857142857, "grad_norm": 0.09228515625, "learning_rate": 7.1428571428571436e-06, "loss": 0.9119, "step": 218 }, { "epoch": 0.2870249017038008, "grad_norm": 0.0966796875, "learning_rate": 7.1297509829619924e-06, "loss": 0.9361, "step": 219 }, { "epoch": 0.28833551769331583, "grad_norm": 0.1005859375, "learning_rate": 7.116644823066841e-06, "loss": 0.9309, "step": 220 }, { "epoch": 0.28964613368283093, "grad_norm": 0.11083984375, "learning_rate": 7.103538663171692e-06, "loss": 0.9438, "step": 221 }, { "epoch": 0.290956749672346, "grad_norm": 0.08935546875, "learning_rate": 7.090432503276541e-06, "loss": 0.8638, "step": 222 }, { "epoch": 0.29226736566186107, "grad_norm": 0.09326171875, "learning_rate": 7.07732634338139e-06, "loss": 0.8844, "step": 223 }, { "epoch": 0.29357798165137616, "grad_norm": 0.10205078125, "learning_rate": 7.0642201834862385e-06, "loss": 0.9305, "step": 224 }, { "epoch": 0.2948885976408912, "grad_norm": 0.1123046875, "learning_rate": 7.051114023591088e-06, "loss": 0.9847, "step": 225 }, { "epoch": 0.2961992136304063, "grad_norm": 0.12890625, "learning_rate": 7.038007863695938e-06, "loss": 0.9791, "step": 226 }, { "epoch": 0.29750982961992134, "grad_norm": 0.099609375, "learning_rate": 7.024901703800787e-06, "loss": 0.9613, "step": 227 }, { "epoch": 0.29882044560943644, "grad_norm": 0.083984375, "learning_rate": 7.011795543905637e-06, "loss": 0.8941, "step": 228 }, { "epoch": 0.30013106159895153, "grad_norm": 0.09912109375, "learning_rate": 6.9986893840104855e-06, "loss": 0.9126, "step": 229 }, { "epoch": 0.30144167758846657, "grad_norm": 0.10205078125, "learning_rate": 6.985583224115334e-06, "loss": 0.942, "step": 230 }, { "epoch": 0.30275229357798167, "grad_norm": 0.10595703125, "learning_rate": 6.972477064220184e-06, "loss": 0.917, "step": 231 }, { "epoch": 0.3040629095674967, "grad_norm": 0.11376953125, "learning_rate": 6.959370904325034e-06, "loss": 0.9761, "step": 232 }, { "epoch": 0.3053735255570118, "grad_norm": 0.109375, "learning_rate": 6.946264744429883e-06, "loss": 0.9472, "step": 233 }, { "epoch": 0.30668414154652685, "grad_norm": 0.10546875, "learning_rate": 6.9331585845347315e-06, "loss": 0.9342, "step": 234 }, { "epoch": 0.30799475753604194, "grad_norm": 0.1005859375, "learning_rate": 6.920052424639582e-06, "loss": 0.9272, "step": 235 }, { "epoch": 0.30930537352555704, "grad_norm": 0.09716796875, "learning_rate": 6.906946264744431e-06, "loss": 0.9006, "step": 236 }, { "epoch": 0.3106159895150721, "grad_norm": 0.08935546875, "learning_rate": 6.89384010484928e-06, "loss": 0.8775, "step": 237 }, { "epoch": 0.3119266055045872, "grad_norm": 0.1015625, "learning_rate": 6.880733944954129e-06, "loss": 0.93, "step": 238 }, { "epoch": 0.3132372214941022, "grad_norm": 0.1044921875, "learning_rate": 6.867627785058978e-06, "loss": 0.9344, "step": 239 }, { "epoch": 0.3145478374836173, "grad_norm": 0.08349609375, "learning_rate": 6.854521625163828e-06, "loss": 0.8809, "step": 240 }, { "epoch": 0.31585845347313235, "grad_norm": 0.1015625, "learning_rate": 6.841415465268677e-06, "loss": 0.9134, "step": 241 }, { "epoch": 0.31716906946264745, "grad_norm": 0.109375, "learning_rate": 6.828309305373526e-06, "loss": 0.9409, "step": 242 }, { "epoch": 0.31847968545216254, "grad_norm": 0.119140625, "learning_rate": 6.815203145478375e-06, "loss": 0.9688, "step": 243 }, { "epoch": 0.3197903014416776, "grad_norm": 0.10009765625, "learning_rate": 6.8020969855832246e-06, "loss": 0.9215, "step": 244 }, { "epoch": 0.3211009174311927, "grad_norm": 0.111328125, "learning_rate": 6.788990825688074e-06, "loss": 0.9651, "step": 245 }, { "epoch": 0.3224115334207077, "grad_norm": 0.10888671875, "learning_rate": 6.775884665792923e-06, "loss": 0.9618, "step": 246 }, { "epoch": 0.3237221494102228, "grad_norm": 0.10400390625, "learning_rate": 6.762778505897772e-06, "loss": 0.9181, "step": 247 }, { "epoch": 0.32503276539973786, "grad_norm": 0.09814453125, "learning_rate": 6.749672346002622e-06, "loss": 0.8983, "step": 248 }, { "epoch": 0.32634338138925295, "grad_norm": 0.10986328125, "learning_rate": 6.7365661861074715e-06, "loss": 0.8959, "step": 249 }, { "epoch": 0.32765399737876805, "grad_norm": 0.09814453125, "learning_rate": 6.72346002621232e-06, "loss": 0.8851, "step": 250 }, { "epoch": 0.3289646133682831, "grad_norm": 0.10498046875, "learning_rate": 6.710353866317169e-06, "loss": 0.8834, "step": 251 }, { "epoch": 0.3302752293577982, "grad_norm": 0.111328125, "learning_rate": 6.697247706422019e-06, "loss": 0.9838, "step": 252 }, { "epoch": 0.3315858453473132, "grad_norm": 0.09423828125, "learning_rate": 6.684141546526868e-06, "loss": 0.9082, "step": 253 }, { "epoch": 0.3328964613368283, "grad_norm": 0.09375, "learning_rate": 6.671035386631718e-06, "loss": 0.9052, "step": 254 }, { "epoch": 0.33420707732634336, "grad_norm": 0.1005859375, "learning_rate": 6.657929226736567e-06, "loss": 0.921, "step": 255 }, { "epoch": 0.33551769331585846, "grad_norm": 0.09375, "learning_rate": 6.644823066841416e-06, "loss": 0.912, "step": 256 }, { "epoch": 0.33682830930537355, "grad_norm": 0.09765625, "learning_rate": 6.631716906946265e-06, "loss": 0.9113, "step": 257 }, { "epoch": 0.3381389252948886, "grad_norm": 0.1123046875, "learning_rate": 6.618610747051114e-06, "loss": 0.92, "step": 258 }, { "epoch": 0.3394495412844037, "grad_norm": 0.09912109375, "learning_rate": 6.6055045871559645e-06, "loss": 0.9471, "step": 259 }, { "epoch": 0.34076015727391873, "grad_norm": 0.09716796875, "learning_rate": 6.592398427260813e-06, "loss": 0.9197, "step": 260 }, { "epoch": 0.3420707732634338, "grad_norm": 0.0927734375, "learning_rate": 6.579292267365662e-06, "loss": 0.8569, "step": 261 }, { "epoch": 0.34338138925294887, "grad_norm": 0.119140625, "learning_rate": 6.566186107470511e-06, "loss": 0.8676, "step": 262 }, { "epoch": 0.34469200524246396, "grad_norm": 0.11083984375, "learning_rate": 6.553079947575362e-06, "loss": 0.9409, "step": 263 }, { "epoch": 0.34600262123197906, "grad_norm": 0.115234375, "learning_rate": 6.539973787680211e-06, "loss": 0.9069, "step": 264 }, { "epoch": 0.3473132372214941, "grad_norm": 0.0908203125, "learning_rate": 6.5268676277850595e-06, "loss": 0.9058, "step": 265 }, { "epoch": 0.3486238532110092, "grad_norm": 0.10693359375, "learning_rate": 6.513761467889908e-06, "loss": 0.9314, "step": 266 }, { "epoch": 0.34993446920052423, "grad_norm": 0.09912109375, "learning_rate": 6.500655307994757e-06, "loss": 0.919, "step": 267 }, { "epoch": 0.35124508519003933, "grad_norm": 0.103515625, "learning_rate": 6.487549148099608e-06, "loss": 0.9022, "step": 268 }, { "epoch": 0.35255570117955437, "grad_norm": 0.1015625, "learning_rate": 6.474442988204457e-06, "loss": 0.873, "step": 269 }, { "epoch": 0.35386631716906947, "grad_norm": 0.0986328125, "learning_rate": 6.4613368283093056e-06, "loss": 0.8797, "step": 270 }, { "epoch": 0.35517693315858456, "grad_norm": 0.10546875, "learning_rate": 6.448230668414155e-06, "loss": 0.9295, "step": 271 }, { "epoch": 0.3564875491480996, "grad_norm": 0.09130859375, "learning_rate": 6.435124508519004e-06, "loss": 0.9065, "step": 272 }, { "epoch": 0.3577981651376147, "grad_norm": 0.08544921875, "learning_rate": 6.422018348623854e-06, "loss": 0.8883, "step": 273 }, { "epoch": 0.35910878112712974, "grad_norm": 0.10791015625, "learning_rate": 6.408912188728703e-06, "loss": 0.949, "step": 274 }, { "epoch": 0.36041939711664484, "grad_norm": 0.1103515625, "learning_rate": 6.3958060288335525e-06, "loss": 1.0008, "step": 275 }, { "epoch": 0.3617300131061599, "grad_norm": 0.10009765625, "learning_rate": 6.382699868938401e-06, "loss": 0.9013, "step": 276 }, { "epoch": 0.36304062909567497, "grad_norm": 0.1103515625, "learning_rate": 6.369593709043251e-06, "loss": 0.8853, "step": 277 }, { "epoch": 0.36435124508519, "grad_norm": 0.09814453125, "learning_rate": 6.3564875491481e-06, "loss": 0.8884, "step": 278 }, { "epoch": 0.3656618610747051, "grad_norm": 0.0927734375, "learning_rate": 6.34338138925295e-06, "loss": 0.8861, "step": 279 }, { "epoch": 0.3669724770642202, "grad_norm": 0.1201171875, "learning_rate": 6.330275229357799e-06, "loss": 0.9303, "step": 280 }, { "epoch": 0.36828309305373524, "grad_norm": 0.10302734375, "learning_rate": 6.3171690694626475e-06, "loss": 0.9221, "step": 281 }, { "epoch": 0.36959370904325034, "grad_norm": 0.0830078125, "learning_rate": 6.304062909567498e-06, "loss": 0.8284, "step": 282 }, { "epoch": 0.3709043250327654, "grad_norm": 0.10009765625, "learning_rate": 6.290956749672347e-06, "loss": 0.8977, "step": 283 }, { "epoch": 0.3722149410222805, "grad_norm": 0.44921875, "learning_rate": 6.277850589777196e-06, "loss": 0.9326, "step": 284 }, { "epoch": 0.3735255570117955, "grad_norm": 0.12255859375, "learning_rate": 6.264744429882045e-06, "loss": 0.9963, "step": 285 }, { "epoch": 0.3748361730013106, "grad_norm": 0.091796875, "learning_rate": 6.2516382699868936e-06, "loss": 0.8851, "step": 286 }, { "epoch": 0.3761467889908257, "grad_norm": 0.10400390625, "learning_rate": 6.238532110091744e-06, "loss": 0.9229, "step": 287 }, { "epoch": 0.37745740498034075, "grad_norm": 0.08984375, "learning_rate": 6.225425950196593e-06, "loss": 0.8724, "step": 288 }, { "epoch": 0.37876802096985585, "grad_norm": 0.10595703125, "learning_rate": 6.212319790301442e-06, "loss": 0.8938, "step": 289 }, { "epoch": 0.3800786369593709, "grad_norm": 0.1025390625, "learning_rate": 6.199213630406291e-06, "loss": 0.8916, "step": 290 }, { "epoch": 0.381389252948886, "grad_norm": 0.12060546875, "learning_rate": 6.186107470511141e-06, "loss": 0.9642, "step": 291 }, { "epoch": 0.382699868938401, "grad_norm": 0.09716796875, "learning_rate": 6.17300131061599e-06, "loss": 0.9056, "step": 292 }, { "epoch": 0.3840104849279161, "grad_norm": 0.09521484375, "learning_rate": 6.159895150720839e-06, "loss": 0.834, "step": 293 }, { "epoch": 0.3853211009174312, "grad_norm": 0.0966796875, "learning_rate": 6.146788990825688e-06, "loss": 0.866, "step": 294 }, { "epoch": 0.38663171690694625, "grad_norm": 0.099609375, "learning_rate": 6.133682830930538e-06, "loss": 0.8584, "step": 295 }, { "epoch": 0.38794233289646135, "grad_norm": 0.1025390625, "learning_rate": 6.120576671035387e-06, "loss": 0.8973, "step": 296 }, { "epoch": 0.3892529488859764, "grad_norm": 0.10888671875, "learning_rate": 6.107470511140236e-06, "loss": 0.9327, "step": 297 }, { "epoch": 0.3905635648754915, "grad_norm": 0.09765625, "learning_rate": 6.094364351245086e-06, "loss": 0.8952, "step": 298 }, { "epoch": 0.3918741808650065, "grad_norm": 0.08251953125, "learning_rate": 6.081258191349935e-06, "loss": 0.8542, "step": 299 }, { "epoch": 0.3931847968545216, "grad_norm": 0.09814453125, "learning_rate": 6.068152031454784e-06, "loss": 0.8459, "step": 300 }, { "epoch": 0.3944954128440367, "grad_norm": 0.12060546875, "learning_rate": 6.0550458715596335e-06, "loss": 0.9723, "step": 301 }, { "epoch": 0.39580602883355176, "grad_norm": 0.0859375, "learning_rate": 6.041939711664483e-06, "loss": 0.8052, "step": 302 }, { "epoch": 0.39711664482306686, "grad_norm": 0.1015625, "learning_rate": 6.028833551769332e-06, "loss": 0.8934, "step": 303 }, { "epoch": 0.3984272608125819, "grad_norm": 0.09423828125, "learning_rate": 6.015727391874181e-06, "loss": 0.8904, "step": 304 }, { "epoch": 0.399737876802097, "grad_norm": 0.08447265625, "learning_rate": 6.002621231979031e-06, "loss": 0.8462, "step": 305 }, { "epoch": 0.40104849279161203, "grad_norm": 0.09375, "learning_rate": 5.9895150720838804e-06, "loss": 0.8518, "step": 306 }, { "epoch": 0.40235910878112713, "grad_norm": 0.08642578125, "learning_rate": 5.976408912188729e-06, "loss": 0.8333, "step": 307 }, { "epoch": 0.4036697247706422, "grad_norm": 0.10791015625, "learning_rate": 5.963302752293578e-06, "loss": 0.9149, "step": 308 }, { "epoch": 0.40498034076015726, "grad_norm": 0.10302734375, "learning_rate": 5.950196592398427e-06, "loss": 0.9044, "step": 309 }, { "epoch": 0.40629095674967236, "grad_norm": 0.08544921875, "learning_rate": 5.937090432503278e-06, "loss": 0.8535, "step": 310 }, { "epoch": 0.4076015727391874, "grad_norm": 0.087890625, "learning_rate": 5.9239842726081265e-06, "loss": 0.8441, "step": 311 }, { "epoch": 0.4089121887287025, "grad_norm": 0.09228515625, "learning_rate": 5.910878112712975e-06, "loss": 0.9094, "step": 312 }, { "epoch": 0.41022280471821754, "grad_norm": 0.1044921875, "learning_rate": 5.897771952817824e-06, "loss": 0.9137, "step": 313 }, { "epoch": 0.41153342070773263, "grad_norm": 0.0927734375, "learning_rate": 5.884665792922674e-06, "loss": 0.8653, "step": 314 }, { "epoch": 0.41284403669724773, "grad_norm": 0.09423828125, "learning_rate": 5.871559633027524e-06, "loss": 0.8478, "step": 315 }, { "epoch": 0.41415465268676277, "grad_norm": 0.1064453125, "learning_rate": 5.858453473132373e-06, "loss": 0.9632, "step": 316 }, { "epoch": 0.41546526867627787, "grad_norm": 0.10498046875, "learning_rate": 5.8453473132372215e-06, "loss": 0.9283, "step": 317 }, { "epoch": 0.4167758846657929, "grad_norm": 0.095703125, "learning_rate": 5.832241153342071e-06, "loss": 0.8875, "step": 318 }, { "epoch": 0.418086500655308, "grad_norm": 0.1728515625, "learning_rate": 5.819134993446921e-06, "loss": 0.9899, "step": 319 }, { "epoch": 0.41939711664482304, "grad_norm": 0.08837890625, "learning_rate": 5.80602883355177e-06, "loss": 0.8889, "step": 320 }, { "epoch": 0.42070773263433814, "grad_norm": 0.1015625, "learning_rate": 5.792922673656619e-06, "loss": 0.89, "step": 321 }, { "epoch": 0.42201834862385323, "grad_norm": 0.09765625, "learning_rate": 5.7798165137614684e-06, "loss": 0.845, "step": 322 }, { "epoch": 0.4233289646133683, "grad_norm": 0.0966796875, "learning_rate": 5.766710353866317e-06, "loss": 0.8742, "step": 323 }, { "epoch": 0.42463958060288337, "grad_norm": 0.09228515625, "learning_rate": 5.753604193971167e-06, "loss": 0.8777, "step": 324 }, { "epoch": 0.4259501965923984, "grad_norm": 0.1025390625, "learning_rate": 5.740498034076017e-06, "loss": 0.8747, "step": 325 }, { "epoch": 0.4272608125819135, "grad_norm": 0.11962890625, "learning_rate": 5.727391874180866e-06, "loss": 0.8858, "step": 326 }, { "epoch": 0.42857142857142855, "grad_norm": 0.0908203125, "learning_rate": 5.7142857142857145e-06, "loss": 0.8502, "step": 327 }, { "epoch": 0.42988204456094364, "grad_norm": 0.09912109375, "learning_rate": 5.701179554390564e-06, "loss": 0.8953, "step": 328 }, { "epoch": 0.43119266055045874, "grad_norm": 0.10888671875, "learning_rate": 5.688073394495414e-06, "loss": 0.8451, "step": 329 }, { "epoch": 0.4325032765399738, "grad_norm": 0.0986328125, "learning_rate": 5.674967234600263e-06, "loss": 0.9342, "step": 330 }, { "epoch": 0.4338138925294889, "grad_norm": 0.09619140625, "learning_rate": 5.661861074705112e-06, "loss": 0.8322, "step": 331 }, { "epoch": 0.4351245085190039, "grad_norm": 0.1103515625, "learning_rate": 5.648754914809961e-06, "loss": 0.9098, "step": 332 }, { "epoch": 0.436435124508519, "grad_norm": 0.09814453125, "learning_rate": 5.635648754914811e-06, "loss": 0.8983, "step": 333 }, { "epoch": 0.43774574049803405, "grad_norm": 0.107421875, "learning_rate": 5.62254259501966e-06, "loss": 0.8822, "step": 334 }, { "epoch": 0.43905635648754915, "grad_norm": 0.11865234375, "learning_rate": 5.609436435124509e-06, "loss": 0.9757, "step": 335 }, { "epoch": 0.44036697247706424, "grad_norm": 0.09326171875, "learning_rate": 5.596330275229358e-06, "loss": 0.8545, "step": 336 }, { "epoch": 0.4416775884665793, "grad_norm": 0.09375, "learning_rate": 5.583224115334207e-06, "loss": 0.8864, "step": 337 }, { "epoch": 0.4429882044560944, "grad_norm": 0.09716796875, "learning_rate": 5.570117955439057e-06, "loss": 0.8633, "step": 338 }, { "epoch": 0.4442988204456094, "grad_norm": 0.10009765625, "learning_rate": 5.557011795543906e-06, "loss": 0.8907, "step": 339 }, { "epoch": 0.4456094364351245, "grad_norm": 0.1015625, "learning_rate": 5.543905635648755e-06, "loss": 0.9361, "step": 340 }, { "epoch": 0.44692005242463956, "grad_norm": 0.10302734375, "learning_rate": 5.530799475753605e-06, "loss": 0.8908, "step": 341 }, { "epoch": 0.44823066841415465, "grad_norm": 0.08984375, "learning_rate": 5.5176933158584545e-06, "loss": 0.8725, "step": 342 }, { "epoch": 0.44954128440366975, "grad_norm": 0.0869140625, "learning_rate": 5.504587155963303e-06, "loss": 0.8167, "step": 343 }, { "epoch": 0.4508519003931848, "grad_norm": 0.1005859375, "learning_rate": 5.491480996068152e-06, "loss": 0.9276, "step": 344 }, { "epoch": 0.4521625163826999, "grad_norm": 0.08984375, "learning_rate": 5.478374836173002e-06, "loss": 0.8772, "step": 345 }, { "epoch": 0.4534731323722149, "grad_norm": 0.0869140625, "learning_rate": 5.465268676277851e-06, "loss": 0.8358, "step": 346 }, { "epoch": 0.45478374836173, "grad_norm": 0.08837890625, "learning_rate": 5.4521625163827006e-06, "loss": 0.8637, "step": 347 }, { "epoch": 0.45609436435124506, "grad_norm": 0.09326171875, "learning_rate": 5.4390563564875494e-06, "loss": 0.9064, "step": 348 }, { "epoch": 0.45740498034076016, "grad_norm": 0.10009765625, "learning_rate": 5.425950196592399e-06, "loss": 0.8661, "step": 349 }, { "epoch": 0.45871559633027525, "grad_norm": 0.0927734375, "learning_rate": 5.412844036697248e-06, "loss": 0.847, "step": 350 }, { "epoch": 0.4600262123197903, "grad_norm": 0.0927734375, "learning_rate": 5.399737876802097e-06, "loss": 0.86, "step": 351 }, { "epoch": 0.4613368283093054, "grad_norm": 0.10302734375, "learning_rate": 5.3866317169069475e-06, "loss": 0.8319, "step": 352 }, { "epoch": 0.46264744429882043, "grad_norm": 0.0888671875, "learning_rate": 5.373525557011796e-06, "loss": 0.8655, "step": 353 }, { "epoch": 0.4639580602883355, "grad_norm": 0.08984375, "learning_rate": 5.360419397116645e-06, "loss": 0.8668, "step": 354 }, { "epoch": 0.46526867627785057, "grad_norm": 0.087890625, "learning_rate": 5.347313237221494e-06, "loss": 0.8541, "step": 355 }, { "epoch": 0.46657929226736566, "grad_norm": 0.08642578125, "learning_rate": 5.334207077326345e-06, "loss": 0.8526, "step": 356 }, { "epoch": 0.46788990825688076, "grad_norm": 0.0859375, "learning_rate": 5.3211009174311936e-06, "loss": 0.836, "step": 357 }, { "epoch": 0.4692005242463958, "grad_norm": 0.115234375, "learning_rate": 5.3079947575360424e-06, "loss": 0.8742, "step": 358 }, { "epoch": 0.4705111402359109, "grad_norm": 0.09423828125, "learning_rate": 5.294888597640891e-06, "loss": 0.8559, "step": 359 }, { "epoch": 0.47182175622542594, "grad_norm": 0.11328125, "learning_rate": 5.28178243774574e-06, "loss": 0.8729, "step": 360 }, { "epoch": 0.47313237221494103, "grad_norm": 0.0859375, "learning_rate": 5.268676277850591e-06, "loss": 0.8849, "step": 361 }, { "epoch": 0.4744429882044561, "grad_norm": 0.11328125, "learning_rate": 5.25557011795544e-06, "loss": 0.9169, "step": 362 }, { "epoch": 0.47575360419397117, "grad_norm": 0.09521484375, "learning_rate": 5.2424639580602885e-06, "loss": 0.911, "step": 363 }, { "epoch": 0.47706422018348627, "grad_norm": 0.095703125, "learning_rate": 5.229357798165137e-06, "loss": 0.9222, "step": 364 }, { "epoch": 0.4783748361730013, "grad_norm": 0.11181640625, "learning_rate": 5.216251638269987e-06, "loss": 0.9473, "step": 365 }, { "epoch": 0.4796854521625164, "grad_norm": 0.10546875, "learning_rate": 5.203145478374837e-06, "loss": 0.8922, "step": 366 }, { "epoch": 0.48099606815203144, "grad_norm": 0.09521484375, "learning_rate": 5.190039318479686e-06, "loss": 0.8529, "step": 367 }, { "epoch": 0.48230668414154654, "grad_norm": 0.08984375, "learning_rate": 5.1769331585845355e-06, "loss": 0.8587, "step": 368 }, { "epoch": 0.4836173001310616, "grad_norm": 0.0830078125, "learning_rate": 5.163826998689384e-06, "loss": 0.8101, "step": 369 }, { "epoch": 0.4849279161205767, "grad_norm": 0.10595703125, "learning_rate": 5.150720838794234e-06, "loss": 0.8956, "step": 370 }, { "epoch": 0.48623853211009177, "grad_norm": 0.08349609375, "learning_rate": 5.137614678899083e-06, "loss": 0.793, "step": 371 }, { "epoch": 0.4875491480996068, "grad_norm": 0.10107421875, "learning_rate": 5.124508519003933e-06, "loss": 0.901, "step": 372 }, { "epoch": 0.4888597640891219, "grad_norm": 0.09326171875, "learning_rate": 5.1114023591087816e-06, "loss": 0.8753, "step": 373 }, { "epoch": 0.49017038007863695, "grad_norm": 0.0888671875, "learning_rate": 5.0982961992136304e-06, "loss": 0.8777, "step": 374 }, { "epoch": 0.49148099606815204, "grad_norm": 0.09521484375, "learning_rate": 5.08519003931848e-06, "loss": 0.8757, "step": 375 }, { "epoch": 0.4927916120576671, "grad_norm": 0.08837890625, "learning_rate": 5.07208387942333e-06, "loss": 0.8753, "step": 376 }, { "epoch": 0.4941022280471822, "grad_norm": 0.08642578125, "learning_rate": 5.058977719528179e-06, "loss": 0.8613, "step": 377 }, { "epoch": 0.4954128440366973, "grad_norm": 0.08544921875, "learning_rate": 5.045871559633028e-06, "loss": 0.8793, "step": 378 }, { "epoch": 0.4967234600262123, "grad_norm": 0.09375, "learning_rate": 5.0327653997378765e-06, "loss": 0.8717, "step": 379 }, { "epoch": 0.4980340760157274, "grad_norm": 0.08984375, "learning_rate": 5.019659239842727e-06, "loss": 0.83, "step": 380 }, { "epoch": 0.49934469200524245, "grad_norm": 0.0966796875, "learning_rate": 5.006553079947576e-06, "loss": 0.8717, "step": 381 }, { "epoch": 0.5006553079947575, "grad_norm": 0.1005859375, "learning_rate": 4.993446920052425e-06, "loss": 0.9068, "step": 382 }, { "epoch": 0.5019659239842726, "grad_norm": 0.09375, "learning_rate": 4.9803407601572746e-06, "loss": 0.8594, "step": 383 }, { "epoch": 0.5032765399737876, "grad_norm": 0.08349609375, "learning_rate": 4.9672346002621235e-06, "loss": 0.8479, "step": 384 }, { "epoch": 0.5045871559633027, "grad_norm": 0.130859375, "learning_rate": 4.954128440366973e-06, "loss": 0.8989, "step": 385 }, { "epoch": 0.5058977719528178, "grad_norm": 0.1044921875, "learning_rate": 4.941022280471822e-06, "loss": 0.8959, "step": 386 }, { "epoch": 0.5072083879423329, "grad_norm": 0.08056640625, "learning_rate": 4.927916120576671e-06, "loss": 0.8326, "step": 387 }, { "epoch": 0.508519003931848, "grad_norm": 0.09375, "learning_rate": 4.914809960681521e-06, "loss": 0.8961, "step": 388 }, { "epoch": 0.509829619921363, "grad_norm": 0.10009765625, "learning_rate": 4.9017038007863695e-06, "loss": 0.8708, "step": 389 }, { "epoch": 0.5111402359108781, "grad_norm": 0.0927734375, "learning_rate": 4.888597640891219e-06, "loss": 0.8549, "step": 390 }, { "epoch": 0.5124508519003932, "grad_norm": 0.109375, "learning_rate": 4.875491480996068e-06, "loss": 0.8987, "step": 391 }, { "epoch": 0.5137614678899083, "grad_norm": 0.0966796875, "learning_rate": 4.862385321100918e-06, "loss": 0.8615, "step": 392 }, { "epoch": 0.5150720838794234, "grad_norm": 0.1044921875, "learning_rate": 4.849279161205767e-06, "loss": 0.8803, "step": 393 }, { "epoch": 0.5163826998689384, "grad_norm": 0.08056640625, "learning_rate": 4.8361730013106165e-06, "loss": 0.87, "step": 394 }, { "epoch": 0.5176933158584535, "grad_norm": 0.130859375, "learning_rate": 4.823066841415465e-06, "loss": 1.0097, "step": 395 }, { "epoch": 0.5190039318479686, "grad_norm": 0.0947265625, "learning_rate": 4.809960681520315e-06, "loss": 0.8873, "step": 396 }, { "epoch": 0.5203145478374837, "grad_norm": 0.09130859375, "learning_rate": 4.796854521625165e-06, "loss": 0.88, "step": 397 }, { "epoch": 0.5216251638269986, "grad_norm": 0.0966796875, "learning_rate": 4.783748361730014e-06, "loss": 0.8896, "step": 398 }, { "epoch": 0.5229357798165137, "grad_norm": 0.0859375, "learning_rate": 4.770642201834863e-06, "loss": 0.8432, "step": 399 }, { "epoch": 0.5242463958060288, "grad_norm": 0.1025390625, "learning_rate": 4.757536041939712e-06, "loss": 0.9044, "step": 400 }, { "epoch": 0.5255570117955439, "grad_norm": 0.09716796875, "learning_rate": 4.744429882044561e-06, "loss": 0.8561, "step": 401 }, { "epoch": 0.526867627785059, "grad_norm": 0.1005859375, "learning_rate": 4.731323722149411e-06, "loss": 0.8843, "step": 402 }, { "epoch": 0.528178243774574, "grad_norm": 0.09765625, "learning_rate": 4.71821756225426e-06, "loss": 0.8224, "step": 403 }, { "epoch": 0.5294888597640891, "grad_norm": 0.08740234375, "learning_rate": 4.7051114023591095e-06, "loss": 0.8476, "step": 404 }, { "epoch": 0.5307994757536042, "grad_norm": 0.0869140625, "learning_rate": 4.692005242463958e-06, "loss": 0.8652, "step": 405 }, { "epoch": 0.5321100917431193, "grad_norm": 0.1044921875, "learning_rate": 4.678899082568808e-06, "loss": 0.8609, "step": 406 }, { "epoch": 0.5334207077326344, "grad_norm": 0.08935546875, "learning_rate": 4.665792922673657e-06, "loss": 0.8656, "step": 407 }, { "epoch": 0.5347313237221494, "grad_norm": 0.09130859375, "learning_rate": 4.652686762778506e-06, "loss": 0.8402, "step": 408 }, { "epoch": 0.5360419397116645, "grad_norm": 0.0810546875, "learning_rate": 4.639580602883356e-06, "loss": 0.8096, "step": 409 }, { "epoch": 0.5373525557011796, "grad_norm": 0.0966796875, "learning_rate": 4.6264744429882045e-06, "loss": 0.8851, "step": 410 }, { "epoch": 0.5386631716906947, "grad_norm": 0.08447265625, "learning_rate": 4.613368283093054e-06, "loss": 0.8061, "step": 411 }, { "epoch": 0.5399737876802096, "grad_norm": 0.08935546875, "learning_rate": 4.600262123197903e-06, "loss": 0.8385, "step": 412 }, { "epoch": 0.5412844036697247, "grad_norm": 0.095703125, "learning_rate": 4.587155963302753e-06, "loss": 0.8718, "step": 413 }, { "epoch": 0.5425950196592398, "grad_norm": 0.0849609375, "learning_rate": 4.574049803407602e-06, "loss": 0.8323, "step": 414 }, { "epoch": 0.5439056356487549, "grad_norm": 0.09912109375, "learning_rate": 4.560943643512451e-06, "loss": 0.9231, "step": 415 }, { "epoch": 0.54521625163827, "grad_norm": 0.099609375, "learning_rate": 4.5478374836173e-06, "loss": 0.8611, "step": 416 }, { "epoch": 0.546526867627785, "grad_norm": 0.10595703125, "learning_rate": 4.53473132372215e-06, "loss": 0.862, "step": 417 }, { "epoch": 0.5478374836173001, "grad_norm": 0.08447265625, "learning_rate": 4.521625163826999e-06, "loss": 0.8414, "step": 418 }, { "epoch": 0.5491480996068152, "grad_norm": 0.0849609375, "learning_rate": 4.508519003931849e-06, "loss": 0.8363, "step": 419 }, { "epoch": 0.5504587155963303, "grad_norm": 0.09375, "learning_rate": 4.4954128440366975e-06, "loss": 0.8394, "step": 420 }, { "epoch": 0.5517693315858454, "grad_norm": 0.10009765625, "learning_rate": 4.482306684141547e-06, "loss": 0.8798, "step": 421 }, { "epoch": 0.5530799475753604, "grad_norm": 0.08544921875, "learning_rate": 4.469200524246396e-06, "loss": 0.8672, "step": 422 }, { "epoch": 0.5543905635648755, "grad_norm": 0.1044921875, "learning_rate": 4.456094364351246e-06, "loss": 0.8717, "step": 423 }, { "epoch": 0.5557011795543906, "grad_norm": 0.0830078125, "learning_rate": 4.442988204456095e-06, "loss": 0.8614, "step": 424 }, { "epoch": 0.5570117955439057, "grad_norm": 0.09912109375, "learning_rate": 4.429882044560944e-06, "loss": 0.8832, "step": 425 }, { "epoch": 0.5583224115334207, "grad_norm": 0.08935546875, "learning_rate": 4.416775884665793e-06, "loss": 0.8659, "step": 426 }, { "epoch": 0.5596330275229358, "grad_norm": 0.08251953125, "learning_rate": 4.403669724770643e-06, "loss": 0.827, "step": 427 }, { "epoch": 0.5609436435124509, "grad_norm": 0.08740234375, "learning_rate": 4.390563564875492e-06, "loss": 0.8626, "step": 428 }, { "epoch": 0.562254259501966, "grad_norm": 0.09130859375, "learning_rate": 4.377457404980341e-06, "loss": 0.8755, "step": 429 }, { "epoch": 0.563564875491481, "grad_norm": 0.08984375, "learning_rate": 4.3643512450851905e-06, "loss": 0.8616, "step": 430 }, { "epoch": 0.564875491480996, "grad_norm": 0.1298828125, "learning_rate": 4.351245085190039e-06, "loss": 0.9583, "step": 431 }, { "epoch": 0.5661861074705111, "grad_norm": 0.09765625, "learning_rate": 4.338138925294889e-06, "loss": 0.8579, "step": 432 }, { "epoch": 0.5674967234600262, "grad_norm": 0.0908203125, "learning_rate": 4.325032765399738e-06, "loss": 0.8487, "step": 433 }, { "epoch": 0.5688073394495413, "grad_norm": 0.095703125, "learning_rate": 4.311926605504588e-06, "loss": 0.9197, "step": 434 }, { "epoch": 0.5701179554390564, "grad_norm": 0.0810546875, "learning_rate": 4.298820445609437e-06, "loss": 0.8562, "step": 435 }, { "epoch": 0.5714285714285714, "grad_norm": 0.09521484375, "learning_rate": 4.2857142857142855e-06, "loss": 0.8741, "step": 436 }, { "epoch": 0.5727391874180865, "grad_norm": 0.0849609375, "learning_rate": 4.272608125819135e-06, "loss": 0.8329, "step": 437 }, { "epoch": 0.5740498034076016, "grad_norm": 0.0947265625, "learning_rate": 4.259501965923984e-06, "loss": 0.8682, "step": 438 }, { "epoch": 0.5753604193971167, "grad_norm": 0.0927734375, "learning_rate": 4.246395806028834e-06, "loss": 0.832, "step": 439 }, { "epoch": 0.5766710353866317, "grad_norm": 0.080078125, "learning_rate": 4.2332896461336835e-06, "loss": 0.7371, "step": 440 }, { "epoch": 0.5779816513761468, "grad_norm": 0.08642578125, "learning_rate": 4.220183486238532e-06, "loss": 0.787, "step": 441 }, { "epoch": 0.5792922673656619, "grad_norm": 0.091796875, "learning_rate": 4.207077326343382e-06, "loss": 0.8569, "step": 442 }, { "epoch": 0.580602883355177, "grad_norm": 0.09375, "learning_rate": 4.193971166448231e-06, "loss": 0.8624, "step": 443 }, { "epoch": 0.581913499344692, "grad_norm": 0.09716796875, "learning_rate": 4.180865006553081e-06, "loss": 0.8681, "step": 444 }, { "epoch": 0.583224115334207, "grad_norm": 0.09716796875, "learning_rate": 4.16775884665793e-06, "loss": 0.8535, "step": 445 }, { "epoch": 0.5845347313237221, "grad_norm": 0.0947265625, "learning_rate": 4.154652686762779e-06, "loss": 0.8643, "step": 446 }, { "epoch": 0.5858453473132372, "grad_norm": 0.099609375, "learning_rate": 4.141546526867628e-06, "loss": 0.8925, "step": 447 }, { "epoch": 0.5871559633027523, "grad_norm": 0.1064453125, "learning_rate": 4.128440366972478e-06, "loss": 0.8615, "step": 448 }, { "epoch": 0.5884665792922673, "grad_norm": 0.08544921875, "learning_rate": 4.115334207077327e-06, "loss": 0.8465, "step": 449 }, { "epoch": 0.5897771952817824, "grad_norm": 0.0830078125, "learning_rate": 4.102228047182176e-06, "loss": 0.7813, "step": 450 }, { "epoch": 0.5910878112712975, "grad_norm": 0.10302734375, "learning_rate": 4.089121887287025e-06, "loss": 0.8841, "step": 451 }, { "epoch": 0.5923984272608126, "grad_norm": 0.1025390625, "learning_rate": 4.076015727391874e-06, "loss": 0.9014, "step": 452 }, { "epoch": 0.5937090432503277, "grad_norm": 0.080078125, "learning_rate": 4.062909567496724e-06, "loss": 0.8079, "step": 453 }, { "epoch": 0.5950196592398427, "grad_norm": 0.09814453125, "learning_rate": 4.049803407601573e-06, "loss": 0.8983, "step": 454 }, { "epoch": 0.5963302752293578, "grad_norm": 0.08935546875, "learning_rate": 4.036697247706423e-06, "loss": 0.838, "step": 455 }, { "epoch": 0.5976408912188729, "grad_norm": 0.09033203125, "learning_rate": 4.0235910878112715e-06, "loss": 0.8754, "step": 456 }, { "epoch": 0.598951507208388, "grad_norm": 0.09765625, "learning_rate": 4.01048492791612e-06, "loss": 0.8545, "step": 457 }, { "epoch": 0.6002621231979031, "grad_norm": 0.09228515625, "learning_rate": 3.99737876802097e-06, "loss": 0.8233, "step": 458 }, { "epoch": 0.601572739187418, "grad_norm": 0.322265625, "learning_rate": 3.984272608125819e-06, "loss": 1.1009, "step": 459 }, { "epoch": 0.6028833551769331, "grad_norm": 0.10205078125, "learning_rate": 3.971166448230669e-06, "loss": 0.8428, "step": 460 }, { "epoch": 0.6041939711664482, "grad_norm": 0.09130859375, "learning_rate": 3.958060288335518e-06, "loss": 0.8053, "step": 461 }, { "epoch": 0.6055045871559633, "grad_norm": 0.1044921875, "learning_rate": 3.944954128440367e-06, "loss": 0.8954, "step": 462 }, { "epoch": 0.6068152031454783, "grad_norm": 0.0908203125, "learning_rate": 3.931847968545216e-06, "loss": 0.847, "step": 463 }, { "epoch": 0.6081258191349934, "grad_norm": 0.08203125, "learning_rate": 3.918741808650066e-06, "loss": 0.8027, "step": 464 }, { "epoch": 0.6094364351245085, "grad_norm": 0.1015625, "learning_rate": 3.905635648754915e-06, "loss": 0.8583, "step": 465 }, { "epoch": 0.6107470511140236, "grad_norm": 0.10400390625, "learning_rate": 3.8925294888597645e-06, "loss": 0.8867, "step": 466 }, { "epoch": 0.6120576671035387, "grad_norm": 0.0810546875, "learning_rate": 3.879423328964614e-06, "loss": 0.8511, "step": 467 }, { "epoch": 0.6133682830930537, "grad_norm": 0.08447265625, "learning_rate": 3.866317169069463e-06, "loss": 0.7994, "step": 468 }, { "epoch": 0.6146788990825688, "grad_norm": 0.08642578125, "learning_rate": 3.853211009174313e-06, "loss": 0.7958, "step": 469 }, { "epoch": 0.6159895150720839, "grad_norm": 0.09375, "learning_rate": 3.840104849279162e-06, "loss": 0.8538, "step": 470 }, { "epoch": 0.617300131061599, "grad_norm": 0.1142578125, "learning_rate": 3.826998689384011e-06, "loss": 0.8669, "step": 471 }, { "epoch": 0.6186107470511141, "grad_norm": 0.091796875, "learning_rate": 3.8138925294888603e-06, "loss": 0.844, "step": 472 }, { "epoch": 0.6199213630406291, "grad_norm": 0.17578125, "learning_rate": 3.800786369593709e-06, "loss": 0.8102, "step": 473 }, { "epoch": 0.6212319790301442, "grad_norm": 0.0947265625, "learning_rate": 3.787680209698559e-06, "loss": 0.8565, "step": 474 }, { "epoch": 0.6225425950196593, "grad_norm": 0.11572265625, "learning_rate": 3.774574049803408e-06, "loss": 0.8768, "step": 475 }, { "epoch": 0.6238532110091743, "grad_norm": 0.09326171875, "learning_rate": 3.7614678899082575e-06, "loss": 0.8301, "step": 476 }, { "epoch": 0.6251638269986893, "grad_norm": 0.10302734375, "learning_rate": 3.7483617300131064e-06, "loss": 0.8675, "step": 477 }, { "epoch": 0.6264744429882044, "grad_norm": 0.08642578125, "learning_rate": 3.7352555701179553e-06, "loss": 0.8506, "step": 478 }, { "epoch": 0.6277850589777195, "grad_norm": 0.0986328125, "learning_rate": 3.722149410222805e-06, "loss": 0.8497, "step": 479 }, { "epoch": 0.6290956749672346, "grad_norm": 0.08447265625, "learning_rate": 3.709043250327654e-06, "loss": 0.8406, "step": 480 }, { "epoch": 0.6304062909567497, "grad_norm": 0.0986328125, "learning_rate": 3.6959370904325036e-06, "loss": 0.8673, "step": 481 }, { "epoch": 0.6317169069462647, "grad_norm": 0.1064453125, "learning_rate": 3.682830930537353e-06, "loss": 0.8574, "step": 482 }, { "epoch": 0.6330275229357798, "grad_norm": 0.0986328125, "learning_rate": 3.6697247706422022e-06, "loss": 0.8517, "step": 483 }, { "epoch": 0.6343381389252949, "grad_norm": 0.10791015625, "learning_rate": 3.6566186107470515e-06, "loss": 0.8969, "step": 484 }, { "epoch": 0.63564875491481, "grad_norm": 0.08935546875, "learning_rate": 3.6435124508519004e-06, "loss": 0.828, "step": 485 }, { "epoch": 0.6369593709043251, "grad_norm": 0.1005859375, "learning_rate": 3.63040629095675e-06, "loss": 0.888, "step": 486 }, { "epoch": 0.6382699868938401, "grad_norm": 0.09765625, "learning_rate": 3.617300131061599e-06, "loss": 0.8397, "step": 487 }, { "epoch": 0.6395806028833552, "grad_norm": 0.08447265625, "learning_rate": 3.6041939711664487e-06, "loss": 0.816, "step": 488 }, { "epoch": 0.6408912188728703, "grad_norm": 0.0927734375, "learning_rate": 3.5910878112712976e-06, "loss": 0.8358, "step": 489 }, { "epoch": 0.6422018348623854, "grad_norm": 0.09130859375, "learning_rate": 3.5779816513761473e-06, "loss": 0.8721, "step": 490 }, { "epoch": 0.6435124508519003, "grad_norm": 0.08984375, "learning_rate": 3.5648754914809962e-06, "loss": 0.8537, "step": 491 }, { "epoch": 0.6448230668414154, "grad_norm": 0.0927734375, "learning_rate": 3.551769331585846e-06, "loss": 0.8555, "step": 492 }, { "epoch": 0.6461336828309305, "grad_norm": 0.08056640625, "learning_rate": 3.538663171690695e-06, "loss": 0.824, "step": 493 }, { "epoch": 0.6474442988204456, "grad_norm": 0.09326171875, "learning_rate": 3.525557011795544e-06, "loss": 0.861, "step": 494 }, { "epoch": 0.6487549148099607, "grad_norm": 0.1015625, "learning_rate": 3.5124508519003934e-06, "loss": 0.8718, "step": 495 }, { "epoch": 0.6500655307994757, "grad_norm": 0.08642578125, "learning_rate": 3.4993446920052427e-06, "loss": 0.8362, "step": 496 }, { "epoch": 0.6513761467889908, "grad_norm": 0.09521484375, "learning_rate": 3.486238532110092e-06, "loss": 0.8618, "step": 497 }, { "epoch": 0.6526867627785059, "grad_norm": 0.08837890625, "learning_rate": 3.4731323722149413e-06, "loss": 0.7983, "step": 498 }, { "epoch": 0.653997378768021, "grad_norm": 0.095703125, "learning_rate": 3.460026212319791e-06, "loss": 0.8572, "step": 499 }, { "epoch": 0.6553079947575361, "grad_norm": 0.08935546875, "learning_rate": 3.44692005242464e-06, "loss": 0.8217, "step": 500 }, { "epoch": 0.6566186107470511, "grad_norm": 0.09033203125, "learning_rate": 3.433813892529489e-06, "loss": 0.8224, "step": 501 }, { "epoch": 0.6579292267365662, "grad_norm": 0.09619140625, "learning_rate": 3.4207077326343385e-06, "loss": 0.8775, "step": 502 }, { "epoch": 0.6592398427260813, "grad_norm": 0.095703125, "learning_rate": 3.4076015727391874e-06, "loss": 0.8743, "step": 503 }, { "epoch": 0.6605504587155964, "grad_norm": 0.10888671875, "learning_rate": 3.394495412844037e-06, "loss": 0.8613, "step": 504 }, { "epoch": 0.6618610747051114, "grad_norm": 0.09326171875, "learning_rate": 3.381389252948886e-06, "loss": 0.8204, "step": 505 }, { "epoch": 0.6631716906946264, "grad_norm": 0.099609375, "learning_rate": 3.3682830930537357e-06, "loss": 0.9019, "step": 506 }, { "epoch": 0.6644823066841415, "grad_norm": 0.0849609375, "learning_rate": 3.3551769331585846e-06, "loss": 0.8578, "step": 507 }, { "epoch": 0.6657929226736566, "grad_norm": 0.095703125, "learning_rate": 3.342070773263434e-06, "loss": 0.8256, "step": 508 }, { "epoch": 0.6671035386631717, "grad_norm": 0.1201171875, "learning_rate": 3.3289646133682837e-06, "loss": 0.9122, "step": 509 }, { "epoch": 0.6684141546526867, "grad_norm": 0.087890625, "learning_rate": 3.3158584534731325e-06, "loss": 0.8423, "step": 510 }, { "epoch": 0.6697247706422018, "grad_norm": 0.125, "learning_rate": 3.3027522935779823e-06, "loss": 0.9521, "step": 511 }, { "epoch": 0.6710353866317169, "grad_norm": 0.08447265625, "learning_rate": 3.289646133682831e-06, "loss": 0.8626, "step": 512 }, { "epoch": 0.672346002621232, "grad_norm": 0.08837890625, "learning_rate": 3.276539973787681e-06, "loss": 0.85, "step": 513 }, { "epoch": 0.6736566186107471, "grad_norm": 0.1015625, "learning_rate": 3.2634338138925297e-06, "loss": 0.9004, "step": 514 }, { "epoch": 0.6749672346002621, "grad_norm": 0.0927734375, "learning_rate": 3.2503276539973786e-06, "loss": 0.8526, "step": 515 }, { "epoch": 0.6762778505897772, "grad_norm": 0.0986328125, "learning_rate": 3.2372214941022283e-06, "loss": 0.8744, "step": 516 }, { "epoch": 0.6775884665792923, "grad_norm": 0.0859375, "learning_rate": 3.2241153342070776e-06, "loss": 0.8476, "step": 517 }, { "epoch": 0.6788990825688074, "grad_norm": 0.103515625, "learning_rate": 3.211009174311927e-06, "loss": 0.8958, "step": 518 }, { "epoch": 0.6802096985583224, "grad_norm": 0.09326171875, "learning_rate": 3.1979030144167763e-06, "loss": 0.8467, "step": 519 }, { "epoch": 0.6815203145478375, "grad_norm": 0.09375, "learning_rate": 3.1847968545216256e-06, "loss": 0.8136, "step": 520 }, { "epoch": 0.6828309305373526, "grad_norm": 0.09814453125, "learning_rate": 3.171690694626475e-06, "loss": 0.8534, "step": 521 }, { "epoch": 0.6841415465268676, "grad_norm": 0.0947265625, "learning_rate": 3.1585845347313237e-06, "loss": 0.8944, "step": 522 }, { "epoch": 0.6854521625163827, "grad_norm": 0.0947265625, "learning_rate": 3.1454783748361735e-06, "loss": 0.8361, "step": 523 }, { "epoch": 0.6867627785058977, "grad_norm": 0.09765625, "learning_rate": 3.1323722149410223e-06, "loss": 0.879, "step": 524 }, { "epoch": 0.6880733944954128, "grad_norm": 0.09619140625, "learning_rate": 3.119266055045872e-06, "loss": 0.8758, "step": 525 }, { "epoch": 0.6893840104849279, "grad_norm": 0.11083984375, "learning_rate": 3.106159895150721e-06, "loss": 0.9117, "step": 526 }, { "epoch": 0.690694626474443, "grad_norm": 0.09130859375, "learning_rate": 3.0930537352555707e-06, "loss": 0.848, "step": 527 }, { "epoch": 0.6920052424639581, "grad_norm": 0.1025390625, "learning_rate": 3.0799475753604195e-06, "loss": 0.8588, "step": 528 }, { "epoch": 0.6933158584534731, "grad_norm": 0.09765625, "learning_rate": 3.066841415465269e-06, "loss": 0.8596, "step": 529 }, { "epoch": 0.6946264744429882, "grad_norm": 0.0849609375, "learning_rate": 3.053735255570118e-06, "loss": 0.8478, "step": 530 }, { "epoch": 0.6959370904325033, "grad_norm": 0.08544921875, "learning_rate": 3.0406290956749675e-06, "loss": 0.8543, "step": 531 }, { "epoch": 0.6972477064220184, "grad_norm": 0.0830078125, "learning_rate": 3.0275229357798168e-06, "loss": 0.8399, "step": 532 }, { "epoch": 0.6985583224115334, "grad_norm": 0.09375, "learning_rate": 3.014416775884666e-06, "loss": 0.8798, "step": 533 }, { "epoch": 0.6998689384010485, "grad_norm": 0.0966796875, "learning_rate": 3.0013106159895154e-06, "loss": 0.8754, "step": 534 }, { "epoch": 0.7011795543905636, "grad_norm": 0.09033203125, "learning_rate": 2.9882044560943647e-06, "loss": 0.825, "step": 535 }, { "epoch": 0.7024901703800787, "grad_norm": 0.08935546875, "learning_rate": 2.9750982961992135e-06, "loss": 0.822, "step": 536 }, { "epoch": 0.7038007863695938, "grad_norm": 0.091796875, "learning_rate": 2.9619921363040633e-06, "loss": 0.8401, "step": 537 }, { "epoch": 0.7051114023591087, "grad_norm": 0.08349609375, "learning_rate": 2.948885976408912e-06, "loss": 0.8338, "step": 538 }, { "epoch": 0.7064220183486238, "grad_norm": 0.0986328125, "learning_rate": 2.935779816513762e-06, "loss": 0.8804, "step": 539 }, { "epoch": 0.7077326343381389, "grad_norm": 0.08935546875, "learning_rate": 2.9226736566186107e-06, "loss": 0.8282, "step": 540 }, { "epoch": 0.709043250327654, "grad_norm": 0.09716796875, "learning_rate": 2.9095674967234605e-06, "loss": 0.8461, "step": 541 }, { "epoch": 0.7103538663171691, "grad_norm": 0.0927734375, "learning_rate": 2.8964613368283093e-06, "loss": 0.8664, "step": 542 }, { "epoch": 0.7116644823066841, "grad_norm": 0.1005859375, "learning_rate": 2.8833551769331587e-06, "loss": 0.8794, "step": 543 }, { "epoch": 0.7129750982961992, "grad_norm": 0.08984375, "learning_rate": 2.8702490170380084e-06, "loss": 0.8405, "step": 544 }, { "epoch": 0.7142857142857143, "grad_norm": 0.1044921875, "learning_rate": 2.8571428571428573e-06, "loss": 0.8692, "step": 545 }, { "epoch": 0.7155963302752294, "grad_norm": 0.09619140625, "learning_rate": 2.844036697247707e-06, "loss": 0.8795, "step": 546 }, { "epoch": 0.7169069462647444, "grad_norm": 0.0947265625, "learning_rate": 2.830930537352556e-06, "loss": 0.8931, "step": 547 }, { "epoch": 0.7182175622542595, "grad_norm": 0.10986328125, "learning_rate": 2.8178243774574056e-06, "loss": 0.937, "step": 548 }, { "epoch": 0.7195281782437746, "grad_norm": 0.09130859375, "learning_rate": 2.8047182175622545e-06, "loss": 0.8622, "step": 549 }, { "epoch": 0.7208387942332897, "grad_norm": 0.09521484375, "learning_rate": 2.7916120576671033e-06, "loss": 0.8853, "step": 550 }, { "epoch": 0.7221494102228048, "grad_norm": 0.08984375, "learning_rate": 2.778505897771953e-06, "loss": 0.8372, "step": 551 }, { "epoch": 0.7234600262123198, "grad_norm": 0.09033203125, "learning_rate": 2.7653997378768024e-06, "loss": 0.8319, "step": 552 }, { "epoch": 0.7247706422018348, "grad_norm": 0.09423828125, "learning_rate": 2.7522935779816517e-06, "loss": 0.8221, "step": 553 }, { "epoch": 0.7260812581913499, "grad_norm": 0.08154296875, "learning_rate": 2.739187418086501e-06, "loss": 0.7893, "step": 554 }, { "epoch": 0.727391874180865, "grad_norm": 0.09130859375, "learning_rate": 2.7260812581913503e-06, "loss": 0.8701, "step": 555 }, { "epoch": 0.72870249017038, "grad_norm": 0.08935546875, "learning_rate": 2.7129750982961996e-06, "loss": 0.8164, "step": 556 }, { "epoch": 0.7300131061598951, "grad_norm": 0.1005859375, "learning_rate": 2.6998689384010485e-06, "loss": 0.8293, "step": 557 }, { "epoch": 0.7313237221494102, "grad_norm": 0.1455078125, "learning_rate": 2.686762778505898e-06, "loss": 0.9142, "step": 558 }, { "epoch": 0.7326343381389253, "grad_norm": 0.09033203125, "learning_rate": 2.673656618610747e-06, "loss": 0.8517, "step": 559 }, { "epoch": 0.7339449541284404, "grad_norm": 0.09033203125, "learning_rate": 2.6605504587155968e-06, "loss": 0.8473, "step": 560 }, { "epoch": 0.7352555701179554, "grad_norm": 0.09326171875, "learning_rate": 2.6474442988204457e-06, "loss": 0.8502, "step": 561 }, { "epoch": 0.7365661861074705, "grad_norm": 0.1005859375, "learning_rate": 2.6343381389252954e-06, "loss": 0.8867, "step": 562 }, { "epoch": 0.7378768020969856, "grad_norm": 0.10693359375, "learning_rate": 2.6212319790301443e-06, "loss": 0.8873, "step": 563 }, { "epoch": 0.7391874180865007, "grad_norm": 0.11328125, "learning_rate": 2.6081258191349936e-06, "loss": 0.9348, "step": 564 }, { "epoch": 0.7404980340760158, "grad_norm": 0.08349609375, "learning_rate": 2.595019659239843e-06, "loss": 0.8116, "step": 565 }, { "epoch": 0.7418086500655308, "grad_norm": 0.1044921875, "learning_rate": 2.581913499344692e-06, "loss": 0.8726, "step": 566 }, { "epoch": 0.7431192660550459, "grad_norm": 0.10595703125, "learning_rate": 2.5688073394495415e-06, "loss": 0.9041, "step": 567 }, { "epoch": 0.744429882044561, "grad_norm": 0.0986328125, "learning_rate": 2.5557011795543908e-06, "loss": 0.8594, "step": 568 }, { "epoch": 0.745740498034076, "grad_norm": 0.09765625, "learning_rate": 2.54259501965924e-06, "loss": 0.8508, "step": 569 }, { "epoch": 0.747051114023591, "grad_norm": 0.095703125, "learning_rate": 2.5294888597640894e-06, "loss": 0.859, "step": 570 }, { "epoch": 0.7483617300131061, "grad_norm": 0.09716796875, "learning_rate": 2.5163826998689383e-06, "loss": 0.8472, "step": 571 }, { "epoch": 0.7496723460026212, "grad_norm": 0.10888671875, "learning_rate": 2.503276539973788e-06, "loss": 0.9117, "step": 572 }, { "epoch": 0.7509829619921363, "grad_norm": 0.0888671875, "learning_rate": 2.4901703800786373e-06, "loss": 0.7996, "step": 573 }, { "epoch": 0.7522935779816514, "grad_norm": 0.10986328125, "learning_rate": 2.4770642201834866e-06, "loss": 0.8995, "step": 574 }, { "epoch": 0.7536041939711664, "grad_norm": 0.0908203125, "learning_rate": 2.4639580602883355e-06, "loss": 0.8158, "step": 575 }, { "epoch": 0.7549148099606815, "grad_norm": 0.08837890625, "learning_rate": 2.4508519003931848e-06, "loss": 0.8351, "step": 576 }, { "epoch": 0.7562254259501966, "grad_norm": 0.09619140625, "learning_rate": 2.437745740498034e-06, "loss": 0.8231, "step": 577 }, { "epoch": 0.7575360419397117, "grad_norm": 0.0849609375, "learning_rate": 2.4246395806028834e-06, "loss": 0.8436, "step": 578 }, { "epoch": 0.7588466579292268, "grad_norm": 0.119140625, "learning_rate": 2.4115334207077327e-06, "loss": 0.951, "step": 579 }, { "epoch": 0.7601572739187418, "grad_norm": 0.099609375, "learning_rate": 2.3984272608125824e-06, "loss": 0.8615, "step": 580 }, { "epoch": 0.7614678899082569, "grad_norm": 0.095703125, "learning_rate": 2.3853211009174317e-06, "loss": 0.8217, "step": 581 }, { "epoch": 0.762778505897772, "grad_norm": 0.09521484375, "learning_rate": 2.3722149410222806e-06, "loss": 0.8587, "step": 582 }, { "epoch": 0.7640891218872871, "grad_norm": 0.09130859375, "learning_rate": 2.35910878112713e-06, "loss": 0.8474, "step": 583 }, { "epoch": 0.765399737876802, "grad_norm": 0.0986328125, "learning_rate": 2.346002621231979e-06, "loss": 0.922, "step": 584 }, { "epoch": 0.7667103538663171, "grad_norm": 0.10107421875, "learning_rate": 2.3328964613368285e-06, "loss": 0.8675, "step": 585 }, { "epoch": 0.7680209698558322, "grad_norm": 0.09619140625, "learning_rate": 2.319790301441678e-06, "loss": 0.8212, "step": 586 }, { "epoch": 0.7693315858453473, "grad_norm": 0.10791015625, "learning_rate": 2.306684141546527e-06, "loss": 0.8521, "step": 587 }, { "epoch": 0.7706422018348624, "grad_norm": 0.09814453125, "learning_rate": 2.2935779816513764e-06, "loss": 0.8703, "step": 588 }, { "epoch": 0.7719528178243774, "grad_norm": 0.08740234375, "learning_rate": 2.2804718217562257e-06, "loss": 0.8151, "step": 589 }, { "epoch": 0.7732634338138925, "grad_norm": 0.07666015625, "learning_rate": 2.267365661861075e-06, "loss": 0.7884, "step": 590 }, { "epoch": 0.7745740498034076, "grad_norm": 0.09033203125, "learning_rate": 2.2542595019659243e-06, "loss": 0.8229, "step": 591 }, { "epoch": 0.7758846657929227, "grad_norm": 0.09765625, "learning_rate": 2.2411533420707736e-06, "loss": 0.8589, "step": 592 }, { "epoch": 0.7771952817824378, "grad_norm": 0.09033203125, "learning_rate": 2.228047182175623e-06, "loss": 0.8547, "step": 593 }, { "epoch": 0.7785058977719528, "grad_norm": 0.109375, "learning_rate": 2.214941022280472e-06, "loss": 0.9085, "step": 594 }, { "epoch": 0.7798165137614679, "grad_norm": 0.1025390625, "learning_rate": 2.2018348623853215e-06, "loss": 0.8684, "step": 595 }, { "epoch": 0.781127129750983, "grad_norm": 0.08544921875, "learning_rate": 2.1887287024901704e-06, "loss": 0.8371, "step": 596 }, { "epoch": 0.7824377457404981, "grad_norm": 0.09912109375, "learning_rate": 2.1756225425950197e-06, "loss": 0.8456, "step": 597 }, { "epoch": 0.783748361730013, "grad_norm": 0.08740234375, "learning_rate": 2.162516382699869e-06, "loss": 0.8163, "step": 598 }, { "epoch": 0.7850589777195282, "grad_norm": 0.09521484375, "learning_rate": 2.1494102228047183e-06, "loss": 0.8369, "step": 599 }, { "epoch": 0.7863695937090432, "grad_norm": 0.0908203125, "learning_rate": 2.1363040629095676e-06, "loss": 0.8605, "step": 600 }, { "epoch": 0.7876802096985583, "grad_norm": 0.083984375, "learning_rate": 2.123197903014417e-06, "loss": 0.8092, "step": 601 }, { "epoch": 0.7889908256880734, "grad_norm": 0.09521484375, "learning_rate": 2.110091743119266e-06, "loss": 0.8318, "step": 602 }, { "epoch": 0.7903014416775884, "grad_norm": 0.1005859375, "learning_rate": 2.0969855832241155e-06, "loss": 0.8978, "step": 603 }, { "epoch": 0.7916120576671035, "grad_norm": 0.1064453125, "learning_rate": 2.083879423328965e-06, "loss": 0.8885, "step": 604 }, { "epoch": 0.7929226736566186, "grad_norm": 0.10107421875, "learning_rate": 2.070773263433814e-06, "loss": 0.868, "step": 605 }, { "epoch": 0.7942332896461337, "grad_norm": 0.0908203125, "learning_rate": 2.0576671035386634e-06, "loss": 0.872, "step": 606 }, { "epoch": 0.7955439056356488, "grad_norm": 0.0888671875, "learning_rate": 2.0445609436435127e-06, "loss": 0.8372, "step": 607 }, { "epoch": 0.7968545216251638, "grad_norm": 0.08837890625, "learning_rate": 2.031454783748362e-06, "loss": 0.8122, "step": 608 }, { "epoch": 0.7981651376146789, "grad_norm": 0.09130859375, "learning_rate": 2.0183486238532113e-06, "loss": 0.8637, "step": 609 }, { "epoch": 0.799475753604194, "grad_norm": 0.09521484375, "learning_rate": 2.00524246395806e-06, "loss": 0.8584, "step": 610 }, { "epoch": 0.8007863695937091, "grad_norm": 0.10205078125, "learning_rate": 1.9921363040629095e-06, "loss": 0.8644, "step": 611 }, { "epoch": 0.8020969855832241, "grad_norm": 0.0986328125, "learning_rate": 1.979030144167759e-06, "loss": 0.8899, "step": 612 }, { "epoch": 0.8034076015727392, "grad_norm": 0.0966796875, "learning_rate": 1.965923984272608e-06, "loss": 0.8957, "step": 613 }, { "epoch": 0.8047182175622543, "grad_norm": 0.09716796875, "learning_rate": 1.9528178243774574e-06, "loss": 0.8588, "step": 614 }, { "epoch": 0.8060288335517694, "grad_norm": 0.1083984375, "learning_rate": 1.939711664482307e-06, "loss": 0.8818, "step": 615 }, { "epoch": 0.8073394495412844, "grad_norm": 0.08642578125, "learning_rate": 1.9266055045871564e-06, "loss": 0.8423, "step": 616 }, { "epoch": 0.8086500655307994, "grad_norm": 0.10498046875, "learning_rate": 1.9134993446920053e-06, "loss": 0.9107, "step": 617 }, { "epoch": 0.8099606815203145, "grad_norm": 0.10400390625, "learning_rate": 1.9003931847968546e-06, "loss": 0.856, "step": 618 }, { "epoch": 0.8112712975098296, "grad_norm": 0.0908203125, "learning_rate": 1.887287024901704e-06, "loss": 0.8194, "step": 619 }, { "epoch": 0.8125819134993447, "grad_norm": 0.09765625, "learning_rate": 1.8741808650065532e-06, "loss": 0.8282, "step": 620 }, { "epoch": 0.8138925294888598, "grad_norm": 0.0966796875, "learning_rate": 1.8610747051114025e-06, "loss": 0.8685, "step": 621 }, { "epoch": 0.8152031454783748, "grad_norm": 0.0947265625, "learning_rate": 1.8479685452162518e-06, "loss": 0.8629, "step": 622 }, { "epoch": 0.8165137614678899, "grad_norm": 0.10107421875, "learning_rate": 1.8348623853211011e-06, "loss": 0.8451, "step": 623 }, { "epoch": 0.817824377457405, "grad_norm": 0.09375, "learning_rate": 1.8217562254259502e-06, "loss": 0.8584, "step": 624 }, { "epoch": 0.8191349934469201, "grad_norm": 0.091796875, "learning_rate": 1.8086500655307995e-06, "loss": 0.8165, "step": 625 }, { "epoch": 0.8204456094364351, "grad_norm": 0.1162109375, "learning_rate": 1.7955439056356488e-06, "loss": 0.8471, "step": 626 }, { "epoch": 0.8217562254259502, "grad_norm": 0.1279296875, "learning_rate": 1.7824377457404981e-06, "loss": 0.8677, "step": 627 }, { "epoch": 0.8230668414154653, "grad_norm": 0.0888671875, "learning_rate": 1.7693315858453474e-06, "loss": 0.8728, "step": 628 }, { "epoch": 0.8243774574049804, "grad_norm": 0.0927734375, "learning_rate": 1.7562254259501967e-06, "loss": 0.8703, "step": 629 }, { "epoch": 0.8256880733944955, "grad_norm": 0.09716796875, "learning_rate": 1.743119266055046e-06, "loss": 0.8602, "step": 630 }, { "epoch": 0.8269986893840104, "grad_norm": 0.091796875, "learning_rate": 1.7300131061598955e-06, "loss": 0.8554, "step": 631 }, { "epoch": 0.8283093053735255, "grad_norm": 0.0830078125, "learning_rate": 1.7169069462647444e-06, "loss": 0.7842, "step": 632 }, { "epoch": 0.8296199213630406, "grad_norm": 0.09619140625, "learning_rate": 1.7038007863695937e-06, "loss": 0.8738, "step": 633 }, { "epoch": 0.8309305373525557, "grad_norm": 0.099609375, "learning_rate": 1.690694626474443e-06, "loss": 0.8639, "step": 634 }, { "epoch": 0.8322411533420708, "grad_norm": 0.0966796875, "learning_rate": 1.6775884665792923e-06, "loss": 0.8823, "step": 635 }, { "epoch": 0.8335517693315858, "grad_norm": 0.10107421875, "learning_rate": 1.6644823066841418e-06, "loss": 0.8777, "step": 636 }, { "epoch": 0.8348623853211009, "grad_norm": 0.0791015625, "learning_rate": 1.6513761467889911e-06, "loss": 0.837, "step": 637 }, { "epoch": 0.836173001310616, "grad_norm": 0.0947265625, "learning_rate": 1.6382699868938404e-06, "loss": 0.8582, "step": 638 }, { "epoch": 0.8374836173001311, "grad_norm": 0.0947265625, "learning_rate": 1.6251638269986893e-06, "loss": 0.8156, "step": 639 }, { "epoch": 0.8387942332896461, "grad_norm": 0.09033203125, "learning_rate": 1.6120576671035388e-06, "loss": 0.8631, "step": 640 }, { "epoch": 0.8401048492791612, "grad_norm": 0.1171875, "learning_rate": 1.5989515072083881e-06, "loss": 0.8445, "step": 641 }, { "epoch": 0.8414154652686763, "grad_norm": 0.103515625, "learning_rate": 1.5858453473132374e-06, "loss": 0.838, "step": 642 }, { "epoch": 0.8427260812581914, "grad_norm": 0.1044921875, "learning_rate": 1.5727391874180867e-06, "loss": 0.8624, "step": 643 }, { "epoch": 0.8440366972477065, "grad_norm": 0.0986328125, "learning_rate": 1.559633027522936e-06, "loss": 0.8519, "step": 644 }, { "epoch": 0.8453473132372215, "grad_norm": 0.1279296875, "learning_rate": 1.5465268676277853e-06, "loss": 0.8694, "step": 645 }, { "epoch": 0.8466579292267365, "grad_norm": 0.08837890625, "learning_rate": 1.5334207077326344e-06, "loss": 0.8351, "step": 646 }, { "epoch": 0.8479685452162516, "grad_norm": 0.1162109375, "learning_rate": 1.5203145478374837e-06, "loss": 0.9536, "step": 647 }, { "epoch": 0.8492791612057667, "grad_norm": 0.08935546875, "learning_rate": 1.507208387942333e-06, "loss": 0.8671, "step": 648 }, { "epoch": 0.8505897771952818, "grad_norm": 0.08642578125, "learning_rate": 1.4941022280471823e-06, "loss": 0.8461, "step": 649 }, { "epoch": 0.8519003931847968, "grad_norm": 0.1044921875, "learning_rate": 1.4809960681520316e-06, "loss": 0.8974, "step": 650 }, { "epoch": 0.8532110091743119, "grad_norm": 0.09716796875, "learning_rate": 1.467889908256881e-06, "loss": 0.8587, "step": 651 }, { "epoch": 0.854521625163827, "grad_norm": 0.10009765625, "learning_rate": 1.4547837483617302e-06, "loss": 0.8808, "step": 652 }, { "epoch": 0.8558322411533421, "grad_norm": 0.0966796875, "learning_rate": 1.4416775884665793e-06, "loss": 0.857, "step": 653 }, { "epoch": 0.8571428571428571, "grad_norm": 0.09619140625, "learning_rate": 1.4285714285714286e-06, "loss": 0.8254, "step": 654 }, { "epoch": 0.8584534731323722, "grad_norm": 0.09375, "learning_rate": 1.415465268676278e-06, "loss": 0.8865, "step": 655 }, { "epoch": 0.8597640891218873, "grad_norm": 0.080078125, "learning_rate": 1.4023591087811272e-06, "loss": 0.8341, "step": 656 }, { "epoch": 0.8610747051114024, "grad_norm": 0.0927734375, "learning_rate": 1.3892529488859765e-06, "loss": 0.8145, "step": 657 }, { "epoch": 0.8623853211009175, "grad_norm": 0.09765625, "learning_rate": 1.3761467889908258e-06, "loss": 0.8296, "step": 658 }, { "epoch": 0.8636959370904325, "grad_norm": 0.14453125, "learning_rate": 1.3630406290956751e-06, "loss": 0.7887, "step": 659 }, { "epoch": 0.8650065530799476, "grad_norm": 0.10693359375, "learning_rate": 1.3499344692005242e-06, "loss": 0.8713, "step": 660 }, { "epoch": 0.8663171690694627, "grad_norm": 0.09814453125, "learning_rate": 1.3368283093053735e-06, "loss": 0.8357, "step": 661 }, { "epoch": 0.8676277850589778, "grad_norm": 0.10791015625, "learning_rate": 1.3237221494102228e-06, "loss": 0.8622, "step": 662 }, { "epoch": 0.8689384010484927, "grad_norm": 0.10205078125, "learning_rate": 1.3106159895150721e-06, "loss": 0.8534, "step": 663 }, { "epoch": 0.8702490170380078, "grad_norm": 0.0830078125, "learning_rate": 1.2975098296199214e-06, "loss": 0.8119, "step": 664 }, { "epoch": 0.8715596330275229, "grad_norm": 0.0810546875, "learning_rate": 1.2844036697247707e-06, "loss": 0.7674, "step": 665 }, { "epoch": 0.872870249017038, "grad_norm": 0.107421875, "learning_rate": 1.27129750982962e-06, "loss": 0.8659, "step": 666 }, { "epoch": 0.8741808650065531, "grad_norm": 0.0908203125, "learning_rate": 1.2581913499344691e-06, "loss": 0.8658, "step": 667 }, { "epoch": 0.8754914809960681, "grad_norm": 0.09033203125, "learning_rate": 1.2450851900393186e-06, "loss": 0.839, "step": 668 }, { "epoch": 0.8768020969855832, "grad_norm": 0.08642578125, "learning_rate": 1.2319790301441677e-06, "loss": 0.8186, "step": 669 }, { "epoch": 0.8781127129750983, "grad_norm": 0.09228515625, "learning_rate": 1.218872870249017e-06, "loss": 0.8901, "step": 670 }, { "epoch": 0.8794233289646134, "grad_norm": 0.09033203125, "learning_rate": 1.2057667103538663e-06, "loss": 0.8684, "step": 671 }, { "epoch": 0.8807339449541285, "grad_norm": 0.0986328125, "learning_rate": 1.1926605504587159e-06, "loss": 0.8821, "step": 672 }, { "epoch": 0.8820445609436435, "grad_norm": 0.08203125, "learning_rate": 1.179554390563565e-06, "loss": 0.8073, "step": 673 }, { "epoch": 0.8833551769331586, "grad_norm": 0.09765625, "learning_rate": 1.1664482306684142e-06, "loss": 0.8496, "step": 674 }, { "epoch": 0.8846657929226737, "grad_norm": 0.07958984375, "learning_rate": 1.1533420707732635e-06, "loss": 0.7955, "step": 675 }, { "epoch": 0.8859764089121888, "grad_norm": 0.0966796875, "learning_rate": 1.1402359108781128e-06, "loss": 0.8757, "step": 676 }, { "epoch": 0.8872870249017037, "grad_norm": 0.09619140625, "learning_rate": 1.1271297509829621e-06, "loss": 0.8424, "step": 677 }, { "epoch": 0.8885976408912188, "grad_norm": 0.08837890625, "learning_rate": 1.1140235910878115e-06, "loss": 0.8599, "step": 678 }, { "epoch": 0.8899082568807339, "grad_norm": 0.10302734375, "learning_rate": 1.1009174311926608e-06, "loss": 0.7905, "step": 679 }, { "epoch": 0.891218872870249, "grad_norm": 0.09912109375, "learning_rate": 1.0878112712975098e-06, "loss": 0.811, "step": 680 }, { "epoch": 0.8925294888597641, "grad_norm": 0.09375, "learning_rate": 1.0747051114023591e-06, "loss": 0.8825, "step": 681 }, { "epoch": 0.8938401048492791, "grad_norm": 0.0966796875, "learning_rate": 1.0615989515072084e-06, "loss": 0.8369, "step": 682 }, { "epoch": 0.8951507208387942, "grad_norm": 0.09326171875, "learning_rate": 1.0484927916120577e-06, "loss": 0.8533, "step": 683 }, { "epoch": 0.8964613368283093, "grad_norm": 0.095703125, "learning_rate": 1.035386631716907e-06, "loss": 0.8499, "step": 684 }, { "epoch": 0.8977719528178244, "grad_norm": 0.08984375, "learning_rate": 1.0222804718217564e-06, "loss": 0.831, "step": 685 }, { "epoch": 0.8990825688073395, "grad_norm": 0.10009765625, "learning_rate": 1.0091743119266057e-06, "loss": 0.8551, "step": 686 }, { "epoch": 0.9003931847968545, "grad_norm": 0.1044921875, "learning_rate": 9.960681520314547e-07, "loss": 0.8516, "step": 687 }, { "epoch": 0.9017038007863696, "grad_norm": 0.0830078125, "learning_rate": 9.82961992136304e-07, "loss": 0.7739, "step": 688 }, { "epoch": 0.9030144167758847, "grad_norm": 0.119140625, "learning_rate": 9.698558322411536e-07, "loss": 0.9754, "step": 689 }, { "epoch": 0.9043250327653998, "grad_norm": 0.10107421875, "learning_rate": 9.567496723460027e-07, "loss": 0.8462, "step": 690 }, { "epoch": 0.9056356487549148, "grad_norm": 0.09228515625, "learning_rate": 9.43643512450852e-07, "loss": 0.8372, "step": 691 }, { "epoch": 0.9069462647444299, "grad_norm": 0.09521484375, "learning_rate": 9.305373525557013e-07, "loss": 0.8669, "step": 692 }, { "epoch": 0.908256880733945, "grad_norm": 0.08984375, "learning_rate": 9.174311926605506e-07, "loss": 0.8267, "step": 693 }, { "epoch": 0.90956749672346, "grad_norm": 0.08984375, "learning_rate": 9.043250327653998e-07, "loss": 0.7513, "step": 694 }, { "epoch": 0.9108781127129751, "grad_norm": 0.0849609375, "learning_rate": 8.912188728702491e-07, "loss": 0.8241, "step": 695 }, { "epoch": 0.9121887287024901, "grad_norm": 0.08544921875, "learning_rate": 8.781127129750984e-07, "loss": 0.871, "step": 696 }, { "epoch": 0.9134993446920052, "grad_norm": 0.0849609375, "learning_rate": 8.650065530799478e-07, "loss": 0.8117, "step": 697 }, { "epoch": 0.9148099606815203, "grad_norm": 0.10400390625, "learning_rate": 8.519003931847969e-07, "loss": 0.9001, "step": 698 }, { "epoch": 0.9161205766710354, "grad_norm": 0.0751953125, "learning_rate": 8.387942332896462e-07, "loss": 0.7326, "step": 699 }, { "epoch": 0.9174311926605505, "grad_norm": 0.1005859375, "learning_rate": 8.256880733944956e-07, "loss": 0.8926, "step": 700 }, { "epoch": 0.9187418086500655, "grad_norm": 0.08837890625, "learning_rate": 8.125819134993447e-07, "loss": 0.8292, "step": 701 }, { "epoch": 0.9200524246395806, "grad_norm": 0.099609375, "learning_rate": 7.994757536041941e-07, "loss": 0.8898, "step": 702 }, { "epoch": 0.9213630406290957, "grad_norm": 0.10693359375, "learning_rate": 7.863695937090434e-07, "loss": 0.908, "step": 703 }, { "epoch": 0.9226736566186108, "grad_norm": 0.08837890625, "learning_rate": 7.732634338138927e-07, "loss": 0.878, "step": 704 }, { "epoch": 0.9239842726081258, "grad_norm": 0.09326171875, "learning_rate": 7.601572739187419e-07, "loss": 0.8028, "step": 705 }, { "epoch": 0.9252948885976409, "grad_norm": 0.0859375, "learning_rate": 7.470511140235912e-07, "loss": 0.8004, "step": 706 }, { "epoch": 0.926605504587156, "grad_norm": 0.1044921875, "learning_rate": 7.339449541284405e-07, "loss": 0.8892, "step": 707 }, { "epoch": 0.927916120576671, "grad_norm": 0.0966796875, "learning_rate": 7.208387942332897e-07, "loss": 0.8666, "step": 708 }, { "epoch": 0.9292267365661862, "grad_norm": 0.10205078125, "learning_rate": 7.07732634338139e-07, "loss": 0.8958, "step": 709 }, { "epoch": 0.9305373525557011, "grad_norm": 0.11279296875, "learning_rate": 6.946264744429883e-07, "loss": 0.9089, "step": 710 }, { "epoch": 0.9318479685452162, "grad_norm": 0.0966796875, "learning_rate": 6.815203145478376e-07, "loss": 0.8549, "step": 711 }, { "epoch": 0.9331585845347313, "grad_norm": 0.1015625, "learning_rate": 6.684141546526868e-07, "loss": 0.8955, "step": 712 }, { "epoch": 0.9344692005242464, "grad_norm": 0.0859375, "learning_rate": 6.553079947575361e-07, "loss": 0.8221, "step": 713 }, { "epoch": 0.9357798165137615, "grad_norm": 0.09130859375, "learning_rate": 6.422018348623854e-07, "loss": 0.8364, "step": 714 }, { "epoch": 0.9370904325032765, "grad_norm": 0.0830078125, "learning_rate": 6.290956749672346e-07, "loss": 0.8322, "step": 715 }, { "epoch": 0.9384010484927916, "grad_norm": 0.11572265625, "learning_rate": 6.159895150720839e-07, "loss": 0.8519, "step": 716 }, { "epoch": 0.9397116644823067, "grad_norm": 0.09375, "learning_rate": 6.028833551769332e-07, "loss": 0.8799, "step": 717 }, { "epoch": 0.9410222804718218, "grad_norm": 0.0849609375, "learning_rate": 5.897771952817825e-07, "loss": 0.8028, "step": 718 }, { "epoch": 0.9423328964613368, "grad_norm": 0.14453125, "learning_rate": 5.766710353866318e-07, "loss": 1.0117, "step": 719 }, { "epoch": 0.9436435124508519, "grad_norm": 0.0966796875, "learning_rate": 5.635648754914811e-07, "loss": 0.8829, "step": 720 }, { "epoch": 0.944954128440367, "grad_norm": 0.1005859375, "learning_rate": 5.504587155963304e-07, "loss": 0.8786, "step": 721 }, { "epoch": 0.9462647444298821, "grad_norm": 0.0849609375, "learning_rate": 5.373525557011796e-07, "loss": 0.7952, "step": 722 }, { "epoch": 0.9475753604193972, "grad_norm": 0.181640625, "learning_rate": 5.242463958060289e-07, "loss": 0.8757, "step": 723 }, { "epoch": 0.9488859764089121, "grad_norm": 0.0927734375, "learning_rate": 5.111402359108782e-07, "loss": 0.834, "step": 724 }, { "epoch": 0.9501965923984272, "grad_norm": 0.07861328125, "learning_rate": 4.980340760157274e-07, "loss": 0.7742, "step": 725 }, { "epoch": 0.9515072083879423, "grad_norm": 0.08984375, "learning_rate": 4.849279161205768e-07, "loss": 0.7577, "step": 726 }, { "epoch": 0.9528178243774574, "grad_norm": 0.091796875, "learning_rate": 4.71821756225426e-07, "loss": 0.8595, "step": 727 }, { "epoch": 0.9541284403669725, "grad_norm": 0.09521484375, "learning_rate": 4.587155963302753e-07, "loss": 0.8525, "step": 728 }, { "epoch": 0.9554390563564875, "grad_norm": 0.0869140625, "learning_rate": 4.4560943643512453e-07, "loss": 0.8467, "step": 729 }, { "epoch": 0.9567496723460026, "grad_norm": 0.09814453125, "learning_rate": 4.325032765399739e-07, "loss": 0.8545, "step": 730 }, { "epoch": 0.9580602883355177, "grad_norm": 0.09130859375, "learning_rate": 4.193971166448231e-07, "loss": 0.8175, "step": 731 }, { "epoch": 0.9593709043250328, "grad_norm": 0.0869140625, "learning_rate": 4.0629095674967233e-07, "loss": 0.8406, "step": 732 }, { "epoch": 0.9606815203145478, "grad_norm": 0.1044921875, "learning_rate": 3.931847968545217e-07, "loss": 0.8783, "step": 733 }, { "epoch": 0.9619921363040629, "grad_norm": 0.0947265625, "learning_rate": 3.8007863695937093e-07, "loss": 0.8492, "step": 734 }, { "epoch": 0.963302752293578, "grad_norm": 0.087890625, "learning_rate": 3.6697247706422023e-07, "loss": 0.7609, "step": 735 }, { "epoch": 0.9646133682830931, "grad_norm": 0.09375, "learning_rate": 3.538663171690695e-07, "loss": 0.8502, "step": 736 }, { "epoch": 0.9659239842726082, "grad_norm": 0.09130859375, "learning_rate": 3.407601572739188e-07, "loss": 0.8456, "step": 737 }, { "epoch": 0.9672346002621232, "grad_norm": 0.099609375, "learning_rate": 3.2765399737876803e-07, "loss": 0.873, "step": 738 }, { "epoch": 0.9685452162516383, "grad_norm": 0.0966796875, "learning_rate": 3.145478374836173e-07, "loss": 0.8559, "step": 739 }, { "epoch": 0.9698558322411533, "grad_norm": 0.09033203125, "learning_rate": 3.014416775884666e-07, "loss": 0.8279, "step": 740 }, { "epoch": 0.9711664482306684, "grad_norm": 0.09912109375, "learning_rate": 2.883355176933159e-07, "loss": 0.8649, "step": 741 }, { "epoch": 0.9724770642201835, "grad_norm": 0.0927734375, "learning_rate": 2.752293577981652e-07, "loss": 0.8299, "step": 742 }, { "epoch": 0.9737876802096985, "grad_norm": 0.0986328125, "learning_rate": 2.6212319790301444e-07, "loss": 0.8482, "step": 743 }, { "epoch": 0.9750982961992136, "grad_norm": 0.09326171875, "learning_rate": 2.490170380078637e-07, "loss": 0.8758, "step": 744 }, { "epoch": 0.9764089121887287, "grad_norm": 0.0986328125, "learning_rate": 2.35910878112713e-07, "loss": 0.8242, "step": 745 }, { "epoch": 0.9777195281782438, "grad_norm": 0.083984375, "learning_rate": 2.2280471821756226e-07, "loss": 0.8508, "step": 746 }, { "epoch": 0.9790301441677588, "grad_norm": 0.09521484375, "learning_rate": 2.0969855832241154e-07, "loss": 0.8623, "step": 747 }, { "epoch": 0.9803407601572739, "grad_norm": 0.1142578125, "learning_rate": 1.9659239842726084e-07, "loss": 0.8708, "step": 748 }, { "epoch": 0.981651376146789, "grad_norm": 0.1005859375, "learning_rate": 1.8348623853211012e-07, "loss": 0.847, "step": 749 }, { "epoch": 0.9829619921363041, "grad_norm": 0.08935546875, "learning_rate": 1.703800786369594e-07, "loss": 0.8746, "step": 750 }, { "epoch": 0.9842726081258192, "grad_norm": 0.09033203125, "learning_rate": 1.5727391874180864e-07, "loss": 0.8517, "step": 751 }, { "epoch": 0.9855832241153342, "grad_norm": 0.08935546875, "learning_rate": 1.4416775884665794e-07, "loss": 0.8401, "step": 752 }, { "epoch": 0.9868938401048493, "grad_norm": 0.083984375, "learning_rate": 1.3106159895150722e-07, "loss": 0.8376, "step": 753 }, { "epoch": 0.9882044560943644, "grad_norm": 0.08837890625, "learning_rate": 1.179554390563565e-07, "loss": 0.8049, "step": 754 }, { "epoch": 0.9895150720838795, "grad_norm": 0.11865234375, "learning_rate": 1.0484927916120577e-07, "loss": 0.9423, "step": 755 }, { "epoch": 0.9908256880733946, "grad_norm": 0.0859375, "learning_rate": 9.174311926605506e-08, "loss": 0.8444, "step": 756 }, { "epoch": 0.9921363040629095, "grad_norm": 0.0810546875, "learning_rate": 7.863695937090432e-08, "loss": 0.8502, "step": 757 }, { "epoch": 0.9934469200524246, "grad_norm": 0.10693359375, "learning_rate": 6.553079947575361e-08, "loss": 0.854, "step": 758 }, { "epoch": 0.9947575360419397, "grad_norm": 0.0947265625, "learning_rate": 5.2424639580602885e-08, "loss": 0.8693, "step": 759 }, { "epoch": 0.9960681520314548, "grad_norm": 0.1044921875, "learning_rate": 3.931847968545216e-08, "loss": 0.8721, "step": 760 }, { "epoch": 0.9973787680209698, "grad_norm": 0.087890625, "learning_rate": 2.6212319790301442e-08, "loss": 0.8678, "step": 761 }, { "epoch": 0.9986893840104849, "grad_norm": 0.0927734375, "learning_rate": 1.3106159895150721e-08, "loss": 0.8628, "step": 762 }, { "epoch": 1.0, "grad_norm": 0.0791015625, "learning_rate": 0.0, "loss": 0.7969, "step": 763 } ], "logging_steps": 1.0, "max_steps": 763, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.468456659819692e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }