{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999936, "eval_steps": 500, "global_step": 3906, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000256, "grad_norm": 592.9743317031632, "learning_rate": 1.0000000000000002e-06, "loss": 7.1027, "step": 1 }, { "epoch": 0.000512, "grad_norm": 599.1780379067313, "learning_rate": 2.0000000000000003e-06, "loss": 7.1034, "step": 2 }, { "epoch": 0.000768, "grad_norm": 542.8746698979221, "learning_rate": 3e-06, "loss": 6.5723, "step": 3 }, { "epoch": 0.001024, "grad_norm": 260.529659383525, "learning_rate": 4.000000000000001e-06, "loss": 5.2188, "step": 4 }, { "epoch": 0.00128, "grad_norm": 113.00533898587209, "learning_rate": 5e-06, "loss": 4.0969, "step": 5 }, { "epoch": 0.001536, "grad_norm": 49.74489646576834, "learning_rate": 6e-06, "loss": 3.4141, "step": 6 }, { "epoch": 0.001792, "grad_norm": 30.811416695900597, "learning_rate": 7e-06, "loss": 3.2294, "step": 7 }, { "epoch": 0.002048, "grad_norm": 14.379119643830325, "learning_rate": 8.000000000000001e-06, "loss": 2.7187, "step": 8 }, { "epoch": 0.002304, "grad_norm": 6.924380787635255, "learning_rate": 9e-06, "loss": 2.6165, "step": 9 }, { "epoch": 0.00256, "grad_norm": 8.303192647738951, "learning_rate": 1e-05, "loss": 2.5339, "step": 10 }, { "epoch": 0.002816, "grad_norm": 6.177706017502565, "learning_rate": 1.1000000000000001e-05, "loss": 2.4265, "step": 11 }, { "epoch": 0.003072, "grad_norm": 4.6524525901522855, "learning_rate": 1.2e-05, "loss": 2.3719, "step": 12 }, { "epoch": 0.003328, "grad_norm": 3.6718997401373885, "learning_rate": 1.3000000000000001e-05, "loss": 2.2678, "step": 13 }, { "epoch": 0.003584, "grad_norm": 3.3357787972855113, "learning_rate": 1.4e-05, "loss": 2.4163, "step": 14 }, { "epoch": 0.00384, "grad_norm": 2.7345388229485197, "learning_rate": 1.5000000000000002e-05, "loss": 2.2873, "step": 15 }, { "epoch": 0.004096, "grad_norm": 2.843523576780457, "learning_rate": 1.6000000000000003e-05, "loss": 2.3126, "step": 16 }, { "epoch": 0.004352, "grad_norm": 3.023816042800059, "learning_rate": 1.7e-05, "loss": 2.1398, "step": 17 }, { "epoch": 0.004608, "grad_norm": 3.3170405372890697, "learning_rate": 1.8e-05, "loss": 2.1752, "step": 18 }, { "epoch": 0.004864, "grad_norm": 2.8559468885914283, "learning_rate": 1.9e-05, "loss": 2.2289, "step": 19 }, { "epoch": 0.00512, "grad_norm": 3.2080620203206287, "learning_rate": 2e-05, "loss": 2.2723, "step": 20 }, { "epoch": 0.005376, "grad_norm": 2.755611282975557, "learning_rate": 2.1000000000000002e-05, "loss": 2.1972, "step": 21 }, { "epoch": 0.005632, "grad_norm": 3.0930080601750194, "learning_rate": 2.2000000000000003e-05, "loss": 2.1221, "step": 22 }, { "epoch": 0.005888, "grad_norm": 2.939046946640738, "learning_rate": 2.3e-05, "loss": 2.144, "step": 23 }, { "epoch": 0.006144, "grad_norm": 3.164263131287616, "learning_rate": 2.4e-05, "loss": 2.1498, "step": 24 }, { "epoch": 0.0064, "grad_norm": 2.526804917170411, "learning_rate": 2.5e-05, "loss": 2.2104, "step": 25 }, { "epoch": 0.006656, "grad_norm": 2.2373273892483345, "learning_rate": 2.6000000000000002e-05, "loss": 2.1118, "step": 26 }, { "epoch": 0.006912, "grad_norm": 2.4966928094222616, "learning_rate": 2.7000000000000002e-05, "loss": 2.0654, "step": 27 }, { "epoch": 0.007168, "grad_norm": 2.7912251583606733, "learning_rate": 2.8e-05, "loss": 2.0889, "step": 28 }, { "epoch": 0.007424, "grad_norm": 2.431054160394137, "learning_rate": 2.9e-05, "loss": 2.0099, "step": 29 }, { "epoch": 0.00768, "grad_norm": 2.902786261237885, "learning_rate": 3.0000000000000004e-05, "loss": 2.0453, "step": 30 }, { "epoch": 0.007936, "grad_norm": 2.6168819637809824, "learning_rate": 3.1e-05, "loss": 2.0467, "step": 31 }, { "epoch": 0.008192, "grad_norm": 2.429081373618289, "learning_rate": 3.2000000000000005e-05, "loss": 2.064, "step": 32 }, { "epoch": 0.008448, "grad_norm": 2.5494920231573066, "learning_rate": 3.3e-05, "loss": 2.0269, "step": 33 }, { "epoch": 0.008704, "grad_norm": 2.057227818655378, "learning_rate": 3.4e-05, "loss": 2.0346, "step": 34 }, { "epoch": 0.00896, "grad_norm": 2.3567588259996497, "learning_rate": 3.5000000000000004e-05, "loss": 1.9448, "step": 35 }, { "epoch": 0.009216, "grad_norm": 2.3677974863708133, "learning_rate": 3.6e-05, "loss": 1.9426, "step": 36 }, { "epoch": 0.009472, "grad_norm": 2.170912625075084, "learning_rate": 3.7000000000000005e-05, "loss": 2.0256, "step": 37 }, { "epoch": 0.009728, "grad_norm": 2.471503225761493, "learning_rate": 3.8e-05, "loss": 1.9751, "step": 38 }, { "epoch": 0.009984, "grad_norm": 2.0645422510246627, "learning_rate": 3.9e-05, "loss": 1.9972, "step": 39 }, { "epoch": 0.01024, "grad_norm": 2.5945416121942295, "learning_rate": 4e-05, "loss": 1.9717, "step": 40 }, { "epoch": 0.010496, "grad_norm": 2.256990687799562, "learning_rate": 3.999999339647203e-05, "loss": 2.0233, "step": 41 }, { "epoch": 0.010752, "grad_norm": 2.856060581961514, "learning_rate": 3.9999973585892455e-05, "loss": 2.051, "step": 42 }, { "epoch": 0.011008, "grad_norm": 2.18339692829619, "learning_rate": 3.999994056827438e-05, "loss": 1.9142, "step": 43 }, { "epoch": 0.011264, "grad_norm": 2.2749191731255403, "learning_rate": 3.999989434363959e-05, "loss": 2.0122, "step": 44 }, { "epoch": 0.01152, "grad_norm": 2.3442105171322742, "learning_rate": 3.999983491201863e-05, "loss": 1.9614, "step": 45 }, { "epoch": 0.011776, "grad_norm": 3.060340586762112, "learning_rate": 3.999976227345072e-05, "loss": 1.9205, "step": 46 }, { "epoch": 0.012032, "grad_norm": 2.161941312538491, "learning_rate": 3.999967642798385e-05, "loss": 1.9214, "step": 47 }, { "epoch": 0.012288, "grad_norm": 2.170351449205052, "learning_rate": 3.99995773756747e-05, "loss": 1.9153, "step": 48 }, { "epoch": 0.012544, "grad_norm": 1.8040235993227136, "learning_rate": 3.9999465116588664e-05, "loss": 1.8511, "step": 49 }, { "epoch": 0.0128, "grad_norm": 2.1919388547398637, "learning_rate": 3.99993396507999e-05, "loss": 1.8317, "step": 50 }, { "epoch": 0.013056, "grad_norm": 1.9694273776898437, "learning_rate": 3.999920097839124e-05, "loss": 1.8839, "step": 51 }, { "epoch": 0.013312, "grad_norm": 1.8146358869514165, "learning_rate": 3.999904909945427e-05, "loss": 1.8511, "step": 52 }, { "epoch": 0.013568, "grad_norm": 1.6933734738418547, "learning_rate": 3.999888401408927e-05, "loss": 1.8425, "step": 53 }, { "epoch": 0.013824, "grad_norm": 1.8648277774057493, "learning_rate": 3.999870572240527e-05, "loss": 1.841, "step": 54 }, { "epoch": 0.01408, "grad_norm": 1.6319589161530719, "learning_rate": 3.999851422451999e-05, "loss": 1.8339, "step": 55 }, { "epoch": 0.014336, "grad_norm": 2.0773191877566846, "learning_rate": 3.9998309520559904e-05, "loss": 1.9235, "step": 56 }, { "epoch": 0.014592, "grad_norm": 1.9786939222445252, "learning_rate": 3.999809161066017e-05, "loss": 1.9388, "step": 57 }, { "epoch": 0.014848, "grad_norm": 2.112419570215387, "learning_rate": 3.999786049496469e-05, "loss": 1.815, "step": 58 }, { "epoch": 0.015104, "grad_norm": 1.681719024460609, "learning_rate": 3.999761617362609e-05, "loss": 1.8619, "step": 59 }, { "epoch": 0.01536, "grad_norm": 1.6898626214306998, "learning_rate": 3.99973586468057e-05, "loss": 1.8577, "step": 60 }, { "epoch": 0.015616, "grad_norm": 1.5779901470944477, "learning_rate": 3.9997087914673585e-05, "loss": 1.8654, "step": 61 }, { "epoch": 0.015872, "grad_norm": 1.838507428017589, "learning_rate": 3.999680397740852e-05, "loss": 1.8033, "step": 62 }, { "epoch": 0.016128, "grad_norm": 1.6109226658384828, "learning_rate": 3.9996506835198005e-05, "loss": 1.7934, "step": 63 }, { "epoch": 0.016384, "grad_norm": 1.5550114322341122, "learning_rate": 3.999619648823826e-05, "loss": 1.8388, "step": 64 }, { "epoch": 0.01664, "grad_norm": 1.5712910294227422, "learning_rate": 3.999587293673422e-05, "loss": 1.8471, "step": 65 }, { "epoch": 0.016896, "grad_norm": 1.518387212706681, "learning_rate": 3.9995536180899544e-05, "loss": 1.7755, "step": 66 }, { "epoch": 0.017152, "grad_norm": 1.6764288290558913, "learning_rate": 3.9995186220956616e-05, "loss": 1.8353, "step": 67 }, { "epoch": 0.017408, "grad_norm": 1.7902844247368257, "learning_rate": 3.999482305713653e-05, "loss": 1.8891, "step": 68 }, { "epoch": 0.017664, "grad_norm": 1.457627249670965, "learning_rate": 3.9994446689679085e-05, "loss": 1.8241, "step": 69 }, { "epoch": 0.01792, "grad_norm": 1.7805484350169922, "learning_rate": 3.999405711883285e-05, "loss": 1.7861, "step": 70 }, { "epoch": 0.018176, "grad_norm": 1.7669361023561936, "learning_rate": 3.999365434485504e-05, "loss": 1.7952, "step": 71 }, { "epoch": 0.018432, "grad_norm": 1.3036511554189805, "learning_rate": 3.999323836801166e-05, "loss": 1.8357, "step": 72 }, { "epoch": 0.018688, "grad_norm": 1.4614422147700734, "learning_rate": 3.999280918857739e-05, "loss": 1.7782, "step": 73 }, { "epoch": 0.018944, "grad_norm": 1.507240616354275, "learning_rate": 3.999236680683564e-05, "loss": 1.817, "step": 74 }, { "epoch": 0.0192, "grad_norm": 1.503660314941665, "learning_rate": 3.9991911223078537e-05, "loss": 1.7895, "step": 75 }, { "epoch": 0.019456, "grad_norm": 1.391065206748873, "learning_rate": 3.999144243760693e-05, "loss": 1.7798, "step": 76 }, { "epoch": 0.019712, "grad_norm": 1.4092222546201962, "learning_rate": 3.9990960450730376e-05, "loss": 1.7809, "step": 77 }, { "epoch": 0.019968, "grad_norm": 1.3330729002785804, "learning_rate": 3.999046526276716e-05, "loss": 1.7588, "step": 78 }, { "epoch": 0.020224, "grad_norm": 1.2940922859791728, "learning_rate": 3.998995687404429e-05, "loss": 1.7642, "step": 79 }, { "epoch": 0.02048, "grad_norm": 1.152265835668214, "learning_rate": 3.998943528489746e-05, "loss": 1.741, "step": 80 }, { "epoch": 0.020736, "grad_norm": 1.6601432070064535, "learning_rate": 3.998890049567113e-05, "loss": 1.7976, "step": 81 }, { "epoch": 0.020992, "grad_norm": 1.019135351254906, "learning_rate": 3.9988352506718426e-05, "loss": 1.7517, "step": 82 }, { "epoch": 0.021248, "grad_norm": 1.4038086263714684, "learning_rate": 3.998779131840123e-05, "loss": 1.721, "step": 83 }, { "epoch": 0.021504, "grad_norm": 1.1440209713679739, "learning_rate": 3.998721693109011e-05, "loss": 1.735, "step": 84 }, { "epoch": 0.02176, "grad_norm": 1.2241475040897805, "learning_rate": 3.998662934516438e-05, "loss": 1.7742, "step": 85 }, { "epoch": 0.022016, "grad_norm": 1.4360186297999553, "learning_rate": 3.998602856101205e-05, "loss": 1.7198, "step": 86 }, { "epoch": 0.022272, "grad_norm": 1.3780396600933922, "learning_rate": 3.9985414579029836e-05, "loss": 1.7398, "step": 87 }, { "epoch": 0.022528, "grad_norm": 1.2041825111331663, "learning_rate": 3.99847873996232e-05, "loss": 1.735, "step": 88 }, { "epoch": 0.022784, "grad_norm": 1.4226095111819237, "learning_rate": 3.9984147023206295e-05, "loss": 1.7487, "step": 89 }, { "epoch": 0.02304, "grad_norm": 1.2299632105832863, "learning_rate": 3.9983493450202e-05, "loss": 1.7461, "step": 90 }, { "epoch": 0.023296, "grad_norm": 1.0311434872404308, "learning_rate": 3.998282668104188e-05, "loss": 1.7403, "step": 91 }, { "epoch": 0.023552, "grad_norm": 1.21132035127431, "learning_rate": 3.998214671616627e-05, "loss": 1.6954, "step": 92 }, { "epoch": 0.023808, "grad_norm": 1.2658549348037387, "learning_rate": 3.9981453556024175e-05, "loss": 1.8071, "step": 93 }, { "epoch": 0.024064, "grad_norm": 1.039194378894388, "learning_rate": 3.998074720107332e-05, "loss": 1.711, "step": 94 }, { "epoch": 0.02432, "grad_norm": 1.3722879638106795, "learning_rate": 3.998002765178015e-05, "loss": 1.7698, "step": 95 }, { "epoch": 0.024576, "grad_norm": 1.1862235393233689, "learning_rate": 3.997929490861983e-05, "loss": 1.7301, "step": 96 }, { "epoch": 0.024832, "grad_norm": 1.1550824312771037, "learning_rate": 3.997854897207621e-05, "loss": 1.7179, "step": 97 }, { "epoch": 0.025088, "grad_norm": 1.3087085960813287, "learning_rate": 3.997778984264189e-05, "loss": 1.7119, "step": 98 }, { "epoch": 0.025344, "grad_norm": 1.0807007825087622, "learning_rate": 3.997701752081816e-05, "loss": 1.7394, "step": 99 }, { "epoch": 0.0256, "grad_norm": 1.3084776122166415, "learning_rate": 3.9976232007115015e-05, "loss": 1.6803, "step": 100 }, { "epoch": 0.025856, "grad_norm": 1.0923354482196317, "learning_rate": 3.9975433302051184e-05, "loss": 1.7406, "step": 101 }, { "epoch": 0.026112, "grad_norm": 1.0883912475361128, "learning_rate": 3.997462140615408e-05, "loss": 1.7512, "step": 102 }, { "epoch": 0.026368, "grad_norm": 1.438650846748812, "learning_rate": 3.9973796319959854e-05, "loss": 1.7349, "step": 103 }, { "epoch": 0.026624, "grad_norm": 1.1863717155104916, "learning_rate": 3.997295804401335e-05, "loss": 1.8028, "step": 104 }, { "epoch": 0.02688, "grad_norm": 1.1285889950460126, "learning_rate": 3.997210657886813e-05, "loss": 1.6684, "step": 105 }, { "epoch": 0.027136, "grad_norm": 1.042248894772584, "learning_rate": 3.9971241925086444e-05, "loss": 1.6906, "step": 106 }, { "epoch": 0.027392, "grad_norm": 1.1054056878331868, "learning_rate": 3.9970364083239284e-05, "loss": 1.6753, "step": 107 }, { "epoch": 0.027648, "grad_norm": 1.106329538499644, "learning_rate": 3.996947305390634e-05, "loss": 1.6673, "step": 108 }, { "epoch": 0.027904, "grad_norm": 1.1496847174342597, "learning_rate": 3.9968568837675986e-05, "loss": 1.6906, "step": 109 }, { "epoch": 0.02816, "grad_norm": 1.0523190733701966, "learning_rate": 3.996765143514534e-05, "loss": 1.7352, "step": 110 }, { "epoch": 0.028416, "grad_norm": 1.0398196895661893, "learning_rate": 3.996672084692021e-05, "loss": 1.6813, "step": 111 }, { "epoch": 0.028672, "grad_norm": 0.9599547428898851, "learning_rate": 3.9965777073615105e-05, "loss": 1.6841, "step": 112 }, { "epoch": 0.028928, "grad_norm": 1.0842378041962715, "learning_rate": 3.996482011585325e-05, "loss": 1.7104, "step": 113 }, { "epoch": 0.029184, "grad_norm": 1.2179306231138718, "learning_rate": 3.996384997426658e-05, "loss": 1.6565, "step": 114 }, { "epoch": 0.02944, "grad_norm": 0.8890508847263513, "learning_rate": 3.996286664949573e-05, "loss": 1.6614, "step": 115 }, { "epoch": 0.029696, "grad_norm": 0.8812111686417768, "learning_rate": 3.996187014219004e-05, "loss": 1.716, "step": 116 }, { "epoch": 0.029952, "grad_norm": 0.9325038134241184, "learning_rate": 3.9960860453007555e-05, "loss": 1.6901, "step": 117 }, { "epoch": 0.030208, "grad_norm": 0.9671295040562344, "learning_rate": 3.995983758261502e-05, "loss": 1.7093, "step": 118 }, { "epoch": 0.030464, "grad_norm": 0.9505147489163117, "learning_rate": 3.9958801531687904e-05, "loss": 1.6738, "step": 119 }, { "epoch": 0.03072, "grad_norm": 0.9724182748888957, "learning_rate": 3.995775230091035e-05, "loss": 1.6735, "step": 120 }, { "epoch": 0.030976, "grad_norm": 1.1693639746457531, "learning_rate": 3.995668989097525e-05, "loss": 1.7062, "step": 121 }, { "epoch": 0.031232, "grad_norm": 0.9683984161249218, "learning_rate": 3.995561430258413e-05, "loss": 1.6642, "step": 122 }, { "epoch": 0.031488, "grad_norm": 0.9383252696760273, "learning_rate": 3.995452553644728e-05, "loss": 1.6459, "step": 123 }, { "epoch": 0.031744, "grad_norm": 0.9501958624559937, "learning_rate": 3.995342359328367e-05, "loss": 1.6897, "step": 124 }, { "epoch": 0.032, "grad_norm": 0.8995019479668482, "learning_rate": 3.995230847382096e-05, "loss": 1.7132, "step": 125 }, { "epoch": 0.032256, "grad_norm": 0.8132439493043349, "learning_rate": 3.995118017879554e-05, "loss": 1.581, "step": 126 }, { "epoch": 0.032512, "grad_norm": 1.2119019710159098, "learning_rate": 3.995003870895247e-05, "loss": 1.6804, "step": 127 }, { "epoch": 0.032768, "grad_norm": 0.9120155940827004, "learning_rate": 3.994888406504552e-05, "loss": 1.7045, "step": 128 }, { "epoch": 0.033024, "grad_norm": 1.0861585776784026, "learning_rate": 3.994771624783718e-05, "loss": 1.6389, "step": 129 }, { "epoch": 0.03328, "grad_norm": 1.0207882622665796, "learning_rate": 3.994653525809859e-05, "loss": 1.7005, "step": 130 }, { "epoch": 0.033536, "grad_norm": 0.9773135378532466, "learning_rate": 3.9945341096609655e-05, "loss": 1.6992, "step": 131 }, { "epoch": 0.033792, "grad_norm": 1.0278290435463915, "learning_rate": 3.9944133764158925e-05, "loss": 1.6899, "step": 132 }, { "epoch": 0.034048, "grad_norm": 0.9079032183424462, "learning_rate": 3.9942913261543664e-05, "loss": 1.6429, "step": 133 }, { "epoch": 0.034304, "grad_norm": 0.8323298818892745, "learning_rate": 3.9941679589569836e-05, "loss": 1.6891, "step": 134 }, { "epoch": 0.03456, "grad_norm": 0.8409270975746009, "learning_rate": 3.99404327490521e-05, "loss": 1.6226, "step": 135 }, { "epoch": 0.034816, "grad_norm": 0.9149078134374603, "learning_rate": 3.9939172740813825e-05, "loss": 1.6255, "step": 136 }, { "epoch": 0.035072, "grad_norm": 1.0196329454781172, "learning_rate": 3.993789956568704e-05, "loss": 1.7171, "step": 137 }, { "epoch": 0.035328, "grad_norm": 0.7946758350260431, "learning_rate": 3.9936613224512495e-05, "loss": 1.5579, "step": 138 }, { "epoch": 0.035584, "grad_norm": 0.8874319573940996, "learning_rate": 3.993531371813963e-05, "loss": 1.6577, "step": 139 }, { "epoch": 0.03584, "grad_norm": 1.2978489551059718, "learning_rate": 3.9934001047426585e-05, "loss": 1.6809, "step": 140 }, { "epoch": 0.036096, "grad_norm": 0.8042620114066384, "learning_rate": 3.9932675213240175e-05, "loss": 1.627, "step": 141 }, { "epoch": 0.036352, "grad_norm": 0.9706716408253456, "learning_rate": 3.993133621645593e-05, "loss": 1.6764, "step": 142 }, { "epoch": 0.036608, "grad_norm": 1.16280871089314, "learning_rate": 3.9929984057958055e-05, "loss": 1.6521, "step": 143 }, { "epoch": 0.036864, "grad_norm": 0.8791842830765326, "learning_rate": 3.992861873863944e-05, "loss": 1.6497, "step": 144 }, { "epoch": 0.03712, "grad_norm": 1.2155169707095028, "learning_rate": 3.9927240259401694e-05, "loss": 1.5884, "step": 145 }, { "epoch": 0.037376, "grad_norm": 0.7562842582963687, "learning_rate": 3.992584862115509e-05, "loss": 1.5987, "step": 146 }, { "epoch": 0.037632, "grad_norm": 0.7795309651797402, "learning_rate": 3.992444382481862e-05, "loss": 1.5948, "step": 147 }, { "epoch": 0.037888, "grad_norm": 0.7771885458088762, "learning_rate": 3.992302587131991e-05, "loss": 1.6598, "step": 148 }, { "epoch": 0.038144, "grad_norm": 0.8098660490108356, "learning_rate": 3.9921594761595334e-05, "loss": 1.5991, "step": 149 }, { "epoch": 0.0384, "grad_norm": 0.7498591706711951, "learning_rate": 3.992015049658992e-05, "loss": 1.6543, "step": 150 }, { "epoch": 0.038656, "grad_norm": 0.7736378513746694, "learning_rate": 3.991869307725741e-05, "loss": 1.6648, "step": 151 }, { "epoch": 0.038912, "grad_norm": 0.742377517409635, "learning_rate": 3.9917222504560184e-05, "loss": 1.6041, "step": 152 }, { "epoch": 0.039168, "grad_norm": 0.7783701921138761, "learning_rate": 3.991573877946937e-05, "loss": 1.6443, "step": 153 }, { "epoch": 0.039424, "grad_norm": 0.7659432386168707, "learning_rate": 3.9914241902964725e-05, "loss": 1.7052, "step": 154 }, { "epoch": 0.03968, "grad_norm": 0.7518527516141134, "learning_rate": 3.991273187603473e-05, "loss": 1.6251, "step": 155 }, { "epoch": 0.039936, "grad_norm": 0.7822919113116653, "learning_rate": 3.9911208699676537e-05, "loss": 1.6287, "step": 156 }, { "epoch": 0.040192, "grad_norm": 0.8066856650406288, "learning_rate": 3.990967237489597e-05, "loss": 1.6348, "step": 157 }, { "epoch": 0.040448, "grad_norm": 0.9105078454075806, "learning_rate": 3.990812290270755e-05, "loss": 1.6133, "step": 158 }, { "epoch": 0.040704, "grad_norm": 0.8941931068770252, "learning_rate": 3.990656028413448e-05, "loss": 1.6297, "step": 159 }, { "epoch": 0.04096, "grad_norm": 0.8696780893605999, "learning_rate": 3.9904984520208626e-05, "loss": 1.6098, "step": 160 }, { "epoch": 0.041216, "grad_norm": 0.7305904830360131, "learning_rate": 3.9903395611970565e-05, "loss": 1.6131, "step": 161 }, { "epoch": 0.041472, "grad_norm": 0.7714314658140741, "learning_rate": 3.990179356046952e-05, "loss": 1.5826, "step": 162 }, { "epoch": 0.041728, "grad_norm": 0.7610912423863428, "learning_rate": 3.990017836676344e-05, "loss": 1.6436, "step": 163 }, { "epoch": 0.041984, "grad_norm": 2.4705363679369245, "learning_rate": 3.9898550031918886e-05, "loss": 1.6775, "step": 164 }, { "epoch": 0.04224, "grad_norm": 0.9467340535163156, "learning_rate": 3.9896908557011146e-05, "loss": 1.6481, "step": 165 }, { "epoch": 0.042496, "grad_norm": 0.9246624477542396, "learning_rate": 3.989525394312418e-05, "loss": 1.6508, "step": 166 }, { "epoch": 0.042752, "grad_norm": 0.9503701562561346, "learning_rate": 3.989358619135062e-05, "loss": 1.6493, "step": 167 }, { "epoch": 0.043008, "grad_norm": 0.8252572441701449, "learning_rate": 3.989190530279175e-05, "loss": 1.6417, "step": 168 }, { "epoch": 0.043264, "grad_norm": 0.848455041237527, "learning_rate": 3.9890211278557576e-05, "loss": 1.5947, "step": 169 }, { "epoch": 0.04352, "grad_norm": 0.9366609881558517, "learning_rate": 3.988850411976674e-05, "loss": 1.6538, "step": 170 }, { "epoch": 0.043776, "grad_norm": 0.8233618081995571, "learning_rate": 3.988678382754656e-05, "loss": 1.5596, "step": 171 }, { "epoch": 0.044032, "grad_norm": 0.795394583475737, "learning_rate": 3.988505040303304e-05, "loss": 1.6272, "step": 172 }, { "epoch": 0.044288, "grad_norm": 0.9468737748519184, "learning_rate": 3.9883303847370866e-05, "loss": 1.6455, "step": 173 }, { "epoch": 0.044544, "grad_norm": 0.8779097843647329, "learning_rate": 3.988154416171336e-05, "loss": 1.6205, "step": 174 }, { "epoch": 0.0448, "grad_norm": 0.7626697331087257, "learning_rate": 3.987977134722255e-05, "loss": 1.6421, "step": 175 }, { "epoch": 0.045056, "grad_norm": 0.7911748234927005, "learning_rate": 3.987798540506911e-05, "loss": 1.6395, "step": 176 }, { "epoch": 0.045312, "grad_norm": 0.8298848464014686, "learning_rate": 3.9876186336432404e-05, "loss": 1.5974, "step": 177 }, { "epoch": 0.045568, "grad_norm": 0.8004652482579814, "learning_rate": 3.987437414250044e-05, "loss": 1.6324, "step": 178 }, { "epoch": 0.045824, "grad_norm": 1.048446607740693, "learning_rate": 3.9872548824469913e-05, "loss": 1.6155, "step": 179 }, { "epoch": 0.04608, "grad_norm": 0.7739830340237577, "learning_rate": 3.9870710383546166e-05, "loss": 1.6201, "step": 180 }, { "epoch": 0.046336, "grad_norm": 0.7468121321303345, "learning_rate": 3.986885882094324e-05, "loss": 1.6282, "step": 181 }, { "epoch": 0.046592, "grad_norm": 0.7784300337192723, "learning_rate": 3.98669941378838e-05, "loss": 1.6151, "step": 182 }, { "epoch": 0.046848, "grad_norm": 0.7887247249132889, "learning_rate": 3.986511633559919e-05, "loss": 1.6322, "step": 183 }, { "epoch": 0.047104, "grad_norm": 0.7763175775630177, "learning_rate": 3.986322541532944e-05, "loss": 1.5858, "step": 184 }, { "epoch": 0.04736, "grad_norm": 0.7029643627744828, "learning_rate": 3.986132137832322e-05, "loss": 1.6501, "step": 185 }, { "epoch": 0.047616, "grad_norm": 0.7913393851592154, "learning_rate": 3.985940422583786e-05, "loss": 1.619, "step": 186 }, { "epoch": 0.047872, "grad_norm": 0.6954422281994223, "learning_rate": 3.985747395913936e-05, "loss": 1.6122, "step": 187 }, { "epoch": 0.048128, "grad_norm": 0.7370595672662306, "learning_rate": 3.9855530579502375e-05, "loss": 1.5991, "step": 188 }, { "epoch": 0.048384, "grad_norm": 0.6963060857346365, "learning_rate": 3.9853574088210224e-05, "loss": 1.6093, "step": 189 }, { "epoch": 0.04864, "grad_norm": 0.6999246871883297, "learning_rate": 3.985160448655488e-05, "loss": 1.644, "step": 190 }, { "epoch": 0.048896, "grad_norm": 0.8871350739368395, "learning_rate": 3.984962177583698e-05, "loss": 1.6516, "step": 191 }, { "epoch": 0.049152, "grad_norm": 0.665284498626241, "learning_rate": 3.984762595736581e-05, "loss": 1.5579, "step": 192 }, { "epoch": 0.049408, "grad_norm": 0.8004360626359813, "learning_rate": 3.98456170324593e-05, "loss": 1.6606, "step": 193 }, { "epoch": 0.049664, "grad_norm": 0.7551735939415242, "learning_rate": 3.9843595002444075e-05, "loss": 1.602, "step": 194 }, { "epoch": 0.04992, "grad_norm": 0.7206439722894225, "learning_rate": 3.984155986865537e-05, "loss": 1.5955, "step": 195 }, { "epoch": 0.050176, "grad_norm": 0.7753493657493119, "learning_rate": 3.9839511632437096e-05, "loss": 1.5357, "step": 196 }, { "epoch": 0.050432, "grad_norm": 0.6948348502128749, "learning_rate": 3.9837450295141816e-05, "loss": 1.5909, "step": 197 }, { "epoch": 0.050688, "grad_norm": 0.7107966792035627, "learning_rate": 3.983537585813073e-05, "loss": 1.6708, "step": 198 }, { "epoch": 0.050944, "grad_norm": 0.7395099525564277, "learning_rate": 3.98332883227737e-05, "loss": 1.5605, "step": 199 }, { "epoch": 0.0512, "grad_norm": 0.6647533169618578, "learning_rate": 3.9831187690449254e-05, "loss": 1.6156, "step": 200 }, { "epoch": 0.051456, "grad_norm": 0.7059101501205137, "learning_rate": 3.9829073962544525e-05, "loss": 1.624, "step": 201 }, { "epoch": 0.051712, "grad_norm": 0.891898916836492, "learning_rate": 3.9826947140455334e-05, "loss": 1.6255, "step": 202 }, { "epoch": 0.051968, "grad_norm": 0.6921158893254238, "learning_rate": 3.982480722558613e-05, "loss": 1.5554, "step": 203 }, { "epoch": 0.052224, "grad_norm": 0.7563556076434174, "learning_rate": 3.982265421935001e-05, "loss": 1.6158, "step": 204 }, { "epoch": 0.05248, "grad_norm": 0.7453240485860165, "learning_rate": 3.982048812316872e-05, "loss": 1.6056, "step": 205 }, { "epoch": 0.052736, "grad_norm": 0.7005042088111104, "learning_rate": 3.981830893847265e-05, "loss": 1.6017, "step": 206 }, { "epoch": 0.052992, "grad_norm": 1.3006069947635521, "learning_rate": 3.981611666670082e-05, "loss": 1.6227, "step": 207 }, { "epoch": 0.053248, "grad_norm": 0.9463170451403351, "learning_rate": 3.9813911309300916e-05, "loss": 1.6319, "step": 208 }, { "epoch": 0.053504, "grad_norm": 0.6555692155134595, "learning_rate": 3.981169286772925e-05, "loss": 1.627, "step": 209 }, { "epoch": 0.05376, "grad_norm": 0.6411962367215546, "learning_rate": 3.980946134345077e-05, "loss": 1.5803, "step": 210 }, { "epoch": 0.054016, "grad_norm": 0.7028659368248574, "learning_rate": 3.980721673793907e-05, "loss": 1.5945, "step": 211 }, { "epoch": 0.054272, "grad_norm": 0.7597298381504562, "learning_rate": 3.9804959052676386e-05, "loss": 1.6166, "step": 212 }, { "epoch": 0.054528, "grad_norm": 0.6597937276241083, "learning_rate": 3.980268828915359e-05, "loss": 1.6014, "step": 213 }, { "epoch": 0.054784, "grad_norm": 0.6527427801713436, "learning_rate": 3.980040444887017e-05, "loss": 1.6061, "step": 214 }, { "epoch": 0.05504, "grad_norm": 0.7050012981026503, "learning_rate": 3.979810753333428e-05, "loss": 1.5752, "step": 215 }, { "epoch": 0.055296, "grad_norm": 0.6641202789394471, "learning_rate": 3.979579754406271e-05, "loss": 1.5667, "step": 216 }, { "epoch": 0.055552, "grad_norm": 0.6478084200729878, "learning_rate": 3.979347448258083e-05, "loss": 1.577, "step": 217 }, { "epoch": 0.055808, "grad_norm": 0.8401856996554157, "learning_rate": 3.979113835042271e-05, "loss": 1.6513, "step": 218 }, { "epoch": 0.056064, "grad_norm": 0.6847093898858372, "learning_rate": 3.9788789149131003e-05, "loss": 1.5861, "step": 219 }, { "epoch": 0.05632, "grad_norm": 0.65843722138248, "learning_rate": 3.978642688025703e-05, "loss": 1.6002, "step": 220 }, { "epoch": 0.056576, "grad_norm": 0.6968805088274576, "learning_rate": 3.978405154536071e-05, "loss": 1.5682, "step": 221 }, { "epoch": 0.056832, "grad_norm": 0.7287039864040735, "learning_rate": 3.9781663146010595e-05, "loss": 1.5688, "step": 222 }, { "epoch": 0.057088, "grad_norm": 0.7064532206170948, "learning_rate": 3.977926168378389e-05, "loss": 1.5846, "step": 223 }, { "epoch": 0.057344, "grad_norm": 0.712986811749775, "learning_rate": 3.977684716026639e-05, "loss": 1.6461, "step": 224 }, { "epoch": 0.0576, "grad_norm": 0.646962393577212, "learning_rate": 3.977441957705254e-05, "loss": 1.5788, "step": 225 }, { "epoch": 0.057856, "grad_norm": 0.667563683766247, "learning_rate": 3.97719789357454e-05, "loss": 1.6239, "step": 226 }, { "epoch": 0.058112, "grad_norm": 0.6765771586778769, "learning_rate": 3.9769525237956655e-05, "loss": 1.5473, "step": 227 }, { "epoch": 0.058368, "grad_norm": 0.7418106909192511, "learning_rate": 3.976705848530662e-05, "loss": 1.5923, "step": 228 }, { "epoch": 0.058624, "grad_norm": 0.673324853811566, "learning_rate": 3.9764578679424204e-05, "loss": 1.603, "step": 229 }, { "epoch": 0.05888, "grad_norm": 0.6836905468874301, "learning_rate": 3.976208582194696e-05, "loss": 1.5565, "step": 230 }, { "epoch": 0.059136, "grad_norm": 0.682245161126949, "learning_rate": 3.975957991452107e-05, "loss": 1.5812, "step": 231 }, { "epoch": 0.059392, "grad_norm": 0.6865147007579201, "learning_rate": 3.9757060958801286e-05, "loss": 1.581, "step": 232 }, { "epoch": 0.059648, "grad_norm": 0.6619829854909499, "learning_rate": 3.9754528956451034e-05, "loss": 1.5698, "step": 233 }, { "epoch": 0.059904, "grad_norm": 0.6436552022457742, "learning_rate": 3.975198390914232e-05, "loss": 1.6069, "step": 234 }, { "epoch": 0.06016, "grad_norm": 0.8291322366946088, "learning_rate": 3.974942581855577e-05, "loss": 1.586, "step": 235 }, { "epoch": 0.060416, "grad_norm": 1.5559952573778277, "learning_rate": 3.974685468638063e-05, "loss": 1.5755, "step": 236 }, { "epoch": 0.060672, "grad_norm": 0.8304033995709409, "learning_rate": 3.9744270514314756e-05, "loss": 1.5765, "step": 237 }, { "epoch": 0.060928, "grad_norm": 0.7498847339030442, "learning_rate": 3.974167330406461e-05, "loss": 1.5478, "step": 238 }, { "epoch": 0.061184, "grad_norm": 0.720015328955355, "learning_rate": 3.973906305734526e-05, "loss": 1.575, "step": 239 }, { "epoch": 0.06144, "grad_norm": 1.0936969289583933, "learning_rate": 3.973643977588041e-05, "loss": 1.5564, "step": 240 }, { "epoch": 0.061696, "grad_norm": 0.7098563782536219, "learning_rate": 3.973380346140233e-05, "loss": 1.5936, "step": 241 }, { "epoch": 0.061952, "grad_norm": 0.7078867886190099, "learning_rate": 3.973115411565193e-05, "loss": 1.5702, "step": 242 }, { "epoch": 0.062208, "grad_norm": 0.7017526979035191, "learning_rate": 3.9728491740378705e-05, "loss": 1.5163, "step": 243 }, { "epoch": 0.062464, "grad_norm": 0.6909325418368992, "learning_rate": 3.972581633734077e-05, "loss": 1.5727, "step": 244 }, { "epoch": 0.06272, "grad_norm": 0.6765399870693833, "learning_rate": 3.972312790830483e-05, "loss": 1.5451, "step": 245 }, { "epoch": 0.062976, "grad_norm": 0.6690891973736529, "learning_rate": 3.9720426455046195e-05, "loss": 1.5725, "step": 246 }, { "epoch": 0.063232, "grad_norm": 0.6761236687452795, "learning_rate": 3.971771197934878e-05, "loss": 1.5935, "step": 247 }, { "epoch": 0.063488, "grad_norm": 0.6451034482316899, "learning_rate": 3.97149844830051e-05, "loss": 1.5908, "step": 248 }, { "epoch": 0.063744, "grad_norm": 0.6514685314428951, "learning_rate": 3.971224396781626e-05, "loss": 1.5662, "step": 249 }, { "epoch": 0.064, "grad_norm": 0.6542085356401529, "learning_rate": 3.970949043559197e-05, "loss": 1.5763, "step": 250 }, { "epoch": 0.064256, "grad_norm": 0.7120449728175606, "learning_rate": 3.970672388815052e-05, "loss": 1.5633, "step": 251 }, { "epoch": 0.064512, "grad_norm": 0.6360640695812156, "learning_rate": 3.970394432731883e-05, "loss": 1.5489, "step": 252 }, { "epoch": 0.064768, "grad_norm": 0.6225201283579339, "learning_rate": 3.9701151754932376e-05, "loss": 1.5615, "step": 253 }, { "epoch": 0.065024, "grad_norm": 0.6511260383517505, "learning_rate": 3.969834617283525e-05, "loss": 1.5535, "step": 254 }, { "epoch": 0.06528, "grad_norm": 0.5810087078492836, "learning_rate": 3.9695527582880105e-05, "loss": 1.6105, "step": 255 }, { "epoch": 0.065536, "grad_norm": 0.6998932628986286, "learning_rate": 3.969269598692823e-05, "loss": 1.6258, "step": 256 }, { "epoch": 0.065792, "grad_norm": 0.627737701805962, "learning_rate": 3.968985138684947e-05, "loss": 1.595, "step": 257 }, { "epoch": 0.066048, "grad_norm": 0.7087387166396533, "learning_rate": 3.968699378452226e-05, "loss": 1.5708, "step": 258 }, { "epoch": 0.066304, "grad_norm": 0.6527691127897426, "learning_rate": 3.968412318183362e-05, "loss": 1.524, "step": 259 }, { "epoch": 0.06656, "grad_norm": 0.7232076900724689, "learning_rate": 3.968123958067917e-05, "loss": 1.5344, "step": 260 }, { "epoch": 0.066816, "grad_norm": 0.6464886874713757, "learning_rate": 3.9678342982963105e-05, "loss": 1.557, "step": 261 }, { "epoch": 0.067072, "grad_norm": 0.642111277654415, "learning_rate": 3.96754333905982e-05, "loss": 1.5299, "step": 262 }, { "epoch": 0.067328, "grad_norm": 0.720672927076375, "learning_rate": 3.96725108055058e-05, "loss": 1.5723, "step": 263 }, { "epoch": 0.067584, "grad_norm": 0.6068722626184577, "learning_rate": 3.966957522961586e-05, "loss": 1.5963, "step": 264 }, { "epoch": 0.06784, "grad_norm": 0.6410058022344428, "learning_rate": 3.966662666486689e-05, "loss": 1.5364, "step": 265 }, { "epoch": 0.068096, "grad_norm": 0.6469635873979933, "learning_rate": 3.966366511320598e-05, "loss": 1.5923, "step": 266 }, { "epoch": 0.068352, "grad_norm": 0.6507160149449662, "learning_rate": 3.966069057658881e-05, "loss": 1.5576, "step": 267 }, { "epoch": 0.068608, "grad_norm": 0.7065198705386274, "learning_rate": 3.96577030569796e-05, "loss": 1.6256, "step": 268 }, { "epoch": 0.068864, "grad_norm": 0.7234247232057596, "learning_rate": 3.965470255635119e-05, "loss": 1.5431, "step": 269 }, { "epoch": 0.06912, "grad_norm": 0.6837089532034915, "learning_rate": 3.965168907668496e-05, "loss": 1.5816, "step": 270 }, { "epoch": 0.069376, "grad_norm": 0.6261301256414645, "learning_rate": 3.9648662619970866e-05, "loss": 1.59, "step": 271 }, { "epoch": 0.069632, "grad_norm": 0.6152968498369733, "learning_rate": 3.964562318820744e-05, "loss": 1.5574, "step": 272 }, { "epoch": 0.069888, "grad_norm": 0.6163553404258021, "learning_rate": 3.964257078340178e-05, "loss": 1.5142, "step": 273 }, { "epoch": 0.070144, "grad_norm": 0.5977857213091625, "learning_rate": 3.963950540756955e-05, "loss": 1.5168, "step": 274 }, { "epoch": 0.0704, "grad_norm": 0.6719016393928003, "learning_rate": 3.963642706273499e-05, "loss": 1.581, "step": 275 }, { "epoch": 0.070656, "grad_norm": 0.7004705040443666, "learning_rate": 3.9633335750930874e-05, "loss": 1.5589, "step": 276 }, { "epoch": 0.070912, "grad_norm": 0.6755307879840072, "learning_rate": 3.963023147419858e-05, "loss": 1.5933, "step": 277 }, { "epoch": 0.071168, "grad_norm": 0.6603843133572679, "learning_rate": 3.9627114234588e-05, "loss": 1.5605, "step": 278 }, { "epoch": 0.071424, "grad_norm": 0.6593151994858428, "learning_rate": 3.962398403415763e-05, "loss": 1.6025, "step": 279 }, { "epoch": 0.07168, "grad_norm": 0.6588011410619836, "learning_rate": 3.9620840874974505e-05, "loss": 1.5318, "step": 280 }, { "epoch": 0.071936, "grad_norm": 0.635351412311554, "learning_rate": 3.961768475911421e-05, "loss": 1.5637, "step": 281 }, { "epoch": 0.072192, "grad_norm": 0.6333914275501125, "learning_rate": 3.9614515688660906e-05, "loss": 1.574, "step": 282 }, { "epoch": 0.072448, "grad_norm": 0.5984690025743336, "learning_rate": 3.961133366570729e-05, "loss": 1.5605, "step": 283 }, { "epoch": 0.072704, "grad_norm": 0.6245904528965746, "learning_rate": 3.960813869235462e-05, "loss": 1.5433, "step": 284 }, { "epoch": 0.07296, "grad_norm": 0.6343112722289147, "learning_rate": 3.960493077071272e-05, "loss": 1.5768, "step": 285 }, { "epoch": 0.073216, "grad_norm": 0.6383739522234918, "learning_rate": 3.960170990289992e-05, "loss": 1.5973, "step": 286 }, { "epoch": 0.073472, "grad_norm": 0.6175087904971406, "learning_rate": 3.9598476091043165e-05, "loss": 1.5591, "step": 287 }, { "epoch": 0.073728, "grad_norm": 0.605286476330089, "learning_rate": 3.9595229337277884e-05, "loss": 1.5594, "step": 288 }, { "epoch": 0.073984, "grad_norm": 0.6150810409680053, "learning_rate": 3.9591969643748094e-05, "loss": 1.6103, "step": 289 }, { "epoch": 0.07424, "grad_norm": 0.6015092265179158, "learning_rate": 3.958869701260633e-05, "loss": 1.5677, "step": 290 }, { "epoch": 0.074496, "grad_norm": 0.5794872235125188, "learning_rate": 3.95854114460137e-05, "loss": 1.5817, "step": 291 }, { "epoch": 0.074752, "grad_norm": 0.6355098279529137, "learning_rate": 3.958211294613983e-05, "loss": 1.6012, "step": 292 }, { "epoch": 0.075008, "grad_norm": 0.6846550048372965, "learning_rate": 3.957880151516289e-05, "loss": 1.5017, "step": 293 }, { "epoch": 0.075264, "grad_norm": 0.6284211989407984, "learning_rate": 3.957547715526959e-05, "loss": 1.5916, "step": 294 }, { "epoch": 0.07552, "grad_norm": 0.6280891054495534, "learning_rate": 3.957213986865519e-05, "loss": 1.6245, "step": 295 }, { "epoch": 0.075776, "grad_norm": 0.6723872157589631, "learning_rate": 3.956878965752348e-05, "loss": 1.569, "step": 296 }, { "epoch": 0.076032, "grad_norm": 0.593574319122968, "learning_rate": 3.956542652408676e-05, "loss": 1.5321, "step": 297 }, { "epoch": 0.076288, "grad_norm": 0.629001881571098, "learning_rate": 3.9562050470565896e-05, "loss": 1.5182, "step": 298 }, { "epoch": 0.076544, "grad_norm": 0.6026761828018135, "learning_rate": 3.9558661499190287e-05, "loss": 1.5448, "step": 299 }, { "epoch": 0.0768, "grad_norm": 0.6084003125242101, "learning_rate": 3.955525961219783e-05, "loss": 1.5386, "step": 300 }, { "epoch": 0.077056, "grad_norm": 0.7230722911197127, "learning_rate": 3.9551844811834984e-05, "loss": 1.521, "step": 301 }, { "epoch": 0.077312, "grad_norm": 0.5916391928344684, "learning_rate": 3.954841710035672e-05, "loss": 1.5479, "step": 302 }, { "epoch": 0.077568, "grad_norm": 0.5655274326827907, "learning_rate": 3.954497648002654e-05, "loss": 1.5287, "step": 303 }, { "epoch": 0.077824, "grad_norm": 0.5753627416365539, "learning_rate": 3.9541522953116446e-05, "loss": 1.5556, "step": 304 }, { "epoch": 0.07808, "grad_norm": 0.5495398557131904, "learning_rate": 3.953805652190701e-05, "loss": 1.539, "step": 305 }, { "epoch": 0.078336, "grad_norm": 0.5816682760424461, "learning_rate": 3.9534577188687285e-05, "loss": 1.5823, "step": 306 }, { "epoch": 0.078592, "grad_norm": 0.5588654675495193, "learning_rate": 3.9531084955754865e-05, "loss": 1.5869, "step": 307 }, { "epoch": 0.078848, "grad_norm": 0.5739225611433424, "learning_rate": 3.952757982541585e-05, "loss": 1.5601, "step": 308 }, { "epoch": 0.079104, "grad_norm": 0.5651453272839616, "learning_rate": 3.952406179998487e-05, "loss": 1.5167, "step": 309 }, { "epoch": 0.07936, "grad_norm": 0.5759801794621782, "learning_rate": 3.952053088178506e-05, "loss": 1.5454, "step": 310 }, { "epoch": 0.079616, "grad_norm": 0.5675550904221729, "learning_rate": 3.951698707314808e-05, "loss": 1.5442, "step": 311 }, { "epoch": 0.079872, "grad_norm": 0.5798120547386769, "learning_rate": 3.9513430376414065e-05, "loss": 1.5223, "step": 312 }, { "epoch": 0.080128, "grad_norm": 0.5610484973346276, "learning_rate": 3.9509860793931716e-05, "loss": 1.5353, "step": 313 }, { "epoch": 0.080384, "grad_norm": 0.5558679322440181, "learning_rate": 3.9506278328058217e-05, "loss": 1.5443, "step": 314 }, { "epoch": 0.08064, "grad_norm": 0.5720633332565486, "learning_rate": 3.950268298115925e-05, "loss": 1.548, "step": 315 }, { "epoch": 0.080896, "grad_norm": 0.5811075172223369, "learning_rate": 3.949907475560901e-05, "loss": 1.6164, "step": 316 }, { "epoch": 0.081152, "grad_norm": 0.5749882097168282, "learning_rate": 3.9495453653790207e-05, "loss": 1.5858, "step": 317 }, { "epoch": 0.081408, "grad_norm": 0.591646361410766, "learning_rate": 3.949181967809404e-05, "loss": 1.5514, "step": 318 }, { "epoch": 0.081664, "grad_norm": 0.5754571775610835, "learning_rate": 3.948817283092022e-05, "loss": 1.5811, "step": 319 }, { "epoch": 0.08192, "grad_norm": 0.5919672737133586, "learning_rate": 3.948451311467695e-05, "loss": 1.5182, "step": 320 }, { "epoch": 0.082176, "grad_norm": 0.5434999284296101, "learning_rate": 3.948084053178094e-05, "loss": 1.5497, "step": 321 }, { "epoch": 0.082432, "grad_norm": 0.5753099865967742, "learning_rate": 3.947715508465738e-05, "loss": 1.5614, "step": 322 }, { "epoch": 0.082688, "grad_norm": 0.5889106315903021, "learning_rate": 3.947345677573997e-05, "loss": 1.5218, "step": 323 }, { "epoch": 0.082944, "grad_norm": 0.5462459473200533, "learning_rate": 3.946974560747089e-05, "loss": 1.5016, "step": 324 }, { "epoch": 0.0832, "grad_norm": 0.5698011542861564, "learning_rate": 3.946602158230084e-05, "loss": 1.5219, "step": 325 }, { "epoch": 0.083456, "grad_norm": 0.5501883442642922, "learning_rate": 3.946228470268898e-05, "loss": 1.5709, "step": 326 }, { "epoch": 0.083712, "grad_norm": 0.6146766806342341, "learning_rate": 3.945853497110296e-05, "loss": 1.5026, "step": 327 }, { "epoch": 0.083968, "grad_norm": 0.57807403453325, "learning_rate": 3.945477239001893e-05, "loss": 1.5638, "step": 328 }, { "epoch": 0.084224, "grad_norm": 0.5651786098641621, "learning_rate": 3.9450996961921536e-05, "loss": 1.5849, "step": 329 }, { "epoch": 0.08448, "grad_norm": 0.5945279558511242, "learning_rate": 3.944720868930388e-05, "loss": 1.5437, "step": 330 }, { "epoch": 0.084736, "grad_norm": 0.5805692011478812, "learning_rate": 3.944340757466756e-05, "loss": 1.5299, "step": 331 }, { "epoch": 0.084992, "grad_norm": 0.5489198981177734, "learning_rate": 3.9439593620522644e-05, "loss": 1.5006, "step": 332 }, { "epoch": 0.085248, "grad_norm": 0.5364138535319419, "learning_rate": 3.9435766829387706e-05, "loss": 1.5268, "step": 333 }, { "epoch": 0.085504, "grad_norm": 0.5350672861028023, "learning_rate": 3.943192720378976e-05, "loss": 1.5115, "step": 334 }, { "epoch": 0.08576, "grad_norm": 0.5778481008977208, "learning_rate": 3.942807474626433e-05, "loss": 1.556, "step": 335 }, { "epoch": 0.086016, "grad_norm": 0.5836054489967638, "learning_rate": 3.9424209459355385e-05, "loss": 1.5597, "step": 336 }, { "epoch": 0.086272, "grad_norm": 0.5252916782088496, "learning_rate": 3.942033134561538e-05, "loss": 1.4676, "step": 337 }, { "epoch": 0.086528, "grad_norm": 0.5523934566599342, "learning_rate": 3.941644040760523e-05, "loss": 1.5329, "step": 338 }, { "epoch": 0.086784, "grad_norm": 0.5396239268232077, "learning_rate": 3.941253664789435e-05, "loss": 1.5304, "step": 339 }, { "epoch": 0.08704, "grad_norm": 0.5922437894247253, "learning_rate": 3.9408620069060584e-05, "loss": 1.5164, "step": 340 }, { "epoch": 0.087296, "grad_norm": 0.5716311630949655, "learning_rate": 3.9404690673690255e-05, "loss": 1.5121, "step": 341 }, { "epoch": 0.087552, "grad_norm": 0.6120273215323037, "learning_rate": 3.940074846437815e-05, "loss": 1.525, "step": 342 }, { "epoch": 0.087808, "grad_norm": 0.5668241402599921, "learning_rate": 3.939679344372753e-05, "loss": 1.4974, "step": 343 }, { "epoch": 0.088064, "grad_norm": 0.6192851077538529, "learning_rate": 3.9392825614350085e-05, "loss": 1.5145, "step": 344 }, { "epoch": 0.08832, "grad_norm": 0.607306396434443, "learning_rate": 3.9388844978865994e-05, "loss": 1.5629, "step": 345 }, { "epoch": 0.088576, "grad_norm": 0.574407967776693, "learning_rate": 3.938485153990388e-05, "loss": 1.4852, "step": 346 }, { "epoch": 0.088832, "grad_norm": 0.5829761810171102, "learning_rate": 3.9380845300100816e-05, "loss": 1.5562, "step": 347 }, { "epoch": 0.089088, "grad_norm": 0.5684824398811541, "learning_rate": 3.937682626210234e-05, "loss": 1.533, "step": 348 }, { "epoch": 0.089344, "grad_norm": 0.6066649082850831, "learning_rate": 3.9372794428562426e-05, "loss": 1.5696, "step": 349 }, { "epoch": 0.0896, "grad_norm": 0.6138752672638471, "learning_rate": 3.9368749802143524e-05, "loss": 1.5335, "step": 350 }, { "epoch": 0.089856, "grad_norm": 0.5473396472418754, "learning_rate": 3.93646923855165e-05, "loss": 1.5339, "step": 351 }, { "epoch": 0.090112, "grad_norm": 0.7374851741318385, "learning_rate": 3.9360622181360675e-05, "loss": 1.5391, "step": 352 }, { "epoch": 0.090368, "grad_norm": 0.6221145074383628, "learning_rate": 3.9356539192363834e-05, "loss": 1.5424, "step": 353 }, { "epoch": 0.090624, "grad_norm": 0.6070785891521857, "learning_rate": 3.935244342122219e-05, "loss": 1.5176, "step": 354 }, { "epoch": 0.09088, "grad_norm": 0.6117861488177805, "learning_rate": 3.934833487064038e-05, "loss": 1.5632, "step": 355 }, { "epoch": 0.091136, "grad_norm": 0.5872912565281746, "learning_rate": 3.9344213543331524e-05, "loss": 1.5766, "step": 356 }, { "epoch": 0.091392, "grad_norm": 0.6171017746113925, "learning_rate": 3.934007944201713e-05, "loss": 1.5205, "step": 357 }, { "epoch": 0.091648, "grad_norm": 0.5922959018496421, "learning_rate": 3.933593256942717e-05, "loss": 1.5189, "step": 358 }, { "epoch": 0.091904, "grad_norm": 0.5645013823801353, "learning_rate": 3.933177292830004e-05, "loss": 1.498, "step": 359 }, { "epoch": 0.09216, "grad_norm": 0.5748892967721703, "learning_rate": 3.932760052138258e-05, "loss": 1.5586, "step": 360 }, { "epoch": 0.092416, "grad_norm": 0.5550513608401035, "learning_rate": 3.932341535143003e-05, "loss": 1.4637, "step": 361 }, { "epoch": 0.092672, "grad_norm": 0.5778780256127651, "learning_rate": 3.931921742120611e-05, "loss": 1.5315, "step": 362 }, { "epoch": 0.092928, "grad_norm": 0.5787100847474165, "learning_rate": 3.931500673348291e-05, "loss": 1.4683, "step": 363 }, { "epoch": 0.093184, "grad_norm": 0.6816375015592419, "learning_rate": 3.931078329104098e-05, "loss": 1.5282, "step": 364 }, { "epoch": 0.09344, "grad_norm": 0.6070308439722151, "learning_rate": 3.930654709666927e-05, "loss": 1.4843, "step": 365 }, { "epoch": 0.093696, "grad_norm": 0.611855774907062, "learning_rate": 3.930229815316517e-05, "loss": 1.507, "step": 366 }, { "epoch": 0.093952, "grad_norm": 0.5920806948004058, "learning_rate": 3.92980364633345e-05, "loss": 1.5102, "step": 367 }, { "epoch": 0.094208, "grad_norm": 0.5789481306796512, "learning_rate": 3.9293762029991454e-05, "loss": 1.4859, "step": 368 }, { "epoch": 0.094464, "grad_norm": 0.5545210379685646, "learning_rate": 3.9289474855958675e-05, "loss": 1.5334, "step": 369 }, { "epoch": 0.09472, "grad_norm": 0.5693708754070363, "learning_rate": 3.928517494406721e-05, "loss": 1.5007, "step": 370 }, { "epoch": 0.094976, "grad_norm": 0.5631181720644017, "learning_rate": 3.928086229715652e-05, "loss": 1.5516, "step": 371 }, { "epoch": 0.095232, "grad_norm": 0.5698899536539186, "learning_rate": 3.927653691807447e-05, "loss": 1.5632, "step": 372 }, { "epoch": 0.095488, "grad_norm": 0.5411018284376289, "learning_rate": 3.927219880967733e-05, "loss": 1.5356, "step": 373 }, { "epoch": 0.095744, "grad_norm": 0.5720335654312722, "learning_rate": 3.92678479748298e-05, "loss": 1.495, "step": 374 }, { "epoch": 0.096, "grad_norm": 0.5713261648400483, "learning_rate": 3.926348441640495e-05, "loss": 1.5237, "step": 375 }, { "epoch": 0.096256, "grad_norm": 0.5684117488173724, "learning_rate": 3.9259108137284275e-05, "loss": 1.5611, "step": 376 }, { "epoch": 0.096512, "grad_norm": 0.5619551640402388, "learning_rate": 3.9254719140357656e-05, "loss": 1.5373, "step": 377 }, { "epoch": 0.096768, "grad_norm": 0.5808577701688464, "learning_rate": 3.925031742852339e-05, "loss": 1.5063, "step": 378 }, { "epoch": 0.097024, "grad_norm": 0.5261948663689826, "learning_rate": 3.9245903004688156e-05, "loss": 1.5304, "step": 379 }, { "epoch": 0.09728, "grad_norm": 0.5545784152397776, "learning_rate": 3.9241475871767026e-05, "loss": 1.5237, "step": 380 }, { "epoch": 0.097536, "grad_norm": 0.5415429526824642, "learning_rate": 3.923703603268348e-05, "loss": 1.5362, "step": 381 }, { "epoch": 0.097792, "grad_norm": 0.5448497604090797, "learning_rate": 3.9232583490369363e-05, "loss": 1.4965, "step": 382 }, { "epoch": 0.098048, "grad_norm": 0.5058967944470406, "learning_rate": 3.922811824776494e-05, "loss": 1.5107, "step": 383 }, { "epoch": 0.098304, "grad_norm": 0.5658118095971683, "learning_rate": 3.922364030781884e-05, "loss": 1.4619, "step": 384 }, { "epoch": 0.09856, "grad_norm": 0.5407229307860603, "learning_rate": 3.921914967348807e-05, "loss": 1.5492, "step": 385 }, { "epoch": 0.098816, "grad_norm": 0.6391689257858224, "learning_rate": 3.9214646347738055e-05, "loss": 1.5979, "step": 386 }, { "epoch": 0.099072, "grad_norm": 0.5354689680191062, "learning_rate": 3.921013033354256e-05, "loss": 1.4744, "step": 387 }, { "epoch": 0.099328, "grad_norm": 0.5449863102426697, "learning_rate": 3.920560163388377e-05, "loss": 1.4878, "step": 388 }, { "epoch": 0.099584, "grad_norm": 0.5234049424534529, "learning_rate": 3.92010602517522e-05, "loss": 1.4809, "step": 389 }, { "epoch": 0.09984, "grad_norm": 0.5577205865798737, "learning_rate": 3.919650619014678e-05, "loss": 1.5365, "step": 390 }, { "epoch": 0.100096, "grad_norm": 0.5620202759361268, "learning_rate": 3.9191939452074786e-05, "loss": 1.5147, "step": 391 }, { "epoch": 0.100352, "grad_norm": 0.5425613914169927, "learning_rate": 3.9187360040551886e-05, "loss": 1.5106, "step": 392 }, { "epoch": 0.100608, "grad_norm": 0.5695548764067173, "learning_rate": 3.9182767958602104e-05, "loss": 1.4999, "step": 393 }, { "epoch": 0.100864, "grad_norm": 0.5402815061869861, "learning_rate": 3.917816320925784e-05, "loss": 1.5021, "step": 394 }, { "epoch": 0.10112, "grad_norm": 0.5897767546820807, "learning_rate": 3.917354579555984e-05, "loss": 1.4991, "step": 395 }, { "epoch": 0.101376, "grad_norm": 0.5548953840790886, "learning_rate": 3.916891572055724e-05, "loss": 1.5445, "step": 396 }, { "epoch": 0.101632, "grad_norm": 0.5869716128855055, "learning_rate": 3.916427298730751e-05, "loss": 1.5418, "step": 397 }, { "epoch": 0.101888, "grad_norm": 0.5846028031929945, "learning_rate": 3.9159617598876495e-05, "loss": 1.4997, "step": 398 }, { "epoch": 0.102144, "grad_norm": 0.5418112164138075, "learning_rate": 3.9154949558338404e-05, "loss": 1.4576, "step": 399 }, { "epoch": 0.1024, "grad_norm": 0.5778710196897984, "learning_rate": 3.915026886877578e-05, "loss": 1.4819, "step": 400 }, { "epoch": 0.102656, "grad_norm": 0.5655149496241458, "learning_rate": 3.914557553327954e-05, "loss": 1.4622, "step": 401 }, { "epoch": 0.102912, "grad_norm": 0.5647033525492865, "learning_rate": 3.914086955494893e-05, "loss": 1.5045, "step": 402 }, { "epoch": 0.103168, "grad_norm": 0.7016332090993277, "learning_rate": 3.913615093689155e-05, "loss": 1.5452, "step": 403 }, { "epoch": 0.103424, "grad_norm": 0.6138038403589344, "learning_rate": 3.9131419682223376e-05, "loss": 1.5164, "step": 404 }, { "epoch": 0.10368, "grad_norm": 0.5950084337866036, "learning_rate": 3.9126675794068686e-05, "loss": 1.5413, "step": 405 }, { "epoch": 0.103936, "grad_norm": 0.5549327135312124, "learning_rate": 3.912191927556013e-05, "loss": 1.5168, "step": 406 }, { "epoch": 0.104192, "grad_norm": 0.6062882109514001, "learning_rate": 3.911715012983868e-05, "loss": 1.5974, "step": 407 }, { "epoch": 0.104448, "grad_norm": 0.5412167037175736, "learning_rate": 3.911236836005366e-05, "loss": 1.506, "step": 408 }, { "epoch": 0.104704, "grad_norm": 0.6335027417269128, "learning_rate": 3.910757396936273e-05, "loss": 1.5209, "step": 409 }, { "epoch": 0.10496, "grad_norm": 0.5517968775497638, "learning_rate": 3.910276696093186e-05, "loss": 1.4858, "step": 410 }, { "epoch": 0.105216, "grad_norm": 0.6864776560063669, "learning_rate": 3.90979473379354e-05, "loss": 1.54, "step": 411 }, { "epoch": 0.105472, "grad_norm": 0.5512986659989538, "learning_rate": 3.909311510355598e-05, "loss": 1.4793, "step": 412 }, { "epoch": 0.105728, "grad_norm": 0.5330731696329115, "learning_rate": 3.908827026098458e-05, "loss": 1.4989, "step": 413 }, { "epoch": 0.105984, "grad_norm": 0.569680860290677, "learning_rate": 3.908341281342052e-05, "loss": 1.5122, "step": 414 }, { "epoch": 0.10624, "grad_norm": 0.5655454222855373, "learning_rate": 3.907854276407141e-05, "loss": 1.4959, "step": 415 }, { "epoch": 0.106496, "grad_norm": 0.5408547908496869, "learning_rate": 3.907366011615322e-05, "loss": 1.5024, "step": 416 }, { "epoch": 0.106752, "grad_norm": 0.5775268900256835, "learning_rate": 3.90687648728902e-05, "loss": 1.5268, "step": 417 }, { "epoch": 0.107008, "grad_norm": 0.5694947000413031, "learning_rate": 3.906385703751496e-05, "loss": 1.5009, "step": 418 }, { "epoch": 0.107264, "grad_norm": 0.5427655898991233, "learning_rate": 3.905893661326839e-05, "loss": 1.5121, "step": 419 }, { "epoch": 0.10752, "grad_norm": 0.5642574792361763, "learning_rate": 3.9054003603399706e-05, "loss": 1.4864, "step": 420 }, { "epoch": 0.107776, "grad_norm": 0.5416495096619954, "learning_rate": 3.904905801116644e-05, "loss": 1.5056, "step": 421 }, { "epoch": 0.108032, "grad_norm": 0.5776572675505547, "learning_rate": 3.904409983983442e-05, "loss": 1.5013, "step": 422 }, { "epoch": 0.108288, "grad_norm": 0.5234234814164501, "learning_rate": 3.903912909267779e-05, "loss": 1.5228, "step": 423 }, { "epoch": 0.108544, "grad_norm": 0.5601952743601876, "learning_rate": 3.9034145772979e-05, "loss": 1.4789, "step": 424 }, { "epoch": 0.1088, "grad_norm": 0.5202010532139367, "learning_rate": 3.90291498840288e-05, "loss": 1.5558, "step": 425 }, { "epoch": 0.109056, "grad_norm": 0.5464202938890733, "learning_rate": 3.9024141429126234e-05, "loss": 1.4734, "step": 426 }, { "epoch": 0.109312, "grad_norm": 0.5168379327374141, "learning_rate": 3.901912041157866e-05, "loss": 1.5119, "step": 427 }, { "epoch": 0.109568, "grad_norm": 0.587502511308583, "learning_rate": 3.9014086834701704e-05, "loss": 1.5305, "step": 428 }, { "epoch": 0.109824, "grad_norm": 0.5231067644776921, "learning_rate": 3.900904070181932e-05, "loss": 1.5085, "step": 429 }, { "epoch": 0.11008, "grad_norm": 0.5542602007102981, "learning_rate": 3.900398201626372e-05, "loss": 1.4959, "step": 430 }, { "epoch": 0.110336, "grad_norm": 0.5379469729738892, "learning_rate": 3.8998910781375443e-05, "loss": 1.4824, "step": 431 }, { "epoch": 0.110592, "grad_norm": 0.5207824755557448, "learning_rate": 3.8993827000503266e-05, "loss": 1.4901, "step": 432 }, { "epoch": 0.110848, "grad_norm": 0.5512719568744536, "learning_rate": 3.8988730677004294e-05, "loss": 1.515, "step": 433 }, { "epoch": 0.111104, "grad_norm": 0.5121929398097549, "learning_rate": 3.8983621814243903e-05, "loss": 1.4651, "step": 434 }, { "epoch": 0.11136, "grad_norm": 0.5634050326224734, "learning_rate": 3.897850041559573e-05, "loss": 1.493, "step": 435 }, { "epoch": 0.111616, "grad_norm": 0.5670568845406377, "learning_rate": 3.897336648444172e-05, "loss": 1.4999, "step": 436 }, { "epoch": 0.111872, "grad_norm": 0.5111885029033698, "learning_rate": 3.896822002417206e-05, "loss": 1.456, "step": 437 }, { "epoch": 0.112128, "grad_norm": 0.5590287667908839, "learning_rate": 3.896306103818525e-05, "loss": 1.4698, "step": 438 }, { "epoch": 0.112384, "grad_norm": 0.5665805787207899, "learning_rate": 3.895788952988803e-05, "loss": 1.4595, "step": 439 }, { "epoch": 0.11264, "grad_norm": 0.5219584430832925, "learning_rate": 3.8952705502695424e-05, "loss": 1.5176, "step": 440 }, { "epoch": 0.112896, "grad_norm": 0.6025385181768772, "learning_rate": 3.8947508960030715e-05, "loss": 1.5346, "step": 441 }, { "epoch": 0.113152, "grad_norm": 0.5060068922884791, "learning_rate": 3.894229990532545e-05, "loss": 1.5245, "step": 442 }, { "epoch": 0.113408, "grad_norm": 0.5843206639819737, "learning_rate": 3.893707834201945e-05, "loss": 1.538, "step": 443 }, { "epoch": 0.113664, "grad_norm": 0.5244030530306381, "learning_rate": 3.8931844273560796e-05, "loss": 1.5176, "step": 444 }, { "epoch": 0.11392, "grad_norm": 0.559066700032701, "learning_rate": 3.8926597703405805e-05, "loss": 1.5139, "step": 445 }, { "epoch": 0.114176, "grad_norm": 0.5672067440229976, "learning_rate": 3.892133863501907e-05, "loss": 1.4721, "step": 446 }, { "epoch": 0.114432, "grad_norm": 0.5614283167360169, "learning_rate": 3.891606707187344e-05, "loss": 1.5847, "step": 447 }, { "epoch": 0.114688, "grad_norm": 0.5397568887526905, "learning_rate": 3.891078301744999e-05, "loss": 1.4329, "step": 448 }, { "epoch": 0.114944, "grad_norm": 0.5588517674522153, "learning_rate": 3.890548647523807e-05, "loss": 1.4528, "step": 449 }, { "epoch": 0.1152, "grad_norm": 0.529750087303853, "learning_rate": 3.890017744873527e-05, "loss": 1.5092, "step": 450 }, { "epoch": 0.115456, "grad_norm": 0.5759067484700479, "learning_rate": 3.8894855941447417e-05, "loss": 1.5124, "step": 451 }, { "epoch": 0.115712, "grad_norm": 0.5231313240554187, "learning_rate": 3.888952195688858e-05, "loss": 1.4831, "step": 452 }, { "epoch": 0.115968, "grad_norm": 0.5129406310715484, "learning_rate": 3.888417549858106e-05, "loss": 1.4628, "step": 453 }, { "epoch": 0.116224, "grad_norm": 0.4968070937138596, "learning_rate": 3.887881657005544e-05, "loss": 1.4634, "step": 454 }, { "epoch": 0.11648, "grad_norm": 0.5019519687196918, "learning_rate": 3.8873445174850466e-05, "loss": 1.4818, "step": 455 }, { "epoch": 0.116736, "grad_norm": 0.5636711660602768, "learning_rate": 3.8868061316513175e-05, "loss": 1.5089, "step": 456 }, { "epoch": 0.116992, "grad_norm": 0.5625467989322707, "learning_rate": 3.886266499859881e-05, "loss": 1.5297, "step": 457 }, { "epoch": 0.117248, "grad_norm": 0.4986056452907962, "learning_rate": 3.8857256224670845e-05, "loss": 1.4898, "step": 458 }, { "epoch": 0.117504, "grad_norm": 0.5566384911612768, "learning_rate": 3.885183499830097e-05, "loss": 1.4806, "step": 459 }, { "epoch": 0.11776, "grad_norm": 0.5060479806690903, "learning_rate": 3.884640132306911e-05, "loss": 1.5382, "step": 460 }, { "epoch": 0.118016, "grad_norm": 0.6047495315011246, "learning_rate": 3.8840955202563416e-05, "loss": 1.5075, "step": 461 }, { "epoch": 0.118272, "grad_norm": 0.5481938397051778, "learning_rate": 3.8835496640380246e-05, "loss": 1.4574, "step": 462 }, { "epoch": 0.118528, "grad_norm": 0.5811921477566268, "learning_rate": 3.883002564012417e-05, "loss": 1.4763, "step": 463 }, { "epoch": 0.118784, "grad_norm": 0.5117611751208486, "learning_rate": 3.8824542205407986e-05, "loss": 1.5086, "step": 464 }, { "epoch": 0.11904, "grad_norm": 0.5287742632770791, "learning_rate": 3.881904633985269e-05, "loss": 1.526, "step": 465 }, { "epoch": 0.119296, "grad_norm": 0.5141498015718505, "learning_rate": 3.88135380470875e-05, "loss": 1.4689, "step": 466 }, { "epoch": 0.119552, "grad_norm": 0.5650669180391636, "learning_rate": 3.880801733074982e-05, "loss": 1.4704, "step": 467 }, { "epoch": 0.119808, "grad_norm": 0.519768278528569, "learning_rate": 3.8802484194485286e-05, "loss": 1.497, "step": 468 }, { "epoch": 0.120064, "grad_norm": 0.516148321621722, "learning_rate": 3.87969386419477e-05, "loss": 1.4748, "step": 469 }, { "epoch": 0.12032, "grad_norm": 0.5741498108887203, "learning_rate": 3.87913806767991e-05, "loss": 1.5364, "step": 470 }, { "epoch": 0.120576, "grad_norm": 0.5370193113419436, "learning_rate": 3.8785810302709705e-05, "loss": 1.4869, "step": 471 }, { "epoch": 0.120832, "grad_norm": 0.5141958953741277, "learning_rate": 3.878022752335792e-05, "loss": 1.483, "step": 472 }, { "epoch": 0.121088, "grad_norm": 0.5227953184172967, "learning_rate": 3.8774632342430335e-05, "loss": 1.5261, "step": 473 }, { "epoch": 0.121344, "grad_norm": 0.5680322108237085, "learning_rate": 3.876902476362178e-05, "loss": 1.4864, "step": 474 }, { "epoch": 0.1216, "grad_norm": 0.5148615981419378, "learning_rate": 3.8763404790635196e-05, "loss": 1.5101, "step": 475 }, { "epoch": 0.121856, "grad_norm": 0.5278168427572507, "learning_rate": 3.875777242718177e-05, "loss": 1.4869, "step": 476 }, { "epoch": 0.122112, "grad_norm": 0.5262349561788399, "learning_rate": 3.875212767698084e-05, "loss": 1.5193, "step": 477 }, { "epoch": 0.122368, "grad_norm": 0.5088167825577808, "learning_rate": 3.874647054375995e-05, "loss": 1.5216, "step": 478 }, { "epoch": 0.122624, "grad_norm": 0.515389026746513, "learning_rate": 3.874080103125479e-05, "loss": 1.5152, "step": 479 }, { "epoch": 0.12288, "grad_norm": 0.5424517329693198, "learning_rate": 3.873511914320922e-05, "loss": 1.4984, "step": 480 }, { "epoch": 0.123136, "grad_norm": 0.5246491931065252, "learning_rate": 3.8729424883375324e-05, "loss": 1.464, "step": 481 }, { "epoch": 0.123392, "grad_norm": 0.4988730498367362, "learning_rate": 3.872371825551331e-05, "loss": 1.4557, "step": 482 }, { "epoch": 0.123648, "grad_norm": 0.5029333644406483, "learning_rate": 3.871799926339156e-05, "loss": 1.4854, "step": 483 }, { "epoch": 0.123904, "grad_norm": 0.9383990698675945, "learning_rate": 3.8712267910786625e-05, "loss": 1.5167, "step": 484 }, { "epoch": 0.12416, "grad_norm": 0.5139016651407603, "learning_rate": 3.870652420148323e-05, "loss": 1.5332, "step": 485 }, { "epoch": 0.124416, "grad_norm": 0.5959931001212663, "learning_rate": 3.8700768139274246e-05, "loss": 1.4974, "step": 486 }, { "epoch": 0.124672, "grad_norm": 0.5187408664632372, "learning_rate": 3.86949997279607e-05, "loss": 1.5043, "step": 487 }, { "epoch": 0.124928, "grad_norm": 0.5443000788345685, "learning_rate": 3.868921897135178e-05, "loss": 1.5182, "step": 488 }, { "epoch": 0.125184, "grad_norm": 0.5072292115459439, "learning_rate": 3.868342587326483e-05, "loss": 1.4451, "step": 489 }, { "epoch": 0.12544, "grad_norm": 0.5518760340694461, "learning_rate": 3.867762043752534e-05, "loss": 1.5107, "step": 490 }, { "epoch": 0.125696, "grad_norm": 0.5051192418000028, "learning_rate": 3.867180266796693e-05, "loss": 1.5035, "step": 491 }, { "epoch": 0.125952, "grad_norm": 0.52273969926132, "learning_rate": 3.86659725684314e-05, "loss": 1.4893, "step": 492 }, { "epoch": 0.126208, "grad_norm": 0.5137312599365389, "learning_rate": 3.866013014276866e-05, "loss": 1.5142, "step": 493 }, { "epoch": 0.126464, "grad_norm": 0.5296237991513308, "learning_rate": 3.865427539483678e-05, "loss": 1.4812, "step": 494 }, { "epoch": 0.12672, "grad_norm": 0.5387163912432142, "learning_rate": 3.864840832850195e-05, "loss": 1.4925, "step": 495 }, { "epoch": 0.126976, "grad_norm": 0.5684312270648573, "learning_rate": 3.864252894763851e-05, "loss": 1.5178, "step": 496 }, { "epoch": 0.127232, "grad_norm": 0.5322176602251488, "learning_rate": 3.863663725612892e-05, "loss": 1.4826, "step": 497 }, { "epoch": 0.127488, "grad_norm": 0.5825864174252922, "learning_rate": 3.863073325786379e-05, "loss": 1.4694, "step": 498 }, { "epoch": 0.127744, "grad_norm": 0.5276903306974429, "learning_rate": 3.862481695674183e-05, "loss": 1.488, "step": 499 }, { "epoch": 0.128, "grad_norm": 0.5260002029782144, "learning_rate": 3.861888835666988e-05, "loss": 1.4839, "step": 500 }, { "epoch": 0.128256, "grad_norm": 0.5300556397109144, "learning_rate": 3.8612947461562916e-05, "loss": 1.4207, "step": 501 }, { "epoch": 0.128512, "grad_norm": 0.5314092141445704, "learning_rate": 3.8606994275344026e-05, "loss": 1.5032, "step": 502 }, { "epoch": 0.128768, "grad_norm": 0.5330073607818647, "learning_rate": 3.860102880194441e-05, "loss": 1.5406, "step": 503 }, { "epoch": 0.129024, "grad_norm": 0.49913744413529537, "learning_rate": 3.8595051045303386e-05, "loss": 1.4283, "step": 504 }, { "epoch": 0.12928, "grad_norm": 0.5087414500773146, "learning_rate": 3.858906100936838e-05, "loss": 1.4167, "step": 505 }, { "epoch": 0.129536, "grad_norm": 0.5152256616225279, "learning_rate": 3.858305869809493e-05, "loss": 1.4878, "step": 506 }, { "epoch": 0.129792, "grad_norm": 0.5698963239266701, "learning_rate": 3.857704411544668e-05, "loss": 1.486, "step": 507 }, { "epoch": 0.130048, "grad_norm": 0.5311600179370706, "learning_rate": 3.8571017265395386e-05, "loss": 1.4734, "step": 508 }, { "epoch": 0.130304, "grad_norm": 0.5320096482787684, "learning_rate": 3.8564978151920876e-05, "loss": 1.4958, "step": 509 }, { "epoch": 0.13056, "grad_norm": 0.5483941145995145, "learning_rate": 3.8558926779011105e-05, "loss": 1.4803, "step": 510 }, { "epoch": 0.130816, "grad_norm": 0.5204917336788906, "learning_rate": 3.855286315066212e-05, "loss": 1.5323, "step": 511 }, { "epoch": 0.131072, "grad_norm": 0.5099999328110443, "learning_rate": 3.8546787270878046e-05, "loss": 1.4984, "step": 512 }, { "epoch": 0.131328, "grad_norm": 0.5685714710884843, "learning_rate": 3.854069914367111e-05, "loss": 1.489, "step": 513 }, { "epoch": 0.131584, "grad_norm": 0.5191929125191872, "learning_rate": 3.8534598773061624e-05, "loss": 1.4582, "step": 514 }, { "epoch": 0.13184, "grad_norm": 0.49900355580993516, "learning_rate": 3.852848616307799e-05, "loss": 1.506, "step": 515 }, { "epoch": 0.132096, "grad_norm": 0.5523320180053961, "learning_rate": 3.8522361317756674e-05, "loss": 1.529, "step": 516 }, { "epoch": 0.132352, "grad_norm": 0.5312634485733235, "learning_rate": 3.851622424114225e-05, "loss": 1.4752, "step": 517 }, { "epoch": 0.132608, "grad_norm": 0.5289885881469029, "learning_rate": 3.851007493728735e-05, "loss": 1.5559, "step": 518 }, { "epoch": 0.132864, "grad_norm": 0.521808855036893, "learning_rate": 3.8503913410252674e-05, "loss": 1.4967, "step": 519 }, { "epoch": 0.13312, "grad_norm": 0.5293852897838439, "learning_rate": 3.849773966410701e-05, "loss": 1.5073, "step": 520 }, { "epoch": 0.133376, "grad_norm": 0.6082067323529153, "learning_rate": 3.849155370292721e-05, "loss": 1.4817, "step": 521 }, { "epoch": 0.133632, "grad_norm": 0.5673519866765149, "learning_rate": 3.848535553079819e-05, "loss": 1.467, "step": 522 }, { "epoch": 0.133888, "grad_norm": 0.5226145357068925, "learning_rate": 3.847914515181293e-05, "loss": 1.4625, "step": 523 }, { "epoch": 0.134144, "grad_norm": 0.5276843816043325, "learning_rate": 3.847292257007247e-05, "loss": 1.4948, "step": 524 }, { "epoch": 0.1344, "grad_norm": 0.5370620172020053, "learning_rate": 3.84666877896859e-05, "loss": 1.4742, "step": 525 }, { "epoch": 0.134656, "grad_norm": 0.5453507354738568, "learning_rate": 3.846044081477041e-05, "loss": 1.4685, "step": 526 }, { "epoch": 0.134912, "grad_norm": 0.5438162366383884, "learning_rate": 3.845418164945116e-05, "loss": 1.4708, "step": 527 }, { "epoch": 0.135168, "grad_norm": 0.5234516812112022, "learning_rate": 3.844791029786144e-05, "loss": 1.4996, "step": 528 }, { "epoch": 0.135424, "grad_norm": 0.48639610207845557, "learning_rate": 3.8441626764142546e-05, "loss": 1.4946, "step": 529 }, { "epoch": 0.13568, "grad_norm": 0.496595221426787, "learning_rate": 3.843533105244382e-05, "loss": 1.4811, "step": 530 }, { "epoch": 0.135936, "grad_norm": 0.512180114016878, "learning_rate": 3.842902316692266e-05, "loss": 1.4363, "step": 531 }, { "epoch": 0.136192, "grad_norm": 0.5141028053289458, "learning_rate": 3.842270311174449e-05, "loss": 1.4342, "step": 532 }, { "epoch": 0.136448, "grad_norm": 0.4910910351768579, "learning_rate": 3.8416370891082785e-05, "loss": 1.4621, "step": 533 }, { "epoch": 0.136704, "grad_norm": 0.5641400246998611, "learning_rate": 3.841002650911904e-05, "loss": 1.5031, "step": 534 }, { "epoch": 0.13696, "grad_norm": 0.5355794269147915, "learning_rate": 3.840366997004278e-05, "loss": 1.4621, "step": 535 }, { "epoch": 0.137216, "grad_norm": 0.5183559275271968, "learning_rate": 3.839730127805157e-05, "loss": 1.5286, "step": 536 }, { "epoch": 0.137472, "grad_norm": 0.5517484333135494, "learning_rate": 3.8390920437351e-05, "loss": 1.4496, "step": 537 }, { "epoch": 0.137728, "grad_norm": 0.5250823206171249, "learning_rate": 3.8384527452154656e-05, "loss": 1.4718, "step": 538 }, { "epoch": 0.137984, "grad_norm": 0.527386533733061, "learning_rate": 3.837812232668419e-05, "loss": 1.4355, "step": 539 }, { "epoch": 0.13824, "grad_norm": 0.5301297939746005, "learning_rate": 3.837170506516922e-05, "loss": 1.5279, "step": 540 }, { "epoch": 0.138496, "grad_norm": 0.4965953653101356, "learning_rate": 3.836527567184741e-05, "loss": 1.4578, "step": 541 }, { "epoch": 0.138752, "grad_norm": 0.4987058389995067, "learning_rate": 3.8358834150964435e-05, "loss": 1.4939, "step": 542 }, { "epoch": 0.139008, "grad_norm": 0.5199682402091151, "learning_rate": 3.835238050677397e-05, "loss": 1.5284, "step": 543 }, { "epoch": 0.139264, "grad_norm": 0.5221150016537505, "learning_rate": 3.83459147435377e-05, "loss": 1.4753, "step": 544 }, { "epoch": 0.13952, "grad_norm": 0.49808617554982365, "learning_rate": 3.833943686552529e-05, "loss": 1.4573, "step": 545 }, { "epoch": 0.139776, "grad_norm": 0.5187924396388296, "learning_rate": 3.8332946877014444e-05, "loss": 1.5393, "step": 546 }, { "epoch": 0.140032, "grad_norm": 0.518681013396759, "learning_rate": 3.8326444782290844e-05, "loss": 1.4876, "step": 547 }, { "epoch": 0.140288, "grad_norm": 0.5246208067375517, "learning_rate": 3.8319930585648155e-05, "loss": 1.5283, "step": 548 }, { "epoch": 0.140544, "grad_norm": 0.5144409469740104, "learning_rate": 3.831340429138805e-05, "loss": 1.4892, "step": 549 }, { "epoch": 0.1408, "grad_norm": 0.516972607434327, "learning_rate": 3.830686590382019e-05, "loss": 1.5056, "step": 550 }, { "epoch": 0.141056, "grad_norm": 0.5068358833896857, "learning_rate": 3.8300315427262213e-05, "loss": 1.4458, "step": 551 }, { "epoch": 0.141312, "grad_norm": 0.511934063044909, "learning_rate": 3.829375286603975e-05, "loss": 1.4648, "step": 552 }, { "epoch": 0.141568, "grad_norm": 0.5005688295598651, "learning_rate": 3.82871782244864e-05, "loss": 1.4702, "step": 553 }, { "epoch": 0.141824, "grad_norm": 0.5184019607769594, "learning_rate": 3.8280591506943747e-05, "loss": 1.4841, "step": 554 }, { "epoch": 0.14208, "grad_norm": 0.4899580643761107, "learning_rate": 3.827399271776135e-05, "loss": 1.4575, "step": 555 }, { "epoch": 0.142336, "grad_norm": 0.5289866400452454, "learning_rate": 3.826738186129674e-05, "loss": 1.4765, "step": 556 }, { "epoch": 0.142592, "grad_norm": 0.5200607580905917, "learning_rate": 3.826075894191541e-05, "loss": 1.5443, "step": 557 }, { "epoch": 0.142848, "grad_norm": 0.47575269376247736, "learning_rate": 3.825412396399083e-05, "loss": 1.4438, "step": 558 }, { "epoch": 0.143104, "grad_norm": 0.4935999190617765, "learning_rate": 3.824747693190442e-05, "loss": 1.4821, "step": 559 }, { "epoch": 0.14336, "grad_norm": 0.5003064248614711, "learning_rate": 3.824081785004557e-05, "loss": 1.48, "step": 560 }, { "epoch": 0.143616, "grad_norm": 0.5051215704107598, "learning_rate": 3.8234146722811615e-05, "loss": 1.4567, "step": 561 }, { "epoch": 0.143872, "grad_norm": 0.4982539484872333, "learning_rate": 3.822746355460786e-05, "loss": 1.5261, "step": 562 }, { "epoch": 0.144128, "grad_norm": 0.45396684160573436, "learning_rate": 3.8220768349847556e-05, "loss": 1.4624, "step": 563 }, { "epoch": 0.144384, "grad_norm": 0.4772507381881317, "learning_rate": 3.82140611129519e-05, "loss": 1.4324, "step": 564 }, { "epoch": 0.14464, "grad_norm": 0.4800115566588089, "learning_rate": 3.8207341848350024e-05, "loss": 1.4993, "step": 565 }, { "epoch": 0.144896, "grad_norm": 0.46880468290110133, "learning_rate": 3.820061056047903e-05, "loss": 1.4359, "step": 566 }, { "epoch": 0.145152, "grad_norm": 0.5906299935246266, "learning_rate": 3.819386725378392e-05, "loss": 1.4459, "step": 567 }, { "epoch": 0.145408, "grad_norm": 0.514687473031849, "learning_rate": 3.8187111932717685e-05, "loss": 1.482, "step": 568 }, { "epoch": 0.145664, "grad_norm": 0.48950218466943946, "learning_rate": 3.8180344601741196e-05, "loss": 1.4797, "step": 569 }, { "epoch": 0.14592, "grad_norm": 0.5774990145961018, "learning_rate": 3.817356526532329e-05, "loss": 1.4983, "step": 570 }, { "epoch": 0.146176, "grad_norm": 0.4805298739520118, "learning_rate": 3.8166773927940716e-05, "loss": 1.4527, "step": 571 }, { "epoch": 0.146432, "grad_norm": 0.5426702797305496, "learning_rate": 3.815997059407815e-05, "loss": 1.4959, "step": 572 }, { "epoch": 0.146688, "grad_norm": 0.5192554251827838, "learning_rate": 3.8153155268228205e-05, "loss": 1.456, "step": 573 }, { "epoch": 0.146944, "grad_norm": 0.4827986566128428, "learning_rate": 3.814632795489139e-05, "loss": 1.4796, "step": 574 }, { "epoch": 0.1472, "grad_norm": 0.5661508656846217, "learning_rate": 3.813948865857614e-05, "loss": 1.4821, "step": 575 }, { "epoch": 0.147456, "grad_norm": 0.47518334928370554, "learning_rate": 3.813263738379882e-05, "loss": 1.4686, "step": 576 }, { "epoch": 0.147712, "grad_norm": 0.5053523822492876, "learning_rate": 3.812577413508366e-05, "loss": 1.475, "step": 577 }, { "epoch": 0.147968, "grad_norm": 0.4921007803106813, "learning_rate": 3.811889891696285e-05, "loss": 1.5357, "step": 578 }, { "epoch": 0.148224, "grad_norm": 0.47279084077275696, "learning_rate": 3.8112011733976445e-05, "loss": 1.4627, "step": 579 }, { "epoch": 0.14848, "grad_norm": 0.5151988977538712, "learning_rate": 3.810511259067242e-05, "loss": 1.4658, "step": 580 }, { "epoch": 0.148736, "grad_norm": 0.5937060221422091, "learning_rate": 3.8098201491606643e-05, "loss": 1.4658, "step": 581 }, { "epoch": 0.148992, "grad_norm": 0.5033060433188645, "learning_rate": 3.8091278441342884e-05, "loss": 1.5192, "step": 582 }, { "epoch": 0.149248, "grad_norm": 0.5537971605094045, "learning_rate": 3.808434344445279e-05, "loss": 1.5079, "step": 583 }, { "epoch": 0.149504, "grad_norm": 0.5461559473122115, "learning_rate": 3.80773965055159e-05, "loss": 1.4624, "step": 584 }, { "epoch": 0.14976, "grad_norm": 0.4971135641407436, "learning_rate": 3.807043762911967e-05, "loss": 1.4506, "step": 585 }, { "epoch": 0.150016, "grad_norm": 0.5801081312766178, "learning_rate": 3.8063466819859384e-05, "loss": 1.5032, "step": 586 }, { "epoch": 0.150272, "grad_norm": 0.5342732781667683, "learning_rate": 3.8056484082338264e-05, "loss": 1.5277, "step": 587 }, { "epoch": 0.150528, "grad_norm": 0.5233499402357944, "learning_rate": 3.804948942116735e-05, "loss": 1.4667, "step": 588 }, { "epoch": 0.150784, "grad_norm": 0.4809954602085254, "learning_rate": 3.8042482840965614e-05, "loss": 1.5182, "step": 589 }, { "epoch": 0.15104, "grad_norm": 0.5378209875866703, "learning_rate": 3.803546434635984e-05, "loss": 1.4919, "step": 590 }, { "epoch": 0.151296, "grad_norm": 0.5115462527699177, "learning_rate": 3.8028433941984745e-05, "loss": 1.4873, "step": 591 }, { "epoch": 0.151552, "grad_norm": 0.48884896510217246, "learning_rate": 3.8021391632482846e-05, "loss": 1.4909, "step": 592 }, { "epoch": 0.151808, "grad_norm": 0.5063767104197124, "learning_rate": 3.8014337422504575e-05, "loss": 1.4874, "step": 593 }, { "epoch": 0.152064, "grad_norm": 0.488906627315114, "learning_rate": 3.8007271316708186e-05, "loss": 1.4503, "step": 594 }, { "epoch": 0.15232, "grad_norm": 0.48264269817230804, "learning_rate": 3.8000193319759806e-05, "loss": 1.4654, "step": 595 }, { "epoch": 0.152576, "grad_norm": 0.45909432938574485, "learning_rate": 3.799310343633341e-05, "loss": 1.4605, "step": 596 }, { "epoch": 0.152832, "grad_norm": 0.4869282154270548, "learning_rate": 3.798600167111082e-05, "loss": 1.4774, "step": 597 }, { "epoch": 0.153088, "grad_norm": 0.4678186464166902, "learning_rate": 3.797888802878171e-05, "loss": 1.4291, "step": 598 }, { "epoch": 0.153344, "grad_norm": 0.4898434772262499, "learning_rate": 3.7971762514043585e-05, "loss": 1.4803, "step": 599 }, { "epoch": 0.1536, "grad_norm": 0.45885928624571815, "learning_rate": 3.7964625131601816e-05, "loss": 1.4063, "step": 600 }, { "epoch": 0.153856, "grad_norm": 0.4638369735793162, "learning_rate": 3.795747588616957e-05, "loss": 1.4827, "step": 601 }, { "epoch": 0.154112, "grad_norm": 0.48705182866429164, "learning_rate": 3.7950314782467895e-05, "loss": 1.4864, "step": 602 }, { "epoch": 0.154368, "grad_norm": 0.44948040749515983, "learning_rate": 3.794314182522563e-05, "loss": 1.4497, "step": 603 }, { "epoch": 0.154624, "grad_norm": 0.46998441867284246, "learning_rate": 3.793595701917946e-05, "loss": 1.4646, "step": 604 }, { "epoch": 0.15488, "grad_norm": 0.47487513935304704, "learning_rate": 3.79287603690739e-05, "loss": 1.4278, "step": 605 }, { "epoch": 0.155136, "grad_norm": 0.5071892523988517, "learning_rate": 3.792155187966127e-05, "loss": 1.5223, "step": 606 }, { "epoch": 0.155392, "grad_norm": 0.47046687460328607, "learning_rate": 3.791433155570172e-05, "loss": 1.4627, "step": 607 }, { "epoch": 0.155648, "grad_norm": 0.46365220599394413, "learning_rate": 3.790709940196321e-05, "loss": 1.4587, "step": 608 }, { "epoch": 0.155904, "grad_norm": 0.4618125397640924, "learning_rate": 3.7899855423221506e-05, "loss": 1.4614, "step": 609 }, { "epoch": 0.15616, "grad_norm": 0.46405342109371617, "learning_rate": 3.78925996242602e-05, "loss": 1.4658, "step": 610 }, { "epoch": 0.156416, "grad_norm": 0.4964360980835685, "learning_rate": 3.7885332009870674e-05, "loss": 1.4925, "step": 611 }, { "epoch": 0.156672, "grad_norm": 0.4729897541533447, "learning_rate": 3.787805258485212e-05, "loss": 1.502, "step": 612 }, { "epoch": 0.156928, "grad_norm": 0.4984420734854259, "learning_rate": 3.787076135401153e-05, "loss": 1.4908, "step": 613 }, { "epoch": 0.157184, "grad_norm": 0.4725106010755061, "learning_rate": 3.786345832216367e-05, "loss": 1.4515, "step": 614 }, { "epoch": 0.15744, "grad_norm": 0.503086806244627, "learning_rate": 3.7856143494131147e-05, "loss": 1.4209, "step": 615 }, { "epoch": 0.157696, "grad_norm": 0.4759086188205728, "learning_rate": 3.78488168747443e-05, "loss": 1.4461, "step": 616 }, { "epoch": 0.157952, "grad_norm": 0.4829840820219489, "learning_rate": 3.78414784688413e-05, "loss": 1.469, "step": 617 }, { "epoch": 0.158208, "grad_norm": 0.4729769255462357, "learning_rate": 3.7834128281268086e-05, "loss": 1.4876, "step": 618 }, { "epoch": 0.158464, "grad_norm": 0.49318652325924256, "learning_rate": 3.782676631687836e-05, "loss": 1.4559, "step": 619 }, { "epoch": 0.15872, "grad_norm": 0.48267335451024324, "learning_rate": 3.7819392580533625e-05, "loss": 1.4416, "step": 620 }, { "epoch": 0.158976, "grad_norm": 0.4940922132924232, "learning_rate": 3.781200707710315e-05, "loss": 1.4512, "step": 621 }, { "epoch": 0.159232, "grad_norm": 0.5243821942738289, "learning_rate": 3.7804609811463965e-05, "loss": 1.4621, "step": 622 }, { "epoch": 0.159488, "grad_norm": 0.49669911080722495, "learning_rate": 3.779720078850089e-05, "loss": 1.4747, "step": 623 }, { "epoch": 0.159744, "grad_norm": 0.5294685222181122, "learning_rate": 3.7789780013106473e-05, "loss": 1.4648, "step": 624 }, { "epoch": 0.16, "grad_norm": 0.4945110689001128, "learning_rate": 3.778234749018106e-05, "loss": 1.4973, "step": 625 }, { "epoch": 0.160256, "grad_norm": 0.5007043208705445, "learning_rate": 3.777490322463273e-05, "loss": 1.4288, "step": 626 }, { "epoch": 0.160512, "grad_norm": 0.48830457806052846, "learning_rate": 3.776744722137733e-05, "loss": 1.461, "step": 627 }, { "epoch": 0.160768, "grad_norm": 0.48616194667411944, "learning_rate": 3.7759979485338454e-05, "loss": 1.4579, "step": 628 }, { "epoch": 0.161024, "grad_norm": 0.4982998450825503, "learning_rate": 3.775250002144744e-05, "loss": 1.4687, "step": 629 }, { "epoch": 0.16128, "grad_norm": 0.5711337803292439, "learning_rate": 3.774500883464336e-05, "loss": 1.4386, "step": 630 }, { "epoch": 0.161536, "grad_norm": 0.5159006545962662, "learning_rate": 3.7737505929873064e-05, "loss": 1.4705, "step": 631 }, { "epoch": 0.161792, "grad_norm": 0.493604946037014, "learning_rate": 3.77299913120911e-05, "loss": 1.5111, "step": 632 }, { "epoch": 0.162048, "grad_norm": 0.5143390979430464, "learning_rate": 3.772246498625977e-05, "loss": 1.4665, "step": 633 }, { "epoch": 0.162304, "grad_norm": 0.5198568541339368, "learning_rate": 3.771492695734911e-05, "loss": 1.4839, "step": 634 }, { "epoch": 0.16256, "grad_norm": 0.5330600104577458, "learning_rate": 3.770737723033687e-05, "loss": 1.4897, "step": 635 }, { "epoch": 0.162816, "grad_norm": 0.5350729225915506, "learning_rate": 3.769981581020854e-05, "loss": 1.499, "step": 636 }, { "epoch": 0.163072, "grad_norm": 0.5133390460402681, "learning_rate": 3.769224270195731e-05, "loss": 1.4116, "step": 637 }, { "epoch": 0.163328, "grad_norm": 0.5493428046404916, "learning_rate": 3.7684657910584126e-05, "loss": 1.4792, "step": 638 }, { "epoch": 0.163584, "grad_norm": 0.5007097732837082, "learning_rate": 3.767706144109761e-05, "loss": 1.4847, "step": 639 }, { "epoch": 0.16384, "grad_norm": 0.544321180685805, "learning_rate": 3.766945329851412e-05, "loss": 1.4654, "step": 640 }, { "epoch": 0.164096, "grad_norm": 0.5138555751833251, "learning_rate": 3.766183348785772e-05, "loss": 1.4376, "step": 641 }, { "epoch": 0.164352, "grad_norm": 0.5316539176387364, "learning_rate": 3.7654202014160156e-05, "loss": 1.5147, "step": 642 }, { "epoch": 0.164608, "grad_norm": 0.7945075408055546, "learning_rate": 3.7646558882460905e-05, "loss": 1.4844, "step": 643 }, { "epoch": 0.164864, "grad_norm": 0.4873574610896713, "learning_rate": 3.7638904097807126e-05, "loss": 1.496, "step": 644 }, { "epoch": 0.16512, "grad_norm": 0.495015641625426, "learning_rate": 3.7631237665253674e-05, "loss": 1.4455, "step": 645 }, { "epoch": 0.165376, "grad_norm": 0.4838969037064851, "learning_rate": 3.762355958986312e-05, "loss": 1.4342, "step": 646 }, { "epoch": 0.165632, "grad_norm": 0.4970867423330155, "learning_rate": 3.7615869876705665e-05, "loss": 1.5032, "step": 647 }, { "epoch": 0.165888, "grad_norm": 0.49865463928292414, "learning_rate": 3.760816853085927e-05, "loss": 1.4649, "step": 648 }, { "epoch": 0.166144, "grad_norm": 0.5398464692283329, "learning_rate": 3.760045555740951e-05, "loss": 1.463, "step": 649 }, { "epoch": 0.1664, "grad_norm": 0.46044969075030867, "learning_rate": 3.7592730961449696e-05, "loss": 1.4439, "step": 650 }, { "epoch": 0.166656, "grad_norm": 0.4987451269097154, "learning_rate": 3.7584994748080766e-05, "loss": 1.495, "step": 651 }, { "epoch": 0.166912, "grad_norm": 0.45642589375875126, "learning_rate": 3.757724692241136e-05, "loss": 1.4331, "step": 652 }, { "epoch": 0.167168, "grad_norm": 0.4826557350612097, "learning_rate": 3.7569487489557774e-05, "loss": 1.4376, "step": 653 }, { "epoch": 0.167424, "grad_norm": 0.5181139554632707, "learning_rate": 3.756171645464397e-05, "loss": 1.4445, "step": 654 }, { "epoch": 0.16768, "grad_norm": 0.48851146823809705, "learning_rate": 3.755393382280158e-05, "loss": 1.4673, "step": 655 }, { "epoch": 0.167936, "grad_norm": 0.4690354425745252, "learning_rate": 3.754613959916988e-05, "loss": 1.4609, "step": 656 }, { "epoch": 0.168192, "grad_norm": 0.4724438289893957, "learning_rate": 3.75383337888958e-05, "loss": 1.4474, "step": 657 }, { "epoch": 0.168448, "grad_norm": 0.44555793362281726, "learning_rate": 3.7530516397133945e-05, "loss": 1.4201, "step": 658 }, { "epoch": 0.168704, "grad_norm": 0.6212787372444118, "learning_rate": 3.752268742904654e-05, "loss": 1.4805, "step": 659 }, { "epoch": 0.16896, "grad_norm": 0.46121297168423186, "learning_rate": 3.7514846889803464e-05, "loss": 1.4796, "step": 660 }, { "epoch": 0.169216, "grad_norm": 0.47184569588969577, "learning_rate": 3.750699478458225e-05, "loss": 1.4505, "step": 661 }, { "epoch": 0.169472, "grad_norm": 0.4656954533401971, "learning_rate": 3.749913111856805e-05, "loss": 1.4444, "step": 662 }, { "epoch": 0.169728, "grad_norm": 0.5259675670817636, "learning_rate": 3.749125589695366e-05, "loss": 1.4367, "step": 663 }, { "epoch": 0.169984, "grad_norm": 0.4873328702186002, "learning_rate": 3.7483369124939504e-05, "loss": 1.4749, "step": 664 }, { "epoch": 0.17024, "grad_norm": 0.4655814623317464, "learning_rate": 3.7475470807733636e-05, "loss": 1.4381, "step": 665 }, { "epoch": 0.170496, "grad_norm": 0.4931827109254648, "learning_rate": 3.746756095055173e-05, "loss": 1.5149, "step": 666 }, { "epoch": 0.170752, "grad_norm": 0.4911179567194494, "learning_rate": 3.745963955861707e-05, "loss": 1.4626, "step": 667 }, { "epoch": 0.171008, "grad_norm": 0.45248131791078317, "learning_rate": 3.7451706637160594e-05, "loss": 1.4393, "step": 668 }, { "epoch": 0.171264, "grad_norm": 0.49779655051956917, "learning_rate": 3.744376219142082e-05, "loss": 1.4999, "step": 669 }, { "epoch": 0.17152, "grad_norm": 0.7577418007010718, "learning_rate": 3.743580622664387e-05, "loss": 1.4981, "step": 670 }, { "epoch": 0.171776, "grad_norm": 0.6362415017807813, "learning_rate": 3.7427838748083505e-05, "loss": 1.4147, "step": 671 }, { "epoch": 0.172032, "grad_norm": 0.5072735060669674, "learning_rate": 3.7419859761001076e-05, "loss": 1.4226, "step": 672 }, { "epoch": 0.172288, "grad_norm": 0.48307694022744335, "learning_rate": 3.7411869270665504e-05, "loss": 1.4619, "step": 673 }, { "epoch": 0.172544, "grad_norm": 0.4859396496396663, "learning_rate": 3.740386728235336e-05, "loss": 1.4478, "step": 674 }, { "epoch": 0.1728, "grad_norm": 0.518520163527151, "learning_rate": 3.739585380134875e-05, "loss": 1.4659, "step": 675 }, { "epoch": 0.173056, "grad_norm": 0.4793495271657428, "learning_rate": 3.7387828832943426e-05, "loss": 1.4412, "step": 676 }, { "epoch": 0.173312, "grad_norm": 0.5723031771791277, "learning_rate": 3.7379792382436686e-05, "loss": 1.4516, "step": 677 }, { "epoch": 0.173568, "grad_norm": 0.7346066886288481, "learning_rate": 3.737174445513542e-05, "loss": 1.4862, "step": 678 }, { "epoch": 0.173824, "grad_norm": 0.5081885555916911, "learning_rate": 3.7363685056354107e-05, "loss": 1.4746, "step": 679 }, { "epoch": 0.17408, "grad_norm": 0.47154578283033843, "learning_rate": 3.735561419141479e-05, "loss": 1.4833, "step": 680 }, { "epoch": 0.174336, "grad_norm": 0.4821856955099171, "learning_rate": 3.7347531865647085e-05, "loss": 1.492, "step": 681 }, { "epoch": 0.174592, "grad_norm": 0.4808741372679512, "learning_rate": 3.7339438084388186e-05, "loss": 1.4682, "step": 682 }, { "epoch": 0.174848, "grad_norm": 0.4689926710044866, "learning_rate": 3.733133285298283e-05, "loss": 1.412, "step": 683 }, { "epoch": 0.175104, "grad_norm": 0.4739136666298373, "learning_rate": 3.7323216176783344e-05, "loss": 1.4777, "step": 684 }, { "epoch": 0.17536, "grad_norm": 0.48599165640258113, "learning_rate": 3.731508806114959e-05, "loss": 1.4491, "step": 685 }, { "epoch": 0.175616, "grad_norm": 0.48200465340790805, "learning_rate": 3.7306948511448995e-05, "loss": 1.4002, "step": 686 }, { "epoch": 0.175872, "grad_norm": 0.478024669313647, "learning_rate": 3.729879753305653e-05, "loss": 1.4036, "step": 687 }, { "epoch": 0.176128, "grad_norm": 0.4702945080370846, "learning_rate": 3.729063513135472e-05, "loss": 1.4725, "step": 688 }, { "epoch": 0.176384, "grad_norm": 0.48183401358468375, "learning_rate": 3.7282461311733625e-05, "loss": 1.442, "step": 689 }, { "epoch": 0.17664, "grad_norm": 0.48111172202528113, "learning_rate": 3.727427607959085e-05, "loss": 1.4793, "step": 690 }, { "epoch": 0.176896, "grad_norm": 0.4916252800067881, "learning_rate": 3.726607944033155e-05, "loss": 1.4696, "step": 691 }, { "epoch": 0.177152, "grad_norm": 0.508033560450045, "learning_rate": 3.725787139936837e-05, "loss": 1.476, "step": 692 }, { "epoch": 0.177408, "grad_norm": 0.4771154913664944, "learning_rate": 3.7249651962121546e-05, "loss": 1.4514, "step": 693 }, { "epoch": 0.177664, "grad_norm": 0.5409437490557761, "learning_rate": 3.724142113401878e-05, "loss": 1.4154, "step": 694 }, { "epoch": 0.17792, "grad_norm": 0.48600439887575736, "learning_rate": 3.7233178920495337e-05, "loss": 1.4357, "step": 695 }, { "epoch": 0.178176, "grad_norm": 0.4738338468476675, "learning_rate": 3.7224925326993984e-05, "loss": 1.4332, "step": 696 }, { "epoch": 0.178432, "grad_norm": 0.5001509463708051, "learning_rate": 3.7216660358965004e-05, "loss": 1.4811, "step": 697 }, { "epoch": 0.178688, "grad_norm": 0.48243325721776514, "learning_rate": 3.720838402186619e-05, "loss": 1.506, "step": 698 }, { "epoch": 0.178944, "grad_norm": 0.5061747044809768, "learning_rate": 3.720009632116284e-05, "loss": 1.4843, "step": 699 }, { "epoch": 0.1792, "grad_norm": 0.49368302377920203, "learning_rate": 3.719179726232776e-05, "loss": 1.4761, "step": 700 }, { "epoch": 0.179456, "grad_norm": 0.4880094254285324, "learning_rate": 3.718348685084127e-05, "loss": 1.4306, "step": 701 }, { "epoch": 0.179712, "grad_norm": 0.5616887770597707, "learning_rate": 3.717516509219116e-05, "loss": 1.4235, "step": 702 }, { "epoch": 0.179968, "grad_norm": 0.4605491705502186, "learning_rate": 3.716683199187273e-05, "loss": 1.4769, "step": 703 }, { "epoch": 0.180224, "grad_norm": 0.4806293115270143, "learning_rate": 3.7158487555388774e-05, "loss": 1.4758, "step": 704 }, { "epoch": 0.18048, "grad_norm": 0.4491839107398391, "learning_rate": 3.715013178824955e-05, "loss": 1.4049, "step": 705 }, { "epoch": 0.180736, "grad_norm": 0.4834227028108713, "learning_rate": 3.7141764695972826e-05, "loss": 1.4269, "step": 706 }, { "epoch": 0.180992, "grad_norm": 0.4689425321819967, "learning_rate": 3.7133386284083827e-05, "loss": 1.4878, "step": 707 }, { "epoch": 0.181248, "grad_norm": 0.47414622085498964, "learning_rate": 3.7124996558115264e-05, "loss": 1.3898, "step": 708 }, { "epoch": 0.181504, "grad_norm": 0.4829954020026905, "learning_rate": 3.711659552360731e-05, "loss": 1.4791, "step": 709 }, { "epoch": 0.18176, "grad_norm": 0.5978676606850087, "learning_rate": 3.710818318610762e-05, "loss": 1.4729, "step": 710 }, { "epoch": 0.182016, "grad_norm": 0.4939727208532775, "learning_rate": 3.70997595511713e-05, "loss": 1.4198, "step": 711 }, { "epoch": 0.182272, "grad_norm": 0.49515408707729713, "learning_rate": 3.7091324624360915e-05, "loss": 1.4738, "step": 712 }, { "epoch": 0.182528, "grad_norm": 0.47417440850950404, "learning_rate": 3.708287841124651e-05, "loss": 1.4235, "step": 713 }, { "epoch": 0.182784, "grad_norm": 0.5255752273278199, "learning_rate": 3.707442091740555e-05, "loss": 1.4892, "step": 714 }, { "epoch": 0.18304, "grad_norm": 0.5000673101521823, "learning_rate": 3.7065952148422964e-05, "loss": 1.4919, "step": 715 }, { "epoch": 0.183296, "grad_norm": 0.5019433803860871, "learning_rate": 3.705747210989114e-05, "loss": 1.4335, "step": 716 }, { "epoch": 0.183552, "grad_norm": 0.5053894054722073, "learning_rate": 3.704898080740989e-05, "loss": 1.4196, "step": 717 }, { "epoch": 0.183808, "grad_norm": 0.48255015197509354, "learning_rate": 3.7040478246586455e-05, "loss": 1.4798, "step": 718 }, { "epoch": 0.184064, "grad_norm": 0.49431846126325724, "learning_rate": 3.703196443303554e-05, "loss": 1.4619, "step": 719 }, { "epoch": 0.18432, "grad_norm": 0.4650252165379995, "learning_rate": 3.702343937237927e-05, "loss": 1.4242, "step": 720 }, { "epoch": 0.184576, "grad_norm": 0.49721326666157056, "learning_rate": 3.701490307024718e-05, "loss": 1.4753, "step": 721 }, { "epoch": 0.184832, "grad_norm": 0.477050808015703, "learning_rate": 3.700635553227624e-05, "loss": 1.4541, "step": 722 }, { "epoch": 0.185088, "grad_norm": 0.4837377632149249, "learning_rate": 3.699779676411085e-05, "loss": 1.4864, "step": 723 }, { "epoch": 0.185344, "grad_norm": 0.4791408518794529, "learning_rate": 3.698922677140282e-05, "loss": 1.4349, "step": 724 }, { "epoch": 0.1856, "grad_norm": 0.4766021409557764, "learning_rate": 3.698064555981136e-05, "loss": 1.4612, "step": 725 }, { "epoch": 0.185856, "grad_norm": 0.49333610372377656, "learning_rate": 3.6972053135003095e-05, "loss": 1.4764, "step": 726 }, { "epoch": 0.186112, "grad_norm": 0.46686646062848547, "learning_rate": 3.6963449502652065e-05, "loss": 1.4979, "step": 727 }, { "epoch": 0.186368, "grad_norm": 0.5033261014249186, "learning_rate": 3.6954834668439696e-05, "loss": 1.4581, "step": 728 }, { "epoch": 0.186624, "grad_norm": 0.46343053384391353, "learning_rate": 3.694620863805481e-05, "loss": 1.4686, "step": 729 }, { "epoch": 0.18688, "grad_norm": 0.47011260118187753, "learning_rate": 3.6937571417193655e-05, "loss": 1.4574, "step": 730 }, { "epoch": 0.187136, "grad_norm": 0.47261326716617896, "learning_rate": 3.6928923011559826e-05, "loss": 1.4756, "step": 731 }, { "epoch": 0.187392, "grad_norm": 0.49385632479513597, "learning_rate": 3.6920263426864326e-05, "loss": 1.4633, "step": 732 }, { "epoch": 0.187648, "grad_norm": 0.47960057867052397, "learning_rate": 3.691159266882553e-05, "loss": 1.4542, "step": 733 }, { "epoch": 0.187904, "grad_norm": 0.533628733946529, "learning_rate": 3.690291074316921e-05, "loss": 1.398, "step": 734 }, { "epoch": 0.18816, "grad_norm": 0.4724189989723257, "learning_rate": 3.689421765562848e-05, "loss": 1.4341, "step": 735 }, { "epoch": 0.188416, "grad_norm": 0.46042216846702017, "learning_rate": 3.688551341194387e-05, "loss": 1.4933, "step": 736 }, { "epoch": 0.188672, "grad_norm": 0.5143585371098637, "learning_rate": 3.6876798017863234e-05, "loss": 1.4663, "step": 737 }, { "epoch": 0.188928, "grad_norm": 0.470956616666743, "learning_rate": 3.686807147914181e-05, "loss": 1.4381, "step": 738 }, { "epoch": 0.189184, "grad_norm": 0.47002703687021247, "learning_rate": 3.68593338015422e-05, "loss": 1.4263, "step": 739 }, { "epoch": 0.18944, "grad_norm": 0.48844803001338105, "learning_rate": 3.685058499083434e-05, "loss": 1.4685, "step": 740 }, { "epoch": 0.189696, "grad_norm": 0.46472404799056205, "learning_rate": 3.684182505279555e-05, "loss": 1.4374, "step": 741 }, { "epoch": 0.189952, "grad_norm": 0.4905148059461764, "learning_rate": 3.6833053993210454e-05, "loss": 1.4932, "step": 742 }, { "epoch": 0.190208, "grad_norm": 0.4830046226099318, "learning_rate": 3.682427181787107e-05, "loss": 1.44, "step": 743 }, { "epoch": 0.190464, "grad_norm": 0.46966230620668026, "learning_rate": 3.681547853257673e-05, "loss": 1.4665, "step": 744 }, { "epoch": 0.19072, "grad_norm": 0.47445681840910275, "learning_rate": 3.6806674143134084e-05, "loss": 1.4422, "step": 745 }, { "epoch": 0.190976, "grad_norm": 0.4592002328987519, "learning_rate": 3.679785865535716e-05, "loss": 1.4172, "step": 746 }, { "epoch": 0.191232, "grad_norm": 0.49365903145792833, "learning_rate": 3.6789032075067276e-05, "loss": 1.473, "step": 747 }, { "epoch": 0.191488, "grad_norm": 0.4763355892831194, "learning_rate": 3.678019440809308e-05, "loss": 1.4549, "step": 748 }, { "epoch": 0.191744, "grad_norm": 0.45940345523933357, "learning_rate": 3.677134566027057e-05, "loss": 1.4833, "step": 749 }, { "epoch": 0.192, "grad_norm": 0.4830970937752208, "learning_rate": 3.676248583744304e-05, "loss": 1.4435, "step": 750 }, { "epoch": 0.192256, "grad_norm": 0.4512327338076334, "learning_rate": 3.675361494546108e-05, "loss": 1.4113, "step": 751 }, { "epoch": 0.192512, "grad_norm": 0.44796981375520895, "learning_rate": 3.674473299018263e-05, "loss": 1.435, "step": 752 }, { "epoch": 0.192768, "grad_norm": 0.44441736502781654, "learning_rate": 3.67358399774729e-05, "loss": 1.379, "step": 753 }, { "epoch": 0.193024, "grad_norm": 0.45911451601843406, "learning_rate": 3.672693591320441e-05, "loss": 1.4421, "step": 754 }, { "epoch": 0.19328, "grad_norm": 0.4610800402343418, "learning_rate": 3.671802080325701e-05, "loss": 1.4609, "step": 755 }, { "epoch": 0.193536, "grad_norm": 0.46315170781973786, "learning_rate": 3.6709094653517795e-05, "loss": 1.435, "step": 756 }, { "epoch": 0.193792, "grad_norm": 0.46377093430214594, "learning_rate": 3.670015746988118e-05, "loss": 1.4343, "step": 757 }, { "epoch": 0.194048, "grad_norm": 0.4510685806409105, "learning_rate": 3.6691209258248856e-05, "loss": 1.4727, "step": 758 }, { "epoch": 0.194304, "grad_norm": 0.45491905747998546, "learning_rate": 3.6682250024529806e-05, "loss": 1.4532, "step": 759 }, { "epoch": 0.19456, "grad_norm": 0.45986555720298133, "learning_rate": 3.667327977464028e-05, "loss": 1.4567, "step": 760 }, { "epoch": 0.194816, "grad_norm": 0.4781624059016721, "learning_rate": 3.666429851450381e-05, "loss": 1.4763, "step": 761 }, { "epoch": 0.195072, "grad_norm": 0.46519209759210833, "learning_rate": 3.66553062500512e-05, "loss": 1.4453, "step": 762 }, { "epoch": 0.195328, "grad_norm": 0.45677026801590037, "learning_rate": 3.66463029872205e-05, "loss": 1.4327, "step": 763 }, { "epoch": 0.195584, "grad_norm": 0.47067501655346805, "learning_rate": 3.663728873195706e-05, "loss": 1.4398, "step": 764 }, { "epoch": 0.19584, "grad_norm": 0.45734875871697284, "learning_rate": 3.662826349021346e-05, "loss": 1.4768, "step": 765 }, { "epoch": 0.196096, "grad_norm": 0.46940587574844156, "learning_rate": 3.6619227267949546e-05, "loss": 1.4841, "step": 766 }, { "epoch": 0.196352, "grad_norm": 0.5151508689949047, "learning_rate": 3.6610180071132406e-05, "loss": 1.4208, "step": 767 }, { "epoch": 0.196608, "grad_norm": 0.45931076078920907, "learning_rate": 3.660112190573639e-05, "loss": 1.4526, "step": 768 }, { "epoch": 0.196864, "grad_norm": 0.5532466662592809, "learning_rate": 3.659205277774308e-05, "loss": 1.4271, "step": 769 }, { "epoch": 0.19712, "grad_norm": 0.4545172639445102, "learning_rate": 3.65829726931413e-05, "loss": 1.426, "step": 770 }, { "epoch": 0.197376, "grad_norm": 0.4508756295326525, "learning_rate": 3.657388165792711e-05, "loss": 1.4362, "step": 771 }, { "epoch": 0.197632, "grad_norm": 0.4588418128081962, "learning_rate": 3.6564779678103795e-05, "loss": 1.4734, "step": 772 }, { "epoch": 0.197888, "grad_norm": 0.46396276381331936, "learning_rate": 3.6555666759681875e-05, "loss": 1.4817, "step": 773 }, { "epoch": 0.198144, "grad_norm": 0.4418862757637594, "learning_rate": 3.6546542908679105e-05, "loss": 1.4256, "step": 774 }, { "epoch": 0.1984, "grad_norm": 0.44269696032275824, "learning_rate": 3.6537408131120425e-05, "loss": 1.37, "step": 775 }, { "epoch": 0.198656, "grad_norm": 0.47068728215224725, "learning_rate": 3.652826243303802e-05, "loss": 1.4272, "step": 776 }, { "epoch": 0.198912, "grad_norm": 0.447381755116166, "learning_rate": 3.651910582047128e-05, "loss": 1.4493, "step": 777 }, { "epoch": 0.199168, "grad_norm": 0.45861520130904376, "learning_rate": 3.6509938299466796e-05, "loss": 1.4452, "step": 778 }, { "epoch": 0.199424, "grad_norm": 0.44731709964897715, "learning_rate": 3.650075987607837e-05, "loss": 1.4562, "step": 779 }, { "epoch": 0.19968, "grad_norm": 0.44168344296001966, "learning_rate": 3.649157055636699e-05, "loss": 1.432, "step": 780 }, { "epoch": 0.199936, "grad_norm": 0.45551124959587086, "learning_rate": 3.6482370346400866e-05, "loss": 1.4499, "step": 781 }, { "epoch": 0.200192, "grad_norm": 0.4680595228925858, "learning_rate": 3.6473159252255366e-05, "loss": 1.5188, "step": 782 }, { "epoch": 0.200448, "grad_norm": 0.45692743235566396, "learning_rate": 3.646393728001306e-05, "loss": 1.4702, "step": 783 }, { "epoch": 0.200704, "grad_norm": 0.44747454106392787, "learning_rate": 3.645470443576372e-05, "loss": 1.441, "step": 784 }, { "epoch": 0.20096, "grad_norm": 0.44838152911562196, "learning_rate": 3.6445460725604275e-05, "loss": 1.4515, "step": 785 }, { "epoch": 0.201216, "grad_norm": 0.44406753895378415, "learning_rate": 3.6436206155638826e-05, "loss": 1.4203, "step": 786 }, { "epoch": 0.201472, "grad_norm": 0.4701345650128569, "learning_rate": 3.642694073197866e-05, "loss": 1.4326, "step": 787 }, { "epoch": 0.201728, "grad_norm": 0.4564607821441837, "learning_rate": 3.641766446074223e-05, "loss": 1.4644, "step": 788 }, { "epoch": 0.201984, "grad_norm": 0.5001405194965799, "learning_rate": 3.640837734805513e-05, "loss": 1.3867, "step": 789 }, { "epoch": 0.20224, "grad_norm": 0.45280261818981854, "learning_rate": 3.6399079400050155e-05, "loss": 1.4541, "step": 790 }, { "epoch": 0.202496, "grad_norm": 0.4575027220844858, "learning_rate": 3.6389770622867216e-05, "loss": 1.4884, "step": 791 }, { "epoch": 0.202752, "grad_norm": 0.4559779231466351, "learning_rate": 3.6380451022653396e-05, "loss": 1.4331, "step": 792 }, { "epoch": 0.203008, "grad_norm": 0.452317123948773, "learning_rate": 3.637112060556291e-05, "loss": 1.4153, "step": 793 }, { "epoch": 0.203264, "grad_norm": 0.4646911337464457, "learning_rate": 3.6361779377757135e-05, "loss": 1.4635, "step": 794 }, { "epoch": 0.20352, "grad_norm": 0.4635149832446141, "learning_rate": 3.635242734540458e-05, "loss": 1.4731, "step": 795 }, { "epoch": 0.203776, "grad_norm": 0.45884534851476744, "learning_rate": 3.634306451468087e-05, "loss": 1.4468, "step": 796 }, { "epoch": 0.204032, "grad_norm": 0.4544762431441358, "learning_rate": 3.633369089176879e-05, "loss": 1.414, "step": 797 }, { "epoch": 0.204288, "grad_norm": 0.474944708134245, "learning_rate": 3.632430648285824e-05, "loss": 1.4567, "step": 798 }, { "epoch": 0.204544, "grad_norm": 0.44940316172856604, "learning_rate": 3.631491129414622e-05, "loss": 1.4213, "step": 799 }, { "epoch": 0.2048, "grad_norm": 0.4548769371713411, "learning_rate": 3.630550533183689e-05, "loss": 1.4889, "step": 800 }, { "epoch": 0.205056, "grad_norm": 0.45162231427773136, "learning_rate": 3.62960886021415e-05, "loss": 1.4544, "step": 801 }, { "epoch": 0.205312, "grad_norm": 0.45503051541640666, "learning_rate": 3.628666111127841e-05, "loss": 1.4252, "step": 802 }, { "epoch": 0.205568, "grad_norm": 0.46646098174781164, "learning_rate": 3.627722286547309e-05, "loss": 1.4586, "step": 803 }, { "epoch": 0.205824, "grad_norm": 0.5207617344446815, "learning_rate": 3.6267773870958116e-05, "loss": 1.4168, "step": 804 }, { "epoch": 0.20608, "grad_norm": 0.4348409570811274, "learning_rate": 3.625831413397315e-05, "loss": 1.4237, "step": 805 }, { "epoch": 0.206336, "grad_norm": 0.5123807862037675, "learning_rate": 3.624884366076496e-05, "loss": 1.448, "step": 806 }, { "epoch": 0.206592, "grad_norm": 0.4471707287448433, "learning_rate": 3.6239362457587405e-05, "loss": 1.4571, "step": 807 }, { "epoch": 0.206848, "grad_norm": 0.4767021845627687, "learning_rate": 3.622987053070143e-05, "loss": 1.4618, "step": 808 }, { "epoch": 0.207104, "grad_norm": 0.4736383286213043, "learning_rate": 3.6220367886375025e-05, "loss": 1.4976, "step": 809 }, { "epoch": 0.20736, "grad_norm": 0.5230317497824801, "learning_rate": 3.621085453088332e-05, "loss": 1.4596, "step": 810 }, { "epoch": 0.207616, "grad_norm": 0.459736186899135, "learning_rate": 3.620133047050846e-05, "loss": 1.4626, "step": 811 }, { "epoch": 0.207872, "grad_norm": 0.4504462983907288, "learning_rate": 3.619179571153971e-05, "loss": 1.4321, "step": 812 }, { "epoch": 0.208128, "grad_norm": 0.4660932448028296, "learning_rate": 3.618225026027336e-05, "loss": 1.439, "step": 813 }, { "epoch": 0.208384, "grad_norm": 0.44896562896314646, "learning_rate": 3.617269412301278e-05, "loss": 1.4705, "step": 814 }, { "epoch": 0.20864, "grad_norm": 0.45092059364560955, "learning_rate": 3.616312730606839e-05, "loss": 1.448, "step": 815 }, { "epoch": 0.208896, "grad_norm": 0.4619945074066421, "learning_rate": 3.6153549815757665e-05, "loss": 1.4701, "step": 816 }, { "epoch": 0.209152, "grad_norm": 0.4638576733714117, "learning_rate": 3.6143961658405125e-05, "loss": 1.4519, "step": 817 }, { "epoch": 0.209408, "grad_norm": 0.48783427361540643, "learning_rate": 3.6134362840342336e-05, "loss": 1.387, "step": 818 }, { "epoch": 0.209664, "grad_norm": 0.47238950806209135, "learning_rate": 3.612475336790792e-05, "loss": 1.4555, "step": 819 }, { "epoch": 0.20992, "grad_norm": 0.4461048762613441, "learning_rate": 3.6115133247447495e-05, "loss": 1.4379, "step": 820 }, { "epoch": 0.210176, "grad_norm": 0.4497081123664542, "learning_rate": 3.610550248531375e-05, "loss": 1.417, "step": 821 }, { "epoch": 0.210432, "grad_norm": 0.4478174355250229, "learning_rate": 3.609586108786638e-05, "loss": 1.3755, "step": 822 }, { "epoch": 0.210688, "grad_norm": 0.6191801428730218, "learning_rate": 3.608620906147211e-05, "loss": 1.46, "step": 823 }, { "epoch": 0.210944, "grad_norm": 0.43865434380083596, "learning_rate": 3.6076546412504685e-05, "loss": 1.3665, "step": 824 }, { "epoch": 0.2112, "grad_norm": 0.46102716217578815, "learning_rate": 3.6066873147344855e-05, "loss": 1.4565, "step": 825 }, { "epoch": 0.211456, "grad_norm": 0.46596669761121484, "learning_rate": 3.60571892723804e-05, "loss": 1.4264, "step": 826 }, { "epoch": 0.211712, "grad_norm": 0.47108078258071157, "learning_rate": 3.6047494794006076e-05, "loss": 1.4807, "step": 827 }, { "epoch": 0.211968, "grad_norm": 0.447382530612611, "learning_rate": 3.603778971862368e-05, "loss": 1.4189, "step": 828 }, { "epoch": 0.212224, "grad_norm": 0.45889576853147074, "learning_rate": 3.602807405264197e-05, "loss": 1.4696, "step": 829 }, { "epoch": 0.21248, "grad_norm": 0.4470715047470063, "learning_rate": 3.6018347802476726e-05, "loss": 1.4355, "step": 830 }, { "epoch": 0.212736, "grad_norm": 0.44725003576501904, "learning_rate": 3.6008610974550695e-05, "loss": 1.4349, "step": 831 }, { "epoch": 0.212992, "grad_norm": 0.44511956010906895, "learning_rate": 3.5998863575293615e-05, "loss": 1.4006, "step": 832 }, { "epoch": 0.213248, "grad_norm": 0.4725224443802951, "learning_rate": 3.598910561114221e-05, "loss": 1.4823, "step": 833 }, { "epoch": 0.213504, "grad_norm": 0.4468391985029001, "learning_rate": 3.5979337088540196e-05, "loss": 1.4267, "step": 834 }, { "epoch": 0.21376, "grad_norm": 0.45018489097674963, "learning_rate": 3.596955801393822e-05, "loss": 1.4197, "step": 835 }, { "epoch": 0.214016, "grad_norm": 0.4473563636509467, "learning_rate": 3.595976839379394e-05, "loss": 1.441, "step": 836 }, { "epoch": 0.214272, "grad_norm": 0.4686279590465284, "learning_rate": 3.594996823457195e-05, "loss": 1.4295, "step": 837 }, { "epoch": 0.214528, "grad_norm": 0.4695134403853054, "learning_rate": 3.594015754274381e-05, "loss": 1.3955, "step": 838 }, { "epoch": 0.214784, "grad_norm": 0.4614248889234679, "learning_rate": 3.5930336324788044e-05, "loss": 1.4499, "step": 839 }, { "epoch": 0.21504, "grad_norm": 0.4731496378151073, "learning_rate": 3.592050458719012e-05, "loss": 1.4287, "step": 840 }, { "epoch": 0.215296, "grad_norm": 0.6173954696775312, "learning_rate": 3.591066233644245e-05, "loss": 1.4411, "step": 841 }, { "epoch": 0.215552, "grad_norm": 0.46537449849811974, "learning_rate": 3.590080957904439e-05, "loss": 1.4431, "step": 842 }, { "epoch": 0.215808, "grad_norm": 0.4863362936600211, "learning_rate": 3.589094632150225e-05, "loss": 1.4503, "step": 843 }, { "epoch": 0.216064, "grad_norm": 0.4540882864080526, "learning_rate": 3.5881072570329246e-05, "loss": 1.4193, "step": 844 }, { "epoch": 0.21632, "grad_norm": 0.47353306881464596, "learning_rate": 3.587118833204554e-05, "loss": 1.4313, "step": 845 }, { "epoch": 0.216576, "grad_norm": 0.43443280437311405, "learning_rate": 3.586129361317821e-05, "loss": 1.3893, "step": 846 }, { "epoch": 0.216832, "grad_norm": 0.46863556095653064, "learning_rate": 3.585138842026128e-05, "loss": 1.4558, "step": 847 }, { "epoch": 0.217088, "grad_norm": 0.46147326841517294, "learning_rate": 3.584147275983565e-05, "loss": 1.4208, "step": 848 }, { "epoch": 0.217344, "grad_norm": 0.4419767536875721, "learning_rate": 3.5831546638449176e-05, "loss": 1.4774, "step": 849 }, { "epoch": 0.2176, "grad_norm": 0.46709183364111473, "learning_rate": 3.5821610062656576e-05, "loss": 1.4332, "step": 850 }, { "epoch": 0.217856, "grad_norm": 0.43366345622333197, "learning_rate": 3.5811663039019515e-05, "loss": 1.4589, "step": 851 }, { "epoch": 0.218112, "grad_norm": 0.8690166031861539, "learning_rate": 3.580170557410653e-05, "loss": 1.4928, "step": 852 }, { "epoch": 0.218368, "grad_norm": 0.48403149174316906, "learning_rate": 3.579173767449306e-05, "loss": 1.4997, "step": 853 }, { "epoch": 0.218624, "grad_norm": 0.4682968680756923, "learning_rate": 3.5781759346761436e-05, "loss": 1.4918, "step": 854 }, { "epoch": 0.21888, "grad_norm": 0.4457127520054066, "learning_rate": 3.577177059750088e-05, "loss": 1.4432, "step": 855 }, { "epoch": 0.219136, "grad_norm": 0.47481852528620266, "learning_rate": 3.5761771433307484e-05, "loss": 1.4319, "step": 856 }, { "epoch": 0.219392, "grad_norm": 0.44455023741613414, "learning_rate": 3.575176186078423e-05, "loss": 1.4044, "step": 857 }, { "epoch": 0.219648, "grad_norm": 0.4384367619677731, "learning_rate": 3.574174188654095e-05, "loss": 1.3796, "step": 858 }, { "epoch": 0.219904, "grad_norm": 0.4519523412451532, "learning_rate": 3.573171151719439e-05, "loss": 1.4814, "step": 859 }, { "epoch": 0.22016, "grad_norm": 0.4316309401500639, "learning_rate": 3.572167075936812e-05, "loss": 1.421, "step": 860 }, { "epoch": 0.220416, "grad_norm": 0.4520666384599931, "learning_rate": 3.571161961969257e-05, "loss": 1.4671, "step": 861 }, { "epoch": 0.220672, "grad_norm": 0.42921942682765796, "learning_rate": 3.5701558104805056e-05, "loss": 1.4075, "step": 862 }, { "epoch": 0.220928, "grad_norm": 0.4596437013341911, "learning_rate": 3.5691486221349716e-05, "loss": 1.4885, "step": 863 }, { "epoch": 0.221184, "grad_norm": 0.4410163086601325, "learning_rate": 3.568140397597755e-05, "loss": 1.4476, "step": 864 }, { "epoch": 0.22144, "grad_norm": 0.448878647999851, "learning_rate": 3.5671311375346396e-05, "loss": 1.4394, "step": 865 }, { "epoch": 0.221696, "grad_norm": 0.4407542992747404, "learning_rate": 3.5661208426120936e-05, "loss": 1.4153, "step": 866 }, { "epoch": 0.221952, "grad_norm": 0.4471569353757589, "learning_rate": 3.5651095134972675e-05, "loss": 1.4128, "step": 867 }, { "epoch": 0.222208, "grad_norm": 0.46010435781203535, "learning_rate": 3.564097150857996e-05, "loss": 1.4565, "step": 868 }, { "epoch": 0.222464, "grad_norm": 0.4635963573709651, "learning_rate": 3.563083755362795e-05, "loss": 1.4753, "step": 869 }, { "epoch": 0.22272, "grad_norm": 0.45296636411408797, "learning_rate": 3.5620693276808636e-05, "loss": 1.43, "step": 870 }, { "epoch": 0.222976, "grad_norm": 0.48554147437404693, "learning_rate": 3.56105386848208e-05, "loss": 1.4635, "step": 871 }, { "epoch": 0.223232, "grad_norm": 0.4412503509821529, "learning_rate": 3.560037378437009e-05, "loss": 1.4546, "step": 872 }, { "epoch": 0.223488, "grad_norm": 0.4616564937072791, "learning_rate": 3.55901985821689e-05, "loss": 1.4781, "step": 873 }, { "epoch": 0.223744, "grad_norm": 0.4410249479054077, "learning_rate": 3.558001308493646e-05, "loss": 1.4383, "step": 874 }, { "epoch": 0.224, "grad_norm": 0.45713132279155816, "learning_rate": 3.556981729939879e-05, "loss": 1.4322, "step": 875 }, { "epoch": 0.224256, "grad_norm": 0.44717743337287874, "learning_rate": 3.555961123228871e-05, "loss": 1.4378, "step": 876 }, { "epoch": 0.224512, "grad_norm": 0.45932042445201127, "learning_rate": 3.554939489034582e-05, "loss": 1.4151, "step": 877 }, { "epoch": 0.224768, "grad_norm": 0.44338562280806193, "learning_rate": 3.5539168280316514e-05, "loss": 1.425, "step": 878 }, { "epoch": 0.225024, "grad_norm": 0.4652361071614257, "learning_rate": 3.552893140895396e-05, "loss": 1.4614, "step": 879 }, { "epoch": 0.22528, "grad_norm": 0.4927838123745844, "learning_rate": 3.551868428301811e-05, "loss": 1.5102, "step": 880 }, { "epoch": 0.225536, "grad_norm": 0.4658091994670612, "learning_rate": 3.550842690927567e-05, "loss": 1.4125, "step": 881 }, { "epoch": 0.225792, "grad_norm": 0.4562348811538113, "learning_rate": 3.549815929450015e-05, "loss": 1.4112, "step": 882 }, { "epoch": 0.226048, "grad_norm": 0.45055802924084537, "learning_rate": 3.548788144547176e-05, "loss": 1.4397, "step": 883 }, { "epoch": 0.226304, "grad_norm": 0.44549937773237536, "learning_rate": 3.547759336897755e-05, "loss": 1.4188, "step": 884 }, { "epoch": 0.22656, "grad_norm": 0.4518598072026557, "learning_rate": 3.546729507181124e-05, "loss": 1.4139, "step": 885 }, { "epoch": 0.226816, "grad_norm": 0.46965867493303237, "learning_rate": 3.545698656077336e-05, "loss": 1.4642, "step": 886 }, { "epoch": 0.227072, "grad_norm": 0.447646521939099, "learning_rate": 3.5446667842671165e-05, "loss": 1.4297, "step": 887 }, { "epoch": 0.227328, "grad_norm": 0.4653145025348672, "learning_rate": 3.5436338924318655e-05, "loss": 1.4619, "step": 888 }, { "epoch": 0.227584, "grad_norm": 0.44904405354389765, "learning_rate": 3.542599981253654e-05, "loss": 1.4244, "step": 889 }, { "epoch": 0.22784, "grad_norm": 0.4502027699866497, "learning_rate": 3.541565051415229e-05, "loss": 1.372, "step": 890 }, { "epoch": 0.228096, "grad_norm": 0.4573099145423407, "learning_rate": 3.5405291036000106e-05, "loss": 1.4387, "step": 891 }, { "epoch": 0.228352, "grad_norm": 0.45492528684437367, "learning_rate": 3.539492138492088e-05, "loss": 1.3732, "step": 892 }, { "epoch": 0.228608, "grad_norm": 0.44881247790223366, "learning_rate": 3.538454156776225e-05, "loss": 1.4001, "step": 893 }, { "epoch": 0.228864, "grad_norm": 0.46080139940224124, "learning_rate": 3.537415159137856e-05, "loss": 1.4577, "step": 894 }, { "epoch": 0.22912, "grad_norm": 0.4633337052245069, "learning_rate": 3.536375146263085e-05, "loss": 1.4956, "step": 895 }, { "epoch": 0.229376, "grad_norm": 0.45084353848061176, "learning_rate": 3.535334118838688e-05, "loss": 1.4344, "step": 896 }, { "epoch": 0.229632, "grad_norm": 0.47855408463606197, "learning_rate": 3.5342920775521105e-05, "loss": 1.3802, "step": 897 }, { "epoch": 0.229888, "grad_norm": 0.42919002715914023, "learning_rate": 3.5332490230914666e-05, "loss": 1.4556, "step": 898 }, { "epoch": 0.230144, "grad_norm": 0.45692038197991075, "learning_rate": 3.5322049561455415e-05, "loss": 1.4184, "step": 899 }, { "epoch": 0.2304, "grad_norm": 0.4240597464637973, "learning_rate": 3.531159877403787e-05, "loss": 1.4079, "step": 900 }, { "epoch": 0.230656, "grad_norm": 0.47541987226950216, "learning_rate": 3.5301137875563234e-05, "loss": 1.4152, "step": 901 }, { "epoch": 0.230912, "grad_norm": 0.4360433996520917, "learning_rate": 3.52906668729394e-05, "loss": 1.4499, "step": 902 }, { "epoch": 0.231168, "grad_norm": 0.4601324602891305, "learning_rate": 3.528018577308092e-05, "loss": 1.4248, "step": 903 }, { "epoch": 0.231424, "grad_norm": 0.44550797836289335, "learning_rate": 3.526969458290901e-05, "loss": 1.3967, "step": 904 }, { "epoch": 0.23168, "grad_norm": 0.46466046515146925, "learning_rate": 3.525919330935157e-05, "loss": 1.4707, "step": 905 }, { "epoch": 0.231936, "grad_norm": 0.4739550055141789, "learning_rate": 3.5248681959343134e-05, "loss": 1.4413, "step": 906 }, { "epoch": 0.232192, "grad_norm": 0.46647827174953926, "learning_rate": 3.523816053982491e-05, "loss": 1.4281, "step": 907 }, { "epoch": 0.232448, "grad_norm": 0.46069258024961396, "learning_rate": 3.522762905774473e-05, "loss": 1.4925, "step": 908 }, { "epoch": 0.232704, "grad_norm": 0.4578784012913905, "learning_rate": 3.5217087520057104e-05, "loss": 1.4534, "step": 909 }, { "epoch": 0.23296, "grad_norm": 0.4454753001477312, "learning_rate": 3.5206535933723176e-05, "loss": 1.4192, "step": 910 }, { "epoch": 0.233216, "grad_norm": 0.45421816655951225, "learning_rate": 3.5195974305710686e-05, "loss": 1.4427, "step": 911 }, { "epoch": 0.233472, "grad_norm": 0.4490127589491534, "learning_rate": 3.518540264299406e-05, "loss": 1.4016, "step": 912 }, { "epoch": 0.233728, "grad_norm": 0.4366203378136273, "learning_rate": 3.5174820952554315e-05, "loss": 1.3855, "step": 913 }, { "epoch": 0.233984, "grad_norm": 0.44468041682079235, "learning_rate": 3.51642292413791e-05, "loss": 1.4152, "step": 914 }, { "epoch": 0.23424, "grad_norm": 0.43867625176718084, "learning_rate": 3.5153627516462675e-05, "loss": 1.482, "step": 915 }, { "epoch": 0.234496, "grad_norm": 0.46086789768893743, "learning_rate": 3.5143015784805936e-05, "loss": 1.423, "step": 916 }, { "epoch": 0.234752, "grad_norm": 0.43066697230713963, "learning_rate": 3.513239405341635e-05, "loss": 1.4294, "step": 917 }, { "epoch": 0.235008, "grad_norm": 0.43799191965319134, "learning_rate": 3.512176232930802e-05, "loss": 1.4121, "step": 918 }, { "epoch": 0.235264, "grad_norm": 0.4842481596239167, "learning_rate": 3.5111120619501634e-05, "loss": 1.493, "step": 919 }, { "epoch": 0.23552, "grad_norm": 0.4279747857679062, "learning_rate": 3.510046893102446e-05, "loss": 1.4338, "step": 920 }, { "epoch": 0.235776, "grad_norm": 0.45756852139816373, "learning_rate": 3.508980727091039e-05, "loss": 1.4333, "step": 921 }, { "epoch": 0.236032, "grad_norm": 0.4486105560370268, "learning_rate": 3.5079135646199874e-05, "loss": 1.4665, "step": 922 }, { "epoch": 0.236288, "grad_norm": 0.43191304608258113, "learning_rate": 3.506845406393995e-05, "loss": 1.4383, "step": 923 }, { "epoch": 0.236544, "grad_norm": 0.4402610923494255, "learning_rate": 3.505776253118422e-05, "loss": 1.4572, "step": 924 }, { "epoch": 0.2368, "grad_norm": 0.4470400059832819, "learning_rate": 3.504706105499288e-05, "loss": 1.4452, "step": 925 }, { "epoch": 0.237056, "grad_norm": 0.4317959250025687, "learning_rate": 3.503634964243268e-05, "loss": 1.4008, "step": 926 }, { "epoch": 0.237312, "grad_norm": 0.4490832662406321, "learning_rate": 3.502562830057692e-05, "loss": 1.4472, "step": 927 }, { "epoch": 0.237568, "grad_norm": 0.4457825493520661, "learning_rate": 3.501489703650548e-05, "loss": 1.4335, "step": 928 }, { "epoch": 0.237824, "grad_norm": 0.4351634660935205, "learning_rate": 3.500415585730477e-05, "loss": 1.4003, "step": 929 }, { "epoch": 0.23808, "grad_norm": 0.4885225929546436, "learning_rate": 3.499340477006777e-05, "loss": 1.4734, "step": 930 }, { "epoch": 0.238336, "grad_norm": 0.4556235308025768, "learning_rate": 3.498264378189398e-05, "loss": 1.4339, "step": 931 }, { "epoch": 0.238592, "grad_norm": 0.4419259292218684, "learning_rate": 3.497187289988946e-05, "loss": 1.4713, "step": 932 }, { "epoch": 0.238848, "grad_norm": 0.44043595385150985, "learning_rate": 3.4961092131166783e-05, "loss": 1.3921, "step": 933 }, { "epoch": 0.239104, "grad_norm": 0.44455364945708115, "learning_rate": 3.495030148284506e-05, "loss": 1.4704, "step": 934 }, { "epoch": 0.23936, "grad_norm": 0.4420177524636941, "learning_rate": 3.4939500962049925e-05, "loss": 1.4188, "step": 935 }, { "epoch": 0.239616, "grad_norm": 0.42233265302722817, "learning_rate": 3.492869057591353e-05, "loss": 1.4258, "step": 936 }, { "epoch": 0.239872, "grad_norm": 0.4532793942780002, "learning_rate": 3.4917870331574555e-05, "loss": 1.4231, "step": 937 }, { "epoch": 0.240128, "grad_norm": 0.4349911554670856, "learning_rate": 3.490704023617817e-05, "loss": 1.4043, "step": 938 }, { "epoch": 0.240384, "grad_norm": 0.455098266486817, "learning_rate": 3.489620029687606e-05, "loss": 1.4187, "step": 939 }, { "epoch": 0.24064, "grad_norm": 0.4227489801709916, "learning_rate": 3.488535052082641e-05, "loss": 1.4038, "step": 940 }, { "epoch": 0.240896, "grad_norm": 0.4465570310466254, "learning_rate": 3.4874490915193904e-05, "loss": 1.3604, "step": 941 }, { "epoch": 0.241152, "grad_norm": 0.4278854623964549, "learning_rate": 3.48636214871497e-05, "loss": 1.4159, "step": 942 }, { "epoch": 0.241408, "grad_norm": 0.4314271056468017, "learning_rate": 3.485274224387147e-05, "loss": 1.3796, "step": 943 }, { "epoch": 0.241664, "grad_norm": 0.45032212849186587, "learning_rate": 3.484185319254334e-05, "loss": 1.4706, "step": 944 }, { "epoch": 0.24192, "grad_norm": 0.46227312038582874, "learning_rate": 3.483095434035594e-05, "loss": 1.4106, "step": 945 }, { "epoch": 0.242176, "grad_norm": 0.44790477242324117, "learning_rate": 3.4820045694506354e-05, "loss": 1.4689, "step": 946 }, { "epoch": 0.242432, "grad_norm": 0.44688685866097144, "learning_rate": 3.480912726219812e-05, "loss": 1.4331, "step": 947 }, { "epoch": 0.242688, "grad_norm": 0.45058692640925385, "learning_rate": 3.479819905064127e-05, "loss": 1.4484, "step": 948 }, { "epoch": 0.242944, "grad_norm": 0.42111017883033847, "learning_rate": 3.478726106705229e-05, "loss": 1.4302, "step": 949 }, { "epoch": 0.2432, "grad_norm": 0.43284840963226584, "learning_rate": 3.477631331865408e-05, "loss": 1.447, "step": 950 }, { "epoch": 0.243456, "grad_norm": 0.4282124874745718, "learning_rate": 3.476535581267604e-05, "loss": 1.4419, "step": 951 }, { "epoch": 0.243712, "grad_norm": 0.5000892409442088, "learning_rate": 3.4754388556353975e-05, "loss": 1.4384, "step": 952 }, { "epoch": 0.243968, "grad_norm": 0.437808391811369, "learning_rate": 3.474341155693015e-05, "loss": 1.4044, "step": 953 }, { "epoch": 0.244224, "grad_norm": 0.4261688588576915, "learning_rate": 3.4732424821653264e-05, "loss": 1.4221, "step": 954 }, { "epoch": 0.24448, "grad_norm": 0.45446959065478565, "learning_rate": 3.472142835777843e-05, "loss": 1.4587, "step": 955 }, { "epoch": 0.244736, "grad_norm": 0.4336286614191386, "learning_rate": 3.471042217256719e-05, "loss": 1.3963, "step": 956 }, { "epoch": 0.244992, "grad_norm": 0.4510498464342703, "learning_rate": 3.469940627328751e-05, "loss": 1.432, "step": 957 }, { "epoch": 0.245248, "grad_norm": 0.4336455219214784, "learning_rate": 3.468838066721378e-05, "loss": 1.4191, "step": 958 }, { "epoch": 0.245504, "grad_norm": 0.48064724030236716, "learning_rate": 3.4677345361626775e-05, "loss": 1.452, "step": 959 }, { "epoch": 0.24576, "grad_norm": 0.4274295696997247, "learning_rate": 3.4666300363813706e-05, "loss": 1.4437, "step": 960 }, { "epoch": 0.246016, "grad_norm": 0.578011845147695, "learning_rate": 3.465524568106816e-05, "loss": 1.4364, "step": 961 }, { "epoch": 0.246272, "grad_norm": 0.453456942161563, "learning_rate": 3.4644181320690134e-05, "loss": 1.4038, "step": 962 }, { "epoch": 0.246528, "grad_norm": 0.4983859308605308, "learning_rate": 3.4633107289985996e-05, "loss": 1.4394, "step": 963 }, { "epoch": 0.246784, "grad_norm": 0.473652556116915, "learning_rate": 3.462202359626852e-05, "loss": 1.4687, "step": 964 }, { "epoch": 0.24704, "grad_norm": 0.4392706175781353, "learning_rate": 3.461093024685686e-05, "loss": 1.4501, "step": 965 }, { "epoch": 0.247296, "grad_norm": 0.4493452178042283, "learning_rate": 3.459982724907654e-05, "loss": 1.428, "step": 966 }, { "epoch": 0.247552, "grad_norm": 0.434453604464245, "learning_rate": 3.4588714610259444e-05, "loss": 1.4207, "step": 967 }, { "epoch": 0.247808, "grad_norm": 0.45005027420718363, "learning_rate": 3.4577592337743834e-05, "loss": 1.3774, "step": 968 }, { "epoch": 0.248064, "grad_norm": 0.44151944570975715, "learning_rate": 3.456646043887435e-05, "loss": 1.4284, "step": 969 }, { "epoch": 0.24832, "grad_norm": 0.4537005100990312, "learning_rate": 3.455531892100196e-05, "loss": 1.4298, "step": 970 }, { "epoch": 0.248576, "grad_norm": 0.447138459673921, "learning_rate": 3.4544167791484e-05, "loss": 1.4121, "step": 971 }, { "epoch": 0.248832, "grad_norm": 0.4546885780839055, "learning_rate": 3.453300705768416e-05, "loss": 1.4408, "step": 972 }, { "epoch": 0.249088, "grad_norm": 0.4589454927084073, "learning_rate": 3.452183672697244e-05, "loss": 1.4117, "step": 973 }, { "epoch": 0.249344, "grad_norm": 0.4544498078183396, "learning_rate": 3.451065680672522e-05, "loss": 1.4374, "step": 974 }, { "epoch": 0.2496, "grad_norm": 0.4465668176926965, "learning_rate": 3.449946730432517e-05, "loss": 1.416, "step": 975 }, { "epoch": 0.249856, "grad_norm": 0.42919312920462227, "learning_rate": 3.448826822716133e-05, "loss": 1.4194, "step": 976 }, { "epoch": 0.250112, "grad_norm": 0.44610945903061555, "learning_rate": 3.447705958262903e-05, "loss": 1.4256, "step": 977 }, { "epoch": 0.250368, "grad_norm": 0.45220100030175053, "learning_rate": 3.446584137812993e-05, "loss": 1.4311, "step": 978 }, { "epoch": 0.250624, "grad_norm": 0.43055243003794796, "learning_rate": 3.445461362107201e-05, "loss": 1.4435, "step": 979 }, { "epoch": 0.25088, "grad_norm": 0.44172772018344914, "learning_rate": 3.444337631886955e-05, "loss": 1.4692, "step": 980 }, { "epoch": 0.251136, "grad_norm": 0.44030475318337275, "learning_rate": 3.443212947894312e-05, "loss": 1.4482, "step": 981 }, { "epoch": 0.251392, "grad_norm": 0.415691608094687, "learning_rate": 3.442087310871963e-05, "loss": 1.3929, "step": 982 }, { "epoch": 0.251648, "grad_norm": 0.44304165148469893, "learning_rate": 3.440960721563222e-05, "loss": 1.4521, "step": 983 }, { "epoch": 0.251904, "grad_norm": 0.4172736072418633, "learning_rate": 3.439833180712038e-05, "loss": 1.4027, "step": 984 }, { "epoch": 0.25216, "grad_norm": 0.45109771921711805, "learning_rate": 3.438704689062985e-05, "loss": 1.468, "step": 985 }, { "epoch": 0.252416, "grad_norm": 0.41347473200382073, "learning_rate": 3.437575247361266e-05, "loss": 1.4347, "step": 986 }, { "epoch": 0.252672, "grad_norm": 0.43592305242084495, "learning_rate": 3.43644485635271e-05, "loss": 1.3982, "step": 987 }, { "epoch": 0.252928, "grad_norm": 0.4278707954991748, "learning_rate": 3.4353135167837746e-05, "loss": 1.4202, "step": 988 }, { "epoch": 0.253184, "grad_norm": 0.43683805167400386, "learning_rate": 3.434181229401543e-05, "loss": 1.3886, "step": 989 }, { "epoch": 0.25344, "grad_norm": 0.4230244794105691, "learning_rate": 3.4330479949537244e-05, "loss": 1.4486, "step": 990 }, { "epoch": 0.253696, "grad_norm": 0.46454049363597516, "learning_rate": 3.431913814188653e-05, "loss": 1.4418, "step": 991 }, { "epoch": 0.253952, "grad_norm": 0.43683065193530624, "learning_rate": 3.430778687855288e-05, "loss": 1.4614, "step": 992 }, { "epoch": 0.254208, "grad_norm": 0.4280593917776047, "learning_rate": 3.429642616703214e-05, "loss": 1.426, "step": 993 }, { "epoch": 0.254464, "grad_norm": 0.44574625900574566, "learning_rate": 3.4285056014826384e-05, "loss": 1.444, "step": 994 }, { "epoch": 0.25472, "grad_norm": 0.47125727307256127, "learning_rate": 3.427367642944393e-05, "loss": 1.4313, "step": 995 }, { "epoch": 0.254976, "grad_norm": 0.4466182459406985, "learning_rate": 3.42622874183993e-05, "loss": 1.4231, "step": 996 }, { "epoch": 0.255232, "grad_norm": 0.43873867769444513, "learning_rate": 3.4250888989213284e-05, "loss": 1.4379, "step": 997 }, { "epoch": 0.255488, "grad_norm": 0.44269828190317606, "learning_rate": 3.4239481149412856e-05, "loss": 1.3736, "step": 998 }, { "epoch": 0.255744, "grad_norm": 0.4350156597314667, "learning_rate": 3.42280639065312e-05, "loss": 1.4446, "step": 999 }, { "epoch": 0.256, "grad_norm": 0.42429994440064317, "learning_rate": 3.4216637268107756e-05, "loss": 1.3959, "step": 1000 }, { "epoch": 0.256256, "grad_norm": 0.5709669353474612, "learning_rate": 3.420520124168811e-05, "loss": 1.433, "step": 1001 }, { "epoch": 0.256512, "grad_norm": 0.42665045231310056, "learning_rate": 3.419375583482409e-05, "loss": 1.4015, "step": 1002 }, { "epoch": 0.256768, "grad_norm": 0.4294521621964963, "learning_rate": 3.4182301055073685e-05, "loss": 1.4472, "step": 1003 }, { "epoch": 0.257024, "grad_norm": 0.45581245450102015, "learning_rate": 3.4170836910001106e-05, "loss": 1.4398, "step": 1004 }, { "epoch": 0.25728, "grad_norm": 0.4271422787799621, "learning_rate": 3.415936340717673e-05, "loss": 1.4221, "step": 1005 }, { "epoch": 0.257536, "grad_norm": 0.4369725844285113, "learning_rate": 3.414788055417711e-05, "loss": 1.3866, "step": 1006 }, { "epoch": 0.257792, "grad_norm": 0.4426497566352074, "learning_rate": 3.413638835858499e-05, "loss": 1.4413, "step": 1007 }, { "epoch": 0.258048, "grad_norm": 0.4259937511799618, "learning_rate": 3.4124886827989266e-05, "loss": 1.4084, "step": 1008 }, { "epoch": 0.258304, "grad_norm": 0.4322674807350434, "learning_rate": 3.4113375969985014e-05, "loss": 1.4824, "step": 1009 }, { "epoch": 0.25856, "grad_norm": 0.42823381043090136, "learning_rate": 3.410185579217346e-05, "loss": 1.446, "step": 1010 }, { "epoch": 0.258816, "grad_norm": 0.43528220386194627, "learning_rate": 3.409032630216197e-05, "loss": 1.427, "step": 1011 }, { "epoch": 0.259072, "grad_norm": 0.452054793422249, "learning_rate": 3.40787875075641e-05, "loss": 1.4119, "step": 1012 }, { "epoch": 0.259328, "grad_norm": 0.4250279989064599, "learning_rate": 3.4067239415999495e-05, "loss": 1.3977, "step": 1013 }, { "epoch": 0.259584, "grad_norm": 0.44126424842155326, "learning_rate": 3.4055682035093995e-05, "loss": 1.4529, "step": 1014 }, { "epoch": 0.25984, "grad_norm": 0.4256684350588182, "learning_rate": 3.404411537247954e-05, "loss": 1.4067, "step": 1015 }, { "epoch": 0.260096, "grad_norm": 0.4335721487876929, "learning_rate": 3.403253943579421e-05, "loss": 1.4464, "step": 1016 }, { "epoch": 0.260352, "grad_norm": 0.4303143452203573, "learning_rate": 3.4020954232682206e-05, "loss": 1.4215, "step": 1017 }, { "epoch": 0.260608, "grad_norm": 1.0123183357848653, "learning_rate": 3.400935977079385e-05, "loss": 1.4353, "step": 1018 }, { "epoch": 0.260864, "grad_norm": 0.6482977388833904, "learning_rate": 3.399775605778557e-05, "loss": 1.438, "step": 1019 }, { "epoch": 0.26112, "grad_norm": 0.44967285191479667, "learning_rate": 3.398614310131993e-05, "loss": 1.4229, "step": 1020 }, { "epoch": 0.261376, "grad_norm": 0.6057682988476251, "learning_rate": 3.397452090906555e-05, "loss": 1.4651, "step": 1021 }, { "epoch": 0.261632, "grad_norm": 0.43317326170891146, "learning_rate": 3.39628894886972e-05, "loss": 1.4256, "step": 1022 }, { "epoch": 0.261888, "grad_norm": 0.45491065500195377, "learning_rate": 3.395124884789571e-05, "loss": 1.4433, "step": 1023 }, { "epoch": 0.262144, "grad_norm": 0.4437203654837756, "learning_rate": 3.3939598994348013e-05, "loss": 1.4422, "step": 1024 }, { "epoch": 0.2624, "grad_norm": 0.4191056580718858, "learning_rate": 3.392793993574712e-05, "loss": 1.4045, "step": 1025 }, { "epoch": 0.262656, "grad_norm": 0.4398937026570217, "learning_rate": 3.3916271679792126e-05, "loss": 1.4321, "step": 1026 }, { "epoch": 0.262912, "grad_norm": 0.42128521009592906, "learning_rate": 3.39045942341882e-05, "loss": 1.4127, "step": 1027 }, { "epoch": 0.263168, "grad_norm": 0.4120713687464483, "learning_rate": 3.389290760664657e-05, "loss": 1.3566, "step": 1028 }, { "epoch": 0.263424, "grad_norm": 0.44944326334583873, "learning_rate": 3.388121180488453e-05, "loss": 1.4551, "step": 1029 }, { "epoch": 0.26368, "grad_norm": 0.4231223193237265, "learning_rate": 3.386950683662544e-05, "loss": 1.3996, "step": 1030 }, { "epoch": 0.263936, "grad_norm": 0.4179059780817227, "learning_rate": 3.385779270959872e-05, "loss": 1.393, "step": 1031 }, { "epoch": 0.264192, "grad_norm": 0.44701583616252405, "learning_rate": 3.38460694315398e-05, "loss": 1.4247, "step": 1032 }, { "epoch": 0.264448, "grad_norm": 0.4467420507604214, "learning_rate": 3.38343370101902e-05, "loss": 1.4173, "step": 1033 }, { "epoch": 0.264704, "grad_norm": 0.4203918013562224, "learning_rate": 3.382259545329746e-05, "loss": 1.4375, "step": 1034 }, { "epoch": 0.26496, "grad_norm": 0.44194798806286295, "learning_rate": 3.381084476861513e-05, "loss": 1.401, "step": 1035 }, { "epoch": 0.265216, "grad_norm": 0.4270039794321105, "learning_rate": 3.379908496390283e-05, "loss": 1.4238, "step": 1036 }, { "epoch": 0.265472, "grad_norm": 0.4519111170939524, "learning_rate": 3.3787316046926165e-05, "loss": 1.4104, "step": 1037 }, { "epoch": 0.265728, "grad_norm": 0.43028522250944884, "learning_rate": 3.377553802545677e-05, "loss": 1.372, "step": 1038 }, { "epoch": 0.265984, "grad_norm": 0.44533641862592893, "learning_rate": 3.376375090727231e-05, "loss": 1.4304, "step": 1039 }, { "epoch": 0.26624, "grad_norm": 0.4299567222911219, "learning_rate": 3.375195470015643e-05, "loss": 1.405, "step": 1040 }, { "epoch": 0.266496, "grad_norm": 0.4279312397017779, "learning_rate": 3.3740149411898786e-05, "loss": 1.3848, "step": 1041 }, { "epoch": 0.266752, "grad_norm": 0.454111960456516, "learning_rate": 3.3728335050295044e-05, "loss": 1.4131, "step": 1042 }, { "epoch": 0.267008, "grad_norm": 0.42596979865495277, "learning_rate": 3.3716511623146843e-05, "loss": 1.3527, "step": 1043 }, { "epoch": 0.267264, "grad_norm": 0.4728077247376842, "learning_rate": 3.370467913826181e-05, "loss": 1.4494, "step": 1044 }, { "epoch": 0.26752, "grad_norm": 0.41880870179750784, "learning_rate": 3.369283760345358e-05, "loss": 1.3987, "step": 1045 }, { "epoch": 0.267776, "grad_norm": 0.46394536859563795, "learning_rate": 3.3680987026541726e-05, "loss": 1.41, "step": 1046 }, { "epoch": 0.268032, "grad_norm": 0.43104738083304367, "learning_rate": 3.3669127415351814e-05, "loss": 1.409, "step": 1047 }, { "epoch": 0.268288, "grad_norm": 0.5752830793020016, "learning_rate": 3.365725877771537e-05, "loss": 1.4452, "step": 1048 }, { "epoch": 0.268544, "grad_norm": 0.4307695707750591, "learning_rate": 3.364538112146989e-05, "loss": 1.414, "step": 1049 }, { "epoch": 0.2688, "grad_norm": 0.4408222347775491, "learning_rate": 3.3633494454458797e-05, "loss": 1.3881, "step": 1050 }, { "epoch": 0.269056, "grad_norm": 0.4472505855386507, "learning_rate": 3.3621598784531516e-05, "loss": 1.4303, "step": 1051 }, { "epoch": 0.269312, "grad_norm": 0.44575366365267904, "learning_rate": 3.360969411954336e-05, "loss": 1.458, "step": 1052 }, { "epoch": 0.269568, "grad_norm": 0.4488634034635231, "learning_rate": 3.3597780467355617e-05, "loss": 1.4352, "step": 1053 }, { "epoch": 0.269824, "grad_norm": 0.4518954947339265, "learning_rate": 3.3585857835835506e-05, "loss": 1.432, "step": 1054 }, { "epoch": 0.27008, "grad_norm": 0.4628353190928325, "learning_rate": 3.357392623285616e-05, "loss": 1.4005, "step": 1055 }, { "epoch": 0.270336, "grad_norm": 0.4222741642550358, "learning_rate": 3.356198566629666e-05, "loss": 1.4066, "step": 1056 }, { "epoch": 0.270592, "grad_norm": 0.45883862879413384, "learning_rate": 3.355003614404198e-05, "loss": 1.4518, "step": 1057 }, { "epoch": 0.270848, "grad_norm": 0.4324613345191155, "learning_rate": 3.353807767398302e-05, "loss": 1.3763, "step": 1058 }, { "epoch": 0.271104, "grad_norm": 0.4625868763703739, "learning_rate": 3.35261102640166e-05, "loss": 1.441, "step": 1059 }, { "epoch": 0.27136, "grad_norm": 0.438932783503285, "learning_rate": 3.351413392204542e-05, "loss": 1.3859, "step": 1060 }, { "epoch": 0.271616, "grad_norm": 0.4411975836219631, "learning_rate": 3.350214865597809e-05, "loss": 1.4521, "step": 1061 }, { "epoch": 0.271872, "grad_norm": 0.4505830947630525, "learning_rate": 3.349015447372913e-05, "loss": 1.4685, "step": 1062 }, { "epoch": 0.272128, "grad_norm": 0.42621480759800845, "learning_rate": 3.347815138321893e-05, "loss": 1.4029, "step": 1063 }, { "epoch": 0.272384, "grad_norm": 0.4273730439682329, "learning_rate": 3.3466139392373737e-05, "loss": 1.3969, "step": 1064 }, { "epoch": 0.27264, "grad_norm": 0.43379605570568897, "learning_rate": 3.345411850912574e-05, "loss": 1.4322, "step": 1065 }, { "epoch": 0.272896, "grad_norm": 0.45587258642895095, "learning_rate": 3.3442088741412936e-05, "loss": 1.4512, "step": 1066 }, { "epoch": 0.273152, "grad_norm": 0.43330064555663333, "learning_rate": 3.3430050097179236e-05, "loss": 1.373, "step": 1067 }, { "epoch": 0.273408, "grad_norm": 0.4539309118279507, "learning_rate": 3.341800258437437e-05, "loss": 1.3822, "step": 1068 }, { "epoch": 0.273664, "grad_norm": 0.42680792034258064, "learning_rate": 3.340594621095397e-05, "loss": 1.3836, "step": 1069 }, { "epoch": 0.27392, "grad_norm": 0.43629943792231574, "learning_rate": 3.3393880984879486e-05, "loss": 1.3919, "step": 1070 }, { "epoch": 0.274176, "grad_norm": 0.4318062252229854, "learning_rate": 3.338180691411821e-05, "loss": 1.4112, "step": 1071 }, { "epoch": 0.274432, "grad_norm": 0.48812451105607574, "learning_rate": 3.3369724006643306e-05, "loss": 1.3769, "step": 1072 }, { "epoch": 0.274688, "grad_norm": 0.4430550721916868, "learning_rate": 3.335763227043375e-05, "loss": 1.428, "step": 1073 }, { "epoch": 0.274944, "grad_norm": 0.4927411254144498, "learning_rate": 3.334553171347436e-05, "loss": 1.4132, "step": 1074 }, { "epoch": 0.2752, "grad_norm": 0.5198335599501936, "learning_rate": 3.3333422343755754e-05, "loss": 1.4295, "step": 1075 }, { "epoch": 0.275456, "grad_norm": 0.4181677523654909, "learning_rate": 3.332130416927442e-05, "loss": 1.4117, "step": 1076 }, { "epoch": 0.275712, "grad_norm": 0.4278501225422261, "learning_rate": 3.3309177198032596e-05, "loss": 1.399, "step": 1077 }, { "epoch": 0.275968, "grad_norm": 0.4381751446210983, "learning_rate": 3.3297041438038374e-05, "loss": 1.3807, "step": 1078 }, { "epoch": 0.276224, "grad_norm": 0.4259930691909271, "learning_rate": 3.3284896897305646e-05, "loss": 1.3801, "step": 1079 }, { "epoch": 0.27648, "grad_norm": 0.43998456590072904, "learning_rate": 3.327274358385408e-05, "loss": 1.4196, "step": 1080 }, { "epoch": 0.276736, "grad_norm": 0.43467970031842434, "learning_rate": 3.3260581505709155e-05, "loss": 1.4248, "step": 1081 }, { "epoch": 0.276992, "grad_norm": 0.42766015316242284, "learning_rate": 3.3248410670902135e-05, "loss": 1.4662, "step": 1082 }, { "epoch": 0.277248, "grad_norm": 0.42400473999462046, "learning_rate": 3.323623108747007e-05, "loss": 1.3948, "step": 1083 }, { "epoch": 0.277504, "grad_norm": 0.4089140295603216, "learning_rate": 3.322404276345577e-05, "loss": 1.382, "step": 1084 }, { "epoch": 0.27776, "grad_norm": 0.48292140338198125, "learning_rate": 3.321184570690784e-05, "loss": 1.4351, "step": 1085 }, { "epoch": 0.278016, "grad_norm": 0.4441478255860841, "learning_rate": 3.3199639925880635e-05, "loss": 1.4128, "step": 1086 }, { "epoch": 0.278272, "grad_norm": 0.43220173522750355, "learning_rate": 3.3187425428434274e-05, "loss": 1.4176, "step": 1087 }, { "epoch": 0.278528, "grad_norm": 0.4710304147933427, "learning_rate": 3.317520222263464e-05, "loss": 1.3941, "step": 1088 }, { "epoch": 0.278784, "grad_norm": 0.5787973525389272, "learning_rate": 3.316297031655336e-05, "loss": 1.4354, "step": 1089 }, { "epoch": 0.27904, "grad_norm": 0.45326684848791515, "learning_rate": 3.31507297182678e-05, "loss": 1.4445, "step": 1090 }, { "epoch": 0.279296, "grad_norm": 0.42260688416974257, "learning_rate": 3.313848043586109e-05, "loss": 1.4049, "step": 1091 }, { "epoch": 0.279552, "grad_norm": 0.43888705009446605, "learning_rate": 3.312622247742206e-05, "loss": 1.4002, "step": 1092 }, { "epoch": 0.279808, "grad_norm": 0.46982478052778404, "learning_rate": 3.3113955851045294e-05, "loss": 1.4006, "step": 1093 }, { "epoch": 0.280064, "grad_norm": 0.46131938755057716, "learning_rate": 3.31016805648311e-05, "loss": 1.3973, "step": 1094 }, { "epoch": 0.28032, "grad_norm": 0.42695791819364337, "learning_rate": 3.3089396626885484e-05, "loss": 1.4096, "step": 1095 }, { "epoch": 0.280576, "grad_norm": 0.48540496424955604, "learning_rate": 3.307710404532019e-05, "loss": 1.4218, "step": 1096 }, { "epoch": 0.280832, "grad_norm": 0.4181815020740165, "learning_rate": 3.3064802828252656e-05, "loss": 1.4089, "step": 1097 }, { "epoch": 0.281088, "grad_norm": 0.4624292426744591, "learning_rate": 3.3052492983806014e-05, "loss": 1.4146, "step": 1098 }, { "epoch": 0.281344, "grad_norm": 0.41945629540394497, "learning_rate": 3.304017452010913e-05, "loss": 1.3996, "step": 1099 }, { "epoch": 0.2816, "grad_norm": 0.487680540768905, "learning_rate": 3.302784744529651e-05, "loss": 1.4213, "step": 1100 }, { "epoch": 0.281856, "grad_norm": 0.4326681860030608, "learning_rate": 3.3015511767508384e-05, "loss": 1.4283, "step": 1101 }, { "epoch": 0.282112, "grad_norm": 0.4728747105607696, "learning_rate": 3.300316749489065e-05, "loss": 1.4193, "step": 1102 }, { "epoch": 0.282368, "grad_norm": 0.4395615466395717, "learning_rate": 3.2990814635594876e-05, "loss": 1.3998, "step": 1103 }, { "epoch": 0.282624, "grad_norm": 0.4577230782734413, "learning_rate": 3.2978453197778325e-05, "loss": 1.4163, "step": 1104 }, { "epoch": 0.28288, "grad_norm": 0.43701381588125393, "learning_rate": 3.296608318960388e-05, "loss": 1.4272, "step": 1105 }, { "epoch": 0.283136, "grad_norm": 0.42056232786760994, "learning_rate": 3.295370461924014e-05, "loss": 1.3768, "step": 1106 }, { "epoch": 0.283392, "grad_norm": 0.43698364999106304, "learning_rate": 3.2941317494861316e-05, "loss": 1.4219, "step": 1107 }, { "epoch": 0.283648, "grad_norm": 0.42205577557787666, "learning_rate": 3.2928921824647276e-05, "loss": 1.4239, "step": 1108 }, { "epoch": 0.283904, "grad_norm": 0.4541527606081171, "learning_rate": 3.2916517616783536e-05, "loss": 1.4737, "step": 1109 }, { "epoch": 0.28416, "grad_norm": 0.433999959387067, "learning_rate": 3.290410487946125e-05, "loss": 1.4476, "step": 1110 }, { "epoch": 0.284416, "grad_norm": 0.4310777615335861, "learning_rate": 3.289168362087721e-05, "loss": 1.4207, "step": 1111 }, { "epoch": 0.284672, "grad_norm": 0.42839054977842494, "learning_rate": 3.287925384923383e-05, "loss": 1.4163, "step": 1112 }, { "epoch": 0.284928, "grad_norm": 0.4420005420447975, "learning_rate": 3.286681557273913e-05, "loss": 1.4234, "step": 1113 }, { "epoch": 0.285184, "grad_norm": 0.42448549675239444, "learning_rate": 3.285436879960677e-05, "loss": 1.4396, "step": 1114 }, { "epoch": 0.28544, "grad_norm": 0.42545444935575516, "learning_rate": 3.284191353805602e-05, "loss": 1.3966, "step": 1115 }, { "epoch": 0.285696, "grad_norm": 0.43663761886815217, "learning_rate": 3.282944979631173e-05, "loss": 1.4307, "step": 1116 }, { "epoch": 0.285952, "grad_norm": 0.4192930986251806, "learning_rate": 3.281697758260437e-05, "loss": 1.4183, "step": 1117 }, { "epoch": 0.286208, "grad_norm": 0.4228677427831289, "learning_rate": 3.280449690517002e-05, "loss": 1.4396, "step": 1118 }, { "epoch": 0.286464, "grad_norm": 0.42687309118446376, "learning_rate": 3.2792007772250305e-05, "loss": 1.3996, "step": 1119 }, { "epoch": 0.28672, "grad_norm": 0.4213739266791507, "learning_rate": 3.277951019209248e-05, "loss": 1.3843, "step": 1120 }, { "epoch": 0.286976, "grad_norm": 0.4124062472240904, "learning_rate": 3.2767004172949346e-05, "loss": 1.3629, "step": 1121 }, { "epoch": 0.287232, "grad_norm": 0.4081569520840997, "learning_rate": 3.275448972307928e-05, "loss": 1.3957, "step": 1122 }, { "epoch": 0.287488, "grad_norm": 0.4134585607339923, "learning_rate": 3.2741966850746244e-05, "loss": 1.4243, "step": 1123 }, { "epoch": 0.287744, "grad_norm": 0.4133468632321385, "learning_rate": 3.272943556421976e-05, "loss": 1.3759, "step": 1124 }, { "epoch": 0.288, "grad_norm": 0.4354601600875955, "learning_rate": 3.2716895871774886e-05, "loss": 1.4441, "step": 1125 }, { "epoch": 0.288256, "grad_norm": 0.509393927859276, "learning_rate": 3.270434778169224e-05, "loss": 1.4118, "step": 1126 }, { "epoch": 0.288512, "grad_norm": 0.42639485732992566, "learning_rate": 3.2691791302258e-05, "loss": 1.4334, "step": 1127 }, { "epoch": 0.288768, "grad_norm": 0.44073563651309283, "learning_rate": 3.267922644176386e-05, "loss": 1.4655, "step": 1128 }, { "epoch": 0.289024, "grad_norm": 0.4154603477143575, "learning_rate": 3.2666653208507075e-05, "loss": 1.3896, "step": 1129 }, { "epoch": 0.28928, "grad_norm": 0.4302593491785022, "learning_rate": 3.265407161079041e-05, "loss": 1.403, "step": 1130 }, { "epoch": 0.289536, "grad_norm": 0.4317919009448384, "learning_rate": 3.264148165692214e-05, "loss": 1.3681, "step": 1131 }, { "epoch": 0.289792, "grad_norm": 0.4408316085454837, "learning_rate": 3.26288833552161e-05, "loss": 1.3998, "step": 1132 }, { "epoch": 0.290048, "grad_norm": 0.4480675731399566, "learning_rate": 3.2616276713991606e-05, "loss": 1.4056, "step": 1133 }, { "epoch": 0.290304, "grad_norm": 0.45969072756888313, "learning_rate": 3.2603661741573484e-05, "loss": 1.4175, "step": 1134 }, { "epoch": 0.29056, "grad_norm": 0.5901718115802729, "learning_rate": 3.259103844629208e-05, "loss": 1.3782, "step": 1135 }, { "epoch": 0.290816, "grad_norm": 0.6404182420252112, "learning_rate": 3.25784068364832e-05, "loss": 1.4055, "step": 1136 }, { "epoch": 0.291072, "grad_norm": 0.4544265445156285, "learning_rate": 3.2565766920488186e-05, "loss": 1.4107, "step": 1137 }, { "epoch": 0.291328, "grad_norm": 0.4281151328847092, "learning_rate": 3.255311870665383e-05, "loss": 1.3876, "step": 1138 }, { "epoch": 0.291584, "grad_norm": 0.4760937046716286, "learning_rate": 3.2540462203332414e-05, "loss": 1.426, "step": 1139 }, { "epoch": 0.29184, "grad_norm": 0.45430832309023095, "learning_rate": 3.25277974188817e-05, "loss": 1.4436, "step": 1140 }, { "epoch": 0.292096, "grad_norm": 0.44345885986922606, "learning_rate": 3.251512436166492e-05, "loss": 1.3907, "step": 1141 }, { "epoch": 0.292352, "grad_norm": 0.42117929236071266, "learning_rate": 3.250244304005074e-05, "loss": 1.3982, "step": 1142 }, { "epoch": 0.292608, "grad_norm": 0.44869795415933883, "learning_rate": 3.248975346241333e-05, "loss": 1.4127, "step": 1143 }, { "epoch": 0.292864, "grad_norm": 0.42775831456898844, "learning_rate": 3.247705563713228e-05, "loss": 1.4351, "step": 1144 }, { "epoch": 0.29312, "grad_norm": 0.44403054624269184, "learning_rate": 3.246434957259263e-05, "loss": 1.4263, "step": 1145 }, { "epoch": 0.293376, "grad_norm": 0.4131576238556579, "learning_rate": 3.2451635277184866e-05, "loss": 1.4178, "step": 1146 }, { "epoch": 0.293632, "grad_norm": 0.42608550129408124, "learning_rate": 3.2438912759304916e-05, "loss": 1.398, "step": 1147 }, { "epoch": 0.293888, "grad_norm": 0.42057962527261933, "learning_rate": 3.242618202735413e-05, "loss": 1.4502, "step": 1148 }, { "epoch": 0.294144, "grad_norm": 0.4364781298778433, "learning_rate": 3.241344308973927e-05, "loss": 1.4394, "step": 1149 }, { "epoch": 0.2944, "grad_norm": 0.41859349497917914, "learning_rate": 3.240069595487254e-05, "loss": 1.4237, "step": 1150 }, { "epoch": 0.294656, "grad_norm": 0.43178868912811974, "learning_rate": 3.2387940631171545e-05, "loss": 1.4788, "step": 1151 }, { "epoch": 0.294912, "grad_norm": 0.4375459849608849, "learning_rate": 3.23751771270593e-05, "loss": 1.4067, "step": 1152 }, { "epoch": 0.295168, "grad_norm": 0.4250862980316044, "learning_rate": 3.236240545096421e-05, "loss": 1.4313, "step": 1153 }, { "epoch": 0.295424, "grad_norm": 0.4315547237652792, "learning_rate": 3.2349625611320105e-05, "loss": 1.4184, "step": 1154 }, { "epoch": 0.29568, "grad_norm": 0.40328560367450433, "learning_rate": 3.233683761656617e-05, "loss": 1.3819, "step": 1155 }, { "epoch": 0.295936, "grad_norm": 0.4345655021400087, "learning_rate": 3.2324041475147004e-05, "loss": 1.4123, "step": 1156 }, { "epoch": 0.296192, "grad_norm": 0.40972345051257697, "learning_rate": 3.231123719551257e-05, "loss": 1.4267, "step": 1157 }, { "epoch": 0.296448, "grad_norm": 0.41479548372189257, "learning_rate": 3.2298424786118215e-05, "loss": 1.4014, "step": 1158 }, { "epoch": 0.296704, "grad_norm": 0.401898097166026, "learning_rate": 3.228560425542465e-05, "loss": 1.3984, "step": 1159 }, { "epoch": 0.29696, "grad_norm": 0.39568322630725306, "learning_rate": 3.227277561189793e-05, "loss": 1.3999, "step": 1160 }, { "epoch": 0.297216, "grad_norm": 0.43401559163454545, "learning_rate": 3.225993886400951e-05, "loss": 1.4252, "step": 1161 }, { "epoch": 0.297472, "grad_norm": 0.40806443584462176, "learning_rate": 3.224709402023616e-05, "loss": 1.406, "step": 1162 }, { "epoch": 0.297728, "grad_norm": 0.4339995130953313, "learning_rate": 3.223424108906001e-05, "loss": 1.398, "step": 1163 }, { "epoch": 0.297984, "grad_norm": 0.43209963228877146, "learning_rate": 3.222138007896853e-05, "loss": 1.4249, "step": 1164 }, { "epoch": 0.29824, "grad_norm": 0.432238127372735, "learning_rate": 3.220851099845453e-05, "loss": 1.3565, "step": 1165 }, { "epoch": 0.298496, "grad_norm": 0.42107503053955303, "learning_rate": 3.219563385601613e-05, "loss": 1.426, "step": 1166 }, { "epoch": 0.298752, "grad_norm": 0.43054109840331417, "learning_rate": 3.2182748660156795e-05, "loss": 1.4325, "step": 1167 }, { "epoch": 0.299008, "grad_norm": 0.4121611963396635, "learning_rate": 3.21698554193853e-05, "loss": 1.3735, "step": 1168 }, { "epoch": 0.299264, "grad_norm": 0.4243039112207908, "learning_rate": 3.215695414221573e-05, "loss": 1.3839, "step": 1169 }, { "epoch": 0.29952, "grad_norm": 0.4194864084514799, "learning_rate": 3.214404483716749e-05, "loss": 1.4287, "step": 1170 }, { "epoch": 0.299776, "grad_norm": 0.442795350924567, "learning_rate": 3.2131127512765254e-05, "loss": 1.4376, "step": 1171 }, { "epoch": 0.300032, "grad_norm": 0.7291214163370128, "learning_rate": 3.211820217753903e-05, "loss": 1.3666, "step": 1172 }, { "epoch": 0.300288, "grad_norm": 0.43035110944910393, "learning_rate": 3.210526884002411e-05, "loss": 1.4319, "step": 1173 }, { "epoch": 0.300544, "grad_norm": 0.43007259932303177, "learning_rate": 3.209232750876103e-05, "loss": 1.4455, "step": 1174 }, { "epoch": 0.3008, "grad_norm": 0.4366584717399654, "learning_rate": 3.207937819229565e-05, "loss": 1.4037, "step": 1175 }, { "epoch": 0.301056, "grad_norm": 0.4242114268120552, "learning_rate": 3.206642089917909e-05, "loss": 1.4362, "step": 1176 }, { "epoch": 0.301312, "grad_norm": 0.4592443154762359, "learning_rate": 3.205345563796774e-05, "loss": 1.4522, "step": 1177 }, { "epoch": 0.301568, "grad_norm": 0.41578437846859323, "learning_rate": 3.2040482417223226e-05, "loss": 1.4031, "step": 1178 }, { "epoch": 0.301824, "grad_norm": 0.4308914903807655, "learning_rate": 3.2027501245512467e-05, "loss": 1.4179, "step": 1179 }, { "epoch": 0.30208, "grad_norm": 0.4226380104191961, "learning_rate": 3.201451213140762e-05, "loss": 1.3722, "step": 1180 }, { "epoch": 0.302336, "grad_norm": 0.44056503127289953, "learning_rate": 3.2001515083486064e-05, "loss": 1.4113, "step": 1181 }, { "epoch": 0.302592, "grad_norm": 0.44529615595672334, "learning_rate": 3.1988510110330454e-05, "loss": 1.4006, "step": 1182 }, { "epoch": 0.302848, "grad_norm": 0.40886495600605616, "learning_rate": 3.1975497220528644e-05, "loss": 1.3829, "step": 1183 }, { "epoch": 0.303104, "grad_norm": 0.45877194127997917, "learning_rate": 3.196247642267375e-05, "loss": 1.4025, "step": 1184 }, { "epoch": 0.30336, "grad_norm": 0.4228189306259059, "learning_rate": 3.194944772536408e-05, "loss": 1.4354, "step": 1185 }, { "epoch": 0.303616, "grad_norm": 0.432010486512588, "learning_rate": 3.1936411137203174e-05, "loss": 1.4126, "step": 1186 }, { "epoch": 0.303872, "grad_norm": 0.41472261705120805, "learning_rate": 3.192336666679978e-05, "loss": 1.4233, "step": 1187 }, { "epoch": 0.304128, "grad_norm": 0.4077540182057449, "learning_rate": 3.191031432276785e-05, "loss": 1.3485, "step": 1188 }, { "epoch": 0.304384, "grad_norm": 0.4191130733988432, "learning_rate": 3.189725411372654e-05, "loss": 1.3602, "step": 1189 }, { "epoch": 0.30464, "grad_norm": 0.4181421316061501, "learning_rate": 3.188418604830019e-05, "loss": 1.3882, "step": 1190 }, { "epoch": 0.304896, "grad_norm": 0.42148518526798495, "learning_rate": 3.187111013511833e-05, "loss": 1.4192, "step": 1191 }, { "epoch": 0.305152, "grad_norm": 0.4246014981854268, "learning_rate": 3.185802638281569e-05, "loss": 1.4109, "step": 1192 }, { "epoch": 0.305408, "grad_norm": 0.41941954042274, "learning_rate": 3.184493480003214e-05, "loss": 1.426, "step": 1193 }, { "epoch": 0.305664, "grad_norm": 0.41303926450149886, "learning_rate": 3.183183539541277e-05, "loss": 1.4235, "step": 1194 }, { "epoch": 0.30592, "grad_norm": 0.41251837929526514, "learning_rate": 3.18187281776078e-05, "loss": 1.3894, "step": 1195 }, { "epoch": 0.306176, "grad_norm": 0.4128128018691263, "learning_rate": 3.18056131552726e-05, "loss": 1.3868, "step": 1196 }, { "epoch": 0.306432, "grad_norm": 0.4171460384979787, "learning_rate": 3.1792490337067725e-05, "loss": 1.3649, "step": 1197 }, { "epoch": 0.306688, "grad_norm": 0.42102456348689277, "learning_rate": 3.177935973165887e-05, "loss": 1.4214, "step": 1198 }, { "epoch": 0.306944, "grad_norm": 0.4340899247810748, "learning_rate": 3.176622134771687e-05, "loss": 1.3913, "step": 1199 }, { "epoch": 0.3072, "grad_norm": 0.4088220280950938, "learning_rate": 3.1753075193917674e-05, "loss": 1.3499, "step": 1200 }, { "epoch": 0.307456, "grad_norm": 0.4108606099279782, "learning_rate": 3.17399212789424e-05, "loss": 1.4277, "step": 1201 }, { "epoch": 0.307712, "grad_norm": 0.4307523834209843, "learning_rate": 3.1726759611477256e-05, "loss": 1.4361, "step": 1202 }, { "epoch": 0.307968, "grad_norm": 0.41750151385651174, "learning_rate": 3.17135902002136e-05, "loss": 1.4131, "step": 1203 }, { "epoch": 0.308224, "grad_norm": 0.4107146101173538, "learning_rate": 3.1700413053847885e-05, "loss": 1.378, "step": 1204 }, { "epoch": 0.30848, "grad_norm": 0.42862077911334096, "learning_rate": 3.168722818108168e-05, "loss": 1.4468, "step": 1205 }, { "epoch": 0.308736, "grad_norm": 0.4131203878404391, "learning_rate": 3.167403559062164e-05, "loss": 1.3732, "step": 1206 }, { "epoch": 0.308992, "grad_norm": 0.41794424909767036, "learning_rate": 3.1660835291179545e-05, "loss": 1.3917, "step": 1207 }, { "epoch": 0.309248, "grad_norm": 0.40816798427888107, "learning_rate": 3.164762729147224e-05, "loss": 1.4394, "step": 1208 }, { "epoch": 0.309504, "grad_norm": 0.43607272276646936, "learning_rate": 3.163441160022167e-05, "loss": 1.4557, "step": 1209 }, { "epoch": 0.30976, "grad_norm": 0.41024301357443327, "learning_rate": 3.162118822615485e-05, "loss": 1.3978, "step": 1210 }, { "epoch": 0.310016, "grad_norm": 0.4105175082791347, "learning_rate": 3.1607957178003874e-05, "loss": 1.3983, "step": 1211 }, { "epoch": 0.310272, "grad_norm": 0.4229197688012235, "learning_rate": 3.1594718464505896e-05, "loss": 1.4291, "step": 1212 }, { "epoch": 0.310528, "grad_norm": 0.4082222047043843, "learning_rate": 3.158147209440315e-05, "loss": 1.4169, "step": 1213 }, { "epoch": 0.310784, "grad_norm": 0.7096120274727995, "learning_rate": 3.1568218076442904e-05, "loss": 1.4024, "step": 1214 }, { "epoch": 0.31104, "grad_norm": 0.4159323049778268, "learning_rate": 3.1554956419377476e-05, "loss": 1.3848, "step": 1215 }, { "epoch": 0.311296, "grad_norm": 0.4193137849966752, "learning_rate": 3.154168713196426e-05, "loss": 1.3952, "step": 1216 }, { "epoch": 0.311552, "grad_norm": 0.41871491169588926, "learning_rate": 3.1528410222965664e-05, "loss": 1.4513, "step": 1217 }, { "epoch": 0.311808, "grad_norm": 0.4099675434317884, "learning_rate": 3.151512570114911e-05, "loss": 1.4055, "step": 1218 }, { "epoch": 0.312064, "grad_norm": 0.41434091244194343, "learning_rate": 3.150183357528709e-05, "loss": 1.3911, "step": 1219 }, { "epoch": 0.31232, "grad_norm": 0.4253088197848956, "learning_rate": 3.1488533854157096e-05, "loss": 1.4421, "step": 1220 }, { "epoch": 0.312576, "grad_norm": 0.40912407238603443, "learning_rate": 3.1475226546541624e-05, "loss": 1.4059, "step": 1221 }, { "epoch": 0.312832, "grad_norm": 0.41612533190360373, "learning_rate": 3.14619116612282e-05, "loss": 1.3935, "step": 1222 }, { "epoch": 0.313088, "grad_norm": 0.4035512041461475, "learning_rate": 3.144858920700935e-05, "loss": 1.367, "step": 1223 }, { "epoch": 0.313344, "grad_norm": 0.4088555183597548, "learning_rate": 3.1435259192682575e-05, "loss": 1.3806, "step": 1224 }, { "epoch": 0.3136, "grad_norm": 0.4038294794185272, "learning_rate": 3.1421921627050416e-05, "loss": 1.3536, "step": 1225 }, { "epoch": 0.313856, "grad_norm": 0.40885215306337575, "learning_rate": 3.1408576518920346e-05, "loss": 1.3924, "step": 1226 }, { "epoch": 0.314112, "grad_norm": 0.40657851070025025, "learning_rate": 3.139522387710485e-05, "loss": 1.3767, "step": 1227 }, { "epoch": 0.314368, "grad_norm": 0.4902339272017824, "learning_rate": 3.13818637104214e-05, "loss": 1.4227, "step": 1228 }, { "epoch": 0.314624, "grad_norm": 0.422462749919461, "learning_rate": 3.13684960276924e-05, "loss": 1.3923, "step": 1229 }, { "epoch": 0.31488, "grad_norm": 0.418395953053438, "learning_rate": 3.135512083774524e-05, "loss": 1.4874, "step": 1230 }, { "epoch": 0.315136, "grad_norm": 0.4251976016544273, "learning_rate": 3.134173814941227e-05, "loss": 1.434, "step": 1231 }, { "epoch": 0.315392, "grad_norm": 0.4212149400175426, "learning_rate": 3.132834797153079e-05, "loss": 1.4046, "step": 1232 }, { "epoch": 0.315648, "grad_norm": 0.43017280246410944, "learning_rate": 3.131495031294303e-05, "loss": 1.4447, "step": 1233 }, { "epoch": 0.315904, "grad_norm": 0.41643787912906144, "learning_rate": 3.130154518249617e-05, "loss": 1.4225, "step": 1234 }, { "epoch": 0.31616, "grad_norm": 0.4177713148189969, "learning_rate": 3.1288132589042335e-05, "loss": 1.4147, "step": 1235 }, { "epoch": 0.316416, "grad_norm": 0.4246517513297747, "learning_rate": 3.1274712541438564e-05, "loss": 1.4012, "step": 1236 }, { "epoch": 0.316672, "grad_norm": 0.41914662504676936, "learning_rate": 3.126128504854682e-05, "loss": 1.4041, "step": 1237 }, { "epoch": 0.316928, "grad_norm": 0.42552289495361123, "learning_rate": 3.1247850119233995e-05, "loss": 1.4056, "step": 1238 }, { "epoch": 0.317184, "grad_norm": 0.4235625622010515, "learning_rate": 3.1234407762371875e-05, "loss": 1.41, "step": 1239 }, { "epoch": 0.31744, "grad_norm": 0.42041864931760375, "learning_rate": 3.1220957986837165e-05, "loss": 1.4256, "step": 1240 }, { "epoch": 0.317696, "grad_norm": 0.46716382347814195, "learning_rate": 3.120750080151145e-05, "loss": 1.3845, "step": 1241 }, { "epoch": 0.317952, "grad_norm": 0.4288107361680559, "learning_rate": 3.119403621528122e-05, "loss": 1.3805, "step": 1242 }, { "epoch": 0.318208, "grad_norm": 0.42585324552708786, "learning_rate": 3.118056423703787e-05, "loss": 1.4314, "step": 1243 }, { "epoch": 0.318464, "grad_norm": 0.43413380000721746, "learning_rate": 3.116708487567764e-05, "loss": 1.4102, "step": 1244 }, { "epoch": 0.31872, "grad_norm": 0.4383729186698269, "learning_rate": 3.1153598140101676e-05, "loss": 1.3789, "step": 1245 }, { "epoch": 0.318976, "grad_norm": 0.4439772022027527, "learning_rate": 3.114010403921597e-05, "loss": 1.4489, "step": 1246 }, { "epoch": 0.319232, "grad_norm": 0.4661506932589023, "learning_rate": 3.11266025819314e-05, "loss": 1.4098, "step": 1247 }, { "epoch": 0.319488, "grad_norm": 0.4468540644831854, "learning_rate": 3.1113093777163686e-05, "loss": 1.4293, "step": 1248 }, { "epoch": 0.319744, "grad_norm": 0.40812565348578894, "learning_rate": 3.1099577633833406e-05, "loss": 1.364, "step": 1249 }, { "epoch": 0.32, "grad_norm": 0.4387320152984219, "learning_rate": 3.1086054160865985e-05, "loss": 1.4582, "step": 1250 }, { "epoch": 0.320256, "grad_norm": 0.42933384170518335, "learning_rate": 3.107252336719168e-05, "loss": 1.4147, "step": 1251 }, { "epoch": 0.320512, "grad_norm": 0.4191023901502762, "learning_rate": 3.1058985261745596e-05, "loss": 1.4306, "step": 1252 }, { "epoch": 0.320768, "grad_norm": 0.4354160868992651, "learning_rate": 3.104543985346765e-05, "loss": 1.4376, "step": 1253 }, { "epoch": 0.321024, "grad_norm": 0.45779761194874546, "learning_rate": 3.10318871513026e-05, "loss": 1.4405, "step": 1254 }, { "epoch": 0.32128, "grad_norm": 0.40230870893151843, "learning_rate": 3.101832716420001e-05, "loss": 1.4103, "step": 1255 }, { "epoch": 0.321536, "grad_norm": 0.422176785304716, "learning_rate": 3.1004759901114245e-05, "loss": 1.4084, "step": 1256 }, { "epoch": 0.321792, "grad_norm": 0.416659527570115, "learning_rate": 3.09911853710045e-05, "loss": 1.3816, "step": 1257 }, { "epoch": 0.322048, "grad_norm": 0.4679334693268822, "learning_rate": 3.097760358283473e-05, "loss": 1.3757, "step": 1258 }, { "epoch": 0.322304, "grad_norm": 0.4339406676317571, "learning_rate": 3.096401454557374e-05, "loss": 1.4663, "step": 1259 }, { "epoch": 0.32256, "grad_norm": 0.42229257580469615, "learning_rate": 3.095041826819506e-05, "loss": 1.4102, "step": 1260 }, { "epoch": 0.322816, "grad_norm": 0.4250543831838138, "learning_rate": 3.093681475967705e-05, "loss": 1.3755, "step": 1261 }, { "epoch": 0.323072, "grad_norm": 0.43920831376044683, "learning_rate": 3.0923204029002815e-05, "loss": 1.4312, "step": 1262 }, { "epoch": 0.323328, "grad_norm": 0.41301480771951404, "learning_rate": 3.090958608516023e-05, "loss": 1.3619, "step": 1263 }, { "epoch": 0.323584, "grad_norm": 0.4312385959997988, "learning_rate": 3.089596093714196e-05, "loss": 1.4412, "step": 1264 }, { "epoch": 0.32384, "grad_norm": 0.39918549346762466, "learning_rate": 3.088232859394541e-05, "loss": 1.3637, "step": 1265 }, { "epoch": 0.324096, "grad_norm": 0.41279532150483866, "learning_rate": 3.086868906457271e-05, "loss": 1.4028, "step": 1266 }, { "epoch": 0.324352, "grad_norm": 0.4108708369390747, "learning_rate": 3.0855042358030795e-05, "loss": 1.3657, "step": 1267 }, { "epoch": 0.324608, "grad_norm": 0.3986934058253757, "learning_rate": 3.0841388483331284e-05, "loss": 1.3671, "step": 1268 }, { "epoch": 0.324864, "grad_norm": 0.41476267719154275, "learning_rate": 3.082772744949056e-05, "loss": 1.3774, "step": 1269 }, { "epoch": 0.32512, "grad_norm": 0.3987211561793892, "learning_rate": 3.0814059265529726e-05, "loss": 1.3822, "step": 1270 }, { "epoch": 0.325376, "grad_norm": 0.431107851684075, "learning_rate": 3.08003839404746e-05, "loss": 1.4003, "step": 1271 }, { "epoch": 0.325632, "grad_norm": 0.40383602951971925, "learning_rate": 3.0786701483355716e-05, "loss": 1.4432, "step": 1272 }, { "epoch": 0.325888, "grad_norm": 0.4234532770760919, "learning_rate": 3.077301190320834e-05, "loss": 1.4062, "step": 1273 }, { "epoch": 0.326144, "grad_norm": 0.4253562186601696, "learning_rate": 3.075931520907241e-05, "loss": 1.4306, "step": 1274 }, { "epoch": 0.3264, "grad_norm": 0.43180697461774165, "learning_rate": 3.074561140999258e-05, "loss": 1.4227, "step": 1275 }, { "epoch": 0.326656, "grad_norm": 0.43759673557827045, "learning_rate": 3.073190051501819e-05, "loss": 1.4058, "step": 1276 }, { "epoch": 0.326912, "grad_norm": 0.3924255525754246, "learning_rate": 3.071818253320328e-05, "loss": 1.4032, "step": 1277 }, { "epoch": 0.327168, "grad_norm": 0.4422430576562213, "learning_rate": 3.070445747360654e-05, "loss": 1.4103, "step": 1278 }, { "epoch": 0.327424, "grad_norm": 0.42782128255942475, "learning_rate": 3.0690725345291364e-05, "loss": 1.459, "step": 1279 }, { "epoch": 0.32768, "grad_norm": 0.4180700988964281, "learning_rate": 3.06769861573258e-05, "loss": 1.4052, "step": 1280 }, { "epoch": 0.327936, "grad_norm": 0.4121329345059298, "learning_rate": 3.0663239918782555e-05, "loss": 1.4315, "step": 1281 }, { "epoch": 0.328192, "grad_norm": 0.39184343428686336, "learning_rate": 3.0649486638738994e-05, "loss": 1.3906, "step": 1282 }, { "epoch": 0.328448, "grad_norm": 0.41153734696899574, "learning_rate": 3.0635726326277145e-05, "loss": 1.4242, "step": 1283 }, { "epoch": 0.328704, "grad_norm": 0.42122867421333227, "learning_rate": 3.062195899048365e-05, "loss": 1.4153, "step": 1284 }, { "epoch": 0.32896, "grad_norm": 0.4173491850861933, "learning_rate": 3.060818464044984e-05, "loss": 1.3687, "step": 1285 }, { "epoch": 0.329216, "grad_norm": 0.4077471444427495, "learning_rate": 3.0594403285271604e-05, "loss": 1.4451, "step": 1286 }, { "epoch": 0.329472, "grad_norm": 0.41324433942453037, "learning_rate": 3.0580614934049524e-05, "loss": 1.4418, "step": 1287 }, { "epoch": 0.329728, "grad_norm": 0.399379532041928, "learning_rate": 3.0566819595888775e-05, "loss": 1.3672, "step": 1288 }, { "epoch": 0.329984, "grad_norm": 0.41275025901887585, "learning_rate": 3.055301727989914e-05, "loss": 1.3802, "step": 1289 }, { "epoch": 0.33024, "grad_norm": 0.41859219060050396, "learning_rate": 3.053920799519502e-05, "loss": 1.4245, "step": 1290 }, { "epoch": 0.330496, "grad_norm": 0.4076327428619652, "learning_rate": 3.0525391750895426e-05, "loss": 1.4043, "step": 1291 }, { "epoch": 0.330752, "grad_norm": 0.4001922313684657, "learning_rate": 3.0511568556123934e-05, "loss": 1.4273, "step": 1292 }, { "epoch": 0.331008, "grad_norm": 0.42313948842744575, "learning_rate": 3.049773842000874e-05, "loss": 1.4695, "step": 1293 }, { "epoch": 0.331264, "grad_norm": 0.40838141258835375, "learning_rate": 3.0483901351682612e-05, "loss": 1.3931, "step": 1294 }, { "epoch": 0.33152, "grad_norm": 0.41358465540726363, "learning_rate": 3.0470057360282896e-05, "loss": 1.4059, "step": 1295 }, { "epoch": 0.331776, "grad_norm": 2.507835536111388, "learning_rate": 3.0456206454951514e-05, "loss": 1.4334, "step": 1296 }, { "epoch": 0.332032, "grad_norm": 0.46880057278022064, "learning_rate": 3.0442348644834944e-05, "loss": 1.4683, "step": 1297 }, { "epoch": 0.332288, "grad_norm": 0.4173886689216587, "learning_rate": 3.042848393908423e-05, "loss": 1.3984, "step": 1298 }, { "epoch": 0.332544, "grad_norm": 0.42698107439204497, "learning_rate": 3.0414612346854972e-05, "loss": 1.4095, "step": 1299 }, { "epoch": 0.3328, "grad_norm": 1.1165984828329885, "learning_rate": 3.0400733877307316e-05, "loss": 1.4433, "step": 1300 }, { "epoch": 0.333056, "grad_norm": 0.4452702291973284, "learning_rate": 3.0386848539605947e-05, "loss": 1.3945, "step": 1301 }, { "epoch": 0.333312, "grad_norm": 0.48079409045222515, "learning_rate": 3.0372956342920088e-05, "loss": 1.4524, "step": 1302 }, { "epoch": 0.333568, "grad_norm": 0.4379537970147366, "learning_rate": 3.0359057296423487e-05, "loss": 1.3809, "step": 1303 }, { "epoch": 0.333824, "grad_norm": 0.4202475726051076, "learning_rate": 3.034515140929442e-05, "loss": 1.3242, "step": 1304 }, { "epoch": 0.33408, "grad_norm": 0.44835869342948403, "learning_rate": 3.0331238690715676e-05, "loss": 1.3987, "step": 1305 }, { "epoch": 0.334336, "grad_norm": 0.4319324120842725, "learning_rate": 3.0317319149874556e-05, "loss": 1.3937, "step": 1306 }, { "epoch": 0.334592, "grad_norm": 0.41385006731848517, "learning_rate": 3.0303392795962878e-05, "loss": 1.4053, "step": 1307 }, { "epoch": 0.334848, "grad_norm": 0.4336406684374519, "learning_rate": 3.028945963817694e-05, "loss": 1.3845, "step": 1308 }, { "epoch": 0.335104, "grad_norm": 0.4290693139712829, "learning_rate": 3.0275519685717546e-05, "loss": 1.4014, "step": 1309 }, { "epoch": 0.33536, "grad_norm": 0.42024043372411046, "learning_rate": 3.0261572947789975e-05, "loss": 1.3781, "step": 1310 }, { "epoch": 0.335616, "grad_norm": 0.4339449661013725, "learning_rate": 3.0247619433604e-05, "loss": 1.4144, "step": 1311 }, { "epoch": 0.335872, "grad_norm": 0.4309262942451594, "learning_rate": 3.0233659152373868e-05, "loss": 1.4027, "step": 1312 }, { "epoch": 0.336128, "grad_norm": 0.41669504307845745, "learning_rate": 3.0219692113318283e-05, "loss": 1.4412, "step": 1313 }, { "epoch": 0.336384, "grad_norm": 0.4403791338547325, "learning_rate": 3.0205718325660414e-05, "loss": 1.4148, "step": 1314 }, { "epoch": 0.33664, "grad_norm": 0.42834913537761277, "learning_rate": 3.01917377986279e-05, "loss": 1.4019, "step": 1315 }, { "epoch": 0.336896, "grad_norm": 0.41394411385160595, "learning_rate": 3.0177750541452826e-05, "loss": 1.3879, "step": 1316 }, { "epoch": 0.337152, "grad_norm": 0.42386207164349365, "learning_rate": 3.0163756563371697e-05, "loss": 1.3705, "step": 1317 }, { "epoch": 0.337408, "grad_norm": 0.4228294359643049, "learning_rate": 3.0149755873625486e-05, "loss": 1.4404, "step": 1318 }, { "epoch": 0.337664, "grad_norm": 0.41303349259162797, "learning_rate": 3.01357484814596e-05, "loss": 1.4013, "step": 1319 }, { "epoch": 0.33792, "grad_norm": 0.4312530347788422, "learning_rate": 3.0121734396123848e-05, "loss": 1.3926, "step": 1320 }, { "epoch": 0.338176, "grad_norm": 0.421555599824878, "learning_rate": 3.010771362687246e-05, "loss": 1.4442, "step": 1321 }, { "epoch": 0.338432, "grad_norm": 0.4380217840607509, "learning_rate": 3.0093686182964102e-05, "loss": 1.4205, "step": 1322 }, { "epoch": 0.338688, "grad_norm": 0.4214516089879668, "learning_rate": 3.0079652073661846e-05, "loss": 1.3799, "step": 1323 }, { "epoch": 0.338944, "grad_norm": 0.42724323935372926, "learning_rate": 3.0065611308233134e-05, "loss": 1.475, "step": 1324 }, { "epoch": 0.3392, "grad_norm": 0.40168310253274403, "learning_rate": 3.0051563895949836e-05, "loss": 1.3881, "step": 1325 }, { "epoch": 0.339456, "grad_norm": 0.42110570812271714, "learning_rate": 3.0037509846088197e-05, "loss": 1.4598, "step": 1326 }, { "epoch": 0.339712, "grad_norm": 0.4073135562375942, "learning_rate": 3.0023449167928854e-05, "loss": 1.3696, "step": 1327 }, { "epoch": 0.339968, "grad_norm": 0.41175537780242827, "learning_rate": 3.0009381870756808e-05, "loss": 1.3955, "step": 1328 }, { "epoch": 0.340224, "grad_norm": 0.4025994893286787, "learning_rate": 2.9995307963861443e-05, "loss": 1.4023, "step": 1329 }, { "epoch": 0.34048, "grad_norm": 0.3999826325095339, "learning_rate": 2.9981227456536502e-05, "loss": 1.3582, "step": 1330 }, { "epoch": 0.340736, "grad_norm": 0.39653252481757734, "learning_rate": 2.996714035808008e-05, "loss": 1.3384, "step": 1331 }, { "epoch": 0.340992, "grad_norm": 0.4017121823864137, "learning_rate": 2.9953046677794648e-05, "loss": 1.3892, "step": 1332 }, { "epoch": 0.341248, "grad_norm": 0.3952039600621715, "learning_rate": 2.993894642498699e-05, "loss": 1.3885, "step": 1333 }, { "epoch": 0.341504, "grad_norm": 0.4309079034094291, "learning_rate": 2.9924839608968254e-05, "loss": 1.3966, "step": 1334 }, { "epoch": 0.34176, "grad_norm": 0.3912068992899185, "learning_rate": 2.991072623905392e-05, "loss": 1.3983, "step": 1335 }, { "epoch": 0.342016, "grad_norm": 0.4096085625722772, "learning_rate": 2.989660632456379e-05, "loss": 1.4132, "step": 1336 }, { "epoch": 0.342272, "grad_norm": 0.39100270613168364, "learning_rate": 2.988247987482198e-05, "loss": 1.3827, "step": 1337 }, { "epoch": 0.342528, "grad_norm": 0.40875733850657575, "learning_rate": 2.9868346899156938e-05, "loss": 1.3821, "step": 1338 }, { "epoch": 0.342784, "grad_norm": 0.3983344888294015, "learning_rate": 2.9854207406901413e-05, "loss": 1.4158, "step": 1339 }, { "epoch": 0.34304, "grad_norm": 0.4133525821701678, "learning_rate": 2.984006140739246e-05, "loss": 1.3897, "step": 1340 }, { "epoch": 0.343296, "grad_norm": 0.41878414870417735, "learning_rate": 2.982590890997143e-05, "loss": 1.4366, "step": 1341 }, { "epoch": 0.343552, "grad_norm": 0.41036239739906116, "learning_rate": 2.9811749923983958e-05, "loss": 1.4035, "step": 1342 }, { "epoch": 0.343808, "grad_norm": 0.4099744812173714, "learning_rate": 2.9797584458779973e-05, "loss": 1.418, "step": 1343 }, { "epoch": 0.344064, "grad_norm": 0.39679300109156596, "learning_rate": 2.9783412523713688e-05, "loss": 1.366, "step": 1344 }, { "epoch": 0.34432, "grad_norm": 0.407202682154567, "learning_rate": 2.9769234128143565e-05, "loss": 1.428, "step": 1345 }, { "epoch": 0.344576, "grad_norm": 0.4653510458646779, "learning_rate": 2.9755049281432354e-05, "loss": 1.3757, "step": 1346 }, { "epoch": 0.344832, "grad_norm": 0.4268516829619504, "learning_rate": 2.974085799294707e-05, "loss": 1.4476, "step": 1347 }, { "epoch": 0.345088, "grad_norm": 0.39093214171547186, "learning_rate": 2.9726660272058948e-05, "loss": 1.3657, "step": 1348 }, { "epoch": 0.345344, "grad_norm": 0.4063958212976025, "learning_rate": 2.97124561281435e-05, "loss": 1.3682, "step": 1349 }, { "epoch": 0.3456, "grad_norm": 0.4204029768691132, "learning_rate": 2.9698245570580488e-05, "loss": 1.3964, "step": 1350 }, { "epoch": 0.345856, "grad_norm": 0.40915344851946944, "learning_rate": 2.9684028608753874e-05, "loss": 1.4048, "step": 1351 }, { "epoch": 0.346112, "grad_norm": 0.41143302070026544, "learning_rate": 2.9669805252051884e-05, "loss": 1.3801, "step": 1352 }, { "epoch": 0.346368, "grad_norm": 0.40408748499763036, "learning_rate": 2.9655575509866935e-05, "loss": 1.3964, "step": 1353 }, { "epoch": 0.346624, "grad_norm": 0.4070544059317756, "learning_rate": 2.9641339391595685e-05, "loss": 1.393, "step": 1354 }, { "epoch": 0.34688, "grad_norm": 0.4117257452047398, "learning_rate": 2.9627096906639e-05, "loss": 1.4146, "step": 1355 }, { "epoch": 0.347136, "grad_norm": 0.41523801930988, "learning_rate": 2.9612848064401934e-05, "loss": 1.3954, "step": 1356 }, { "epoch": 0.347392, "grad_norm": 0.40424233120719416, "learning_rate": 2.959859287429376e-05, "loss": 1.3973, "step": 1357 }, { "epoch": 0.347648, "grad_norm": 0.3952105902502218, "learning_rate": 2.9584331345727924e-05, "loss": 1.3594, "step": 1358 }, { "epoch": 0.347904, "grad_norm": 0.4002731102703411, "learning_rate": 2.9570063488122077e-05, "loss": 1.3644, "step": 1359 }, { "epoch": 0.34816, "grad_norm": 0.4062748863017373, "learning_rate": 2.9555789310898025e-05, "loss": 1.3972, "step": 1360 }, { "epoch": 0.348416, "grad_norm": 0.4006636196780595, "learning_rate": 2.954150882348177e-05, "loss": 1.3761, "step": 1361 }, { "epoch": 0.348672, "grad_norm": 0.42746420943661606, "learning_rate": 2.952722203530347e-05, "loss": 1.4657, "step": 1362 }, { "epoch": 0.348928, "grad_norm": 0.40353070498438187, "learning_rate": 2.9512928955797443e-05, "loss": 1.3721, "step": 1363 }, { "epoch": 0.349184, "grad_norm": 0.40839741594261064, "learning_rate": 2.9498629594402167e-05, "loss": 1.3842, "step": 1364 }, { "epoch": 0.34944, "grad_norm": 0.4088648815029996, "learning_rate": 2.9484323960560273e-05, "loss": 1.407, "step": 1365 }, { "epoch": 0.349696, "grad_norm": 0.4194551301113532, "learning_rate": 2.9470012063718508e-05, "loss": 1.4074, "step": 1366 }, { "epoch": 0.349952, "grad_norm": 0.41113900413829035, "learning_rate": 2.9455693913327788e-05, "loss": 1.3886, "step": 1367 }, { "epoch": 0.350208, "grad_norm": 0.4075661853838565, "learning_rate": 2.9441369518843138e-05, "loss": 1.421, "step": 1368 }, { "epoch": 0.350464, "grad_norm": 0.4326718183685464, "learning_rate": 2.9427038889723708e-05, "loss": 1.4226, "step": 1369 }, { "epoch": 0.35072, "grad_norm": 0.3978549833374469, "learning_rate": 2.941270203543278e-05, "loss": 1.386, "step": 1370 }, { "epoch": 0.350976, "grad_norm": 0.41513633886157214, "learning_rate": 2.9398358965437726e-05, "loss": 1.3876, "step": 1371 }, { "epoch": 0.351232, "grad_norm": 0.4072081871458697, "learning_rate": 2.9384009689210038e-05, "loss": 1.4214, "step": 1372 }, { "epoch": 0.351488, "grad_norm": 0.4102887266971176, "learning_rate": 2.9369654216225294e-05, "loss": 1.3733, "step": 1373 }, { "epoch": 0.351744, "grad_norm": 0.4247078164587722, "learning_rate": 2.9355292555963185e-05, "loss": 1.4051, "step": 1374 }, { "epoch": 0.352, "grad_norm": 0.3979339879640671, "learning_rate": 2.9340924717907455e-05, "loss": 1.3585, "step": 1375 }, { "epoch": 0.352256, "grad_norm": 0.4091242783323288, "learning_rate": 2.932655071154596e-05, "loss": 1.3668, "step": 1376 }, { "epoch": 0.352512, "grad_norm": 0.401848634888522, "learning_rate": 2.9312170546370605e-05, "loss": 1.3845, "step": 1377 }, { "epoch": 0.352768, "grad_norm": 0.39291242459777115, "learning_rate": 2.9297784231877382e-05, "loss": 1.3894, "step": 1378 }, { "epoch": 0.353024, "grad_norm": 0.4152876137595836, "learning_rate": 2.9283391777566325e-05, "loss": 1.3088, "step": 1379 }, { "epoch": 0.35328, "grad_norm": 0.39267878980640647, "learning_rate": 2.9268993192941547e-05, "loss": 1.4052, "step": 1380 }, { "epoch": 0.353536, "grad_norm": 0.4243710580844867, "learning_rate": 2.9254588487511175e-05, "loss": 1.4289, "step": 1381 }, { "epoch": 0.353792, "grad_norm": 0.4187107223001199, "learning_rate": 2.9240177670787406e-05, "loss": 1.4168, "step": 1382 }, { "epoch": 0.354048, "grad_norm": 0.3930573690233906, "learning_rate": 2.9225760752286452e-05, "loss": 1.3699, "step": 1383 }, { "epoch": 0.354304, "grad_norm": 0.41248128489925057, "learning_rate": 2.9211337741528587e-05, "loss": 1.4088, "step": 1384 }, { "epoch": 0.35456, "grad_norm": 0.40385969250358, "learning_rate": 2.9196908648038067e-05, "loss": 1.392, "step": 1385 }, { "epoch": 0.354816, "grad_norm": 0.41201070774800636, "learning_rate": 2.91824734813432e-05, "loss": 1.3823, "step": 1386 }, { "epoch": 0.355072, "grad_norm": 0.4111156865828313, "learning_rate": 2.9168032250976276e-05, "loss": 1.363, "step": 1387 }, { "epoch": 0.355328, "grad_norm": 0.4090791112503471, "learning_rate": 2.9153584966473603e-05, "loss": 1.4184, "step": 1388 }, { "epoch": 0.355584, "grad_norm": 0.39436573608160275, "learning_rate": 2.913913163737549e-05, "loss": 1.3679, "step": 1389 }, { "epoch": 0.35584, "grad_norm": 0.42828945672571866, "learning_rate": 2.9124672273226235e-05, "loss": 1.386, "step": 1390 }, { "epoch": 0.356096, "grad_norm": 0.4169046921649057, "learning_rate": 2.9110206883574116e-05, "loss": 1.4224, "step": 1391 }, { "epoch": 0.356352, "grad_norm": 0.4059191438661423, "learning_rate": 2.9095735477971394e-05, "loss": 1.392, "step": 1392 }, { "epoch": 0.356608, "grad_norm": 0.4010680595741339, "learning_rate": 2.908125806597431e-05, "loss": 1.4451, "step": 1393 }, { "epoch": 0.356864, "grad_norm": 0.4197912470550911, "learning_rate": 2.9066774657143044e-05, "loss": 1.3578, "step": 1394 }, { "epoch": 0.35712, "grad_norm": 0.40507250291805036, "learning_rate": 2.905228526104177e-05, "loss": 1.3947, "step": 1395 }, { "epoch": 0.357376, "grad_norm": 0.40100006589374415, "learning_rate": 2.9037789887238604e-05, "loss": 1.3946, "step": 1396 }, { "epoch": 0.357632, "grad_norm": 0.41565844507070737, "learning_rate": 2.9023288545305603e-05, "loss": 1.3639, "step": 1397 }, { "epoch": 0.357888, "grad_norm": 0.40506390419502336, "learning_rate": 2.9008781244818757e-05, "loss": 1.3902, "step": 1398 }, { "epoch": 0.358144, "grad_norm": 0.41078857891945475, "learning_rate": 2.8994267995358023e-05, "loss": 1.3904, "step": 1399 }, { "epoch": 0.3584, "grad_norm": 0.41349718420951437, "learning_rate": 2.8979748806507246e-05, "loss": 1.3436, "step": 1400 }, { "epoch": 0.358656, "grad_norm": 0.40497883809613155, "learning_rate": 2.8965223687854228e-05, "loss": 1.4263, "step": 1401 }, { "epoch": 0.358912, "grad_norm": 0.4186045316020322, "learning_rate": 2.895069264899067e-05, "loss": 1.4104, "step": 1402 }, { "epoch": 0.359168, "grad_norm": 0.4050900563293329, "learning_rate": 2.8936155699512177e-05, "loss": 1.3894, "step": 1403 }, { "epoch": 0.359424, "grad_norm": 0.4217906921509169, "learning_rate": 2.8921612849018264e-05, "loss": 1.4188, "step": 1404 }, { "epoch": 0.35968, "grad_norm": 0.406031775708933, "learning_rate": 2.8907064107112352e-05, "loss": 1.4038, "step": 1405 }, { "epoch": 0.359936, "grad_norm": 0.42501135712568533, "learning_rate": 2.8892509483401733e-05, "loss": 1.3864, "step": 1406 }, { "epoch": 0.360192, "grad_norm": 0.4004574305482252, "learning_rate": 2.88779489874976e-05, "loss": 1.3794, "step": 1407 }, { "epoch": 0.360448, "grad_norm": 0.4322073810561032, "learning_rate": 2.886338262901502e-05, "loss": 1.4033, "step": 1408 }, { "epoch": 0.360704, "grad_norm": 0.407532779928784, "learning_rate": 2.8848810417572926e-05, "loss": 1.3653, "step": 1409 }, { "epoch": 0.36096, "grad_norm": 0.4160188733666925, "learning_rate": 2.883423236279411e-05, "loss": 1.3987, "step": 1410 }, { "epoch": 0.361216, "grad_norm": 0.3946384821257591, "learning_rate": 2.8819648474305242e-05, "loss": 1.3528, "step": 1411 }, { "epoch": 0.361472, "grad_norm": 0.40946563151926035, "learning_rate": 2.8805058761736826e-05, "loss": 1.4338, "step": 1412 }, { "epoch": 0.361728, "grad_norm": 0.4140866205862416, "learning_rate": 2.8790463234723228e-05, "loss": 1.3277, "step": 1413 }, { "epoch": 0.361984, "grad_norm": 0.41329119127896946, "learning_rate": 2.8775861902902637e-05, "loss": 1.4364, "step": 1414 }, { "epoch": 0.36224, "grad_norm": 0.4148301414120206, "learning_rate": 2.876125477591709e-05, "loss": 1.3876, "step": 1415 }, { "epoch": 0.362496, "grad_norm": 0.42922962124142383, "learning_rate": 2.8746641863412442e-05, "loss": 1.4215, "step": 1416 }, { "epoch": 0.362752, "grad_norm": 0.3996671823822803, "learning_rate": 2.8732023175038367e-05, "loss": 1.4017, "step": 1417 }, { "epoch": 0.363008, "grad_norm": 0.4243935379713655, "learning_rate": 2.8717398720448354e-05, "loss": 1.4023, "step": 1418 }, { "epoch": 0.363264, "grad_norm": 0.40366481180258595, "learning_rate": 2.8702768509299713e-05, "loss": 1.3576, "step": 1419 }, { "epoch": 0.36352, "grad_norm": 0.40237319153594614, "learning_rate": 2.8688132551253546e-05, "loss": 1.3771, "step": 1420 }, { "epoch": 0.363776, "grad_norm": 0.3979352056097781, "learning_rate": 2.8673490855974734e-05, "loss": 1.3749, "step": 1421 }, { "epoch": 0.364032, "grad_norm": 0.411800551565504, "learning_rate": 2.865884343313197e-05, "loss": 1.3891, "step": 1422 }, { "epoch": 0.364288, "grad_norm": 0.4357601236293486, "learning_rate": 2.8644190292397724e-05, "loss": 1.4419, "step": 1423 }, { "epoch": 0.364544, "grad_norm": 0.41655976059382555, "learning_rate": 2.8629531443448235e-05, "loss": 1.3506, "step": 1424 }, { "epoch": 0.3648, "grad_norm": 0.41080576554614484, "learning_rate": 2.8614866895963516e-05, "loss": 1.3921, "step": 1425 }, { "epoch": 0.365056, "grad_norm": 0.41253353404570114, "learning_rate": 2.8600196659627338e-05, "loss": 1.3865, "step": 1426 }, { "epoch": 0.365312, "grad_norm": 0.3986542374866559, "learning_rate": 2.8585520744127237e-05, "loss": 1.3525, "step": 1427 }, { "epoch": 0.365568, "grad_norm": 0.4096729370444157, "learning_rate": 2.857083915915449e-05, "loss": 1.3653, "step": 1428 }, { "epoch": 0.365824, "grad_norm": 0.39774045543567765, "learning_rate": 2.8556151914404134e-05, "loss": 1.4021, "step": 1429 }, { "epoch": 0.36608, "grad_norm": 0.4049965435487163, "learning_rate": 2.8541459019574917e-05, "loss": 1.3832, "step": 1430 }, { "epoch": 0.366336, "grad_norm": 0.42629838563661254, "learning_rate": 2.8526760484369343e-05, "loss": 1.4137, "step": 1431 }, { "epoch": 0.366592, "grad_norm": 0.4116529028138325, "learning_rate": 2.851205631849363e-05, "loss": 1.4292, "step": 1432 }, { "epoch": 0.366848, "grad_norm": 0.4045372916833185, "learning_rate": 2.8497346531657707e-05, "loss": 1.3847, "step": 1433 }, { "epoch": 0.367104, "grad_norm": 0.38842958290686924, "learning_rate": 2.8482631133575234e-05, "loss": 1.3454, "step": 1434 }, { "epoch": 0.36736, "grad_norm": 0.41313710411587384, "learning_rate": 2.8467910133963563e-05, "loss": 1.4246, "step": 1435 }, { "epoch": 0.367616, "grad_norm": 0.415673512083582, "learning_rate": 2.8453183542543743e-05, "loss": 1.394, "step": 1436 }, { "epoch": 0.367872, "grad_norm": 0.40776592561021396, "learning_rate": 2.8438451369040517e-05, "loss": 1.3712, "step": 1437 }, { "epoch": 0.368128, "grad_norm": 0.40050035240394743, "learning_rate": 2.8423713623182335e-05, "loss": 1.4022, "step": 1438 }, { "epoch": 0.368384, "grad_norm": 0.413546865878269, "learning_rate": 2.840897031470128e-05, "loss": 1.3992, "step": 1439 }, { "epoch": 0.36864, "grad_norm": 0.3977846604527369, "learning_rate": 2.8394221453333156e-05, "loss": 1.4011, "step": 1440 }, { "epoch": 0.368896, "grad_norm": 0.4465853157576206, "learning_rate": 2.8379467048817417e-05, "loss": 1.39, "step": 1441 }, { "epoch": 0.369152, "grad_norm": 0.4117433738404153, "learning_rate": 2.8364707110897164e-05, "loss": 1.3279, "step": 1442 }, { "epoch": 0.369408, "grad_norm": 0.4099006976335367, "learning_rate": 2.8349941649319177e-05, "loss": 1.4148, "step": 1443 }, { "epoch": 0.369664, "grad_norm": 0.4033069633736659, "learning_rate": 2.8335170673833853e-05, "loss": 1.4042, "step": 1444 }, { "epoch": 0.36992, "grad_norm": 0.42122434363078604, "learning_rate": 2.832039419419526e-05, "loss": 1.3894, "step": 1445 }, { "epoch": 0.370176, "grad_norm": 0.403945412759211, "learning_rate": 2.8305612220161083e-05, "loss": 1.4065, "step": 1446 }, { "epoch": 0.370432, "grad_norm": 0.41425040899401516, "learning_rate": 2.8290824761492638e-05, "loss": 1.4355, "step": 1447 }, { "epoch": 0.370688, "grad_norm": 0.3982234942522407, "learning_rate": 2.827603182795487e-05, "loss": 1.3782, "step": 1448 }, { "epoch": 0.370944, "grad_norm": 0.4202672535883716, "learning_rate": 2.8261233429316332e-05, "loss": 1.4379, "step": 1449 }, { "epoch": 0.3712, "grad_norm": 0.40147854079857526, "learning_rate": 2.824642957534918e-05, "loss": 1.4362, "step": 1450 }, { "epoch": 0.371456, "grad_norm": 0.39897217803227375, "learning_rate": 2.8231620275829195e-05, "loss": 1.407, "step": 1451 }, { "epoch": 0.371712, "grad_norm": 0.40318497864964664, "learning_rate": 2.8216805540535726e-05, "loss": 1.4066, "step": 1452 }, { "epoch": 0.371968, "grad_norm": 0.4103414428318573, "learning_rate": 2.820198537925174e-05, "loss": 1.3865, "step": 1453 }, { "epoch": 0.372224, "grad_norm": 0.4182121543077132, "learning_rate": 2.818715980176376e-05, "loss": 1.3869, "step": 1454 }, { "epoch": 0.37248, "grad_norm": 0.40288453253981876, "learning_rate": 2.8172328817861893e-05, "loss": 1.3704, "step": 1455 }, { "epoch": 0.372736, "grad_norm": 0.4069207416474802, "learning_rate": 2.815749243733983e-05, "loss": 1.4013, "step": 1456 }, { "epoch": 0.372992, "grad_norm": 0.39384935194889376, "learning_rate": 2.8142650669994815e-05, "loss": 1.3887, "step": 1457 }, { "epoch": 0.373248, "grad_norm": 0.41284920347155823, "learning_rate": 2.812780352562765e-05, "loss": 1.4167, "step": 1458 }, { "epoch": 0.373504, "grad_norm": 0.39494067135759336, "learning_rate": 2.811295101404269e-05, "loss": 1.4529, "step": 1459 }, { "epoch": 0.37376, "grad_norm": 0.39146528650555706, "learning_rate": 2.8098093145047835e-05, "loss": 1.3363, "step": 1460 }, { "epoch": 0.374016, "grad_norm": 0.3938517203877017, "learning_rate": 2.808322992845451e-05, "loss": 1.4158, "step": 1461 }, { "epoch": 0.374272, "grad_norm": 0.38717281782784313, "learning_rate": 2.8068361374077685e-05, "loss": 1.3325, "step": 1462 }, { "epoch": 0.374528, "grad_norm": 0.40039877511324534, "learning_rate": 2.805348749173586e-05, "loss": 1.4214, "step": 1463 }, { "epoch": 0.374784, "grad_norm": 0.4040339933397001, "learning_rate": 2.8038608291251034e-05, "loss": 1.4046, "step": 1464 }, { "epoch": 0.37504, "grad_norm": 0.4012907304983749, "learning_rate": 2.802372378244874e-05, "loss": 1.3418, "step": 1465 }, { "epoch": 0.375296, "grad_norm": 0.40684512393503225, "learning_rate": 2.8008833975158e-05, "loss": 1.4432, "step": 1466 }, { "epoch": 0.375552, "grad_norm": 0.4015132924502184, "learning_rate": 2.7993938879211326e-05, "loss": 1.3388, "step": 1467 }, { "epoch": 0.375808, "grad_norm": 0.39201937175882234, "learning_rate": 2.7979038504444758e-05, "loss": 1.3599, "step": 1468 }, { "epoch": 0.376064, "grad_norm": 0.3954785930528305, "learning_rate": 2.796413286069779e-05, "loss": 1.361, "step": 1469 }, { "epoch": 0.37632, "grad_norm": 0.3991007536549761, "learning_rate": 2.794922195781341e-05, "loss": 1.3837, "step": 1470 }, { "epoch": 0.376576, "grad_norm": 0.3962004416655387, "learning_rate": 2.793430580563807e-05, "loss": 1.3832, "step": 1471 }, { "epoch": 0.376832, "grad_norm": 0.4151718797988861, "learning_rate": 2.7919384414021694e-05, "loss": 1.3845, "step": 1472 }, { "epoch": 0.377088, "grad_norm": 0.4070998709006792, "learning_rate": 2.7904457792817657e-05, "loss": 1.4441, "step": 1473 }, { "epoch": 0.377344, "grad_norm": 0.40227154642403934, "learning_rate": 2.788952595188281e-05, "loss": 1.3828, "step": 1474 }, { "epoch": 0.3776, "grad_norm": 0.41053363456182507, "learning_rate": 2.7874588901077423e-05, "loss": 1.401, "step": 1475 }, { "epoch": 0.377856, "grad_norm": 0.4156256717670139, "learning_rate": 2.7859646650265234e-05, "loss": 1.4609, "step": 1476 }, { "epoch": 0.378112, "grad_norm": 0.42156722753830067, "learning_rate": 2.7844699209313386e-05, "loss": 1.3666, "step": 1477 }, { "epoch": 0.378368, "grad_norm": 0.39362018482759137, "learning_rate": 2.782974658809247e-05, "loss": 1.3902, "step": 1478 }, { "epoch": 0.378624, "grad_norm": 0.42342397930782977, "learning_rate": 2.7814788796476485e-05, "loss": 1.3568, "step": 1479 }, { "epoch": 0.37888, "grad_norm": 0.410550145888425, "learning_rate": 2.7799825844342855e-05, "loss": 1.3763, "step": 1480 }, { "epoch": 0.379136, "grad_norm": 0.42195017934877654, "learning_rate": 2.7784857741572417e-05, "loss": 1.4173, "step": 1481 }, { "epoch": 0.379392, "grad_norm": 0.41889711774759975, "learning_rate": 2.7769884498049386e-05, "loss": 1.4141, "step": 1482 }, { "epoch": 0.379648, "grad_norm": 0.40309358374814935, "learning_rate": 2.7754906123661387e-05, "loss": 1.3913, "step": 1483 }, { "epoch": 0.379904, "grad_norm": 0.4332191496375678, "learning_rate": 2.7739922628299437e-05, "loss": 1.3699, "step": 1484 }, { "epoch": 0.38016, "grad_norm": 0.3978072067686622, "learning_rate": 2.7724934021857918e-05, "loss": 1.4148, "step": 1485 }, { "epoch": 0.380416, "grad_norm": 0.4073415193597892, "learning_rate": 2.770994031423461e-05, "loss": 1.4123, "step": 1486 }, { "epoch": 0.380672, "grad_norm": 0.39202950987697, "learning_rate": 2.7694941515330647e-05, "loss": 1.3344, "step": 1487 }, { "epoch": 0.380928, "grad_norm": 0.39459818264273605, "learning_rate": 2.7679937635050528e-05, "loss": 1.3952, "step": 1488 }, { "epoch": 0.381184, "grad_norm": 0.41533187245370556, "learning_rate": 2.7664928683302105e-05, "loss": 1.4183, "step": 1489 }, { "epoch": 0.38144, "grad_norm": 0.38598544926438805, "learning_rate": 2.7649914669996586e-05, "loss": 1.3933, "step": 1490 }, { "epoch": 0.381696, "grad_norm": 0.3995132263263834, "learning_rate": 2.7634895605048507e-05, "loss": 1.3949, "step": 1491 }, { "epoch": 0.381952, "grad_norm": 0.38762983789706557, "learning_rate": 2.761987149837576e-05, "loss": 1.3958, "step": 1492 }, { "epoch": 0.382208, "grad_norm": 0.41064617496478406, "learning_rate": 2.7604842359899548e-05, "loss": 1.4421, "step": 1493 }, { "epoch": 0.382464, "grad_norm": 0.3878237625353223, "learning_rate": 2.758980819954441e-05, "loss": 1.3611, "step": 1494 }, { "epoch": 0.38272, "grad_norm": 0.4050652158905262, "learning_rate": 2.7574769027238192e-05, "loss": 1.4051, "step": 1495 }, { "epoch": 0.382976, "grad_norm": 0.40194902335667826, "learning_rate": 2.755972485291206e-05, "loss": 1.3822, "step": 1496 }, { "epoch": 0.383232, "grad_norm": 0.3942544952669704, "learning_rate": 2.7544675686500463e-05, "loss": 1.4076, "step": 1497 }, { "epoch": 0.383488, "grad_norm": 0.41318429138575724, "learning_rate": 2.7529621537941178e-05, "loss": 1.388, "step": 1498 }, { "epoch": 0.383744, "grad_norm": 0.39386145940840683, "learning_rate": 2.7514562417175245e-05, "loss": 1.3904, "step": 1499 }, { "epoch": 0.384, "grad_norm": 0.3990637429649315, "learning_rate": 2.749949833414699e-05, "loss": 1.3692, "step": 1500 }, { "epoch": 0.384256, "grad_norm": 0.401501946969665, "learning_rate": 2.7484429298804035e-05, "loss": 1.3961, "step": 1501 }, { "epoch": 0.384512, "grad_norm": 0.4128300084646836, "learning_rate": 2.746935532109725e-05, "loss": 1.3988, "step": 1502 }, { "epoch": 0.384768, "grad_norm": 0.39017572698396386, "learning_rate": 2.7454276410980782e-05, "loss": 1.3943, "step": 1503 }, { "epoch": 0.385024, "grad_norm": 0.40030785606713126, "learning_rate": 2.7439192578412038e-05, "loss": 1.354, "step": 1504 }, { "epoch": 0.38528, "grad_norm": 0.3982596970351547, "learning_rate": 2.742410383335166e-05, "loss": 1.3905, "step": 1505 }, { "epoch": 0.385536, "grad_norm": 0.40237759112183574, "learning_rate": 2.740901018576354e-05, "loss": 1.3959, "step": 1506 }, { "epoch": 0.385792, "grad_norm": 0.39582368540774776, "learning_rate": 2.7393911645614823e-05, "loss": 1.3791, "step": 1507 }, { "epoch": 0.386048, "grad_norm": 0.41353091741160364, "learning_rate": 2.7378808222875857e-05, "loss": 1.4195, "step": 1508 }, { "epoch": 0.386304, "grad_norm": 0.40337474613787083, "learning_rate": 2.7363699927520245e-05, "loss": 1.3876, "step": 1509 }, { "epoch": 0.38656, "grad_norm": 0.4262651794610066, "learning_rate": 2.7348586769524787e-05, "loss": 1.4114, "step": 1510 }, { "epoch": 0.386816, "grad_norm": 0.3973129693246807, "learning_rate": 2.733346875886949e-05, "loss": 1.4232, "step": 1511 }, { "epoch": 0.387072, "grad_norm": 0.4091811894812489, "learning_rate": 2.7318345905537586e-05, "loss": 1.3488, "step": 1512 }, { "epoch": 0.387328, "grad_norm": 0.42086499572736796, "learning_rate": 2.730321821951549e-05, "loss": 1.4452, "step": 1513 }, { "epoch": 0.387584, "grad_norm": 0.40117605061590655, "learning_rate": 2.7288085710792803e-05, "loss": 1.3529, "step": 1514 }, { "epoch": 0.38784, "grad_norm": 0.421129178260488, "learning_rate": 2.7272948389362336e-05, "loss": 1.3757, "step": 1515 }, { "epoch": 0.388096, "grad_norm": 0.39138122388578267, "learning_rate": 2.7257806265220054e-05, "loss": 1.3908, "step": 1516 }, { "epoch": 0.388352, "grad_norm": 0.4164644408626294, "learning_rate": 2.7242659348365095e-05, "loss": 1.3961, "step": 1517 }, { "epoch": 0.388608, "grad_norm": 0.5209449487008662, "learning_rate": 2.722750764879978e-05, "loss": 1.3795, "step": 1518 }, { "epoch": 0.388864, "grad_norm": 0.3911144110290774, "learning_rate": 2.721235117652956e-05, "loss": 1.3808, "step": 1519 }, { "epoch": 0.38912, "grad_norm": 0.39729576106363, "learning_rate": 2.7197189941563062e-05, "loss": 1.3983, "step": 1520 }, { "epoch": 0.389376, "grad_norm": 0.41272447874451607, "learning_rate": 2.7182023953912064e-05, "loss": 1.3985, "step": 1521 }, { "epoch": 0.389632, "grad_norm": 0.3865516565294949, "learning_rate": 2.7166853223591443e-05, "loss": 1.3647, "step": 1522 }, { "epoch": 0.389888, "grad_norm": 0.4021935699821488, "learning_rate": 2.715167776061925e-05, "loss": 1.3027, "step": 1523 }, { "epoch": 0.390144, "grad_norm": 0.39466405197335036, "learning_rate": 2.713649757501664e-05, "loss": 1.4065, "step": 1524 }, { "epoch": 0.3904, "grad_norm": 0.41187275749423646, "learning_rate": 2.712131267680789e-05, "loss": 1.4002, "step": 1525 }, { "epoch": 0.390656, "grad_norm": 0.3885017632820429, "learning_rate": 2.7106123076020395e-05, "loss": 1.4001, "step": 1526 }, { "epoch": 0.390912, "grad_norm": 0.40901946692529934, "learning_rate": 2.7090928782684645e-05, "loss": 1.3776, "step": 1527 }, { "epoch": 0.391168, "grad_norm": 0.3773922895454674, "learning_rate": 2.707572980683423e-05, "loss": 1.3806, "step": 1528 }, { "epoch": 0.391424, "grad_norm": 0.41086430448126915, "learning_rate": 2.7060526158505842e-05, "loss": 1.3759, "step": 1529 }, { "epoch": 0.39168, "grad_norm": 0.40622266435226445, "learning_rate": 2.7045317847739257e-05, "loss": 1.388, "step": 1530 }, { "epoch": 0.391936, "grad_norm": 0.39405916912369365, "learning_rate": 2.7030104884577318e-05, "loss": 1.3558, "step": 1531 }, { "epoch": 0.392192, "grad_norm": 0.39198447399118386, "learning_rate": 2.7014887279065957e-05, "loss": 1.3405, "step": 1532 }, { "epoch": 0.392448, "grad_norm": 0.4012359202522085, "learning_rate": 2.6999665041254153e-05, "loss": 1.3942, "step": 1533 }, { "epoch": 0.392704, "grad_norm": 0.42075057648329756, "learning_rate": 2.6984438181193953e-05, "loss": 1.3786, "step": 1534 }, { "epoch": 0.39296, "grad_norm": 0.3891041261713129, "learning_rate": 2.696920670894046e-05, "loss": 1.3364, "step": 1535 }, { "epoch": 0.393216, "grad_norm": 0.42206820076883245, "learning_rate": 2.6953970634551826e-05, "loss": 1.405, "step": 1536 }, { "epoch": 0.393472, "grad_norm": 0.3998697342408393, "learning_rate": 2.693872996808922e-05, "loss": 1.3948, "step": 1537 }, { "epoch": 0.393728, "grad_norm": 0.4276627844704088, "learning_rate": 2.692348471961687e-05, "loss": 1.4086, "step": 1538 }, { "epoch": 0.393984, "grad_norm": 0.4126012008033037, "learning_rate": 2.6908234899202022e-05, "loss": 1.4199, "step": 1539 }, { "epoch": 0.39424, "grad_norm": 0.4138237646983024, "learning_rate": 2.6892980516914928e-05, "loss": 1.4126, "step": 1540 }, { "epoch": 0.394496, "grad_norm": 0.39757716447399477, "learning_rate": 2.6877721582828866e-05, "loss": 1.3923, "step": 1541 }, { "epoch": 0.394752, "grad_norm": 0.3920675739350379, "learning_rate": 2.686245810702011e-05, "loss": 1.3315, "step": 1542 }, { "epoch": 0.395008, "grad_norm": 0.4025739810672015, "learning_rate": 2.684719009956795e-05, "loss": 1.4146, "step": 1543 }, { "epoch": 0.395264, "grad_norm": 0.39771850671881304, "learning_rate": 2.683191757055465e-05, "loss": 1.3387, "step": 1544 }, { "epoch": 0.39552, "grad_norm": 0.3990656693840695, "learning_rate": 2.6816640530065473e-05, "loss": 1.3829, "step": 1545 }, { "epoch": 0.395776, "grad_norm": 0.40071890774475416, "learning_rate": 2.680135898818865e-05, "loss": 1.3765, "step": 1546 }, { "epoch": 0.396032, "grad_norm": 0.39735506237863144, "learning_rate": 2.6786072955015385e-05, "loss": 1.3892, "step": 1547 }, { "epoch": 0.396288, "grad_norm": 0.4175223299259817, "learning_rate": 2.6770782440639863e-05, "loss": 1.3874, "step": 1548 }, { "epoch": 0.396544, "grad_norm": 0.433375401605186, "learning_rate": 2.675548745515922e-05, "loss": 1.3757, "step": 1549 }, { "epoch": 0.3968, "grad_norm": 0.3930721801250507, "learning_rate": 2.674018800867353e-05, "loss": 1.3744, "step": 1550 }, { "epoch": 0.397056, "grad_norm": 0.3954251438693906, "learning_rate": 2.6724884111285833e-05, "loss": 1.3824, "step": 1551 }, { "epoch": 0.397312, "grad_norm": 0.40441968467011125, "learning_rate": 2.67095757731021e-05, "loss": 1.3769, "step": 1552 }, { "epoch": 0.397568, "grad_norm": 0.4183542898345257, "learning_rate": 2.6694263004231233e-05, "loss": 1.3533, "step": 1553 }, { "epoch": 0.397824, "grad_norm": 0.40230419987606225, "learning_rate": 2.6678945814785064e-05, "loss": 1.4029, "step": 1554 }, { "epoch": 0.39808, "grad_norm": 0.38673022424732684, "learning_rate": 2.6663624214878346e-05, "loss": 1.3579, "step": 1555 }, { "epoch": 0.398336, "grad_norm": 0.4023746571208338, "learning_rate": 2.664829821462872e-05, "loss": 1.3966, "step": 1556 }, { "epoch": 0.398592, "grad_norm": 0.4124529070247121, "learning_rate": 2.6632967824156787e-05, "loss": 1.3776, "step": 1557 }, { "epoch": 0.398848, "grad_norm": 0.397544880264132, "learning_rate": 2.6617633053585982e-05, "loss": 1.3969, "step": 1558 }, { "epoch": 0.399104, "grad_norm": 0.38090972098310716, "learning_rate": 2.660229391304268e-05, "loss": 1.3499, "step": 1559 }, { "epoch": 0.39936, "grad_norm": 0.5782454723460566, "learning_rate": 2.6586950412656123e-05, "loss": 1.3468, "step": 1560 }, { "epoch": 0.399616, "grad_norm": 0.3993088875489005, "learning_rate": 2.6571602562558437e-05, "loss": 1.371, "step": 1561 }, { "epoch": 0.399872, "grad_norm": 0.40302494861911015, "learning_rate": 2.6556250372884607e-05, "loss": 1.3778, "step": 1562 }, { "epoch": 0.400128, "grad_norm": 0.4114994326643323, "learning_rate": 2.6540893853772503e-05, "loss": 1.3992, "step": 1563 }, { "epoch": 0.400384, "grad_norm": 0.40272367426831235, "learning_rate": 2.6525533015362842e-05, "loss": 1.4196, "step": 1564 }, { "epoch": 0.40064, "grad_norm": 0.427405695044878, "learning_rate": 2.65101678677992e-05, "loss": 1.4068, "step": 1565 }, { "epoch": 0.400896, "grad_norm": 0.3913728419721429, "learning_rate": 2.6494798421227998e-05, "loss": 1.3798, "step": 1566 }, { "epoch": 0.401152, "grad_norm": 0.4171393815183837, "learning_rate": 2.6479424685798486e-05, "loss": 1.3898, "step": 1567 }, { "epoch": 0.401408, "grad_norm": 0.42903904918849795, "learning_rate": 2.646404667166275e-05, "loss": 1.4504, "step": 1568 }, { "epoch": 0.401664, "grad_norm": 0.40441914554955816, "learning_rate": 2.6448664388975715e-05, "loss": 1.363, "step": 1569 }, { "epoch": 0.40192, "grad_norm": 0.4021049132089498, "learning_rate": 2.6433277847895112e-05, "loss": 1.3656, "step": 1570 }, { "epoch": 0.402176, "grad_norm": 0.4126885458524139, "learning_rate": 2.6417887058581477e-05, "loss": 1.4309, "step": 1571 }, { "epoch": 0.402432, "grad_norm": 0.40419344696231346, "learning_rate": 2.640249203119817e-05, "loss": 1.3955, "step": 1572 }, { "epoch": 0.402688, "grad_norm": 0.4298677193961361, "learning_rate": 2.6387092775911346e-05, "loss": 1.39, "step": 1573 }, { "epoch": 0.402944, "grad_norm": 0.39841193163686267, "learning_rate": 2.6371689302889932e-05, "loss": 1.3903, "step": 1574 }, { "epoch": 0.4032, "grad_norm": 0.43115914304598324, "learning_rate": 2.635628162230566e-05, "loss": 1.3853, "step": 1575 }, { "epoch": 0.403456, "grad_norm": 0.3949831876606797, "learning_rate": 2.634086974433304e-05, "loss": 1.3685, "step": 1576 }, { "epoch": 0.403712, "grad_norm": 0.5145054740614577, "learning_rate": 2.6325453679149345e-05, "loss": 1.3827, "step": 1577 }, { "epoch": 0.403968, "grad_norm": 0.40356041639044127, "learning_rate": 2.631003343693461e-05, "loss": 1.3764, "step": 1578 }, { "epoch": 0.404224, "grad_norm": 0.40962786395355416, "learning_rate": 2.629460902787165e-05, "loss": 1.3928, "step": 1579 }, { "epoch": 0.40448, "grad_norm": 0.3951557965433583, "learning_rate": 2.6279180462146007e-05, "loss": 1.3821, "step": 1580 }, { "epoch": 0.404736, "grad_norm": 0.42080773079936673, "learning_rate": 2.626374774994597e-05, "loss": 1.3775, "step": 1581 }, { "epoch": 0.404992, "grad_norm": 0.4019048141081693, "learning_rate": 2.624831090146259e-05, "loss": 1.3775, "step": 1582 }, { "epoch": 0.405248, "grad_norm": 0.42041255372460984, "learning_rate": 2.623286992688963e-05, "loss": 1.3865, "step": 1583 }, { "epoch": 0.405504, "grad_norm": 0.4171818585494644, "learning_rate": 2.6217424836423568e-05, "loss": 1.3765, "step": 1584 }, { "epoch": 0.40576, "grad_norm": 0.4206426386265979, "learning_rate": 2.620197564026363e-05, "loss": 1.3837, "step": 1585 }, { "epoch": 0.406016, "grad_norm": 0.40643713965187, "learning_rate": 2.618652234861172e-05, "loss": 1.3902, "step": 1586 }, { "epoch": 0.406272, "grad_norm": 0.4103606704571333, "learning_rate": 2.617106497167247e-05, "loss": 1.3846, "step": 1587 }, { "epoch": 0.406528, "grad_norm": 0.40351380650539226, "learning_rate": 2.6155603519653204e-05, "loss": 1.4024, "step": 1588 }, { "epoch": 0.406784, "grad_norm": 0.3935193501074164, "learning_rate": 2.6140138002763934e-05, "loss": 1.3798, "step": 1589 }, { "epoch": 0.40704, "grad_norm": 0.39594658998954835, "learning_rate": 2.6124668431217348e-05, "loss": 1.3504, "step": 1590 }, { "epoch": 0.407296, "grad_norm": 0.4263075340248201, "learning_rate": 2.610919481522884e-05, "loss": 1.4077, "step": 1591 }, { "epoch": 0.407552, "grad_norm": 0.4054166615334729, "learning_rate": 2.6093717165016435e-05, "loss": 1.3901, "step": 1592 }, { "epoch": 0.407808, "grad_norm": 0.40408189295138713, "learning_rate": 2.6078235490800854e-05, "loss": 1.3998, "step": 1593 }, { "epoch": 0.408064, "grad_norm": 0.39219033299624, "learning_rate": 2.606274980280547e-05, "loss": 1.3429, "step": 1594 }, { "epoch": 0.40832, "grad_norm": 0.4198503629754004, "learning_rate": 2.6047260111256285e-05, "loss": 1.3918, "step": 1595 }, { "epoch": 0.408576, "grad_norm": 0.3992637269410784, "learning_rate": 2.6031766426381966e-05, "loss": 1.3521, "step": 1596 }, { "epoch": 0.408832, "grad_norm": 0.420595935974799, "learning_rate": 2.601626875841382e-05, "loss": 1.3826, "step": 1597 }, { "epoch": 0.409088, "grad_norm": 0.4073796415180875, "learning_rate": 2.6000767117585757e-05, "loss": 1.3647, "step": 1598 }, { "epoch": 0.409344, "grad_norm": 0.41121208429459427, "learning_rate": 2.598526151413435e-05, "loss": 1.3888, "step": 1599 }, { "epoch": 0.4096, "grad_norm": 0.41195076366901184, "learning_rate": 2.5969751958298757e-05, "loss": 1.3756, "step": 1600 }, { "epoch": 0.409856, "grad_norm": 0.3948716051630853, "learning_rate": 2.595423846032075e-05, "loss": 1.3564, "step": 1601 }, { "epoch": 0.410112, "grad_norm": 0.4952150340783802, "learning_rate": 2.593872103044472e-05, "loss": 1.3446, "step": 1602 }, { "epoch": 0.410368, "grad_norm": 0.41606517793409226, "learning_rate": 2.592319967891765e-05, "loss": 1.4168, "step": 1603 }, { "epoch": 0.410624, "grad_norm": 0.3977857583909906, "learning_rate": 2.5907674415989093e-05, "loss": 1.4139, "step": 1604 }, { "epoch": 0.41088, "grad_norm": 0.4139785295481217, "learning_rate": 2.5892145251911212e-05, "loss": 1.3831, "step": 1605 }, { "epoch": 0.411136, "grad_norm": 0.4050706492706079, "learning_rate": 2.5876612196938733e-05, "loss": 1.3645, "step": 1606 }, { "epoch": 0.411392, "grad_norm": 0.3972742936957601, "learning_rate": 2.586107526132894e-05, "loss": 1.3982, "step": 1607 }, { "epoch": 0.411648, "grad_norm": 0.3963830822595578, "learning_rate": 2.584553445534171e-05, "loss": 1.3901, "step": 1608 }, { "epoch": 0.411904, "grad_norm": 0.39906050797707837, "learning_rate": 2.5829989789239444e-05, "loss": 1.4044, "step": 1609 }, { "epoch": 0.41216, "grad_norm": 0.4041355026275094, "learning_rate": 2.5814441273287107e-05, "loss": 1.3526, "step": 1610 }, { "epoch": 0.412416, "grad_norm": 0.3901220716060119, "learning_rate": 2.5798888917752214e-05, "loss": 1.3944, "step": 1611 }, { "epoch": 0.412672, "grad_norm": 0.3951631264314115, "learning_rate": 2.5783332732904804e-05, "loss": 1.374, "step": 1612 }, { "epoch": 0.412928, "grad_norm": 0.3969236325227861, "learning_rate": 2.576777272901744e-05, "loss": 1.3745, "step": 1613 }, { "epoch": 0.413184, "grad_norm": 0.399605500782134, "learning_rate": 2.5752208916365212e-05, "loss": 1.3849, "step": 1614 }, { "epoch": 0.41344, "grad_norm": 0.3933122149760464, "learning_rate": 2.5736641305225743e-05, "loss": 1.3861, "step": 1615 }, { "epoch": 0.413696, "grad_norm": 0.40477851704107515, "learning_rate": 2.5721069905879133e-05, "loss": 1.4244, "step": 1616 }, { "epoch": 0.413952, "grad_norm": 0.3954583940407715, "learning_rate": 2.570549472860801e-05, "loss": 1.3637, "step": 1617 }, { "epoch": 0.414208, "grad_norm": 0.3959632146371025, "learning_rate": 2.5689915783697482e-05, "loss": 1.3453, "step": 1618 }, { "epoch": 0.414464, "grad_norm": 0.3991134814273877, "learning_rate": 2.5674333081435143e-05, "loss": 1.37, "step": 1619 }, { "epoch": 0.41472, "grad_norm": 0.4051899206058478, "learning_rate": 2.5658746632111074e-05, "loss": 1.3795, "step": 1620 }, { "epoch": 0.414976, "grad_norm": 0.4185562541025897, "learning_rate": 2.5643156446017848e-05, "loss": 1.3535, "step": 1621 }, { "epoch": 0.415232, "grad_norm": 0.38986333830470227, "learning_rate": 2.5627562533450472e-05, "loss": 1.3604, "step": 1622 }, { "epoch": 0.415488, "grad_norm": 0.42337006686198586, "learning_rate": 2.5611964904706428e-05, "loss": 1.3832, "step": 1623 }, { "epoch": 0.415744, "grad_norm": 0.405324673797134, "learning_rate": 2.5596363570085662e-05, "loss": 1.4134, "step": 1624 }, { "epoch": 0.416, "grad_norm": 0.39206757474833637, "learning_rate": 2.5580758539890553e-05, "loss": 1.3489, "step": 1625 }, { "epoch": 0.416256, "grad_norm": 0.42518732036158596, "learning_rate": 2.5565149824425934e-05, "loss": 1.3886, "step": 1626 }, { "epoch": 0.416512, "grad_norm": 0.4115238669294899, "learning_rate": 2.5549537433999056e-05, "loss": 1.3808, "step": 1627 }, { "epoch": 0.416768, "grad_norm": 0.40265154562167854, "learning_rate": 2.5533921378919613e-05, "loss": 1.3681, "step": 1628 }, { "epoch": 0.417024, "grad_norm": 0.39696003699747384, "learning_rate": 2.55183016694997e-05, "loss": 1.384, "step": 1629 }, { "epoch": 0.41728, "grad_norm": 0.4235769321836882, "learning_rate": 2.5502678316053843e-05, "loss": 1.3844, "step": 1630 }, { "epoch": 0.417536, "grad_norm": 0.41409911866652294, "learning_rate": 2.548705132889896e-05, "loss": 1.3767, "step": 1631 }, { "epoch": 0.417792, "grad_norm": 0.48422399562662255, "learning_rate": 2.5471420718354388e-05, "loss": 1.38, "step": 1632 }, { "epoch": 0.418048, "grad_norm": 0.42036501796305853, "learning_rate": 2.5455786494741837e-05, "loss": 1.3975, "step": 1633 }, { "epoch": 0.418304, "grad_norm": 0.4195381697731602, "learning_rate": 2.544014866838541e-05, "loss": 1.4076, "step": 1634 }, { "epoch": 0.41856, "grad_norm": 0.4548925116959188, "learning_rate": 2.542450724961159e-05, "loss": 1.3902, "step": 1635 }, { "epoch": 0.418816, "grad_norm": 0.3940400315054187, "learning_rate": 2.540886224874923e-05, "loss": 1.3779, "step": 1636 }, { "epoch": 0.419072, "grad_norm": 0.43945606726245035, "learning_rate": 2.5393213676129554e-05, "loss": 1.3826, "step": 1637 }, { "epoch": 0.419328, "grad_norm": 0.4163662354538395, "learning_rate": 2.5377561542086137e-05, "loss": 1.4204, "step": 1638 }, { "epoch": 0.419584, "grad_norm": 0.4193152758276446, "learning_rate": 2.5361905856954914e-05, "loss": 1.4037, "step": 1639 }, { "epoch": 0.41984, "grad_norm": 0.44274764480832757, "learning_rate": 2.534624663107416e-05, "loss": 1.3718, "step": 1640 }, { "epoch": 0.420096, "grad_norm": 0.3876028697996953, "learning_rate": 2.5330583874784482e-05, "loss": 1.3503, "step": 1641 }, { "epoch": 0.420352, "grad_norm": 0.41644118695416305, "learning_rate": 2.5314917598428825e-05, "loss": 1.4129, "step": 1642 }, { "epoch": 0.420608, "grad_norm": 0.39810268710247143, "learning_rate": 2.5299247812352473e-05, "loss": 1.3932, "step": 1643 }, { "epoch": 0.420864, "grad_norm": 0.40326367954337183, "learning_rate": 2.5283574526902993e-05, "loss": 1.3481, "step": 1644 }, { "epoch": 0.42112, "grad_norm": 0.39892787672129604, "learning_rate": 2.5267897752430298e-05, "loss": 1.349, "step": 1645 }, { "epoch": 0.421376, "grad_norm": 0.396709095168283, "learning_rate": 2.5252217499286588e-05, "loss": 1.3434, "step": 1646 }, { "epoch": 0.421632, "grad_norm": 0.41170280773964757, "learning_rate": 2.5236533777826352e-05, "loss": 1.4296, "step": 1647 }, { "epoch": 0.421888, "grad_norm": 0.3943068969914347, "learning_rate": 2.522084659840638e-05, "loss": 1.4062, "step": 1648 }, { "epoch": 0.422144, "grad_norm": 0.41427815670803986, "learning_rate": 2.5205155971385767e-05, "loss": 1.3614, "step": 1649 }, { "epoch": 0.4224, "grad_norm": 0.47798552404873934, "learning_rate": 2.518946190712584e-05, "loss": 1.3762, "step": 1650 }, { "epoch": 0.422656, "grad_norm": 0.4035673682359989, "learning_rate": 2.517376441599022e-05, "loss": 1.3922, "step": 1651 }, { "epoch": 0.422912, "grad_norm": 0.4253600897756832, "learning_rate": 2.5158063508344795e-05, "loss": 1.3904, "step": 1652 }, { "epoch": 0.423168, "grad_norm": 0.4501247555429117, "learning_rate": 2.51423591945577e-05, "loss": 1.362, "step": 1653 }, { "epoch": 0.423424, "grad_norm": 0.40857521892542986, "learning_rate": 2.5126651484999326e-05, "loss": 1.3682, "step": 1654 }, { "epoch": 0.42368, "grad_norm": 0.3973805906450857, "learning_rate": 2.5110940390042307e-05, "loss": 1.3425, "step": 1655 }, { "epoch": 0.423936, "grad_norm": 0.39188616018937367, "learning_rate": 2.5095225920061497e-05, "loss": 1.3785, "step": 1656 }, { "epoch": 0.424192, "grad_norm": 0.4091780966827691, "learning_rate": 2.5079508085433997e-05, "loss": 1.3701, "step": 1657 }, { "epoch": 0.424448, "grad_norm": 0.3996626694571495, "learning_rate": 2.5063786896539127e-05, "loss": 1.404, "step": 1658 }, { "epoch": 0.424704, "grad_norm": 0.40939621792263153, "learning_rate": 2.5048062363758402e-05, "loss": 1.3714, "step": 1659 }, { "epoch": 0.42496, "grad_norm": 0.3971689720247293, "learning_rate": 2.503233449747558e-05, "loss": 1.3984, "step": 1660 }, { "epoch": 0.425216, "grad_norm": 0.3821679446009892, "learning_rate": 2.5016603308076595e-05, "loss": 1.3376, "step": 1661 }, { "epoch": 0.425472, "grad_norm": 0.4043898043823833, "learning_rate": 2.500086880594958e-05, "loss": 1.3666, "step": 1662 }, { "epoch": 0.425728, "grad_norm": 0.39778875096692295, "learning_rate": 2.4985131001484856e-05, "loss": 1.3714, "step": 1663 }, { "epoch": 0.425984, "grad_norm": 0.38665387374871996, "learning_rate": 2.4969389905074933e-05, "loss": 1.3628, "step": 1664 }, { "epoch": 0.42624, "grad_norm": 0.40061361960772834, "learning_rate": 2.495364552711448e-05, "loss": 1.4037, "step": 1665 }, { "epoch": 0.426496, "grad_norm": 0.4097538578443415, "learning_rate": 2.4937897878000342e-05, "loss": 1.3997, "step": 1666 }, { "epoch": 0.426752, "grad_norm": 0.4194364300436425, "learning_rate": 2.4922146968131532e-05, "loss": 1.4175, "step": 1667 }, { "epoch": 0.427008, "grad_norm": 0.3904163363390266, "learning_rate": 2.4906392807909197e-05, "loss": 1.3648, "step": 1668 }, { "epoch": 0.427264, "grad_norm": 0.38829791877734754, "learning_rate": 2.489063540773665e-05, "loss": 1.378, "step": 1669 }, { "epoch": 0.42752, "grad_norm": 0.39804854115858945, "learning_rate": 2.4874874778019325e-05, "loss": 1.3753, "step": 1670 }, { "epoch": 0.427776, "grad_norm": 0.40394219971487694, "learning_rate": 2.4859110929164804e-05, "loss": 1.3904, "step": 1671 }, { "epoch": 0.428032, "grad_norm": 0.40593103276439263, "learning_rate": 2.4843343871582782e-05, "loss": 1.442, "step": 1672 }, { "epoch": 0.428288, "grad_norm": 0.40883603705905375, "learning_rate": 2.4827573615685094e-05, "loss": 1.3966, "step": 1673 }, { "epoch": 0.428544, "grad_norm": 0.39298387029552795, "learning_rate": 2.4811800171885657e-05, "loss": 1.3697, "step": 1674 }, { "epoch": 0.4288, "grad_norm": 0.3975635468666579, "learning_rate": 2.4796023550600514e-05, "loss": 1.3686, "step": 1675 }, { "epoch": 0.429056, "grad_norm": 0.4039602951835481, "learning_rate": 2.47802437622478e-05, "loss": 1.3925, "step": 1676 }, { "epoch": 0.429312, "grad_norm": 0.3937224765480824, "learning_rate": 2.476446081724775e-05, "loss": 1.3974, "step": 1677 }, { "epoch": 0.429568, "grad_norm": 0.3962916721102795, "learning_rate": 2.4748674726022664e-05, "loss": 1.3221, "step": 1678 }, { "epoch": 0.429824, "grad_norm": 0.43345760168637193, "learning_rate": 2.4732885498996946e-05, "loss": 1.3095, "step": 1679 }, { "epoch": 0.43008, "grad_norm": 0.5311447891485607, "learning_rate": 2.471709314659704e-05, "loss": 1.388, "step": 1680 }, { "epoch": 0.430336, "grad_norm": 0.40774384231581445, "learning_rate": 2.4701297679251477e-05, "loss": 1.415, "step": 1681 }, { "epoch": 0.430592, "grad_norm": 0.3959012487119448, "learning_rate": 2.4685499107390845e-05, "loss": 1.3379, "step": 1682 }, { "epoch": 0.430848, "grad_norm": 0.39720729155074513, "learning_rate": 2.4669697441447765e-05, "loss": 1.3943, "step": 1683 }, { "epoch": 0.431104, "grad_norm": 0.4044871575064922, "learning_rate": 2.4653892691856917e-05, "loss": 1.3809, "step": 1684 }, { "epoch": 0.43136, "grad_norm": 0.4128011927443433, "learning_rate": 2.463808486905501e-05, "loss": 1.3692, "step": 1685 }, { "epoch": 0.431616, "grad_norm": 0.3945382851301297, "learning_rate": 2.4622273983480786e-05, "loss": 1.4103, "step": 1686 }, { "epoch": 0.431872, "grad_norm": 0.40348049733922964, "learning_rate": 2.4606460045575e-05, "loss": 1.4062, "step": 1687 }, { "epoch": 0.432128, "grad_norm": 0.39468307064477304, "learning_rate": 2.4590643065780442e-05, "loss": 1.3561, "step": 1688 }, { "epoch": 0.432384, "grad_norm": 0.38637036351251514, "learning_rate": 2.4574823054541888e-05, "loss": 1.376, "step": 1689 }, { "epoch": 0.43264, "grad_norm": 0.5706557146219178, "learning_rate": 2.4559000022306138e-05, "loss": 1.3618, "step": 1690 }, { "epoch": 0.432896, "grad_norm": 0.4014697464370375, "learning_rate": 2.4543173979521963e-05, "loss": 1.3738, "step": 1691 }, { "epoch": 0.433152, "grad_norm": 0.4234522871570587, "learning_rate": 2.4527344936640144e-05, "loss": 1.3997, "step": 1692 }, { "epoch": 0.433408, "grad_norm": 0.40070027803367486, "learning_rate": 2.451151290411342e-05, "loss": 1.3701, "step": 1693 }, { "epoch": 0.433664, "grad_norm": 0.4223395250073125, "learning_rate": 2.4495677892396536e-05, "loss": 1.3876, "step": 1694 }, { "epoch": 0.43392, "grad_norm": 0.39766966616935306, "learning_rate": 2.447983991194618e-05, "loss": 1.3846, "step": 1695 }, { "epoch": 0.434176, "grad_norm": 0.395694837718773, "learning_rate": 2.4463998973221008e-05, "loss": 1.3401, "step": 1696 }, { "epoch": 0.434432, "grad_norm": 0.43721143989623745, "learning_rate": 2.444815508668162e-05, "loss": 1.3799, "step": 1697 }, { "epoch": 0.434688, "grad_norm": 0.3977996446282918, "learning_rate": 2.4432308262790578e-05, "loss": 1.3811, "step": 1698 }, { "epoch": 0.434944, "grad_norm": 0.3898714921064679, "learning_rate": 2.441645851201237e-05, "loss": 1.3798, "step": 1699 }, { "epoch": 0.4352, "grad_norm": 0.4120512566566544, "learning_rate": 2.4400605844813435e-05, "loss": 1.4282, "step": 1700 }, { "epoch": 0.435456, "grad_norm": 0.3855122018243483, "learning_rate": 2.4384750271662115e-05, "loss": 1.358, "step": 1701 }, { "epoch": 0.435712, "grad_norm": 0.40242886120197624, "learning_rate": 2.4368891803028686e-05, "loss": 1.3855, "step": 1702 }, { "epoch": 0.435968, "grad_norm": 0.40703300960909283, "learning_rate": 2.435303044938533e-05, "loss": 1.402, "step": 1703 }, { "epoch": 0.436224, "grad_norm": 0.4092185102587738, "learning_rate": 2.433716622120615e-05, "loss": 1.3923, "step": 1704 }, { "epoch": 0.43648, "grad_norm": 0.3966975647335042, "learning_rate": 2.432129912896711e-05, "loss": 1.3442, "step": 1705 }, { "epoch": 0.436736, "grad_norm": 0.39348873043614585, "learning_rate": 2.4305429183146103e-05, "loss": 1.3597, "step": 1706 }, { "epoch": 0.436992, "grad_norm": 0.39246132168166764, "learning_rate": 2.4289556394222898e-05, "loss": 1.3902, "step": 1707 }, { "epoch": 0.437248, "grad_norm": 0.41949718832606925, "learning_rate": 2.4273680772679117e-05, "loss": 1.3581, "step": 1708 }, { "epoch": 0.437504, "grad_norm": 0.3847202589782453, "learning_rate": 2.4257802328998282e-05, "loss": 1.3445, "step": 1709 }, { "epoch": 0.43776, "grad_norm": 0.3980203822033651, "learning_rate": 2.4241921073665774e-05, "loss": 1.4048, "step": 1710 }, { "epoch": 0.438016, "grad_norm": 0.3998175690546011, "learning_rate": 2.422603701716881e-05, "loss": 1.3807, "step": 1711 }, { "epoch": 0.438272, "grad_norm": 0.4005580201490593, "learning_rate": 2.4210150169996488e-05, "loss": 1.3966, "step": 1712 }, { "epoch": 0.438528, "grad_norm": 0.3892653316982133, "learning_rate": 2.4194260542639718e-05, "loss": 1.4274, "step": 1713 }, { "epoch": 0.438784, "grad_norm": 0.4030769465151541, "learning_rate": 2.4178368145591263e-05, "loss": 1.4513, "step": 1714 }, { "epoch": 0.43904, "grad_norm": 0.3877759525628278, "learning_rate": 2.4162472989345712e-05, "loss": 1.3453, "step": 1715 }, { "epoch": 0.439296, "grad_norm": 0.3990219360361329, "learning_rate": 2.4146575084399486e-05, "loss": 1.389, "step": 1716 }, { "epoch": 0.439552, "grad_norm": 0.3918722994154548, "learning_rate": 2.4130674441250794e-05, "loss": 1.3979, "step": 1717 }, { "epoch": 0.439808, "grad_norm": 0.3959758657045125, "learning_rate": 2.4114771070399682e-05, "loss": 1.3684, "step": 1718 }, { "epoch": 0.440064, "grad_norm": 0.40699097011603336, "learning_rate": 2.4098864982347987e-05, "loss": 1.421, "step": 1719 }, { "epoch": 0.44032, "grad_norm": 0.405875905668041, "learning_rate": 2.408295618759932e-05, "loss": 1.4194, "step": 1720 }, { "epoch": 0.440576, "grad_norm": 0.3937131816620548, "learning_rate": 2.4067044696659123e-05, "loss": 1.3582, "step": 1721 }, { "epoch": 0.440832, "grad_norm": 0.40362599806045574, "learning_rate": 2.4051130520034583e-05, "loss": 1.4082, "step": 1722 }, { "epoch": 0.441088, "grad_norm": 0.40096338058688386, "learning_rate": 2.4035213668234665e-05, "loss": 1.3576, "step": 1723 }, { "epoch": 0.441344, "grad_norm": 0.392911513846458, "learning_rate": 2.4019294151770112e-05, "loss": 1.397, "step": 1724 }, { "epoch": 0.4416, "grad_norm": 0.41780738843330906, "learning_rate": 2.400337198115343e-05, "loss": 1.3646, "step": 1725 }, { "epoch": 0.441856, "grad_norm": 0.3866055700530504, "learning_rate": 2.3987447166898852e-05, "loss": 1.3027, "step": 1726 }, { "epoch": 0.442112, "grad_norm": 0.40076950230821623, "learning_rate": 2.397151971952238e-05, "loss": 1.382, "step": 1727 }, { "epoch": 0.442368, "grad_norm": 0.4365448033887497, "learning_rate": 2.395558964954176e-05, "loss": 1.411, "step": 1728 }, { "epoch": 0.442624, "grad_norm": 0.4848205715225887, "learning_rate": 2.3939656967476438e-05, "loss": 1.3677, "step": 1729 }, { "epoch": 0.44288, "grad_norm": 0.40848934176686613, "learning_rate": 2.3923721683847622e-05, "loss": 1.3604, "step": 1730 }, { "epoch": 0.443136, "grad_norm": 0.4086949613269775, "learning_rate": 2.3907783809178217e-05, "loss": 1.4061, "step": 1731 }, { "epoch": 0.443392, "grad_norm": 0.40735356106168075, "learning_rate": 2.3891843353992834e-05, "loss": 1.3478, "step": 1732 }, { "epoch": 0.443648, "grad_norm": 0.39930100878519625, "learning_rate": 2.3875900328817803e-05, "loss": 1.3767, "step": 1733 }, { "epoch": 0.443904, "grad_norm": 0.40953445784506254, "learning_rate": 2.3859954744181155e-05, "loss": 1.4088, "step": 1734 }, { "epoch": 0.44416, "grad_norm": 0.3996727107528165, "learning_rate": 2.384400661061259e-05, "loss": 1.4105, "step": 1735 }, { "epoch": 0.444416, "grad_norm": 0.3904639117082841, "learning_rate": 2.38280559386435e-05, "loss": 1.3837, "step": 1736 }, { "epoch": 0.444672, "grad_norm": 0.39470341174968043, "learning_rate": 2.3812102738806972e-05, "loss": 1.3887, "step": 1737 }, { "epoch": 0.444928, "grad_norm": 0.418702806625604, "learning_rate": 2.3796147021637727e-05, "loss": 1.3768, "step": 1738 }, { "epoch": 0.445184, "grad_norm": 0.4125066196253555, "learning_rate": 2.3780188797672174e-05, "loss": 1.3837, "step": 1739 }, { "epoch": 0.44544, "grad_norm": 0.3722167887884071, "learning_rate": 2.376422807744838e-05, "loss": 1.3211, "step": 1740 }, { "epoch": 0.445696, "grad_norm": 0.3858516208747193, "learning_rate": 2.3748264871506046e-05, "loss": 1.3819, "step": 1741 }, { "epoch": 0.445952, "grad_norm": 0.3950251437618387, "learning_rate": 2.373229919038651e-05, "loss": 1.3866, "step": 1742 }, { "epoch": 0.446208, "grad_norm": 0.3939520350075915, "learning_rate": 2.371633104463277e-05, "loss": 1.3869, "step": 1743 }, { "epoch": 0.446464, "grad_norm": 0.38946413317359496, "learning_rate": 2.370036044478942e-05, "loss": 1.3628, "step": 1744 }, { "epoch": 0.44672, "grad_norm": 0.3956520200769147, "learning_rate": 2.36843874014027e-05, "loss": 1.3705, "step": 1745 }, { "epoch": 0.446976, "grad_norm": 0.40353459534633557, "learning_rate": 2.3668411925020455e-05, "loss": 1.4091, "step": 1746 }, { "epoch": 0.447232, "grad_norm": 0.40007440247880177, "learning_rate": 2.3652434026192133e-05, "loss": 1.3655, "step": 1747 }, { "epoch": 0.447488, "grad_norm": 0.40150312507951885, "learning_rate": 2.363645371546878e-05, "loss": 1.4011, "step": 1748 }, { "epoch": 0.447744, "grad_norm": 0.39333764690121004, "learning_rate": 2.3620471003403042e-05, "loss": 1.3843, "step": 1749 }, { "epoch": 0.448, "grad_norm": 0.3926520464218505, "learning_rate": 2.360448590054915e-05, "loss": 1.3105, "step": 1750 }, { "epoch": 0.448256, "grad_norm": 0.4169859328134903, "learning_rate": 2.358849841746291e-05, "loss": 1.3721, "step": 1751 }, { "epoch": 0.448512, "grad_norm": 0.39648294512606946, "learning_rate": 2.35725085647017e-05, "loss": 1.4077, "step": 1752 }, { "epoch": 0.448768, "grad_norm": 0.3955064408061962, "learning_rate": 2.3556516352824463e-05, "loss": 1.4041, "step": 1753 }, { "epoch": 0.449024, "grad_norm": 0.38764943218850895, "learning_rate": 2.3540521792391702e-05, "loss": 1.3843, "step": 1754 }, { "epoch": 0.44928, "grad_norm": 0.382829642481448, "learning_rate": 2.352452489396547e-05, "loss": 1.3633, "step": 1755 }, { "epoch": 0.449536, "grad_norm": 0.38171491764487375, "learning_rate": 2.3508525668109372e-05, "loss": 1.3413, "step": 1756 }, { "epoch": 0.449792, "grad_norm": 0.407732971436391, "learning_rate": 2.3492524125388528e-05, "loss": 1.3797, "step": 1757 }, { "epoch": 0.450048, "grad_norm": 0.3979240541405608, "learning_rate": 2.3476520276369604e-05, "loss": 1.395, "step": 1758 }, { "epoch": 0.450304, "grad_norm": 0.41229659307524663, "learning_rate": 2.3460514131620794e-05, "loss": 1.3687, "step": 1759 }, { "epoch": 0.45056, "grad_norm": 0.3963502848257384, "learning_rate": 2.3444505701711797e-05, "loss": 1.3633, "step": 1760 }, { "epoch": 0.450816, "grad_norm": 0.4521114295100032, "learning_rate": 2.3428494997213815e-05, "loss": 1.4106, "step": 1761 }, { "epoch": 0.451072, "grad_norm": 0.3973039631210743, "learning_rate": 2.3412482028699578e-05, "loss": 1.37, "step": 1762 }, { "epoch": 0.451328, "grad_norm": 0.4096894893328068, "learning_rate": 2.339646680674329e-05, "loss": 1.393, "step": 1763 }, { "epoch": 0.451584, "grad_norm": 0.38696194505378206, "learning_rate": 2.3380449341920636e-05, "loss": 1.3641, "step": 1764 }, { "epoch": 0.45184, "grad_norm": 0.4168961251278173, "learning_rate": 2.33644296448088e-05, "loss": 1.408, "step": 1765 }, { "epoch": 0.452096, "grad_norm": 0.5591728900837221, "learning_rate": 2.334840772598644e-05, "loss": 1.4023, "step": 1766 }, { "epoch": 0.452352, "grad_norm": 0.3950170126233293, "learning_rate": 2.3332383596033667e-05, "loss": 1.3824, "step": 1767 }, { "epoch": 0.452608, "grad_norm": 0.41596636317532765, "learning_rate": 2.331635726553207e-05, "loss": 1.3809, "step": 1768 }, { "epoch": 0.452864, "grad_norm": 0.3843489452862653, "learning_rate": 2.3300328745064676e-05, "loss": 1.378, "step": 1769 }, { "epoch": 0.45312, "grad_norm": 0.41390966007592817, "learning_rate": 2.328429804521595e-05, "loss": 1.4255, "step": 1770 }, { "epoch": 0.453376, "grad_norm": 0.39683044682829405, "learning_rate": 2.326826517657183e-05, "loss": 1.3837, "step": 1771 }, { "epoch": 0.453632, "grad_norm": 0.3847065092201651, "learning_rate": 2.325223014971966e-05, "loss": 1.3953, "step": 1772 }, { "epoch": 0.453888, "grad_norm": 0.3842903954199142, "learning_rate": 2.3236192975248204e-05, "loss": 1.3593, "step": 1773 }, { "epoch": 0.454144, "grad_norm": 0.40201925252571474, "learning_rate": 2.3220153663747675e-05, "loss": 1.3864, "step": 1774 }, { "epoch": 0.4544, "grad_norm": 0.3777445785109359, "learning_rate": 2.3204112225809653e-05, "loss": 1.3559, "step": 1775 }, { "epoch": 0.454656, "grad_norm": 0.4033279428350374, "learning_rate": 2.3188068672027163e-05, "loss": 1.3656, "step": 1776 }, { "epoch": 0.454912, "grad_norm": 0.42131659789269554, "learning_rate": 2.3172023012994608e-05, "loss": 1.3833, "step": 1777 }, { "epoch": 0.455168, "grad_norm": 0.38228953431753593, "learning_rate": 2.3155975259307778e-05, "loss": 1.3408, "step": 1778 }, { "epoch": 0.455424, "grad_norm": 0.39570407255298823, "learning_rate": 2.3139925421563863e-05, "loss": 1.3417, "step": 1779 }, { "epoch": 0.45568, "grad_norm": 0.38359210009177375, "learning_rate": 2.312387351036141e-05, "loss": 1.3441, "step": 1780 }, { "epoch": 0.455936, "grad_norm": 0.38266597427093224, "learning_rate": 2.310781953630034e-05, "loss": 1.3752, "step": 1781 }, { "epoch": 0.456192, "grad_norm": 0.3955574821178628, "learning_rate": 2.3091763509981946e-05, "loss": 1.3558, "step": 1782 }, { "epoch": 0.456448, "grad_norm": 0.3831525546439454, "learning_rate": 2.3075705442008874e-05, "loss": 1.3719, "step": 1783 }, { "epoch": 0.456704, "grad_norm": 0.3869376162023207, "learning_rate": 2.30596453429851e-05, "loss": 1.3435, "step": 1784 }, { "epoch": 0.45696, "grad_norm": 0.4093846303352642, "learning_rate": 2.304358322351597e-05, "loss": 1.4077, "step": 1785 }, { "epoch": 0.457216, "grad_norm": 0.39645698038591165, "learning_rate": 2.3027519094208148e-05, "loss": 1.3475, "step": 1786 }, { "epoch": 0.457472, "grad_norm": 0.41232689714750037, "learning_rate": 2.301145296566961e-05, "loss": 1.4272, "step": 1787 }, { "epoch": 0.457728, "grad_norm": 0.39099450538922387, "learning_rate": 2.2995384848509684e-05, "loss": 1.3644, "step": 1788 }, { "epoch": 0.457984, "grad_norm": 0.38596895877610593, "learning_rate": 2.2979314753339e-05, "loss": 1.3725, "step": 1789 }, { "epoch": 0.45824, "grad_norm": 0.39215342491795824, "learning_rate": 2.296324269076948e-05, "loss": 1.3867, "step": 1790 }, { "epoch": 0.458496, "grad_norm": 0.4038879757570262, "learning_rate": 2.294716867141436e-05, "loss": 1.3811, "step": 1791 }, { "epoch": 0.458752, "grad_norm": 0.4037059234135371, "learning_rate": 2.2931092705888167e-05, "loss": 1.4255, "step": 1792 }, { "epoch": 0.459008, "grad_norm": 0.38314522450078486, "learning_rate": 2.2915014804806693e-05, "loss": 1.34, "step": 1793 }, { "epoch": 0.459264, "grad_norm": 0.38970609420407243, "learning_rate": 2.289893497878705e-05, "loss": 1.3865, "step": 1794 }, { "epoch": 0.45952, "grad_norm": 0.38437592094893025, "learning_rate": 2.2882853238447576e-05, "loss": 1.3564, "step": 1795 }, { "epoch": 0.459776, "grad_norm": 0.39797191113556035, "learning_rate": 2.28667695944079e-05, "loss": 1.3872, "step": 1796 }, { "epoch": 0.460032, "grad_norm": 0.38772174065389203, "learning_rate": 2.285068405728891e-05, "loss": 1.3826, "step": 1797 }, { "epoch": 0.460288, "grad_norm": 0.3862781126275468, "learning_rate": 2.2834596637712724e-05, "loss": 1.3999, "step": 1798 }, { "epoch": 0.460544, "grad_norm": 0.3932140463601221, "learning_rate": 2.281850734630272e-05, "loss": 1.3887, "step": 1799 }, { "epoch": 0.4608, "grad_norm": 0.39389600096501287, "learning_rate": 2.28024161936835e-05, "loss": 1.3663, "step": 1800 }, { "epoch": 0.461056, "grad_norm": 0.3723341401885224, "learning_rate": 2.2786323190480907e-05, "loss": 1.3218, "step": 1801 }, { "epoch": 0.461312, "grad_norm": 0.38596343457932986, "learning_rate": 2.2770228347322005e-05, "loss": 1.3694, "step": 1802 }, { "epoch": 0.461568, "grad_norm": 0.3882757892178642, "learning_rate": 2.2754131674835057e-05, "loss": 1.3591, "step": 1803 }, { "epoch": 0.461824, "grad_norm": 0.4018501524739478, "learning_rate": 2.273803318364956e-05, "loss": 1.4035, "step": 1804 }, { "epoch": 0.46208, "grad_norm": 0.3821632736044547, "learning_rate": 2.272193288439618e-05, "loss": 1.3387, "step": 1805 }, { "epoch": 0.462336, "grad_norm": 0.38408088795486967, "learning_rate": 2.270583078770681e-05, "loss": 1.3106, "step": 1806 }, { "epoch": 0.462592, "grad_norm": 0.39667015718044396, "learning_rate": 2.2689726904214514e-05, "loss": 1.3806, "step": 1807 }, { "epoch": 0.462848, "grad_norm": 0.39655850443471397, "learning_rate": 2.2673621244553526e-05, "loss": 1.3552, "step": 1808 }, { "epoch": 0.463104, "grad_norm": 0.3919611109646091, "learning_rate": 2.2657513819359273e-05, "loss": 1.3692, "step": 1809 }, { "epoch": 0.46336, "grad_norm": 0.3982351083472095, "learning_rate": 2.2641404639268333e-05, "loss": 1.397, "step": 1810 }, { "epoch": 0.463616, "grad_norm": 0.40006889799958345, "learning_rate": 2.262529371491845e-05, "loss": 1.3409, "step": 1811 }, { "epoch": 0.463872, "grad_norm": 0.39670665142998657, "learning_rate": 2.2609181056948514e-05, "loss": 1.3821, "step": 1812 }, { "epoch": 0.464128, "grad_norm": 0.38493550905875334, "learning_rate": 2.2593066675998573e-05, "loss": 1.3788, "step": 1813 }, { "epoch": 0.464384, "grad_norm": 0.3987303359163086, "learning_rate": 2.2576950582709798e-05, "loss": 1.3963, "step": 1814 }, { "epoch": 0.46464, "grad_norm": 0.39799467532596533, "learning_rate": 2.256083278772449e-05, "loss": 1.4016, "step": 1815 }, { "epoch": 0.464896, "grad_norm": 0.37947102091190804, "learning_rate": 2.2544713301686095e-05, "loss": 1.3973, "step": 1816 }, { "epoch": 0.465152, "grad_norm": 0.4059182442431278, "learning_rate": 2.252859213523915e-05, "loss": 1.3817, "step": 1817 }, { "epoch": 0.465408, "grad_norm": 0.39267172911681414, "learning_rate": 2.2512469299029305e-05, "loss": 1.3936, "step": 1818 }, { "epoch": 0.465664, "grad_norm": 0.39756579908049916, "learning_rate": 2.249634480370334e-05, "loss": 1.3946, "step": 1819 }, { "epoch": 0.46592, "grad_norm": 0.4008096913828974, "learning_rate": 2.24802186599091e-05, "loss": 1.3553, "step": 1820 }, { "epoch": 0.466176, "grad_norm": 0.3934787317972552, "learning_rate": 2.246409087829552e-05, "loss": 1.3999, "step": 1821 }, { "epoch": 0.466432, "grad_norm": 0.3868372313340382, "learning_rate": 2.244796146951264e-05, "loss": 1.3506, "step": 1822 }, { "epoch": 0.466688, "grad_norm": 0.39393581463126714, "learning_rate": 2.243183044421156e-05, "loss": 1.3772, "step": 1823 }, { "epoch": 0.466944, "grad_norm": 0.38846836998563544, "learning_rate": 2.2415697813044437e-05, "loss": 1.394, "step": 1824 }, { "epoch": 0.4672, "grad_norm": 0.39569930690283633, "learning_rate": 2.2399563586664506e-05, "loss": 1.3555, "step": 1825 }, { "epoch": 0.467456, "grad_norm": 0.39772708408470236, "learning_rate": 2.2383427775726048e-05, "loss": 1.3721, "step": 1826 }, { "epoch": 0.467712, "grad_norm": 0.3984931759651297, "learning_rate": 2.2367290390884388e-05, "loss": 1.3459, "step": 1827 }, { "epoch": 0.467968, "grad_norm": 0.4112939598437003, "learning_rate": 2.23511514427959e-05, "loss": 1.3884, "step": 1828 }, { "epoch": 0.468224, "grad_norm": 0.3924848225380971, "learning_rate": 2.2335010942117975e-05, "loss": 1.3986, "step": 1829 }, { "epoch": 0.46848, "grad_norm": 0.39693183356509976, "learning_rate": 2.2318868899509044e-05, "loss": 1.382, "step": 1830 }, { "epoch": 0.468736, "grad_norm": 0.3989722636859817, "learning_rate": 2.2302725325628546e-05, "loss": 1.4032, "step": 1831 }, { "epoch": 0.468992, "grad_norm": 0.3917379726246244, "learning_rate": 2.2286580231136944e-05, "loss": 1.3506, "step": 1832 }, { "epoch": 0.469248, "grad_norm": 0.39012441905950607, "learning_rate": 2.227043362669568e-05, "loss": 1.3476, "step": 1833 }, { "epoch": 0.469504, "grad_norm": 0.39173346519766833, "learning_rate": 2.2254285522967222e-05, "loss": 1.363, "step": 1834 }, { "epoch": 0.46976, "grad_norm": 0.3876010772993033, "learning_rate": 2.2238135930615013e-05, "loss": 1.3761, "step": 1835 }, { "epoch": 0.470016, "grad_norm": 0.3894214133316552, "learning_rate": 2.222198486030348e-05, "loss": 1.3248, "step": 1836 }, { "epoch": 0.470272, "grad_norm": 0.37941164908231445, "learning_rate": 2.2205832322698032e-05, "loss": 1.3613, "step": 1837 }, { "epoch": 0.470528, "grad_norm": 0.38428399869063484, "learning_rate": 2.2189678328465037e-05, "loss": 1.3583, "step": 1838 }, { "epoch": 0.470784, "grad_norm": 0.3884299629309424, "learning_rate": 2.2173522888271828e-05, "loss": 1.3427, "step": 1839 }, { "epoch": 0.47104, "grad_norm": 0.3967330061801342, "learning_rate": 2.2157366012786697e-05, "loss": 1.4158, "step": 1840 }, { "epoch": 0.471296, "grad_norm": 0.39378558378341266, "learning_rate": 2.2141207712678892e-05, "loss": 1.3519, "step": 1841 }, { "epoch": 0.471552, "grad_norm": 0.4060692858362815, "learning_rate": 2.2125047998618585e-05, "loss": 1.417, "step": 1842 }, { "epoch": 0.471808, "grad_norm": 0.3937915115932798, "learning_rate": 2.2108886881276876e-05, "loss": 1.4381, "step": 1843 }, { "epoch": 0.472064, "grad_norm": 0.40559843964990816, "learning_rate": 2.2092724371325822e-05, "loss": 1.3945, "step": 1844 }, { "epoch": 0.47232, "grad_norm": 0.39586516874472877, "learning_rate": 2.207656047943837e-05, "loss": 1.3791, "step": 1845 }, { "epoch": 0.472576, "grad_norm": 0.4038597628746731, "learning_rate": 2.20603952162884e-05, "loss": 1.4425, "step": 1846 }, { "epoch": 0.472832, "grad_norm": 0.3825390250815511, "learning_rate": 2.2044228592550692e-05, "loss": 1.3741, "step": 1847 }, { "epoch": 0.473088, "grad_norm": 0.3879539812390648, "learning_rate": 2.2028060618900904e-05, "loss": 1.3422, "step": 1848 }, { "epoch": 0.473344, "grad_norm": 0.40371587380346063, "learning_rate": 2.2011891306015613e-05, "loss": 1.3301, "step": 1849 }, { "epoch": 0.4736, "grad_norm": 0.39304235831918916, "learning_rate": 2.1995720664572274e-05, "loss": 1.4056, "step": 1850 }, { "epoch": 0.473856, "grad_norm": 0.3815801828760049, "learning_rate": 2.1979548705249207e-05, "loss": 1.337, "step": 1851 }, { "epoch": 0.474112, "grad_norm": 0.40892262823207376, "learning_rate": 2.1963375438725616e-05, "loss": 1.4087, "step": 1852 }, { "epoch": 0.474368, "grad_norm": 0.3759524716635272, "learning_rate": 2.1947200875681562e-05, "loss": 1.3276, "step": 1853 }, { "epoch": 0.474624, "grad_norm": 0.38996220056608855, "learning_rate": 2.193102502679796e-05, "loss": 1.3708, "step": 1854 }, { "epoch": 0.47488, "grad_norm": 0.38408014772660437, "learning_rate": 2.1914847902756576e-05, "loss": 1.3561, "step": 1855 }, { "epoch": 0.475136, "grad_norm": 0.490147038901984, "learning_rate": 2.1898669514240027e-05, "loss": 1.4004, "step": 1856 }, { "epoch": 0.475392, "grad_norm": 0.405561535558326, "learning_rate": 2.188248987193175e-05, "loss": 1.348, "step": 1857 }, { "epoch": 0.475648, "grad_norm": 0.3919274110593133, "learning_rate": 2.1866308986516024e-05, "loss": 1.4026, "step": 1858 }, { "epoch": 0.475904, "grad_norm": 0.3815444236706001, "learning_rate": 2.185012686867794e-05, "loss": 1.3524, "step": 1859 }, { "epoch": 0.47616, "grad_norm": 0.3751516282870969, "learning_rate": 2.183394352910339e-05, "loss": 1.3222, "step": 1860 }, { "epoch": 0.476416, "grad_norm": 0.43472332841938144, "learning_rate": 2.181775897847911e-05, "loss": 1.3663, "step": 1861 }, { "epoch": 0.476672, "grad_norm": 0.3949922058601869, "learning_rate": 2.18015732274926e-05, "loss": 1.3772, "step": 1862 }, { "epoch": 0.476928, "grad_norm": 0.39306434480459695, "learning_rate": 2.1785386286832168e-05, "loss": 1.3503, "step": 1863 }, { "epoch": 0.477184, "grad_norm": 0.38099835002121996, "learning_rate": 2.176919816718691e-05, "loss": 1.3254, "step": 1864 }, { "epoch": 0.47744, "grad_norm": 0.39097982013339694, "learning_rate": 2.175300887924669e-05, "loss": 1.374, "step": 1865 }, { "epoch": 0.477696, "grad_norm": 0.4237109011944741, "learning_rate": 2.1736818433702154e-05, "loss": 1.3544, "step": 1866 }, { "epoch": 0.477952, "grad_norm": 0.3866707953519438, "learning_rate": 2.17206268412447e-05, "loss": 1.3532, "step": 1867 }, { "epoch": 0.478208, "grad_norm": 0.3991120349847027, "learning_rate": 2.170443411256651e-05, "loss": 1.3726, "step": 1868 }, { "epoch": 0.478464, "grad_norm": 0.394147724593643, "learning_rate": 2.168824025836047e-05, "loss": 1.341, "step": 1869 }, { "epoch": 0.47872, "grad_norm": 0.38688708760088025, "learning_rate": 2.1672045289320266e-05, "loss": 1.367, "step": 1870 }, { "epoch": 0.478976, "grad_norm": 0.3883878610835278, "learning_rate": 2.1655849216140275e-05, "loss": 1.3279, "step": 1871 }, { "epoch": 0.479232, "grad_norm": 0.38419541851279776, "learning_rate": 2.1639652049515612e-05, "loss": 1.3552, "step": 1872 }, { "epoch": 0.479488, "grad_norm": 0.38071993014633887, "learning_rate": 2.1623453800142137e-05, "loss": 1.3455, "step": 1873 }, { "epoch": 0.479744, "grad_norm": 0.3883589661871264, "learning_rate": 2.1607254478716408e-05, "loss": 1.3495, "step": 1874 }, { "epoch": 0.48, "grad_norm": 0.38360991489015156, "learning_rate": 2.1591054095935685e-05, "loss": 1.3665, "step": 1875 }, { "epoch": 0.480256, "grad_norm": 0.38566315185836325, "learning_rate": 2.1574852662497934e-05, "loss": 1.3692, "step": 1876 }, { "epoch": 0.480512, "grad_norm": 0.39595454956347353, "learning_rate": 2.1558650189101827e-05, "loss": 1.3769, "step": 1877 }, { "epoch": 0.480768, "grad_norm": 0.3771882913414376, "learning_rate": 2.1542446686446703e-05, "loss": 1.3559, "step": 1878 }, { "epoch": 0.481024, "grad_norm": 0.3848277571542053, "learning_rate": 2.1526242165232593e-05, "loss": 1.3625, "step": 1879 }, { "epoch": 0.48128, "grad_norm": 0.3875537295192209, "learning_rate": 2.1510036636160203e-05, "loss": 1.3586, "step": 1880 }, { "epoch": 0.481536, "grad_norm": 0.3840800721837618, "learning_rate": 2.14938301099309e-05, "loss": 1.3681, "step": 1881 }, { "epoch": 0.481792, "grad_norm": 0.3829801938867898, "learning_rate": 2.1477622597246698e-05, "loss": 1.3809, "step": 1882 }, { "epoch": 0.482048, "grad_norm": 0.3814671971062877, "learning_rate": 2.1461414108810275e-05, "loss": 1.3692, "step": 1883 }, { "epoch": 0.482304, "grad_norm": 0.3904328847746494, "learning_rate": 2.1445204655324966e-05, "loss": 1.3847, "step": 1884 }, { "epoch": 0.48256, "grad_norm": 0.38612942551642143, "learning_rate": 2.1428994247494717e-05, "loss": 1.3132, "step": 1885 }, { "epoch": 0.482816, "grad_norm": 0.3967469284918172, "learning_rate": 2.1412782896024123e-05, "loss": 1.4053, "step": 1886 }, { "epoch": 0.483072, "grad_norm": 0.3832059171805036, "learning_rate": 2.139657061161839e-05, "loss": 1.412, "step": 1887 }, { "epoch": 0.483328, "grad_norm": 0.3838962416148976, "learning_rate": 2.1380357404983337e-05, "loss": 1.3454, "step": 1888 }, { "epoch": 0.483584, "grad_norm": 0.3827608827431334, "learning_rate": 2.1364143286825416e-05, "loss": 1.3567, "step": 1889 }, { "epoch": 0.48384, "grad_norm": 0.37557705371612216, "learning_rate": 2.134792826785166e-05, "loss": 1.3464, "step": 1890 }, { "epoch": 0.484096, "grad_norm": 0.39159301675637603, "learning_rate": 2.13317123587697e-05, "loss": 1.3741, "step": 1891 }, { "epoch": 0.484352, "grad_norm": 0.46588329434799575, "learning_rate": 2.1315495570287763e-05, "loss": 1.3658, "step": 1892 }, { "epoch": 0.484608, "grad_norm": 0.3835301814445272, "learning_rate": 2.1299277913114644e-05, "loss": 1.3823, "step": 1893 }, { "epoch": 0.484864, "grad_norm": 0.3804298421496852, "learning_rate": 2.1283059397959715e-05, "loss": 1.346, "step": 1894 }, { "epoch": 0.48512, "grad_norm": 0.39296518406531694, "learning_rate": 2.1266840035532924e-05, "loss": 1.3401, "step": 1895 }, { "epoch": 0.485376, "grad_norm": 0.3863026882979723, "learning_rate": 2.1250619836544777e-05, "loss": 1.3979, "step": 1896 }, { "epoch": 0.485632, "grad_norm": 0.39231799033018405, "learning_rate": 2.1234398811706316e-05, "loss": 1.3448, "step": 1897 }, { "epoch": 0.485888, "grad_norm": 0.3927200286700311, "learning_rate": 2.121817697172915e-05, "loss": 1.3527, "step": 1898 }, { "epoch": 0.486144, "grad_norm": 0.37090478976427615, "learning_rate": 2.120195432732542e-05, "loss": 1.3447, "step": 1899 }, { "epoch": 0.4864, "grad_norm": 0.3860129441087777, "learning_rate": 2.1185730889207776e-05, "loss": 1.3671, "step": 1900 }, { "epoch": 0.486656, "grad_norm": 0.3859971159967315, "learning_rate": 2.116950666808942e-05, "loss": 1.3707, "step": 1901 }, { "epoch": 0.486912, "grad_norm": 0.380245783494106, "learning_rate": 2.115328167468407e-05, "loss": 1.3536, "step": 1902 }, { "epoch": 0.487168, "grad_norm": 0.3705195318013785, "learning_rate": 2.1137055919705943e-05, "loss": 1.3556, "step": 1903 }, { "epoch": 0.487424, "grad_norm": 0.3848309242438573, "learning_rate": 2.1120829413869753e-05, "loss": 1.3176, "step": 1904 }, { "epoch": 0.48768, "grad_norm": 0.4671167042750147, "learning_rate": 2.110460216789073e-05, "loss": 1.3928, "step": 1905 }, { "epoch": 0.487936, "grad_norm": 0.4164478081380505, "learning_rate": 2.1088374192484563e-05, "loss": 1.3438, "step": 1906 }, { "epoch": 0.488192, "grad_norm": 0.43360540042775003, "learning_rate": 2.107214549836746e-05, "loss": 1.3727, "step": 1907 }, { "epoch": 0.488448, "grad_norm": 0.4014460329954866, "learning_rate": 2.105591609625608e-05, "loss": 1.3998, "step": 1908 }, { "epoch": 0.488704, "grad_norm": 0.39470932194340597, "learning_rate": 2.1039685996867553e-05, "loss": 1.3545, "step": 1909 }, { "epoch": 0.48896, "grad_norm": 0.3779309817406106, "learning_rate": 2.1023455210919464e-05, "loss": 1.3421, "step": 1910 }, { "epoch": 0.489216, "grad_norm": 0.42963191579012516, "learning_rate": 2.1007223749129868e-05, "loss": 1.39, "step": 1911 }, { "epoch": 0.489472, "grad_norm": 0.39017051602695113, "learning_rate": 2.0990991622217245e-05, "loss": 1.3524, "step": 1912 }, { "epoch": 0.489728, "grad_norm": 0.397072377704159, "learning_rate": 2.097475884090053e-05, "loss": 1.3887, "step": 1913 }, { "epoch": 0.489984, "grad_norm": 0.39216348539631113, "learning_rate": 2.0958525415899097e-05, "loss": 1.3522, "step": 1914 }, { "epoch": 0.49024, "grad_norm": 0.3912211902857766, "learning_rate": 2.094229135793272e-05, "loss": 1.3368, "step": 1915 }, { "epoch": 0.490496, "grad_norm": 0.3763842134676675, "learning_rate": 2.092605667772161e-05, "loss": 1.3398, "step": 1916 }, { "epoch": 0.490752, "grad_norm": 0.40522392708546073, "learning_rate": 2.090982138598638e-05, "loss": 1.4091, "step": 1917 }, { "epoch": 0.491008, "grad_norm": 0.3877709008886343, "learning_rate": 2.089358549344805e-05, "loss": 1.3868, "step": 1918 }, { "epoch": 0.491264, "grad_norm": 0.3823586201376526, "learning_rate": 2.0877349010828044e-05, "loss": 1.3416, "step": 1919 }, { "epoch": 0.49152, "grad_norm": 0.3937433305394222, "learning_rate": 2.0861111948848164e-05, "loss": 1.4201, "step": 1920 }, { "epoch": 0.491776, "grad_norm": 0.38459863882126133, "learning_rate": 2.08448743182306e-05, "loss": 1.3307, "step": 1921 }, { "epoch": 0.492032, "grad_norm": 0.38562441496805383, "learning_rate": 2.0828636129697913e-05, "loss": 1.3439, "step": 1922 }, { "epoch": 0.492288, "grad_norm": 0.4057156401896175, "learning_rate": 2.081239739397304e-05, "loss": 1.4366, "step": 1923 }, { "epoch": 0.492544, "grad_norm": 0.3756407061799462, "learning_rate": 2.0796158121779276e-05, "loss": 1.3363, "step": 1924 }, { "epoch": 0.4928, "grad_norm": 0.3913577138131945, "learning_rate": 2.0779918323840272e-05, "loss": 1.3487, "step": 1925 }, { "epoch": 0.493056, "grad_norm": 0.39703055525485664, "learning_rate": 2.0763678010880022e-05, "loss": 1.3578, "step": 1926 }, { "epoch": 0.493312, "grad_norm": 0.3855821699483612, "learning_rate": 2.0747437193622853e-05, "loss": 1.3834, "step": 1927 }, { "epoch": 0.493568, "grad_norm": 0.3976740420628351, "learning_rate": 2.0731195882793444e-05, "loss": 1.392, "step": 1928 }, { "epoch": 0.493824, "grad_norm": 0.38616943432626455, "learning_rate": 2.071495408911679e-05, "loss": 1.3432, "step": 1929 }, { "epoch": 0.49408, "grad_norm": 0.39878652095482303, "learning_rate": 2.0698711823318206e-05, "loss": 1.4333, "step": 1930 }, { "epoch": 0.494336, "grad_norm": 0.37809552547894043, "learning_rate": 2.068246909612331e-05, "loss": 1.3208, "step": 1931 }, { "epoch": 0.494592, "grad_norm": 0.40437408145789877, "learning_rate": 2.0666225918258044e-05, "loss": 1.3466, "step": 1932 }, { "epoch": 0.494848, "grad_norm": 0.3825355317614924, "learning_rate": 2.0649982300448622e-05, "loss": 1.3353, "step": 1933 }, { "epoch": 0.495104, "grad_norm": 0.3814665792747797, "learning_rate": 2.0633738253421568e-05, "loss": 1.4063, "step": 1934 }, { "epoch": 0.49536, "grad_norm": 0.4169007755233025, "learning_rate": 2.0617493787903693e-05, "loss": 1.3536, "step": 1935 }, { "epoch": 0.495616, "grad_norm": 0.38418396725842263, "learning_rate": 2.060124891462206e-05, "loss": 1.3588, "step": 1936 }, { "epoch": 0.495872, "grad_norm": 0.37767525766944776, "learning_rate": 2.058500364430403e-05, "loss": 1.3787, "step": 1937 }, { "epoch": 0.496128, "grad_norm": 0.3934807105107317, "learning_rate": 2.056875798767721e-05, "loss": 1.3801, "step": 1938 }, { "epoch": 0.496384, "grad_norm": 0.3951314838809165, "learning_rate": 2.055251195546945e-05, "loss": 1.3386, "step": 1939 }, { "epoch": 0.49664, "grad_norm": 0.38609768348745493, "learning_rate": 2.0536265558408882e-05, "loss": 1.3933, "step": 1940 }, { "epoch": 0.496896, "grad_norm": 0.392244717604293, "learning_rate": 2.052001880722385e-05, "loss": 1.4091, "step": 1941 }, { "epoch": 0.497152, "grad_norm": 0.37865861275173585, "learning_rate": 2.0503771712642943e-05, "loss": 1.3607, "step": 1942 }, { "epoch": 0.497408, "grad_norm": 0.3963807460907485, "learning_rate": 2.048752428539498e-05, "loss": 1.3971, "step": 1943 }, { "epoch": 0.497664, "grad_norm": 0.3750881740326447, "learning_rate": 2.0471276536208985e-05, "loss": 1.3329, "step": 1944 }, { "epoch": 0.49792, "grad_norm": 0.38133354834594463, "learning_rate": 2.045502847581422e-05, "loss": 1.3677, "step": 1945 }, { "epoch": 0.498176, "grad_norm": 0.3781436719748803, "learning_rate": 2.043878011494012e-05, "loss": 1.3767, "step": 1946 }, { "epoch": 0.498432, "grad_norm": 0.3823177173813485, "learning_rate": 2.0422531464316348e-05, "loss": 1.371, "step": 1947 }, { "epoch": 0.498688, "grad_norm": 0.3853247153477152, "learning_rate": 2.040628253467274e-05, "loss": 1.3701, "step": 1948 }, { "epoch": 0.498944, "grad_norm": 0.39146602670334707, "learning_rate": 2.0390033336739324e-05, "loss": 1.3573, "step": 1949 }, { "epoch": 0.4992, "grad_norm": 0.3809707236503774, "learning_rate": 2.0373783881246304e-05, "loss": 1.3735, "step": 1950 }, { "epoch": 0.499456, "grad_norm": 0.38373386962231826, "learning_rate": 2.035753417892405e-05, "loss": 1.3625, "step": 1951 }, { "epoch": 0.499712, "grad_norm": 0.43752232820186826, "learning_rate": 2.03412842405031e-05, "loss": 1.3675, "step": 1952 }, { "epoch": 0.499968, "grad_norm": 0.43061717408471223, "learning_rate": 2.0325034076714154e-05, "loss": 1.3578, "step": 1953 }, { "epoch": 0.500224, "grad_norm": 0.38957311795657534, "learning_rate": 2.030878369828804e-05, "loss": 1.3417, "step": 1954 }, { "epoch": 0.50048, "grad_norm": 0.38734693860595065, "learning_rate": 2.0292533115955747e-05, "loss": 1.3483, "step": 1955 }, { "epoch": 0.500736, "grad_norm": 0.4001206589592376, "learning_rate": 2.0276282340448388e-05, "loss": 1.3746, "step": 1956 }, { "epoch": 0.500992, "grad_norm": 0.3932650495410377, "learning_rate": 2.0260031382497223e-05, "loss": 1.3732, "step": 1957 }, { "epoch": 0.501248, "grad_norm": 0.40347394899495653, "learning_rate": 2.0243780252833595e-05, "loss": 1.3855, "step": 1958 }, { "epoch": 0.501504, "grad_norm": 0.39723513653260434, "learning_rate": 2.0227528962189007e-05, "loss": 1.3594, "step": 1959 }, { "epoch": 0.50176, "grad_norm": 0.3755346349346534, "learning_rate": 2.0211277521295028e-05, "loss": 1.3553, "step": 1960 }, { "epoch": 0.502016, "grad_norm": 0.3845739316487716, "learning_rate": 2.0195025940883345e-05, "loss": 1.3613, "step": 1961 }, { "epoch": 0.502272, "grad_norm": 0.3997167923209803, "learning_rate": 2.0178774231685737e-05, "loss": 1.4203, "step": 1962 }, { "epoch": 0.502528, "grad_norm": 0.39110812057016336, "learning_rate": 2.0162522404434064e-05, "loss": 1.3929, "step": 1963 }, { "epoch": 0.502784, "grad_norm": 0.3826156000320015, "learning_rate": 2.0146270469860267e-05, "loss": 1.3769, "step": 1964 }, { "epoch": 0.50304, "grad_norm": 0.39525167791832627, "learning_rate": 2.0130018438696356e-05, "loss": 1.3841, "step": 1965 }, { "epoch": 0.503296, "grad_norm": 0.46290825246172496, "learning_rate": 2.011376632167441e-05, "loss": 1.3184, "step": 1966 }, { "epoch": 0.503552, "grad_norm": 0.3902020720325329, "learning_rate": 2.009751412952655e-05, "loss": 1.3799, "step": 1967 }, { "epoch": 0.503808, "grad_norm": 0.37705753303203643, "learning_rate": 2.008126187298496e-05, "loss": 1.3499, "step": 1968 }, { "epoch": 0.504064, "grad_norm": 0.37233911352395976, "learning_rate": 2.006500956278187e-05, "loss": 1.2956, "step": 1969 }, { "epoch": 0.50432, "grad_norm": 0.39048293745012347, "learning_rate": 2.004875720964953e-05, "loss": 1.3707, "step": 1970 }, { "epoch": 0.504576, "grad_norm": 0.3816016600125312, "learning_rate": 2.0032504824320232e-05, "loss": 1.3477, "step": 1971 }, { "epoch": 0.504832, "grad_norm": 0.39998943464436226, "learning_rate": 2.0016252417526286e-05, "loss": 1.4148, "step": 1972 }, { "epoch": 0.505088, "grad_norm": 0.36275553689547413, "learning_rate": 2e-05, "loss": 1.3116, "step": 1973 }, { "epoch": 0.505344, "grad_norm": 0.3871694522161321, "learning_rate": 1.998374758247372e-05, "loss": 1.3867, "step": 1974 }, { "epoch": 0.5056, "grad_norm": 0.39756330652623084, "learning_rate": 1.9967495175679767e-05, "loss": 1.3371, "step": 1975 }, { "epoch": 0.505856, "grad_norm": 0.3811377904000282, "learning_rate": 1.9951242790350473e-05, "loss": 1.3702, "step": 1976 }, { "epoch": 0.506112, "grad_norm": 0.39480916686406853, "learning_rate": 1.993499043721814e-05, "loss": 1.4002, "step": 1977 }, { "epoch": 0.506368, "grad_norm": 0.3949932713288373, "learning_rate": 1.9918738127015044e-05, "loss": 1.4003, "step": 1978 }, { "epoch": 0.506624, "grad_norm": 0.40511077698438464, "learning_rate": 1.9902485870473456e-05, "loss": 1.3934, "step": 1979 }, { "epoch": 0.50688, "grad_norm": 0.37228094962321134, "learning_rate": 1.9886233678325593e-05, "loss": 1.323, "step": 1980 }, { "epoch": 0.507136, "grad_norm": 0.39072542349002837, "learning_rate": 1.9869981561303648e-05, "loss": 1.3769, "step": 1981 }, { "epoch": 0.507392, "grad_norm": 0.3874456384980606, "learning_rate": 1.9853729530139733e-05, "loss": 1.3545, "step": 1982 }, { "epoch": 0.507648, "grad_norm": 0.38616909369123936, "learning_rate": 1.9837477595565943e-05, "loss": 1.3723, "step": 1983 }, { "epoch": 0.507904, "grad_norm": 0.380014806887977, "learning_rate": 1.9821225768314273e-05, "loss": 1.3552, "step": 1984 }, { "epoch": 0.50816, "grad_norm": 0.3918810517682189, "learning_rate": 1.9804974059116662e-05, "loss": 1.4037, "step": 1985 }, { "epoch": 0.508416, "grad_norm": 0.38827740239059316, "learning_rate": 1.9788722478704976e-05, "loss": 1.3649, "step": 1986 }, { "epoch": 0.508672, "grad_norm": 0.6764152648389595, "learning_rate": 1.9772471037811e-05, "loss": 1.3329, "step": 1987 }, { "epoch": 0.508928, "grad_norm": 0.4038041178815253, "learning_rate": 1.9756219747166402e-05, "loss": 1.3828, "step": 1988 }, { "epoch": 0.509184, "grad_norm": 0.3939024982146074, "learning_rate": 1.973996861750279e-05, "loss": 1.4168, "step": 1989 }, { "epoch": 0.50944, "grad_norm": 0.3799535259187517, "learning_rate": 1.9723717659551615e-05, "loss": 1.3796, "step": 1990 }, { "epoch": 0.509696, "grad_norm": 0.38191301074008077, "learning_rate": 1.970746688404426e-05, "loss": 1.3685, "step": 1991 }, { "epoch": 0.509952, "grad_norm": 0.3786011550570985, "learning_rate": 1.9691216301711966e-05, "loss": 1.3765, "step": 1992 }, { "epoch": 0.510208, "grad_norm": 0.3825478153169017, "learning_rate": 1.9674965923285853e-05, "loss": 1.3655, "step": 1993 }, { "epoch": 0.510464, "grad_norm": 0.38829089445025744, "learning_rate": 1.96587157594969e-05, "loss": 1.3714, "step": 1994 }, { "epoch": 0.51072, "grad_norm": 0.3887355422106671, "learning_rate": 1.964246582107596e-05, "loss": 1.3364, "step": 1995 }, { "epoch": 0.510976, "grad_norm": 0.3739339344603865, "learning_rate": 1.9626216118753706e-05, "loss": 1.3001, "step": 1996 }, { "epoch": 0.511232, "grad_norm": 0.38535629182107584, "learning_rate": 1.960996666326068e-05, "loss": 1.3261, "step": 1997 }, { "epoch": 0.511488, "grad_norm": 0.3918910022989723, "learning_rate": 1.959371746532726e-05, "loss": 1.4068, "step": 1998 }, { "epoch": 0.511744, "grad_norm": 0.3941888414970392, "learning_rate": 1.9577468535683656e-05, "loss": 1.3977, "step": 1999 }, { "epoch": 0.512, "grad_norm": 0.3776578134866879, "learning_rate": 1.9561219885059882e-05, "loss": 1.3409, "step": 2000 }, { "epoch": 0.512256, "grad_norm": 0.3805173482554356, "learning_rate": 1.9544971524185788e-05, "loss": 1.3669, "step": 2001 }, { "epoch": 0.512512, "grad_norm": 0.4026985262759454, "learning_rate": 1.9528723463791018e-05, "loss": 1.3806, "step": 2002 }, { "epoch": 0.512768, "grad_norm": 0.3801088690882476, "learning_rate": 1.9512475714605026e-05, "loss": 1.3475, "step": 2003 }, { "epoch": 0.513024, "grad_norm": 0.3791935978374013, "learning_rate": 1.949622828735706e-05, "loss": 1.3496, "step": 2004 }, { "epoch": 0.51328, "grad_norm": 0.3875364802742619, "learning_rate": 1.9479981192776155e-05, "loss": 1.3804, "step": 2005 }, { "epoch": 0.513536, "grad_norm": 0.36535804881634, "learning_rate": 1.946373444159112e-05, "loss": 1.3265, "step": 2006 }, { "epoch": 0.513792, "grad_norm": 0.379400328996959, "learning_rate": 1.9447488044530562e-05, "loss": 1.3409, "step": 2007 }, { "epoch": 0.514048, "grad_norm": 0.3966491889236639, "learning_rate": 1.9431242012322804e-05, "loss": 1.356, "step": 2008 }, { "epoch": 0.514304, "grad_norm": 0.40753606751500926, "learning_rate": 1.9414996355695977e-05, "loss": 1.416, "step": 2009 }, { "epoch": 0.51456, "grad_norm": 0.3729933466235488, "learning_rate": 1.9398751085377947e-05, "loss": 1.3442, "step": 2010 }, { "epoch": 0.514816, "grad_norm": 0.46908178989442895, "learning_rate": 1.9382506212096314e-05, "loss": 1.3587, "step": 2011 }, { "epoch": 0.515072, "grad_norm": 0.3822963648272708, "learning_rate": 1.9366261746578436e-05, "loss": 1.3568, "step": 2012 }, { "epoch": 0.515328, "grad_norm": 0.4203938774143448, "learning_rate": 1.9350017699551388e-05, "loss": 1.3751, "step": 2013 }, { "epoch": 0.515584, "grad_norm": 0.3835878632140864, "learning_rate": 1.9333774081741966e-05, "loss": 1.3767, "step": 2014 }, { "epoch": 0.51584, "grad_norm": 0.3854311270816547, "learning_rate": 1.9317530903876693e-05, "loss": 1.3392, "step": 2015 }, { "epoch": 0.516096, "grad_norm": 0.37716617835807054, "learning_rate": 1.9301288176681804e-05, "loss": 1.3351, "step": 2016 }, { "epoch": 0.516352, "grad_norm": 0.37510208283474183, "learning_rate": 1.9285045910883215e-05, "loss": 1.36, "step": 2017 }, { "epoch": 0.516608, "grad_norm": 0.3889786974781347, "learning_rate": 1.926880411720656e-05, "loss": 1.3456, "step": 2018 }, { "epoch": 0.516864, "grad_norm": 0.38281895229950763, "learning_rate": 1.9252562806377157e-05, "loss": 1.3694, "step": 2019 }, { "epoch": 0.51712, "grad_norm": 0.3764444722565575, "learning_rate": 1.923632198911999e-05, "loss": 1.3526, "step": 2020 }, { "epoch": 0.517376, "grad_norm": 0.37803337524374225, "learning_rate": 1.9220081676159734e-05, "loss": 1.3277, "step": 2021 }, { "epoch": 0.517632, "grad_norm": 0.37486847839364956, "learning_rate": 1.920384187822073e-05, "loss": 1.3522, "step": 2022 }, { "epoch": 0.517888, "grad_norm": 0.3793818125835509, "learning_rate": 1.9187602606026962e-05, "loss": 1.35, "step": 2023 }, { "epoch": 0.518144, "grad_norm": 0.37553117272021497, "learning_rate": 1.917136387030209e-05, "loss": 1.305, "step": 2024 }, { "epoch": 0.5184, "grad_norm": 0.38402371778975825, "learning_rate": 1.915512568176941e-05, "loss": 1.3668, "step": 2025 }, { "epoch": 0.518656, "grad_norm": 0.3724125018094645, "learning_rate": 1.9138888051151843e-05, "loss": 1.3753, "step": 2026 }, { "epoch": 0.518912, "grad_norm": 0.3916883428772269, "learning_rate": 1.9122650989171963e-05, "loss": 1.3586, "step": 2027 }, { "epoch": 0.519168, "grad_norm": 0.38366679902568507, "learning_rate": 1.9106414506551953e-05, "loss": 1.3504, "step": 2028 }, { "epoch": 0.519424, "grad_norm": 0.3737928390889648, "learning_rate": 1.9090178614013623e-05, "loss": 1.3402, "step": 2029 }, { "epoch": 0.51968, "grad_norm": 0.37944351880829946, "learning_rate": 1.9073943322278392e-05, "loss": 1.3564, "step": 2030 }, { "epoch": 0.519936, "grad_norm": 0.38686519820469406, "learning_rate": 1.905770864206729e-05, "loss": 1.3531, "step": 2031 }, { "epoch": 0.520192, "grad_norm": 0.38161934980693907, "learning_rate": 1.9041474584100906e-05, "loss": 1.3843, "step": 2032 }, { "epoch": 0.520448, "grad_norm": 0.38096987600479093, "learning_rate": 1.9025241159099473e-05, "loss": 1.3479, "step": 2033 }, { "epoch": 0.520704, "grad_norm": 0.3799753033323743, "learning_rate": 1.900900837778276e-05, "loss": 1.3615, "step": 2034 }, { "epoch": 0.52096, "grad_norm": 0.3824694160095296, "learning_rate": 1.899277625087014e-05, "loss": 1.3461, "step": 2035 }, { "epoch": 0.521216, "grad_norm": 0.370765586732249, "learning_rate": 1.897654478908054e-05, "loss": 1.3601, "step": 2036 }, { "epoch": 0.521472, "grad_norm": 0.37434634624810453, "learning_rate": 1.8960314003132454e-05, "loss": 1.3343, "step": 2037 }, { "epoch": 0.521728, "grad_norm": 0.4090736874847413, "learning_rate": 1.8944083903743925e-05, "loss": 1.4035, "step": 2038 }, { "epoch": 0.521984, "grad_norm": 0.3761748606810081, "learning_rate": 1.8927854501632545e-05, "loss": 1.3427, "step": 2039 }, { "epoch": 0.52224, "grad_norm": 0.3914494264900448, "learning_rate": 1.891162580751544e-05, "loss": 1.3646, "step": 2040 }, { "epoch": 0.522496, "grad_norm": 0.36885682978584305, "learning_rate": 1.8895397832109278e-05, "loss": 1.3281, "step": 2041 }, { "epoch": 0.522752, "grad_norm": 0.38415728985535125, "learning_rate": 1.887917058613025e-05, "loss": 1.3353, "step": 2042 }, { "epoch": 0.523008, "grad_norm": 0.38406231515071804, "learning_rate": 1.8862944080294064e-05, "loss": 1.3523, "step": 2043 }, { "epoch": 0.523264, "grad_norm": 0.4364899047138978, "learning_rate": 1.8846718325315932e-05, "loss": 1.3524, "step": 2044 }, { "epoch": 0.52352, "grad_norm": 0.40585140573911505, "learning_rate": 1.8830493331910585e-05, "loss": 1.3624, "step": 2045 }, { "epoch": 0.523776, "grad_norm": 0.3866723892567866, "learning_rate": 1.881426911079223e-05, "loss": 1.3303, "step": 2046 }, { "epoch": 0.524032, "grad_norm": 0.37406192942239963, "learning_rate": 1.879804567267459e-05, "loss": 1.3459, "step": 2047 }, { "epoch": 0.524288, "grad_norm": 0.41742522714072416, "learning_rate": 1.878182302827085e-05, "loss": 1.4168, "step": 2048 }, { "epoch": 0.524544, "grad_norm": 0.372405924295808, "learning_rate": 1.8765601188293684e-05, "loss": 1.3732, "step": 2049 }, { "epoch": 0.5248, "grad_norm": 0.376264613982168, "learning_rate": 1.8749380163455233e-05, "loss": 1.3402, "step": 2050 }, { "epoch": 0.525056, "grad_norm": 0.42417981347771366, "learning_rate": 1.873315996446708e-05, "loss": 1.331, "step": 2051 }, { "epoch": 0.525312, "grad_norm": 0.37369764743074074, "learning_rate": 1.8716940602040288e-05, "loss": 1.3486, "step": 2052 }, { "epoch": 0.525568, "grad_norm": 0.38997774182167544, "learning_rate": 1.8700722086885362e-05, "loss": 1.3656, "step": 2053 }, { "epoch": 0.525824, "grad_norm": 0.37788192897730455, "learning_rate": 1.8684504429712243e-05, "loss": 1.3516, "step": 2054 }, { "epoch": 0.52608, "grad_norm": 0.3826998166530222, "learning_rate": 1.8668287641230297e-05, "loss": 1.3475, "step": 2055 }, { "epoch": 0.526336, "grad_norm": 0.38907979309832097, "learning_rate": 1.865207173214835e-05, "loss": 1.3993, "step": 2056 }, { "epoch": 0.526592, "grad_norm": 0.3958424846120189, "learning_rate": 1.863585671317459e-05, "loss": 1.3511, "step": 2057 }, { "epoch": 0.526848, "grad_norm": 0.39056588073092524, "learning_rate": 1.8619642595016666e-05, "loss": 1.4097, "step": 2058 }, { "epoch": 0.527104, "grad_norm": 0.3861322913089585, "learning_rate": 1.8603429388381616e-05, "loss": 1.3908, "step": 2059 }, { "epoch": 0.52736, "grad_norm": 0.37809193121786944, "learning_rate": 1.8587217103975883e-05, "loss": 1.3498, "step": 2060 }, { "epoch": 0.527616, "grad_norm": 0.4634022980405316, "learning_rate": 1.8571005752505283e-05, "loss": 1.3535, "step": 2061 }, { "epoch": 0.527872, "grad_norm": 0.3734249420568222, "learning_rate": 1.855479534467504e-05, "loss": 1.3504, "step": 2062 }, { "epoch": 0.528128, "grad_norm": 0.38513967648013914, "learning_rate": 1.8538585891189728e-05, "loss": 1.3613, "step": 2063 }, { "epoch": 0.528384, "grad_norm": 0.3952614760238835, "learning_rate": 1.852237740275331e-05, "loss": 1.3569, "step": 2064 }, { "epoch": 0.52864, "grad_norm": 0.3799787486364502, "learning_rate": 1.8506169890069107e-05, "loss": 1.3432, "step": 2065 }, { "epoch": 0.528896, "grad_norm": 0.3772255968606647, "learning_rate": 1.84899633638398e-05, "loss": 1.3722, "step": 2066 }, { "epoch": 0.529152, "grad_norm": 0.3952663829807274, "learning_rate": 1.8473757834767407e-05, "loss": 1.3679, "step": 2067 }, { "epoch": 0.529408, "grad_norm": 0.38699406164517774, "learning_rate": 1.8457553313553307e-05, "loss": 1.3502, "step": 2068 }, { "epoch": 0.529664, "grad_norm": 0.3747695467247886, "learning_rate": 1.8441349810898183e-05, "loss": 1.343, "step": 2069 }, { "epoch": 0.52992, "grad_norm": 0.3912642106608188, "learning_rate": 1.842514733750207e-05, "loss": 1.3621, "step": 2070 }, { "epoch": 0.530176, "grad_norm": 0.37617577873717256, "learning_rate": 1.8408945904064318e-05, "loss": 1.3442, "step": 2071 }, { "epoch": 0.530432, "grad_norm": 0.3822158921231808, "learning_rate": 1.8392745521283595e-05, "loss": 1.3835, "step": 2072 }, { "epoch": 0.530688, "grad_norm": 0.3829521686892734, "learning_rate": 1.8376546199857863e-05, "loss": 1.3116, "step": 2073 }, { "epoch": 0.530944, "grad_norm": 0.3912827432901779, "learning_rate": 1.8360347950484394e-05, "loss": 1.371, "step": 2074 }, { "epoch": 0.5312, "grad_norm": 0.38984717198741525, "learning_rate": 1.8344150783859735e-05, "loss": 1.4056, "step": 2075 }, { "epoch": 0.531456, "grad_norm": 0.3800195058843405, "learning_rate": 1.832795471067974e-05, "loss": 1.3839, "step": 2076 }, { "epoch": 0.531712, "grad_norm": 0.4058431189404512, "learning_rate": 1.8311759741639532e-05, "loss": 1.3924, "step": 2077 }, { "epoch": 0.531968, "grad_norm": 0.38818070189647086, "learning_rate": 1.82955658874335e-05, "loss": 1.3829, "step": 2078 }, { "epoch": 0.532224, "grad_norm": 0.3889718938267594, "learning_rate": 1.8279373158755302e-05, "loss": 1.3909, "step": 2079 }, { "epoch": 0.53248, "grad_norm": 0.38596348551667153, "learning_rate": 1.826318156629786e-05, "loss": 1.3399, "step": 2080 }, { "epoch": 0.532736, "grad_norm": 0.38073285372814036, "learning_rate": 1.824699112075332e-05, "loss": 1.3282, "step": 2081 }, { "epoch": 0.532992, "grad_norm": 0.3887279076016933, "learning_rate": 1.8230801832813098e-05, "loss": 1.3958, "step": 2082 }, { "epoch": 0.533248, "grad_norm": 0.38026456510914264, "learning_rate": 1.821461371316784e-05, "loss": 1.3397, "step": 2083 }, { "epoch": 0.533504, "grad_norm": 0.3840468334946173, "learning_rate": 1.8198426772507403e-05, "loss": 1.3969, "step": 2084 }, { "epoch": 0.53376, "grad_norm": 0.36735772950034606, "learning_rate": 1.8182241021520894e-05, "loss": 1.3497, "step": 2085 }, { "epoch": 0.534016, "grad_norm": 0.37727340891308475, "learning_rate": 1.816605647089662e-05, "loss": 1.3405, "step": 2086 }, { "epoch": 0.534272, "grad_norm": 0.3817167618739919, "learning_rate": 1.8149873131322074e-05, "loss": 1.3528, "step": 2087 }, { "epoch": 0.534528, "grad_norm": 0.37470028659453597, "learning_rate": 1.813369101348398e-05, "loss": 1.3704, "step": 2088 }, { "epoch": 0.534784, "grad_norm": 0.3815150268056056, "learning_rate": 1.8117510128068256e-05, "loss": 1.3744, "step": 2089 }, { "epoch": 0.53504, "grad_norm": 0.381605010560828, "learning_rate": 1.810133048575998e-05, "loss": 1.3815, "step": 2090 }, { "epoch": 0.535296, "grad_norm": 0.3719371477350257, "learning_rate": 1.8085152097243427e-05, "loss": 1.3317, "step": 2091 }, { "epoch": 0.535552, "grad_norm": 0.3775510754806728, "learning_rate": 1.8068974973202054e-05, "loss": 1.3553, "step": 2092 }, { "epoch": 0.535808, "grad_norm": 0.3856223560753888, "learning_rate": 1.805279912431845e-05, "loss": 1.3624, "step": 2093 }, { "epoch": 0.536064, "grad_norm": 0.37921495698941027, "learning_rate": 1.803662456127439e-05, "loss": 1.3477, "step": 2094 }, { "epoch": 0.53632, "grad_norm": 0.37422915813102103, "learning_rate": 1.8020451294750803e-05, "loss": 1.3367, "step": 2095 }, { "epoch": 0.536576, "grad_norm": 0.39208733182668565, "learning_rate": 1.8004279335427736e-05, "loss": 1.3699, "step": 2096 }, { "epoch": 0.536832, "grad_norm": 0.45200517766565956, "learning_rate": 1.798810869398439e-05, "loss": 1.3528, "step": 2097 }, { "epoch": 0.537088, "grad_norm": 0.38664350422940386, "learning_rate": 1.7971939381099096e-05, "loss": 1.3731, "step": 2098 }, { "epoch": 0.537344, "grad_norm": 0.3749868711865193, "learning_rate": 1.7955771407449318e-05, "loss": 1.3312, "step": 2099 }, { "epoch": 0.5376, "grad_norm": 0.3802562997281817, "learning_rate": 1.7939604783711604e-05, "loss": 1.3421, "step": 2100 }, { "epoch": 0.537856, "grad_norm": 0.3847721004167942, "learning_rate": 1.7923439520561633e-05, "loss": 1.4153, "step": 2101 }, { "epoch": 0.538112, "grad_norm": 0.38647196295576564, "learning_rate": 1.790727562867418e-05, "loss": 1.376, "step": 2102 }, { "epoch": 0.538368, "grad_norm": 0.3867842013063015, "learning_rate": 1.7891113118723124e-05, "loss": 1.3764, "step": 2103 }, { "epoch": 0.538624, "grad_norm": 0.3675020116716856, "learning_rate": 1.7874952001381422e-05, "loss": 1.3311, "step": 2104 }, { "epoch": 0.53888, "grad_norm": 0.3802479662147957, "learning_rate": 1.7858792287321115e-05, "loss": 1.3523, "step": 2105 }, { "epoch": 0.539136, "grad_norm": 0.3838485199168695, "learning_rate": 1.7842633987213306e-05, "loss": 1.3881, "step": 2106 }, { "epoch": 0.539392, "grad_norm": 0.38426644959950734, "learning_rate": 1.782647711172818e-05, "loss": 1.3757, "step": 2107 }, { "epoch": 0.539648, "grad_norm": 0.38371774530606056, "learning_rate": 1.781032167153497e-05, "loss": 1.3342, "step": 2108 }, { "epoch": 0.539904, "grad_norm": 0.3745116546620054, "learning_rate": 1.779416767730197e-05, "loss": 1.3521, "step": 2109 }, { "epoch": 0.54016, "grad_norm": 0.3855687090157187, "learning_rate": 1.7778015139696522e-05, "loss": 1.3714, "step": 2110 }, { "epoch": 0.540416, "grad_norm": 0.3675427258179148, "learning_rate": 1.776186406938499e-05, "loss": 1.3271, "step": 2111 }, { "epoch": 0.540672, "grad_norm": 0.38420824151141797, "learning_rate": 1.7745714477032785e-05, "loss": 1.3768, "step": 2112 }, { "epoch": 0.540928, "grad_norm": 0.3736316609285772, "learning_rate": 1.7729566373304324e-05, "loss": 1.3495, "step": 2113 }, { "epoch": 0.541184, "grad_norm": 0.5039960628789155, "learning_rate": 1.7713419768863062e-05, "loss": 1.3245, "step": 2114 }, { "epoch": 0.54144, "grad_norm": 0.3951841030218529, "learning_rate": 1.769727467437145e-05, "loss": 1.3773, "step": 2115 }, { "epoch": 0.541696, "grad_norm": 0.38575774498899407, "learning_rate": 1.768113110049096e-05, "loss": 1.3765, "step": 2116 }, { "epoch": 0.541952, "grad_norm": 0.4030279378238872, "learning_rate": 1.7664989057882028e-05, "loss": 1.3747, "step": 2117 }, { "epoch": 0.542208, "grad_norm": 0.3824938246922401, "learning_rate": 1.7648848557204108e-05, "loss": 1.36, "step": 2118 }, { "epoch": 0.542464, "grad_norm": 0.3813476261543493, "learning_rate": 1.7632709609115615e-05, "loss": 1.3599, "step": 2119 }, { "epoch": 0.54272, "grad_norm": 0.3854620647729533, "learning_rate": 1.7616572224273955e-05, "loss": 1.3774, "step": 2120 }, { "epoch": 0.542976, "grad_norm": 0.3872480031165972, "learning_rate": 1.7600436413335497e-05, "loss": 1.3385, "step": 2121 }, { "epoch": 0.543232, "grad_norm": 0.37459907478509025, "learning_rate": 1.7584302186955566e-05, "loss": 1.3637, "step": 2122 }, { "epoch": 0.543488, "grad_norm": 0.37003604400193363, "learning_rate": 1.7568169555788452e-05, "loss": 1.3277, "step": 2123 }, { "epoch": 0.543744, "grad_norm": 0.3897103268959548, "learning_rate": 1.7552038530487365e-05, "loss": 1.3606, "step": 2124 }, { "epoch": 0.544, "grad_norm": 0.3842180135214001, "learning_rate": 1.7535909121704483e-05, "loss": 1.3797, "step": 2125 }, { "epoch": 0.544256, "grad_norm": 0.3793962630856666, "learning_rate": 1.7519781340090907e-05, "loss": 1.3541, "step": 2126 }, { "epoch": 0.544512, "grad_norm": 0.38147543127416006, "learning_rate": 1.7503655196296663e-05, "loss": 1.3499, "step": 2127 }, { "epoch": 0.544768, "grad_norm": 0.37522914726155804, "learning_rate": 1.7487530700970695e-05, "loss": 1.3275, "step": 2128 }, { "epoch": 0.545024, "grad_norm": 0.3806746091578742, "learning_rate": 1.7471407864760865e-05, "loss": 1.3549, "step": 2129 }, { "epoch": 0.54528, "grad_norm": 0.3822651391506844, "learning_rate": 1.7455286698313915e-05, "loss": 1.336, "step": 2130 }, { "epoch": 0.545536, "grad_norm": 0.3748034070970487, "learning_rate": 1.7439167212275513e-05, "loss": 1.3661, "step": 2131 }, { "epoch": 0.545792, "grad_norm": 0.37888354545732184, "learning_rate": 1.742304941729021e-05, "loss": 1.369, "step": 2132 }, { "epoch": 0.546048, "grad_norm": 0.3856861662677533, "learning_rate": 1.7406933324001434e-05, "loss": 1.3936, "step": 2133 }, { "epoch": 0.546304, "grad_norm": 0.38421050317964134, "learning_rate": 1.739081894305149e-05, "loss": 1.3731, "step": 2134 }, { "epoch": 0.54656, "grad_norm": 0.38700673161803956, "learning_rate": 1.737470628508156e-05, "loss": 1.3835, "step": 2135 }, { "epoch": 0.546816, "grad_norm": 0.36366413651031415, "learning_rate": 1.7358595360731677e-05, "loss": 1.3355, "step": 2136 }, { "epoch": 0.547072, "grad_norm": 0.37453922051959276, "learning_rate": 1.7342486180640734e-05, "loss": 1.3516, "step": 2137 }, { "epoch": 0.547328, "grad_norm": 0.3830745867488307, "learning_rate": 1.7326378755446477e-05, "loss": 1.3693, "step": 2138 }, { "epoch": 0.547584, "grad_norm": 0.3662720842511759, "learning_rate": 1.7310273095785493e-05, "loss": 1.3627, "step": 2139 }, { "epoch": 0.54784, "grad_norm": 0.3743521462657609, "learning_rate": 1.729416921229319e-05, "loss": 1.3267, "step": 2140 }, { "epoch": 0.548096, "grad_norm": 0.37170806478825064, "learning_rate": 1.727806711560383e-05, "loss": 1.3315, "step": 2141 }, { "epoch": 0.548352, "grad_norm": 0.38371584915491314, "learning_rate": 1.726196681635045e-05, "loss": 1.356, "step": 2142 }, { "epoch": 0.548608, "grad_norm": 0.36881069578312703, "learning_rate": 1.724586832516495e-05, "loss": 1.3255, "step": 2143 }, { "epoch": 0.548864, "grad_norm": 0.36831457203626694, "learning_rate": 1.7229771652678e-05, "loss": 1.3442, "step": 2144 }, { "epoch": 0.54912, "grad_norm": 0.369309849625158, "learning_rate": 1.7213676809519097e-05, "loss": 1.391, "step": 2145 }, { "epoch": 0.549376, "grad_norm": 0.3818064674117364, "learning_rate": 1.7197583806316503e-05, "loss": 1.4232, "step": 2146 }, { "epoch": 0.549632, "grad_norm": 0.37144236216381116, "learning_rate": 1.7181492653697294e-05, "loss": 1.3693, "step": 2147 }, { "epoch": 0.549888, "grad_norm": 0.38241896266778236, "learning_rate": 1.7165403362287282e-05, "loss": 1.3803, "step": 2148 }, { "epoch": 0.550144, "grad_norm": 0.3789863922718266, "learning_rate": 1.7149315942711095e-05, "loss": 1.3494, "step": 2149 }, { "epoch": 0.5504, "grad_norm": 0.3825600834270111, "learning_rate": 1.7133230405592104e-05, "loss": 1.3613, "step": 2150 }, { "epoch": 0.550656, "grad_norm": 0.3816437222214102, "learning_rate": 1.7117146761552427e-05, "loss": 1.3612, "step": 2151 }, { "epoch": 0.550912, "grad_norm": 0.3805199233229356, "learning_rate": 1.7101065021212953e-05, "loss": 1.3333, "step": 2152 }, { "epoch": 0.551168, "grad_norm": 0.3653836458901507, "learning_rate": 1.7084985195193314e-05, "loss": 1.3175, "step": 2153 }, { "epoch": 0.551424, "grad_norm": 0.37791705846437934, "learning_rate": 1.7068907294111846e-05, "loss": 1.3723, "step": 2154 }, { "epoch": 0.55168, "grad_norm": 0.3690739756841646, "learning_rate": 1.7052831328585644e-05, "loss": 1.3116, "step": 2155 }, { "epoch": 0.551936, "grad_norm": 0.3670831997602546, "learning_rate": 1.7036757309230528e-05, "loss": 1.3333, "step": 2156 }, { "epoch": 0.552192, "grad_norm": 0.3853856160194879, "learning_rate": 1.7020685246661005e-05, "loss": 1.3806, "step": 2157 }, { "epoch": 0.552448, "grad_norm": 0.3802903542821904, "learning_rate": 1.7004615151490315e-05, "loss": 1.3389, "step": 2158 }, { "epoch": 0.552704, "grad_norm": 0.37936384609356444, "learning_rate": 1.6988547034330398e-05, "loss": 1.3684, "step": 2159 }, { "epoch": 0.55296, "grad_norm": 0.9491242525761396, "learning_rate": 1.6972480905791866e-05, "loss": 1.4162, "step": 2160 }, { "epoch": 0.553216, "grad_norm": 0.38475694090959206, "learning_rate": 1.6956416776484033e-05, "loss": 1.3494, "step": 2161 }, { "epoch": 0.553472, "grad_norm": 0.37109422392276603, "learning_rate": 1.6940354657014904e-05, "loss": 1.3229, "step": 2162 }, { "epoch": 0.553728, "grad_norm": 0.4284737805277542, "learning_rate": 1.6924294557991133e-05, "loss": 1.365, "step": 2163 }, { "epoch": 0.553984, "grad_norm": 0.39306648811881995, "learning_rate": 1.6908236490018057e-05, "loss": 1.3528, "step": 2164 }, { "epoch": 0.55424, "grad_norm": 0.3819950028296498, "learning_rate": 1.6892180463699662e-05, "loss": 1.3367, "step": 2165 }, { "epoch": 0.554496, "grad_norm": 0.3797595745228369, "learning_rate": 1.68761264896386e-05, "loss": 1.3526, "step": 2166 }, { "epoch": 0.554752, "grad_norm": 0.3810791741677055, "learning_rate": 1.6860074578436144e-05, "loss": 1.3682, "step": 2167 }, { "epoch": 0.555008, "grad_norm": 0.483730628140365, "learning_rate": 1.6844024740692225e-05, "loss": 1.3064, "step": 2168 }, { "epoch": 0.555264, "grad_norm": 0.37752333239113534, "learning_rate": 1.6827976987005396e-05, "loss": 1.3327, "step": 2169 }, { "epoch": 0.55552, "grad_norm": 0.40352985160311683, "learning_rate": 1.681193132797284e-05, "loss": 1.3625, "step": 2170 }, { "epoch": 0.555776, "grad_norm": 0.3748604787055534, "learning_rate": 1.6795887774190347e-05, "loss": 1.3689, "step": 2171 }, { "epoch": 0.556032, "grad_norm": 0.398637746166116, "learning_rate": 1.6779846336252338e-05, "loss": 1.401, "step": 2172 }, { "epoch": 0.556288, "grad_norm": 0.3760939184136376, "learning_rate": 1.6763807024751803e-05, "loss": 1.3743, "step": 2173 }, { "epoch": 0.556544, "grad_norm": 0.38519622883152477, "learning_rate": 1.6747769850280347e-05, "loss": 1.363, "step": 2174 }, { "epoch": 0.5568, "grad_norm": 0.3771531178584084, "learning_rate": 1.6731734823428173e-05, "loss": 1.3428, "step": 2175 }, { "epoch": 0.557056, "grad_norm": 0.37525144111512165, "learning_rate": 1.671570195478405e-05, "loss": 1.3512, "step": 2176 }, { "epoch": 0.557312, "grad_norm": 0.3801214309577439, "learning_rate": 1.6699671254935327e-05, "loss": 1.3465, "step": 2177 }, { "epoch": 0.557568, "grad_norm": 0.4186195347973017, "learning_rate": 1.668364273446794e-05, "loss": 1.3848, "step": 2178 }, { "epoch": 0.557824, "grad_norm": 0.37049428840564413, "learning_rate": 1.6667616403966336e-05, "loss": 1.3168, "step": 2179 }, { "epoch": 0.55808, "grad_norm": 0.3715164585563844, "learning_rate": 1.6651592274013566e-05, "loss": 1.3396, "step": 2180 }, { "epoch": 0.558336, "grad_norm": 0.3831083669912095, "learning_rate": 1.6635570355191203e-05, "loss": 1.3504, "step": 2181 }, { "epoch": 0.558592, "grad_norm": 0.36967405561187333, "learning_rate": 1.6619550658079367e-05, "loss": 1.3529, "step": 2182 }, { "epoch": 0.558848, "grad_norm": 0.3720040958031323, "learning_rate": 1.660353319325672e-05, "loss": 1.382, "step": 2183 }, { "epoch": 0.559104, "grad_norm": 0.37710264718784026, "learning_rate": 1.6587517971300426e-05, "loss": 1.3172, "step": 2184 }, { "epoch": 0.55936, "grad_norm": 0.3761260822686075, "learning_rate": 1.657150500278619e-05, "loss": 1.3763, "step": 2185 }, { "epoch": 0.559616, "grad_norm": 0.37354442154961, "learning_rate": 1.6555494298288214e-05, "loss": 1.3703, "step": 2186 }, { "epoch": 0.559872, "grad_norm": 0.38031763624367715, "learning_rate": 1.653948586837921e-05, "loss": 1.331, "step": 2187 }, { "epoch": 0.560128, "grad_norm": 0.3748668328803114, "learning_rate": 1.6523479723630395e-05, "loss": 1.3676, "step": 2188 }, { "epoch": 0.560384, "grad_norm": 0.36432902561148695, "learning_rate": 1.650747587461148e-05, "loss": 1.3251, "step": 2189 }, { "epoch": 0.56064, "grad_norm": 0.3813433918307206, "learning_rate": 1.6491474331890635e-05, "loss": 1.3768, "step": 2190 }, { "epoch": 0.560896, "grad_norm": 0.37870179057947584, "learning_rate": 1.6475475106034532e-05, "loss": 1.3708, "step": 2191 }, { "epoch": 0.561152, "grad_norm": 0.3712716619197617, "learning_rate": 1.64594782076083e-05, "loss": 1.3586, "step": 2192 }, { "epoch": 0.561408, "grad_norm": 0.37815430976426484, "learning_rate": 1.6443483647175543e-05, "loss": 1.367, "step": 2193 }, { "epoch": 0.561664, "grad_norm": 0.37732382749011745, "learning_rate": 1.6427491435298302e-05, "loss": 1.3716, "step": 2194 }, { "epoch": 0.56192, "grad_norm": 0.3701154665843065, "learning_rate": 1.6411501582537094e-05, "loss": 1.328, "step": 2195 }, { "epoch": 0.562176, "grad_norm": 0.37557034264861267, "learning_rate": 1.639551409945086e-05, "loss": 1.379, "step": 2196 }, { "epoch": 0.562432, "grad_norm": 0.3727337721669061, "learning_rate": 1.6379528996596968e-05, "loss": 1.3494, "step": 2197 }, { "epoch": 0.562688, "grad_norm": 0.3701846177575583, "learning_rate": 1.6363546284531228e-05, "loss": 1.3327, "step": 2198 }, { "epoch": 0.562944, "grad_norm": 0.38673245451596755, "learning_rate": 1.6347565973807874e-05, "loss": 1.3932, "step": 2199 }, { "epoch": 0.5632, "grad_norm": 0.38156398456502344, "learning_rate": 1.6331588074979548e-05, "loss": 1.3683, "step": 2200 }, { "epoch": 0.563456, "grad_norm": 0.3814057118745101, "learning_rate": 1.6315612598597303e-05, "loss": 1.321, "step": 2201 }, { "epoch": 0.563712, "grad_norm": 0.3906389233844145, "learning_rate": 1.629963955521059e-05, "loss": 1.4058, "step": 2202 }, { "epoch": 0.563968, "grad_norm": 0.39740377605693994, "learning_rate": 1.6283668955367242e-05, "loss": 1.3838, "step": 2203 }, { "epoch": 0.564224, "grad_norm": 0.4108933047385235, "learning_rate": 1.6267700809613495e-05, "loss": 1.3624, "step": 2204 }, { "epoch": 0.56448, "grad_norm": 0.38758479265767487, "learning_rate": 1.625173512849396e-05, "loss": 1.381, "step": 2205 }, { "epoch": 0.564736, "grad_norm": 0.36226549413256043, "learning_rate": 1.6235771922551625e-05, "loss": 1.3357, "step": 2206 }, { "epoch": 0.564992, "grad_norm": 0.4012347514162213, "learning_rate": 1.6219811202327823e-05, "loss": 1.3705, "step": 2207 }, { "epoch": 0.565248, "grad_norm": 0.4016027962429828, "learning_rate": 1.6203852978362283e-05, "loss": 1.3923, "step": 2208 }, { "epoch": 0.565504, "grad_norm": 0.38342917553150113, "learning_rate": 1.618789726119304e-05, "loss": 1.3149, "step": 2209 }, { "epoch": 0.56576, "grad_norm": 0.39146762325918033, "learning_rate": 1.6171944061356502e-05, "loss": 1.3817, "step": 2210 }, { "epoch": 0.566016, "grad_norm": 0.3876385019662338, "learning_rate": 1.6155993389387416e-05, "loss": 1.3732, "step": 2211 }, { "epoch": 0.566272, "grad_norm": 0.37806095845462007, "learning_rate": 1.6140045255818852e-05, "loss": 1.33, "step": 2212 }, { "epoch": 0.566528, "grad_norm": 0.3867062529623233, "learning_rate": 1.61240996711822e-05, "loss": 1.3505, "step": 2213 }, { "epoch": 0.566784, "grad_norm": 0.39085764650292765, "learning_rate": 1.610815664600718e-05, "loss": 1.3703, "step": 2214 }, { "epoch": 0.56704, "grad_norm": 0.3737346008548558, "learning_rate": 1.6092216190821797e-05, "loss": 1.3803, "step": 2215 }, { "epoch": 0.567296, "grad_norm": 0.393946507127276, "learning_rate": 1.6076278316152385e-05, "loss": 1.3786, "step": 2216 }, { "epoch": 0.567552, "grad_norm": 0.3792476393960428, "learning_rate": 1.6060343032523565e-05, "loss": 1.3512, "step": 2217 }, { "epoch": 0.567808, "grad_norm": 0.37555835051329145, "learning_rate": 1.604441035045825e-05, "loss": 1.3677, "step": 2218 }, { "epoch": 0.568064, "grad_norm": 0.3727881241973694, "learning_rate": 1.6028480280477623e-05, "loss": 1.3184, "step": 2219 }, { "epoch": 0.56832, "grad_norm": 0.38416225231854084, "learning_rate": 1.601255283310116e-05, "loss": 1.3418, "step": 2220 }, { "epoch": 0.568576, "grad_norm": 0.3728642396381784, "learning_rate": 1.5996628018846583e-05, "loss": 1.3196, "step": 2221 }, { "epoch": 0.568832, "grad_norm": 0.3853443838238744, "learning_rate": 1.598070584822989e-05, "loss": 1.3615, "step": 2222 }, { "epoch": 0.569088, "grad_norm": 0.3789759624321883, "learning_rate": 1.5964786331765338e-05, "loss": 1.3322, "step": 2223 }, { "epoch": 0.569344, "grad_norm": 0.3809667140495396, "learning_rate": 1.5948869479965427e-05, "loss": 1.3349, "step": 2224 }, { "epoch": 0.5696, "grad_norm": 0.36925905764877975, "learning_rate": 1.593295530334088e-05, "loss": 1.3141, "step": 2225 }, { "epoch": 0.569856, "grad_norm": 0.44187942570993816, "learning_rate": 1.5917043812400675e-05, "loss": 1.4244, "step": 2226 }, { "epoch": 0.570112, "grad_norm": 0.38114195125282335, "learning_rate": 1.5901135017652026e-05, "loss": 1.3505, "step": 2227 }, { "epoch": 0.570368, "grad_norm": 0.3936183489948876, "learning_rate": 1.588522892960032e-05, "loss": 1.3875, "step": 2228 }, { "epoch": 0.570624, "grad_norm": 0.3748487198771036, "learning_rate": 1.5869325558749213e-05, "loss": 1.3402, "step": 2229 }, { "epoch": 0.57088, "grad_norm": 0.37239379674809603, "learning_rate": 1.585342491560052e-05, "loss": 1.3343, "step": 2230 }, { "epoch": 0.571136, "grad_norm": 0.3713986366846414, "learning_rate": 1.5837527010654288e-05, "loss": 1.3379, "step": 2231 }, { "epoch": 0.571392, "grad_norm": 0.3889222732551767, "learning_rate": 1.5821631854408737e-05, "loss": 1.3892, "step": 2232 }, { "epoch": 0.571648, "grad_norm": 0.3781520062206382, "learning_rate": 1.580573945736029e-05, "loss": 1.3419, "step": 2233 }, { "epoch": 0.571904, "grad_norm": 0.43138610439627034, "learning_rate": 1.578984983000352e-05, "loss": 1.3713, "step": 2234 }, { "epoch": 0.57216, "grad_norm": 0.374183877191408, "learning_rate": 1.5773962982831195e-05, "loss": 1.3715, "step": 2235 }, { "epoch": 0.572416, "grad_norm": 0.37724382780510624, "learning_rate": 1.575807892633423e-05, "loss": 1.3236, "step": 2236 }, { "epoch": 0.572672, "grad_norm": 0.36951695134079643, "learning_rate": 1.5742197671001718e-05, "loss": 1.3278, "step": 2237 }, { "epoch": 0.572928, "grad_norm": 0.3730308173512109, "learning_rate": 1.5726319227320886e-05, "loss": 1.2887, "step": 2238 }, { "epoch": 0.573184, "grad_norm": 0.4138229444030839, "learning_rate": 1.5710443605777116e-05, "loss": 1.3717, "step": 2239 }, { "epoch": 0.57344, "grad_norm": 0.3682964854089962, "learning_rate": 1.56945708168539e-05, "loss": 1.3713, "step": 2240 }, { "epoch": 0.573696, "grad_norm": 0.3821482160361737, "learning_rate": 1.5678700871032897e-05, "loss": 1.3657, "step": 2241 }, { "epoch": 0.573952, "grad_norm": 0.3882239509569752, "learning_rate": 1.566283377879386e-05, "loss": 1.3797, "step": 2242 }, { "epoch": 0.574208, "grad_norm": 0.36504684582481467, "learning_rate": 1.5646969550614668e-05, "loss": 1.3173, "step": 2243 }, { "epoch": 0.574464, "grad_norm": 0.3715772912160699, "learning_rate": 1.5631108196971314e-05, "loss": 1.367, "step": 2244 }, { "epoch": 0.57472, "grad_norm": 0.5209604103847042, "learning_rate": 1.561524972833789e-05, "loss": 1.3432, "step": 2245 }, { "epoch": 0.574976, "grad_norm": 0.3734621425514182, "learning_rate": 1.5599394155186572e-05, "loss": 1.3482, "step": 2246 }, { "epoch": 0.575232, "grad_norm": 0.44078621941441853, "learning_rate": 1.5583541487987634e-05, "loss": 1.3893, "step": 2247 }, { "epoch": 0.575488, "grad_norm": 0.37823257441116975, "learning_rate": 1.5567691737209426e-05, "loss": 1.355, "step": 2248 }, { "epoch": 0.575744, "grad_norm": 0.36797122644153557, "learning_rate": 1.5551844913318384e-05, "loss": 1.3098, "step": 2249 }, { "epoch": 0.576, "grad_norm": 0.3737947544850818, "learning_rate": 1.5536001026778995e-05, "loss": 1.3581, "step": 2250 }, { "epoch": 0.576256, "grad_norm": 0.3806811792748175, "learning_rate": 1.5520160088053823e-05, "loss": 1.3517, "step": 2251 }, { "epoch": 0.576512, "grad_norm": 0.37623117171217324, "learning_rate": 1.5504322107603468e-05, "loss": 1.355, "step": 2252 }, { "epoch": 0.576768, "grad_norm": 0.3717659864975748, "learning_rate": 1.5488487095886583e-05, "loss": 1.3108, "step": 2253 }, { "epoch": 0.577024, "grad_norm": 0.3719871971216823, "learning_rate": 1.5472655063359866e-05, "loss": 1.3729, "step": 2254 }, { "epoch": 0.57728, "grad_norm": 0.36091153773724427, "learning_rate": 1.545682602047804e-05, "loss": 1.3094, "step": 2255 }, { "epoch": 0.577536, "grad_norm": 0.38697772679152986, "learning_rate": 1.544099997769387e-05, "loss": 1.3735, "step": 2256 }, { "epoch": 0.577792, "grad_norm": 0.3751724682900414, "learning_rate": 1.5425176945458115e-05, "loss": 1.3552, "step": 2257 }, { "epoch": 0.578048, "grad_norm": 0.3687940568201914, "learning_rate": 1.5409356934219565e-05, "loss": 1.3407, "step": 2258 }, { "epoch": 0.578304, "grad_norm": 0.3710590047489983, "learning_rate": 1.5393539954425003e-05, "loss": 1.3326, "step": 2259 }, { "epoch": 0.57856, "grad_norm": 0.3882085060289895, "learning_rate": 1.5377726016519217e-05, "loss": 1.3641, "step": 2260 }, { "epoch": 0.578816, "grad_norm": 0.3655867653954755, "learning_rate": 1.5361915130944992e-05, "loss": 1.3214, "step": 2261 }, { "epoch": 0.579072, "grad_norm": 0.3730432282349772, "learning_rate": 1.5346107308143086e-05, "loss": 1.3015, "step": 2262 }, { "epoch": 0.579328, "grad_norm": 0.38426697263394805, "learning_rate": 1.533030255855224e-05, "loss": 1.3588, "step": 2263 }, { "epoch": 0.579584, "grad_norm": 0.37662249909310863, "learning_rate": 1.5314500892609165e-05, "loss": 1.3524, "step": 2264 }, { "epoch": 0.57984, "grad_norm": 0.38510202692395645, "learning_rate": 1.529870232074853e-05, "loss": 1.359, "step": 2265 }, { "epoch": 0.580096, "grad_norm": 0.3804646416042581, "learning_rate": 1.528290685340297e-05, "loss": 1.3428, "step": 2266 }, { "epoch": 0.580352, "grad_norm": 0.3876124501784752, "learning_rate": 1.526711450100306e-05, "loss": 1.3668, "step": 2267 }, { "epoch": 0.580608, "grad_norm": 0.38287855453059777, "learning_rate": 1.525132527397734e-05, "loss": 1.3572, "step": 2268 }, { "epoch": 0.580864, "grad_norm": 0.375151612670057, "learning_rate": 1.523553918275226e-05, "loss": 1.3532, "step": 2269 }, { "epoch": 0.58112, "grad_norm": 0.3919514910840354, "learning_rate": 1.5219756237752208e-05, "loss": 1.3868, "step": 2270 }, { "epoch": 0.581376, "grad_norm": 0.3806021096952684, "learning_rate": 1.5203976449399496e-05, "loss": 1.3344, "step": 2271 }, { "epoch": 0.581632, "grad_norm": 0.44032029453162475, "learning_rate": 1.5188199828114351e-05, "loss": 1.3113, "step": 2272 }, { "epoch": 0.581888, "grad_norm": 0.3793833341445387, "learning_rate": 1.5172426384314913e-05, "loss": 1.3474, "step": 2273 }, { "epoch": 0.582144, "grad_norm": 0.36680365980370855, "learning_rate": 1.5156656128417222e-05, "loss": 1.2984, "step": 2274 }, { "epoch": 0.5824, "grad_norm": 0.3775094562078009, "learning_rate": 1.514088907083521e-05, "loss": 1.3228, "step": 2275 }, { "epoch": 0.582656, "grad_norm": 0.37919190454463625, "learning_rate": 1.5125125221980685e-05, "loss": 1.3492, "step": 2276 }, { "epoch": 0.582912, "grad_norm": 0.377088138974955, "learning_rate": 1.5109364592263358e-05, "loss": 1.3123, "step": 2277 }, { "epoch": 0.583168, "grad_norm": 0.37666893129225226, "learning_rate": 1.5093607192090804e-05, "loss": 1.3163, "step": 2278 }, { "epoch": 0.583424, "grad_norm": 0.37903665498721223, "learning_rate": 1.5077853031868474e-05, "loss": 1.3353, "step": 2279 }, { "epoch": 0.58368, "grad_norm": 0.37210419459138383, "learning_rate": 1.506210212199966e-05, "loss": 1.3698, "step": 2280 }, { "epoch": 0.583936, "grad_norm": 0.3811346156665869, "learning_rate": 1.5046354472885531e-05, "loss": 1.3625, "step": 2281 }, { "epoch": 0.584192, "grad_norm": 0.368968577575401, "learning_rate": 1.5030610094925075e-05, "loss": 1.3391, "step": 2282 }, { "epoch": 0.584448, "grad_norm": 0.36808515493658445, "learning_rate": 1.5014868998515149e-05, "loss": 1.3507, "step": 2283 }, { "epoch": 0.584704, "grad_norm": 0.3781432420689751, "learning_rate": 1.4999131194050422e-05, "loss": 1.369, "step": 2284 }, { "epoch": 0.58496, "grad_norm": 0.38214116892085476, "learning_rate": 1.4983396691923409e-05, "loss": 1.4044, "step": 2285 }, { "epoch": 0.585216, "grad_norm": 0.35929579555237, "learning_rate": 1.496766550252442e-05, "loss": 1.3208, "step": 2286 }, { "epoch": 0.585472, "grad_norm": 0.3777170341823041, "learning_rate": 1.4951937636241596e-05, "loss": 1.3933, "step": 2287 }, { "epoch": 0.585728, "grad_norm": 0.3619536341409582, "learning_rate": 1.4936213103460887e-05, "loss": 1.3382, "step": 2288 }, { "epoch": 0.585984, "grad_norm": 0.36899439216065494, "learning_rate": 1.4920491914566006e-05, "loss": 1.3235, "step": 2289 }, { "epoch": 0.58624, "grad_norm": 0.359079663981322, "learning_rate": 1.4904774079938506e-05, "loss": 1.2912, "step": 2290 }, { "epoch": 0.586496, "grad_norm": 0.3792872546558347, "learning_rate": 1.4889059609957701e-05, "loss": 1.3203, "step": 2291 }, { "epoch": 0.586752, "grad_norm": 0.39150188975039885, "learning_rate": 1.4873348515000674e-05, "loss": 1.3967, "step": 2292 }, { "epoch": 0.587008, "grad_norm": 0.3667972774714096, "learning_rate": 1.4857640805442299e-05, "loss": 1.3399, "step": 2293 }, { "epoch": 0.587264, "grad_norm": 0.3654649853645106, "learning_rate": 1.4841936491655214e-05, "loss": 1.3304, "step": 2294 }, { "epoch": 0.58752, "grad_norm": 0.37133836550508914, "learning_rate": 1.4826235584009787e-05, "loss": 1.3443, "step": 2295 }, { "epoch": 0.587776, "grad_norm": 0.3699980678136288, "learning_rate": 1.4810538092874166e-05, "loss": 1.3429, "step": 2296 }, { "epoch": 0.588032, "grad_norm": 0.3828017826190828, "learning_rate": 1.4794844028614241e-05, "loss": 1.3952, "step": 2297 }, { "epoch": 0.588288, "grad_norm": 0.3823678342606419, "learning_rate": 1.4779153401593615e-05, "loss": 1.4048, "step": 2298 }, { "epoch": 0.588544, "grad_norm": 0.3873418839487346, "learning_rate": 1.4763466222173651e-05, "loss": 1.3477, "step": 2299 }, { "epoch": 0.5888, "grad_norm": 0.42093981727887153, "learning_rate": 1.4747782500713424e-05, "loss": 1.3468, "step": 2300 }, { "epoch": 0.589056, "grad_norm": 0.37627946357442715, "learning_rate": 1.4732102247569707e-05, "loss": 1.3561, "step": 2301 }, { "epoch": 0.589312, "grad_norm": 0.3774846228628711, "learning_rate": 1.4716425473097014e-05, "loss": 1.3202, "step": 2302 }, { "epoch": 0.589568, "grad_norm": 0.39446706636088746, "learning_rate": 1.4700752187647536e-05, "loss": 1.3332, "step": 2303 }, { "epoch": 0.589824, "grad_norm": 0.4571459377747097, "learning_rate": 1.4685082401571175e-05, "loss": 1.3379, "step": 2304 }, { "epoch": 0.59008, "grad_norm": 0.3855564493166995, "learning_rate": 1.4669416125215521e-05, "loss": 1.3686, "step": 2305 }, { "epoch": 0.590336, "grad_norm": 0.3715568264605294, "learning_rate": 1.4653753368925849e-05, "loss": 1.3453, "step": 2306 }, { "epoch": 0.590592, "grad_norm": 0.37031663897396006, "learning_rate": 1.463809414304509e-05, "loss": 1.333, "step": 2307 }, { "epoch": 0.590848, "grad_norm": 0.37508863617146876, "learning_rate": 1.4622438457913868e-05, "loss": 1.347, "step": 2308 }, { "epoch": 0.591104, "grad_norm": 0.3733052979112978, "learning_rate": 1.4606786323870453e-05, "loss": 1.3266, "step": 2309 }, { "epoch": 0.59136, "grad_norm": 0.37788688941543863, "learning_rate": 1.4591137751250773e-05, "loss": 1.3701, "step": 2310 }, { "epoch": 0.591616, "grad_norm": 0.453462626404309, "learning_rate": 1.4575492750388414e-05, "loss": 1.355, "step": 2311 }, { "epoch": 0.591872, "grad_norm": 0.3795413553795468, "learning_rate": 1.4559851331614599e-05, "loss": 1.3478, "step": 2312 }, { "epoch": 0.592128, "grad_norm": 0.3823990407770366, "learning_rate": 1.454421350525817e-05, "loss": 1.3624, "step": 2313 }, { "epoch": 0.592384, "grad_norm": 0.39575781388846365, "learning_rate": 1.452857928164562e-05, "loss": 1.324, "step": 2314 }, { "epoch": 0.59264, "grad_norm": 0.3750310670682684, "learning_rate": 1.4512948671101044e-05, "loss": 1.3658, "step": 2315 }, { "epoch": 0.592896, "grad_norm": 0.3672455375013226, "learning_rate": 1.4497321683946164e-05, "loss": 1.3169, "step": 2316 }, { "epoch": 0.593152, "grad_norm": 0.3784045992827003, "learning_rate": 1.4481698330500305e-05, "loss": 1.3617, "step": 2317 }, { "epoch": 0.593408, "grad_norm": 0.38081347684495337, "learning_rate": 1.44660786210804e-05, "loss": 1.3367, "step": 2318 }, { "epoch": 0.593664, "grad_norm": 0.37537983218799875, "learning_rate": 1.4450462566000949e-05, "loss": 1.3634, "step": 2319 }, { "epoch": 0.59392, "grad_norm": 0.3710171373325578, "learning_rate": 1.4434850175574075e-05, "loss": 1.344, "step": 2320 }, { "epoch": 0.594176, "grad_norm": 0.37832574770650346, "learning_rate": 1.441924146010945e-05, "loss": 1.3282, "step": 2321 }, { "epoch": 0.594432, "grad_norm": 0.3665737755222365, "learning_rate": 1.4403636429914341e-05, "loss": 1.3412, "step": 2322 }, { "epoch": 0.594688, "grad_norm": 0.3739263205029084, "learning_rate": 1.4388035095293574e-05, "loss": 1.3116, "step": 2323 }, { "epoch": 0.594944, "grad_norm": 0.3705435601086545, "learning_rate": 1.4372437466549538e-05, "loss": 1.3449, "step": 2324 }, { "epoch": 0.5952, "grad_norm": 0.374173941420483, "learning_rate": 1.435684355398216e-05, "loss": 1.3284, "step": 2325 }, { "epoch": 0.595456, "grad_norm": 0.3902715212805494, "learning_rate": 1.4341253367888927e-05, "loss": 1.3818, "step": 2326 }, { "epoch": 0.595712, "grad_norm": 0.3787382983315016, "learning_rate": 1.4325666918564864e-05, "loss": 1.3511, "step": 2327 }, { "epoch": 0.595968, "grad_norm": 0.37324421790608675, "learning_rate": 1.4310084216302525e-05, "loss": 1.3277, "step": 2328 }, { "epoch": 0.596224, "grad_norm": 0.37216348688190976, "learning_rate": 1.4294505271391996e-05, "loss": 1.3383, "step": 2329 }, { "epoch": 0.59648, "grad_norm": 0.37608682408744293, "learning_rate": 1.4278930094120873e-05, "loss": 1.3407, "step": 2330 }, { "epoch": 0.596736, "grad_norm": 0.5292557615821755, "learning_rate": 1.4263358694774265e-05, "loss": 1.3526, "step": 2331 }, { "epoch": 0.596992, "grad_norm": 0.36832019383416703, "learning_rate": 1.4247791083634793e-05, "loss": 1.333, "step": 2332 }, { "epoch": 0.597248, "grad_norm": 0.37451138684195373, "learning_rate": 1.4232227270982568e-05, "loss": 1.3408, "step": 2333 }, { "epoch": 0.597504, "grad_norm": 0.37943289258009805, "learning_rate": 1.4216667267095201e-05, "loss": 1.4125, "step": 2334 }, { "epoch": 0.59776, "grad_norm": 0.36028885322216797, "learning_rate": 1.4201111082247789e-05, "loss": 1.2867, "step": 2335 }, { "epoch": 0.598016, "grad_norm": 0.6747125891034204, "learning_rate": 1.4185558726712897e-05, "loss": 1.326, "step": 2336 }, { "epoch": 0.598272, "grad_norm": 0.445399274655276, "learning_rate": 1.4170010210760565e-05, "loss": 1.3442, "step": 2337 }, { "epoch": 0.598528, "grad_norm": 0.3810315840204842, "learning_rate": 1.41544655446583e-05, "loss": 1.3752, "step": 2338 }, { "epoch": 0.598784, "grad_norm": 0.371717800301048, "learning_rate": 1.4138924738671062e-05, "loss": 1.3174, "step": 2339 }, { "epoch": 0.59904, "grad_norm": 0.37417685716696636, "learning_rate": 1.4123387803061272e-05, "loss": 1.344, "step": 2340 }, { "epoch": 0.599296, "grad_norm": 0.3795814689348757, "learning_rate": 1.4107854748088793e-05, "loss": 1.3694, "step": 2341 }, { "epoch": 0.599552, "grad_norm": 0.3762017395579445, "learning_rate": 1.409232558401091e-05, "loss": 1.3005, "step": 2342 }, { "epoch": 0.599808, "grad_norm": 0.37458072223306627, "learning_rate": 1.407680032108236e-05, "loss": 1.3493, "step": 2343 }, { "epoch": 0.600064, "grad_norm": 0.38458300759139086, "learning_rate": 1.4061278969555282e-05, "loss": 1.3569, "step": 2344 }, { "epoch": 0.60032, "grad_norm": 0.42039908284339145, "learning_rate": 1.4045761539679252e-05, "loss": 1.3337, "step": 2345 }, { "epoch": 0.600576, "grad_norm": 0.3791067291940985, "learning_rate": 1.403024804170125e-05, "loss": 1.3501, "step": 2346 }, { "epoch": 0.600832, "grad_norm": 0.38267077164447566, "learning_rate": 1.4014738485865653e-05, "loss": 1.338, "step": 2347 }, { "epoch": 0.601088, "grad_norm": 0.3792535507507403, "learning_rate": 1.3999232882414243e-05, "loss": 1.3361, "step": 2348 }, { "epoch": 0.601344, "grad_norm": 0.36958417253225245, "learning_rate": 1.3983731241586191e-05, "loss": 1.3227, "step": 2349 }, { "epoch": 0.6016, "grad_norm": 0.3774486735552734, "learning_rate": 1.3968233573618037e-05, "loss": 1.3499, "step": 2350 }, { "epoch": 0.601856, "grad_norm": 0.37685913758937145, "learning_rate": 1.395273988874372e-05, "loss": 1.3698, "step": 2351 }, { "epoch": 0.602112, "grad_norm": 0.374781454822194, "learning_rate": 1.3937250197194538e-05, "loss": 1.32, "step": 2352 }, { "epoch": 0.602368, "grad_norm": 0.37868228915622637, "learning_rate": 1.3921764509199144e-05, "loss": 1.3726, "step": 2353 }, { "epoch": 0.602624, "grad_norm": 0.3624110669232201, "learning_rate": 1.3906282834983565e-05, "loss": 1.3179, "step": 2354 }, { "epoch": 0.60288, "grad_norm": 0.3743659017767568, "learning_rate": 1.3890805184771172e-05, "loss": 1.3654, "step": 2355 }, { "epoch": 0.603136, "grad_norm": 0.37005102367137127, "learning_rate": 1.3875331568782655e-05, "loss": 1.3314, "step": 2356 }, { "epoch": 0.603392, "grad_norm": 0.3591237958684529, "learning_rate": 1.3859861997236072e-05, "loss": 1.3368, "step": 2357 }, { "epoch": 0.603648, "grad_norm": 0.37166373867628405, "learning_rate": 1.3844396480346802e-05, "loss": 1.3414, "step": 2358 }, { "epoch": 0.603904, "grad_norm": 0.37209451746878963, "learning_rate": 1.3828935028327534e-05, "loss": 1.3797, "step": 2359 }, { "epoch": 0.60416, "grad_norm": 0.37067600859803523, "learning_rate": 1.3813477651388284e-05, "loss": 1.3844, "step": 2360 }, { "epoch": 0.604416, "grad_norm": 0.3658287213180722, "learning_rate": 1.379802435973638e-05, "loss": 1.3276, "step": 2361 }, { "epoch": 0.604672, "grad_norm": 0.3752694937051555, "learning_rate": 1.3782575163576435e-05, "loss": 1.35, "step": 2362 }, { "epoch": 0.604928, "grad_norm": 0.3690764058793174, "learning_rate": 1.3767130073110376e-05, "loss": 1.3307, "step": 2363 }, { "epoch": 0.605184, "grad_norm": 0.3855250475312962, "learning_rate": 1.3751689098537414e-05, "loss": 1.3843, "step": 2364 }, { "epoch": 0.60544, "grad_norm": 0.3738038114002888, "learning_rate": 1.373625225005403e-05, "loss": 1.348, "step": 2365 }, { "epoch": 0.605696, "grad_norm": 0.37280230715610485, "learning_rate": 1.3720819537853998e-05, "loss": 1.3397, "step": 2366 }, { "epoch": 0.605952, "grad_norm": 0.37986374886976393, "learning_rate": 1.370539097212836e-05, "loss": 1.3603, "step": 2367 }, { "epoch": 0.606208, "grad_norm": 0.3721790725838548, "learning_rate": 1.3689966563065394e-05, "loss": 1.3604, "step": 2368 }, { "epoch": 0.606464, "grad_norm": 0.3728762171397826, "learning_rate": 1.3674546320850661e-05, "loss": 1.3065, "step": 2369 }, { "epoch": 0.60672, "grad_norm": 0.37633729879751454, "learning_rate": 1.3659130255666968e-05, "loss": 1.3772, "step": 2370 }, { "epoch": 0.606976, "grad_norm": 0.368260103901516, "learning_rate": 1.3643718377694341e-05, "loss": 1.322, "step": 2371 }, { "epoch": 0.607232, "grad_norm": 0.3776350696404196, "learning_rate": 1.3628310697110073e-05, "loss": 1.3018, "step": 2372 }, { "epoch": 0.607488, "grad_norm": 0.3657882477273965, "learning_rate": 1.3612907224088665e-05, "loss": 1.3251, "step": 2373 }, { "epoch": 0.607744, "grad_norm": 0.3629726611169219, "learning_rate": 1.3597507968801834e-05, "loss": 1.3011, "step": 2374 }, { "epoch": 0.608, "grad_norm": 0.37218339996723465, "learning_rate": 1.3582112941418531e-05, "loss": 1.3177, "step": 2375 }, { "epoch": 0.608256, "grad_norm": 0.36714667901070663, "learning_rate": 1.3566722152104897e-05, "loss": 1.3316, "step": 2376 }, { "epoch": 0.608512, "grad_norm": 0.3696568545783475, "learning_rate": 1.355133561102429e-05, "loss": 1.3337, "step": 2377 }, { "epoch": 0.608768, "grad_norm": 0.37907363729955995, "learning_rate": 1.353595332833725e-05, "loss": 1.3536, "step": 2378 }, { "epoch": 0.609024, "grad_norm": 0.40353173435338824, "learning_rate": 1.3520575314201524e-05, "loss": 1.3612, "step": 2379 }, { "epoch": 0.60928, "grad_norm": 0.3838257373005245, "learning_rate": 1.3505201578772009e-05, "loss": 1.3037, "step": 2380 }, { "epoch": 0.609536, "grad_norm": 0.3717807734805008, "learning_rate": 1.3489832132200804e-05, "loss": 1.3915, "step": 2381 }, { "epoch": 0.609792, "grad_norm": 0.36629125595214146, "learning_rate": 1.347446698463716e-05, "loss": 1.3088, "step": 2382 }, { "epoch": 0.610048, "grad_norm": 0.3735512860638466, "learning_rate": 1.34591061462275e-05, "loss": 1.3407, "step": 2383 }, { "epoch": 0.610304, "grad_norm": 0.3791904421149582, "learning_rate": 1.3443749627115397e-05, "loss": 1.3676, "step": 2384 }, { "epoch": 0.61056, "grad_norm": 0.44888286869136595, "learning_rate": 1.3428397437441573e-05, "loss": 1.3701, "step": 2385 }, { "epoch": 0.610816, "grad_norm": 0.4051327501559484, "learning_rate": 1.341304958734388e-05, "loss": 1.3546, "step": 2386 }, { "epoch": 0.611072, "grad_norm": 0.3692572990142681, "learning_rate": 1.3397706086957328e-05, "loss": 1.3407, "step": 2387 }, { "epoch": 0.611328, "grad_norm": 0.3709398849271903, "learning_rate": 1.3382366946414023e-05, "loss": 1.3157, "step": 2388 }, { "epoch": 0.611584, "grad_norm": 0.4380538631303332, "learning_rate": 1.336703217584322e-05, "loss": 1.3468, "step": 2389 }, { "epoch": 0.61184, "grad_norm": 0.37058405000496025, "learning_rate": 1.3351701785371275e-05, "loss": 1.3113, "step": 2390 }, { "epoch": 0.612096, "grad_norm": 0.3759052796556128, "learning_rate": 1.3336375785121667e-05, "loss": 1.3478, "step": 2391 }, { "epoch": 0.612352, "grad_norm": 0.6591295868015719, "learning_rate": 1.3321054185214941e-05, "loss": 1.4023, "step": 2392 }, { "epoch": 0.612608, "grad_norm": 0.366612370193042, "learning_rate": 1.3305736995768774e-05, "loss": 1.3113, "step": 2393 }, { "epoch": 0.612864, "grad_norm": 0.36778332470740066, "learning_rate": 1.3290424226897907e-05, "loss": 1.3419, "step": 2394 }, { "epoch": 0.61312, "grad_norm": 0.3816875684197744, "learning_rate": 1.327511588871417e-05, "loss": 1.3816, "step": 2395 }, { "epoch": 0.613376, "grad_norm": 0.37590865401895246, "learning_rate": 1.3259811991326472e-05, "loss": 1.3036, "step": 2396 }, { "epoch": 0.613632, "grad_norm": 0.3863213964953608, "learning_rate": 1.324451254484079e-05, "loss": 1.3585, "step": 2397 }, { "epoch": 0.613888, "grad_norm": 0.3997095797953172, "learning_rate": 1.3229217559360143e-05, "loss": 1.3528, "step": 2398 }, { "epoch": 0.614144, "grad_norm": 0.5150809974820241, "learning_rate": 1.3213927044984622e-05, "loss": 1.3494, "step": 2399 }, { "epoch": 0.6144, "grad_norm": 0.3812743969443645, "learning_rate": 1.319864101181136e-05, "loss": 1.364, "step": 2400 }, { "epoch": 0.614656, "grad_norm": 0.38039188150094044, "learning_rate": 1.318335946993453e-05, "loss": 1.3237, "step": 2401 }, { "epoch": 0.614912, "grad_norm": 0.3789602190113306, "learning_rate": 1.3168082429445348e-05, "loss": 1.356, "step": 2402 }, { "epoch": 0.615168, "grad_norm": 0.3814043160420131, "learning_rate": 1.3152809900432058e-05, "loss": 1.3067, "step": 2403 }, { "epoch": 0.615424, "grad_norm": 0.40827277514333093, "learning_rate": 1.3137541892979897e-05, "loss": 1.3554, "step": 2404 }, { "epoch": 0.61568, "grad_norm": 0.3757701933025881, "learning_rate": 1.3122278417171144e-05, "loss": 1.3321, "step": 2405 }, { "epoch": 0.615936, "grad_norm": 0.3750020847378091, "learning_rate": 1.3107019483085077e-05, "loss": 1.3395, "step": 2406 }, { "epoch": 0.616192, "grad_norm": 0.3978653529305366, "learning_rate": 1.3091765100797981e-05, "loss": 1.383, "step": 2407 }, { "epoch": 0.616448, "grad_norm": 0.37417484241798354, "learning_rate": 1.307651528038313e-05, "loss": 1.322, "step": 2408 }, { "epoch": 0.616704, "grad_norm": 0.387357799174085, "learning_rate": 1.3061270031910787e-05, "loss": 1.3924, "step": 2409 }, { "epoch": 0.61696, "grad_norm": 0.37612256373040276, "learning_rate": 1.3046029365448187e-05, "loss": 1.321, "step": 2410 }, { "epoch": 0.617216, "grad_norm": 0.38176111821312564, "learning_rate": 1.3030793291059545e-05, "loss": 1.3785, "step": 2411 }, { "epoch": 0.617472, "grad_norm": 0.39034776712012653, "learning_rate": 1.3015561818806055e-05, "loss": 1.3771, "step": 2412 }, { "epoch": 0.617728, "grad_norm": 0.38693605966312683, "learning_rate": 1.3000334958745856e-05, "loss": 1.3389, "step": 2413 }, { "epoch": 0.617984, "grad_norm": 0.37293516162162627, "learning_rate": 1.298511272093405e-05, "loss": 1.3434, "step": 2414 }, { "epoch": 0.61824, "grad_norm": 0.37014206659438237, "learning_rate": 1.2969895115422682e-05, "loss": 1.3783, "step": 2415 }, { "epoch": 0.618496, "grad_norm": 0.40638163500763214, "learning_rate": 1.2954682152260751e-05, "loss": 1.3487, "step": 2416 }, { "epoch": 0.618752, "grad_norm": 0.38528768815849423, "learning_rate": 1.2939473841494163e-05, "loss": 1.3774, "step": 2417 }, { "epoch": 0.619008, "grad_norm": 0.3645233473274598, "learning_rate": 1.2924270193165778e-05, "loss": 1.3145, "step": 2418 }, { "epoch": 0.619264, "grad_norm": 0.3813598552265583, "learning_rate": 1.2909071217315363e-05, "loss": 1.3623, "step": 2419 }, { "epoch": 0.61952, "grad_norm": 0.3715443213762051, "learning_rate": 1.2893876923979614e-05, "loss": 1.3497, "step": 2420 }, { "epoch": 0.619776, "grad_norm": 0.3684889192753593, "learning_rate": 1.2878687323192111e-05, "loss": 1.325, "step": 2421 }, { "epoch": 0.620032, "grad_norm": 0.3626529713744613, "learning_rate": 1.2863502424983369e-05, "loss": 1.3325, "step": 2422 }, { "epoch": 0.620288, "grad_norm": 0.3744268581448939, "learning_rate": 1.2848322239380758e-05, "loss": 1.3326, "step": 2423 }, { "epoch": 0.620544, "grad_norm": 0.3714715977246351, "learning_rate": 1.2833146776408563e-05, "loss": 1.3554, "step": 2424 }, { "epoch": 0.6208, "grad_norm": 0.3669863015274799, "learning_rate": 1.2817976046087943e-05, "loss": 1.339, "step": 2425 }, { "epoch": 0.621056, "grad_norm": 0.3845944202617204, "learning_rate": 1.2802810058436938e-05, "loss": 1.3947, "step": 2426 }, { "epoch": 0.621312, "grad_norm": 0.38195003261765537, "learning_rate": 1.2787648823470443e-05, "loss": 1.3138, "step": 2427 }, { "epoch": 0.621568, "grad_norm": 0.4801487254183432, "learning_rate": 1.2772492351200234e-05, "loss": 1.3308, "step": 2428 }, { "epoch": 0.621824, "grad_norm": 0.36456717822357065, "learning_rate": 1.2757340651634911e-05, "loss": 1.3421, "step": 2429 }, { "epoch": 0.62208, "grad_norm": 0.36694776482222324, "learning_rate": 1.274219373477995e-05, "loss": 1.3473, "step": 2430 }, { "epoch": 0.622336, "grad_norm": 0.35975667076551615, "learning_rate": 1.2727051610637666e-05, "loss": 1.3024, "step": 2431 }, { "epoch": 0.622592, "grad_norm": 0.36701367134315854, "learning_rate": 1.2711914289207195e-05, "loss": 1.3302, "step": 2432 }, { "epoch": 0.622848, "grad_norm": 0.3621491665612143, "learning_rate": 1.2696781780484513e-05, "loss": 1.343, "step": 2433 }, { "epoch": 0.623104, "grad_norm": 0.3709943712731596, "learning_rate": 1.268165409446242e-05, "loss": 1.3832, "step": 2434 }, { "epoch": 0.62336, "grad_norm": 0.3683479178811537, "learning_rate": 1.2666531241130513e-05, "loss": 1.3692, "step": 2435 }, { "epoch": 0.623616, "grad_norm": 0.374270580812587, "learning_rate": 1.265141323047522e-05, "loss": 1.3404, "step": 2436 }, { "epoch": 0.623872, "grad_norm": 0.36712350819584494, "learning_rate": 1.2636300072479758e-05, "loss": 1.3256, "step": 2437 }, { "epoch": 0.624128, "grad_norm": 0.370980076661176, "learning_rate": 1.2621191777124144e-05, "loss": 1.39, "step": 2438 }, { "epoch": 0.624384, "grad_norm": 0.37362924252463325, "learning_rate": 1.260608835438518e-05, "loss": 1.3666, "step": 2439 }, { "epoch": 0.62464, "grad_norm": 0.37500140491616984, "learning_rate": 1.2590989814236467e-05, "loss": 1.3466, "step": 2440 }, { "epoch": 0.624896, "grad_norm": 0.3694879866305653, "learning_rate": 1.2575896166648349e-05, "loss": 1.3569, "step": 2441 }, { "epoch": 0.625152, "grad_norm": 0.3588799700443844, "learning_rate": 1.2560807421587967e-05, "loss": 1.3271, "step": 2442 }, { "epoch": 0.625408, "grad_norm": 0.3699283775171232, "learning_rate": 1.254572358901922e-05, "loss": 1.3551, "step": 2443 }, { "epoch": 0.625664, "grad_norm": 0.3750788075269564, "learning_rate": 1.2530644678902752e-05, "loss": 1.3554, "step": 2444 }, { "epoch": 0.62592, "grad_norm": 0.37021240652505016, "learning_rate": 1.251557070119597e-05, "loss": 1.3747, "step": 2445 }, { "epoch": 0.626176, "grad_norm": 0.37318762849645426, "learning_rate": 1.2500501665853016e-05, "loss": 1.3243, "step": 2446 }, { "epoch": 0.626432, "grad_norm": 0.3774553720827331, "learning_rate": 1.2485437582824764e-05, "loss": 1.3807, "step": 2447 }, { "epoch": 0.626688, "grad_norm": 0.36914543877060885, "learning_rate": 1.2470378462058826e-05, "loss": 1.3534, "step": 2448 }, { "epoch": 0.626944, "grad_norm": 0.3659265243042741, "learning_rate": 1.245532431349954e-05, "loss": 1.3517, "step": 2449 }, { "epoch": 0.6272, "grad_norm": 0.37556825176104697, "learning_rate": 1.2440275147087947e-05, "loss": 1.3744, "step": 2450 }, { "epoch": 0.627456, "grad_norm": 0.3780268240331811, "learning_rate": 1.2425230972761808e-05, "loss": 1.3666, "step": 2451 }, { "epoch": 0.627712, "grad_norm": 0.35378470476343993, "learning_rate": 1.24101918004556e-05, "loss": 1.3044, "step": 2452 }, { "epoch": 0.627968, "grad_norm": 0.35438232422028915, "learning_rate": 1.2395157640100459e-05, "loss": 1.2613, "step": 2453 }, { "epoch": 0.628224, "grad_norm": 0.48253293345715553, "learning_rate": 1.2380128501624248e-05, "loss": 1.3316, "step": 2454 }, { "epoch": 0.62848, "grad_norm": 0.35982381292033405, "learning_rate": 1.2365104394951498e-05, "loss": 1.3177, "step": 2455 }, { "epoch": 0.628736, "grad_norm": 0.36793445905793276, "learning_rate": 1.2350085330003419e-05, "loss": 1.3284, "step": 2456 }, { "epoch": 0.628992, "grad_norm": 0.37712446631785285, "learning_rate": 1.2335071316697895e-05, "loss": 1.3464, "step": 2457 }, { "epoch": 0.629248, "grad_norm": 0.36849438037885773, "learning_rate": 1.2320062364949478e-05, "loss": 1.3737, "step": 2458 }, { "epoch": 0.629504, "grad_norm": 0.3691787225149286, "learning_rate": 1.2305058484669356e-05, "loss": 1.3795, "step": 2459 }, { "epoch": 0.62976, "grad_norm": 0.37579884824642285, "learning_rate": 1.2290059685765395e-05, "loss": 1.3317, "step": 2460 }, { "epoch": 0.630016, "grad_norm": 0.3676521605019993, "learning_rate": 1.2275065978142089e-05, "loss": 1.3408, "step": 2461 }, { "epoch": 0.630272, "grad_norm": 0.373238026670823, "learning_rate": 1.2260077371700572e-05, "loss": 1.3484, "step": 2462 }, { "epoch": 0.630528, "grad_norm": 0.3779606872881511, "learning_rate": 1.2245093876338618e-05, "loss": 1.3448, "step": 2463 }, { "epoch": 0.630784, "grad_norm": 0.36882533051104166, "learning_rate": 1.2230115501950626e-05, "loss": 1.3559, "step": 2464 }, { "epoch": 0.63104, "grad_norm": 0.38219081221027207, "learning_rate": 1.221514225842759e-05, "loss": 1.3727, "step": 2465 }, { "epoch": 0.631296, "grad_norm": 0.36692717807232483, "learning_rate": 1.2200174155657147e-05, "loss": 1.2875, "step": 2466 }, { "epoch": 0.631552, "grad_norm": 0.36256590798081884, "learning_rate": 1.2185211203523521e-05, "loss": 1.3075, "step": 2467 }, { "epoch": 0.631808, "grad_norm": 0.37238053244652786, "learning_rate": 1.2170253411907536e-05, "loss": 1.3216, "step": 2468 }, { "epoch": 0.632064, "grad_norm": 0.3803700367181227, "learning_rate": 1.2155300790686617e-05, "loss": 1.3568, "step": 2469 }, { "epoch": 0.63232, "grad_norm": 0.3699879598789386, "learning_rate": 1.2140353349734776e-05, "loss": 1.3665, "step": 2470 }, { "epoch": 0.632576, "grad_norm": 0.3665872620915169, "learning_rate": 1.212541109892258e-05, "loss": 1.3672, "step": 2471 }, { "epoch": 0.632832, "grad_norm": 0.6248408615273903, "learning_rate": 1.2110474048117198e-05, "loss": 1.4082, "step": 2472 }, { "epoch": 0.633088, "grad_norm": 0.35987766608041954, "learning_rate": 1.2095542207182348e-05, "loss": 1.3281, "step": 2473 }, { "epoch": 0.633344, "grad_norm": 0.4225789733224598, "learning_rate": 1.2080615585978315e-05, "loss": 1.4126, "step": 2474 }, { "epoch": 0.6336, "grad_norm": 0.37744910494769535, "learning_rate": 1.2065694194361934e-05, "loss": 1.3688, "step": 2475 }, { "epoch": 0.633856, "grad_norm": 0.36496449774197853, "learning_rate": 1.2050778042186594e-05, "loss": 1.3437, "step": 2476 }, { "epoch": 0.634112, "grad_norm": 0.36376727878783244, "learning_rate": 1.2035867139302218e-05, "loss": 1.3301, "step": 2477 }, { "epoch": 0.634368, "grad_norm": 0.3705950905186145, "learning_rate": 1.2020961495555247e-05, "loss": 1.3528, "step": 2478 }, { "epoch": 0.634624, "grad_norm": 0.36750560593131604, "learning_rate": 1.2006061120788677e-05, "loss": 1.3343, "step": 2479 }, { "epoch": 0.63488, "grad_norm": 0.3701806907697241, "learning_rate": 1.199116602484201e-05, "loss": 1.3635, "step": 2480 }, { "epoch": 0.635136, "grad_norm": 0.4250064284134545, "learning_rate": 1.1976276217551268e-05, "loss": 1.3371, "step": 2481 }, { "epoch": 0.635392, "grad_norm": 0.37521588071779033, "learning_rate": 1.1961391708748966e-05, "loss": 1.353, "step": 2482 }, { "epoch": 0.635648, "grad_norm": 0.366321107588632, "learning_rate": 1.1946512508264152e-05, "loss": 1.3123, "step": 2483 }, { "epoch": 0.635904, "grad_norm": 0.36953132595669375, "learning_rate": 1.1931638625922322e-05, "loss": 1.3345, "step": 2484 }, { "epoch": 0.63616, "grad_norm": 0.38256769973430976, "learning_rate": 1.1916770071545499e-05, "loss": 1.3771, "step": 2485 }, { "epoch": 0.636416, "grad_norm": 0.3795423361075958, "learning_rate": 1.1901906854952172e-05, "loss": 1.3453, "step": 2486 }, { "epoch": 0.636672, "grad_norm": 0.3718747294220241, "learning_rate": 1.1887048985957312e-05, "loss": 1.3418, "step": 2487 }, { "epoch": 0.636928, "grad_norm": 0.3687617976520658, "learning_rate": 1.187219647437235e-05, "loss": 1.3237, "step": 2488 }, { "epoch": 0.637184, "grad_norm": 0.37306731208951827, "learning_rate": 1.1857349330005193e-05, "loss": 1.3491, "step": 2489 }, { "epoch": 0.63744, "grad_norm": 0.36446125738551793, "learning_rate": 1.1842507562660176e-05, "loss": 1.3229, "step": 2490 }, { "epoch": 0.637696, "grad_norm": 0.38058161988826456, "learning_rate": 1.1827671182138114e-05, "loss": 1.3966, "step": 2491 }, { "epoch": 0.637952, "grad_norm": 0.3615258209781036, "learning_rate": 1.1812840198236248e-05, "loss": 1.3174, "step": 2492 }, { "epoch": 0.638208, "grad_norm": 0.3660253994651249, "learning_rate": 1.1798014620748266e-05, "loss": 1.3457, "step": 2493 }, { "epoch": 0.638464, "grad_norm": 0.38180918419106485, "learning_rate": 1.1783194459464272e-05, "loss": 1.3484, "step": 2494 }, { "epoch": 0.63872, "grad_norm": 0.36764697921499084, "learning_rate": 1.1768379724170811e-05, "loss": 1.333, "step": 2495 }, { "epoch": 0.638976, "grad_norm": 0.37414000131425107, "learning_rate": 1.1753570424650822e-05, "loss": 1.382, "step": 2496 }, { "epoch": 0.639232, "grad_norm": 0.3757510701645589, "learning_rate": 1.1738766570683673e-05, "loss": 1.3089, "step": 2497 }, { "epoch": 0.639488, "grad_norm": 0.36368832775713555, "learning_rate": 1.172396817204513e-05, "loss": 1.3409, "step": 2498 }, { "epoch": 0.639744, "grad_norm": 0.38513835531869484, "learning_rate": 1.1709175238507364e-05, "loss": 1.3399, "step": 2499 }, { "epoch": 0.64, "grad_norm": 0.3632332495589021, "learning_rate": 1.169438777983892e-05, "loss": 1.2926, "step": 2500 }, { "epoch": 0.640256, "grad_norm": 0.37079684532784696, "learning_rate": 1.1679605805804748e-05, "loss": 1.3431, "step": 2501 }, { "epoch": 0.640512, "grad_norm": 0.38943015771695316, "learning_rate": 1.1664829326166154e-05, "loss": 1.3706, "step": 2502 }, { "epoch": 0.640768, "grad_norm": 0.39794729038396065, "learning_rate": 1.1650058350680831e-05, "loss": 1.3487, "step": 2503 }, { "epoch": 0.641024, "grad_norm": 0.37976486635730594, "learning_rate": 1.1635292889102834e-05, "loss": 1.3599, "step": 2504 }, { "epoch": 0.64128, "grad_norm": 0.3697016444976321, "learning_rate": 1.1620532951182584e-05, "loss": 1.3526, "step": 2505 }, { "epoch": 0.641536, "grad_norm": 0.3610187394753097, "learning_rate": 1.1605778546666846e-05, "loss": 1.287, "step": 2506 }, { "epoch": 0.641792, "grad_norm": 0.3663719272650304, "learning_rate": 1.1591029685298725e-05, "loss": 1.351, "step": 2507 }, { "epoch": 0.642048, "grad_norm": 0.3721606211988694, "learning_rate": 1.1576286376817682e-05, "loss": 1.3599, "step": 2508 }, { "epoch": 0.642304, "grad_norm": 0.36786691504653046, "learning_rate": 1.156154863095949e-05, "loss": 1.3467, "step": 2509 }, { "epoch": 0.64256, "grad_norm": 0.3698288625287084, "learning_rate": 1.1546816457456265e-05, "loss": 1.3285, "step": 2510 }, { "epoch": 0.642816, "grad_norm": 0.36945551749245975, "learning_rate": 1.1532089866036442e-05, "loss": 1.3281, "step": 2511 }, { "epoch": 0.643072, "grad_norm": 0.3741585770325421, "learning_rate": 1.1517368866424767e-05, "loss": 1.3665, "step": 2512 }, { "epoch": 0.643328, "grad_norm": 0.36202612951993735, "learning_rate": 1.15026534683423e-05, "loss": 1.3007, "step": 2513 }, { "epoch": 0.643584, "grad_norm": 0.37860722875961206, "learning_rate": 1.1487943681506382e-05, "loss": 1.3333, "step": 2514 }, { "epoch": 0.64384, "grad_norm": 0.36328215072216624, "learning_rate": 1.1473239515630663e-05, "loss": 1.2957, "step": 2515 }, { "epoch": 0.644096, "grad_norm": 0.36936753269892786, "learning_rate": 1.1458540980425088e-05, "loss": 1.3152, "step": 2516 }, { "epoch": 0.644352, "grad_norm": 0.3618079974800423, "learning_rate": 1.144384808559587e-05, "loss": 1.3456, "step": 2517 }, { "epoch": 0.644608, "grad_norm": 0.36632031994452485, "learning_rate": 1.1429160840845515e-05, "loss": 1.354, "step": 2518 }, { "epoch": 0.644864, "grad_norm": 0.3632247198297989, "learning_rate": 1.141447925587277e-05, "loss": 1.3367, "step": 2519 }, { "epoch": 0.64512, "grad_norm": 0.36619419224537886, "learning_rate": 1.1399803340372672e-05, "loss": 1.334, "step": 2520 }, { "epoch": 0.645376, "grad_norm": 0.3739425129979195, "learning_rate": 1.1385133104036497e-05, "loss": 1.3497, "step": 2521 }, { "epoch": 0.645632, "grad_norm": 0.37499884738984035, "learning_rate": 1.1370468556551773e-05, "loss": 1.3819, "step": 2522 }, { "epoch": 0.645888, "grad_norm": 0.4727524029447252, "learning_rate": 1.1355809707602278e-05, "loss": 1.3388, "step": 2523 }, { "epoch": 0.646144, "grad_norm": 0.3662282274608861, "learning_rate": 1.1341156566868032e-05, "loss": 1.3192, "step": 2524 }, { "epoch": 0.6464, "grad_norm": 0.3686639498506288, "learning_rate": 1.1326509144025275e-05, "loss": 1.3473, "step": 2525 }, { "epoch": 0.646656, "grad_norm": 0.36795463671361095, "learning_rate": 1.1311867448746464e-05, "loss": 1.3234, "step": 2526 }, { "epoch": 0.646912, "grad_norm": 0.37243344052142013, "learning_rate": 1.1297231490700287e-05, "loss": 1.3501, "step": 2527 }, { "epoch": 0.647168, "grad_norm": 0.37367443842724596, "learning_rate": 1.1282601279551644e-05, "loss": 1.3368, "step": 2528 }, { "epoch": 0.647424, "grad_norm": 0.36441216204962235, "learning_rate": 1.1267976824961644e-05, "loss": 1.3246, "step": 2529 }, { "epoch": 0.64768, "grad_norm": 0.37130674618671683, "learning_rate": 1.1253358136587566e-05, "loss": 1.3336, "step": 2530 }, { "epoch": 0.647936, "grad_norm": 0.3625541384602983, "learning_rate": 1.1238745224082921e-05, "loss": 1.3144, "step": 2531 }, { "epoch": 0.648192, "grad_norm": 0.3631736161580889, "learning_rate": 1.1224138097097371e-05, "loss": 1.3599, "step": 2532 }, { "epoch": 0.648448, "grad_norm": 0.3700356782965741, "learning_rate": 1.120953676527678e-05, "loss": 1.3565, "step": 2533 }, { "epoch": 0.648704, "grad_norm": 0.37945403154520446, "learning_rate": 1.1194941238263177e-05, "loss": 1.2984, "step": 2534 }, { "epoch": 0.64896, "grad_norm": 0.36933115391430277, "learning_rate": 1.1180351525694761e-05, "loss": 1.3585, "step": 2535 }, { "epoch": 0.649216, "grad_norm": 0.36729710913029534, "learning_rate": 1.1165767637205894e-05, "loss": 1.3458, "step": 2536 }, { "epoch": 0.649472, "grad_norm": 0.3720520281340042, "learning_rate": 1.1151189582427077e-05, "loss": 1.3554, "step": 2537 }, { "epoch": 0.649728, "grad_norm": 0.38327392000773447, "learning_rate": 1.1136617370984984e-05, "loss": 1.3424, "step": 2538 }, { "epoch": 0.649984, "grad_norm": 0.3650362121886063, "learning_rate": 1.1122051012502402e-05, "loss": 1.3282, "step": 2539 }, { "epoch": 0.65024, "grad_norm": 0.371719709182339, "learning_rate": 1.1107490516598267e-05, "loss": 1.3516, "step": 2540 }, { "epoch": 0.650496, "grad_norm": 0.3586628438388398, "learning_rate": 1.1092935892887654e-05, "loss": 1.3298, "step": 2541 }, { "epoch": 0.650752, "grad_norm": 0.3719561341281398, "learning_rate": 1.1078387150981742e-05, "loss": 1.3313, "step": 2542 }, { "epoch": 0.651008, "grad_norm": 0.3727239166934467, "learning_rate": 1.106384430048783e-05, "loss": 1.3521, "step": 2543 }, { "epoch": 0.651264, "grad_norm": 0.3785858866751082, "learning_rate": 1.1049307351009342e-05, "loss": 1.3529, "step": 2544 }, { "epoch": 0.65152, "grad_norm": 0.3699255170251369, "learning_rate": 1.1034776312145779e-05, "loss": 1.3264, "step": 2545 }, { "epoch": 0.651776, "grad_norm": 0.370841786556642, "learning_rate": 1.1020251193492757e-05, "loss": 1.3455, "step": 2546 }, { "epoch": 0.652032, "grad_norm": 0.38493894578772453, "learning_rate": 1.1005732004641984e-05, "loss": 1.3635, "step": 2547 }, { "epoch": 0.652288, "grad_norm": 0.3688671495603611, "learning_rate": 1.0991218755181243e-05, "loss": 1.3832, "step": 2548 }, { "epoch": 0.652544, "grad_norm": 0.37302506509268296, "learning_rate": 1.0976711454694402e-05, "loss": 1.3361, "step": 2549 }, { "epoch": 0.6528, "grad_norm": 0.37125101625358176, "learning_rate": 1.0962210112761402e-05, "loss": 1.3177, "step": 2550 }, { "epoch": 0.653056, "grad_norm": 0.3688831866811569, "learning_rate": 1.0947714738958233e-05, "loss": 1.3671, "step": 2551 }, { "epoch": 0.653312, "grad_norm": 0.3680758547705766, "learning_rate": 1.0933225342856966e-05, "loss": 1.3123, "step": 2552 }, { "epoch": 0.653568, "grad_norm": 0.3580660580460009, "learning_rate": 1.0918741934025702e-05, "loss": 1.3228, "step": 2553 }, { "epoch": 0.653824, "grad_norm": 0.36853007839695645, "learning_rate": 1.090426452202861e-05, "loss": 1.329, "step": 2554 }, { "epoch": 0.65408, "grad_norm": 0.3774580085406426, "learning_rate": 1.0889793116425884e-05, "loss": 1.3878, "step": 2555 }, { "epoch": 0.654336, "grad_norm": 0.3633518154575558, "learning_rate": 1.0875327726773772e-05, "loss": 1.3181, "step": 2556 }, { "epoch": 0.654592, "grad_norm": 0.37751466255702537, "learning_rate": 1.0860868362624516e-05, "loss": 1.3469, "step": 2557 }, { "epoch": 0.654848, "grad_norm": 0.3788976851452432, "learning_rate": 1.08464150335264e-05, "loss": 1.3505, "step": 2558 }, { "epoch": 0.655104, "grad_norm": 0.38121911947790077, "learning_rate": 1.083196774902373e-05, "loss": 1.3783, "step": 2559 }, { "epoch": 0.65536, "grad_norm": 0.3670954629671705, "learning_rate": 1.0817526518656802e-05, "loss": 1.3488, "step": 2560 }, { "epoch": 0.655616, "grad_norm": 0.37010883596481103, "learning_rate": 1.0803091351961927e-05, "loss": 1.3305, "step": 2561 }, { "epoch": 0.655872, "grad_norm": 0.3713332390391647, "learning_rate": 1.0788662258471418e-05, "loss": 1.3636, "step": 2562 }, { "epoch": 0.656128, "grad_norm": 0.37981586637855286, "learning_rate": 1.0774239247713546e-05, "loss": 1.3667, "step": 2563 }, { "epoch": 0.656384, "grad_norm": 0.3711320649142627, "learning_rate": 1.0759822329212608e-05, "loss": 1.3573, "step": 2564 }, { "epoch": 0.65664, "grad_norm": 0.4434392626812667, "learning_rate": 1.0745411512488835e-05, "loss": 1.3154, "step": 2565 }, { "epoch": 0.656896, "grad_norm": 0.36822617722850787, "learning_rate": 1.0731006807058461e-05, "loss": 1.3204, "step": 2566 }, { "epoch": 0.657152, "grad_norm": 0.3735435377092979, "learning_rate": 1.0716608222433673e-05, "loss": 1.3833, "step": 2567 }, { "epoch": 0.657408, "grad_norm": 0.3687736873412092, "learning_rate": 1.0702215768122626e-05, "loss": 1.3484, "step": 2568 }, { "epoch": 0.657664, "grad_norm": 0.3634430428751392, "learning_rate": 1.0687829453629402e-05, "loss": 1.3347, "step": 2569 }, { "epoch": 0.65792, "grad_norm": 0.36575876073850627, "learning_rate": 1.0673449288454047e-05, "loss": 1.3377, "step": 2570 }, { "epoch": 0.658176, "grad_norm": 0.36138016967844705, "learning_rate": 1.0659075282092549e-05, "loss": 1.3255, "step": 2571 }, { "epoch": 0.658432, "grad_norm": 0.5883643275874262, "learning_rate": 1.064470744403682e-05, "loss": 1.3528, "step": 2572 }, { "epoch": 0.658688, "grad_norm": 0.3781053920212659, "learning_rate": 1.0630345783774703e-05, "loss": 1.3638, "step": 2573 }, { "epoch": 0.658944, "grad_norm": 0.35916298018797127, "learning_rate": 1.061599031078997e-05, "loss": 1.3198, "step": 2574 }, { "epoch": 0.6592, "grad_norm": 0.3887816985229547, "learning_rate": 1.0601641034562284e-05, "loss": 1.3598, "step": 2575 }, { "epoch": 0.659456, "grad_norm": 0.3748928241617236, "learning_rate": 1.0587297964567228e-05, "loss": 1.3647, "step": 2576 }, { "epoch": 0.659712, "grad_norm": 0.3738669618812053, "learning_rate": 1.0572961110276299e-05, "loss": 1.3661, "step": 2577 }, { "epoch": 0.659968, "grad_norm": 0.46500249195164156, "learning_rate": 1.055863048115687e-05, "loss": 1.3853, "step": 2578 }, { "epoch": 0.660224, "grad_norm": 0.3776039888451268, "learning_rate": 1.0544306086672216e-05, "loss": 1.3343, "step": 2579 }, { "epoch": 0.66048, "grad_norm": 0.3807101545844821, "learning_rate": 1.05299879362815e-05, "loss": 1.3605, "step": 2580 }, { "epoch": 0.660736, "grad_norm": 0.3694774074729018, "learning_rate": 1.0515676039439737e-05, "loss": 1.3275, "step": 2581 }, { "epoch": 0.660992, "grad_norm": 0.537277442177915, "learning_rate": 1.0501370405597834e-05, "loss": 1.287, "step": 2582 }, { "epoch": 0.661248, "grad_norm": 0.36570689840417997, "learning_rate": 1.0487071044202559e-05, "loss": 1.3191, "step": 2583 }, { "epoch": 0.661504, "grad_norm": 0.3600435539908686, "learning_rate": 1.0472777964696533e-05, "loss": 1.3324, "step": 2584 }, { "epoch": 0.66176, "grad_norm": 0.37070217471806244, "learning_rate": 1.0458491176518238e-05, "loss": 1.3046, "step": 2585 }, { "epoch": 0.662016, "grad_norm": 0.3700480681943844, "learning_rate": 1.0444210689101982e-05, "loss": 1.3538, "step": 2586 }, { "epoch": 0.662272, "grad_norm": 0.3761803678188354, "learning_rate": 1.0429936511877936e-05, "loss": 1.364, "step": 2587 }, { "epoch": 0.662528, "grad_norm": 0.370711766744739, "learning_rate": 1.0415668654272084e-05, "loss": 1.369, "step": 2588 }, { "epoch": 0.662784, "grad_norm": 0.37494912916105905, "learning_rate": 1.0401407125706248e-05, "loss": 1.3679, "step": 2589 }, { "epoch": 0.66304, "grad_norm": 0.3807586583252639, "learning_rate": 1.038715193559807e-05, "loss": 1.3647, "step": 2590 }, { "epoch": 0.663296, "grad_norm": 0.36888881639562676, "learning_rate": 1.0372903093361005e-05, "loss": 1.3758, "step": 2591 }, { "epoch": 0.663552, "grad_norm": 0.3802422282877783, "learning_rate": 1.0358660608404323e-05, "loss": 1.3602, "step": 2592 }, { "epoch": 0.663808, "grad_norm": 0.3667329159317049, "learning_rate": 1.0344424490133073e-05, "loss": 1.3277, "step": 2593 }, { "epoch": 0.664064, "grad_norm": 0.3704556516556962, "learning_rate": 1.0330194747948124e-05, "loss": 1.3443, "step": 2594 }, { "epoch": 0.66432, "grad_norm": 0.35781349805789997, "learning_rate": 1.0315971391246124e-05, "loss": 1.2863, "step": 2595 }, { "epoch": 0.664576, "grad_norm": 0.3733009183878282, "learning_rate": 1.030175442941951e-05, "loss": 1.366, "step": 2596 }, { "epoch": 0.664832, "grad_norm": 0.3714855036540785, "learning_rate": 1.02875438718565e-05, "loss": 1.3282, "step": 2597 }, { "epoch": 0.665088, "grad_norm": 0.37577403707192186, "learning_rate": 1.0273339727941059e-05, "loss": 1.3676, "step": 2598 }, { "epoch": 0.665344, "grad_norm": 0.5424670131785854, "learning_rate": 1.0259142007052945e-05, "loss": 1.3357, "step": 2599 }, { "epoch": 0.6656, "grad_norm": 0.36799579590693093, "learning_rate": 1.024495071856765e-05, "loss": 1.3207, "step": 2600 }, { "epoch": 0.665856, "grad_norm": 0.3834979434969095, "learning_rate": 1.023076587185644e-05, "loss": 1.3836, "step": 2601 }, { "epoch": 0.666112, "grad_norm": 0.3883966855141535, "learning_rate": 1.0216587476286319e-05, "loss": 1.3939, "step": 2602 }, { "epoch": 0.666368, "grad_norm": 0.3703229154264701, "learning_rate": 1.0202415541220025e-05, "loss": 1.3663, "step": 2603 }, { "epoch": 0.666624, "grad_norm": 0.3694268353212482, "learning_rate": 1.0188250076016042e-05, "loss": 1.3276, "step": 2604 }, { "epoch": 0.66688, "grad_norm": 0.379663923230283, "learning_rate": 1.0174091090028577e-05, "loss": 1.3145, "step": 2605 }, { "epoch": 0.667136, "grad_norm": 0.3689204488906091, "learning_rate": 1.0159938592607543e-05, "loss": 1.3508, "step": 2606 }, { "epoch": 0.667392, "grad_norm": 0.3647690934389834, "learning_rate": 1.0145792593098589e-05, "loss": 1.3776, "step": 2607 }, { "epoch": 0.667648, "grad_norm": 0.37532380234612484, "learning_rate": 1.013165310084307e-05, "loss": 1.3116, "step": 2608 }, { "epoch": 0.667904, "grad_norm": 0.3703790134183054, "learning_rate": 1.0117520125178028e-05, "loss": 1.2854, "step": 2609 }, { "epoch": 0.66816, "grad_norm": 0.36367138986957337, "learning_rate": 1.010339367543622e-05, "loss": 1.3395, "step": 2610 }, { "epoch": 0.668416, "grad_norm": 0.3682736714356572, "learning_rate": 1.008927376094609e-05, "loss": 1.3446, "step": 2611 }, { "epoch": 0.668672, "grad_norm": 0.3822665533177629, "learning_rate": 1.0075160391031752e-05, "loss": 1.3465, "step": 2612 }, { "epoch": 0.668928, "grad_norm": 0.37983676001982464, "learning_rate": 1.0061053575013018e-05, "loss": 1.3687, "step": 2613 }, { "epoch": 0.669184, "grad_norm": 0.36198830719917674, "learning_rate": 1.004695332220536e-05, "loss": 1.321, "step": 2614 }, { "epoch": 0.66944, "grad_norm": 0.977659132777435, "learning_rate": 1.0032859641919919e-05, "loss": 1.3528, "step": 2615 }, { "epoch": 0.669696, "grad_norm": 0.365790615774564, "learning_rate": 1.00187725434635e-05, "loss": 1.3051, "step": 2616 }, { "epoch": 0.669952, "grad_norm": 0.3786600766705962, "learning_rate": 1.0004692036138563e-05, "loss": 1.3577, "step": 2617 }, { "epoch": 0.670208, "grad_norm": 0.39380255388215823, "learning_rate": 9.990618129243196e-06, "loss": 1.3066, "step": 2618 }, { "epoch": 0.670464, "grad_norm": 0.3859151004351654, "learning_rate": 9.976550832071147e-06, "loss": 1.3609, "step": 2619 }, { "epoch": 0.67072, "grad_norm": 0.3840296411750152, "learning_rate": 9.962490153911808e-06, "loss": 1.3455, "step": 2620 }, { "epoch": 0.670976, "grad_norm": 0.3756080929744528, "learning_rate": 9.94843610405017e-06, "loss": 1.293, "step": 2621 }, { "epoch": 0.671232, "grad_norm": 0.37502739976127286, "learning_rate": 9.934388691766871e-06, "loss": 1.3299, "step": 2622 }, { "epoch": 0.671488, "grad_norm": 0.3857547508060106, "learning_rate": 9.920347926338168e-06, "loss": 1.3686, "step": 2623 }, { "epoch": 0.671744, "grad_norm": 0.3798305117183366, "learning_rate": 9.906313817035901e-06, "loss": 1.3142, "step": 2624 }, { "epoch": 0.672, "grad_norm": 0.3800915820206502, "learning_rate": 9.892286373127544e-06, "loss": 1.3679, "step": 2625 }, { "epoch": 0.672256, "grad_norm": 0.3861312320713112, "learning_rate": 9.87826560387616e-06, "loss": 1.3243, "step": 2626 }, { "epoch": 0.672512, "grad_norm": 0.36582924559078434, "learning_rate": 9.8642515185404e-06, "loss": 1.3056, "step": 2627 }, { "epoch": 0.672768, "grad_norm": 0.40242192935836535, "learning_rate": 9.850244126374507e-06, "loss": 1.3158, "step": 2628 }, { "epoch": 0.673024, "grad_norm": 0.37892670999798633, "learning_rate": 9.836243436628308e-06, "loss": 1.378, "step": 2629 }, { "epoch": 0.67328, "grad_norm": 0.36650655072840554, "learning_rate": 9.822249458547181e-06, "loss": 1.3186, "step": 2630 }, { "epoch": 0.673536, "grad_norm": 0.38076781390993314, "learning_rate": 9.808262201372105e-06, "loss": 1.3783, "step": 2631 }, { "epoch": 0.673792, "grad_norm": 0.366740434741912, "learning_rate": 9.794281674339592e-06, "loss": 1.3345, "step": 2632 }, { "epoch": 0.674048, "grad_norm": 0.37138330520365054, "learning_rate": 9.780307886681725e-06, "loss": 1.3704, "step": 2633 }, { "epoch": 0.674304, "grad_norm": 0.37385656318772026, "learning_rate": 9.766340847626137e-06, "loss": 1.371, "step": 2634 }, { "epoch": 0.67456, "grad_norm": 0.3598827409244142, "learning_rate": 9.752380566396009e-06, "loss": 1.2962, "step": 2635 }, { "epoch": 0.674816, "grad_norm": 0.35804557056735475, "learning_rate": 9.738427052210034e-06, "loss": 1.284, "step": 2636 }, { "epoch": 0.675072, "grad_norm": 0.36629201743992723, "learning_rate": 9.724480314282463e-06, "loss": 1.3249, "step": 2637 }, { "epoch": 0.675328, "grad_norm": 0.36613584428443136, "learning_rate": 9.710540361823063e-06, "loss": 1.3601, "step": 2638 }, { "epoch": 0.675584, "grad_norm": 0.3708551405826036, "learning_rate": 9.696607204037124e-06, "loss": 1.3774, "step": 2639 }, { "epoch": 0.67584, "grad_norm": 0.36797737809950704, "learning_rate": 9.68268085012544e-06, "loss": 1.3419, "step": 2640 }, { "epoch": 0.676096, "grad_norm": 0.37093191446741586, "learning_rate": 9.668761309284331e-06, "loss": 1.3574, "step": 2641 }, { "epoch": 0.676352, "grad_norm": 0.3597602873692287, "learning_rate": 9.654848590705586e-06, "loss": 1.324, "step": 2642 }, { "epoch": 0.676608, "grad_norm": 0.35802403622133666, "learning_rate": 9.640942703576523e-06, "loss": 1.3318, "step": 2643 }, { "epoch": 0.676864, "grad_norm": 0.3677302768290871, "learning_rate": 9.627043657079919e-06, "loss": 1.3332, "step": 2644 }, { "epoch": 0.67712, "grad_norm": 0.37406146287412306, "learning_rate": 9.613151460394056e-06, "loss": 1.3698, "step": 2645 }, { "epoch": 0.677376, "grad_norm": 0.38207001485102654, "learning_rate": 9.599266122692685e-06, "loss": 1.3434, "step": 2646 }, { "epoch": 0.677632, "grad_norm": 0.3577688961063157, "learning_rate": 9.585387653145035e-06, "loss": 1.3355, "step": 2647 }, { "epoch": 0.677888, "grad_norm": 0.36934086676171113, "learning_rate": 9.571516060915777e-06, "loss": 1.36, "step": 2648 }, { "epoch": 0.678144, "grad_norm": 0.36254847639041815, "learning_rate": 9.557651355165063e-06, "loss": 1.3247, "step": 2649 }, { "epoch": 0.6784, "grad_norm": 0.3618082848720538, "learning_rate": 9.543793545048492e-06, "loss": 1.3149, "step": 2650 }, { "epoch": 0.678656, "grad_norm": 0.36334611485521706, "learning_rate": 9.529942639717105e-06, "loss": 1.3294, "step": 2651 }, { "epoch": 0.678912, "grad_norm": 0.37409551708703354, "learning_rate": 9.516098648317386e-06, "loss": 1.3414, "step": 2652 }, { "epoch": 0.679168, "grad_norm": 0.3664262978619659, "learning_rate": 9.502261579991266e-06, "loss": 1.3293, "step": 2653 }, { "epoch": 0.679424, "grad_norm": 0.3631581741889457, "learning_rate": 9.488431443876076e-06, "loss": 1.3151, "step": 2654 }, { "epoch": 0.67968, "grad_norm": 0.3706366330549185, "learning_rate": 9.474608249104584e-06, "loss": 1.3381, "step": 2655 }, { "epoch": 0.679936, "grad_norm": 0.36464991204874425, "learning_rate": 9.460792004804981e-06, "loss": 1.3672, "step": 2656 }, { "epoch": 0.680192, "grad_norm": 0.36127269295840575, "learning_rate": 9.446982720100864e-06, "loss": 1.3086, "step": 2657 }, { "epoch": 0.680448, "grad_norm": 0.3595141547582816, "learning_rate": 9.433180404111228e-06, "loss": 1.3204, "step": 2658 }, { "epoch": 0.680704, "grad_norm": 0.36304361675869173, "learning_rate": 9.419385065950477e-06, "loss": 1.3462, "step": 2659 }, { "epoch": 0.68096, "grad_norm": 0.3643391855609854, "learning_rate": 9.405596714728404e-06, "loss": 1.3254, "step": 2660 }, { "epoch": 0.681216, "grad_norm": 0.3670255704769003, "learning_rate": 9.391815359550173e-06, "loss": 1.3325, "step": 2661 }, { "epoch": 0.681472, "grad_norm": 0.357269760668828, "learning_rate": 9.37804100951635e-06, "loss": 1.3154, "step": 2662 }, { "epoch": 0.681728, "grad_norm": 0.364239896412167, "learning_rate": 9.36427367372286e-06, "loss": 1.341, "step": 2663 }, { "epoch": 0.681984, "grad_norm": 0.3693497742785979, "learning_rate": 9.35051336126101e-06, "loss": 1.3318, "step": 2664 }, { "epoch": 0.68224, "grad_norm": 0.3710250050560966, "learning_rate": 9.336760081217452e-06, "loss": 1.3181, "step": 2665 }, { "epoch": 0.682496, "grad_norm": 0.3653262761697295, "learning_rate": 9.323013842674212e-06, "loss": 1.3712, "step": 2666 }, { "epoch": 0.682752, "grad_norm": 0.38611736819797404, "learning_rate": 9.309274654708643e-06, "loss": 1.3694, "step": 2667 }, { "epoch": 0.683008, "grad_norm": 0.3744647647875601, "learning_rate": 9.295542526393464e-06, "loss": 1.3163, "step": 2668 }, { "epoch": 0.683264, "grad_norm": 0.3669198424090417, "learning_rate": 9.281817466796728e-06, "loss": 1.3465, "step": 2669 }, { "epoch": 0.68352, "grad_norm": 0.36070003418527274, "learning_rate": 9.26809948498181e-06, "loss": 1.3255, "step": 2670 }, { "epoch": 0.683776, "grad_norm": 0.3634309404692573, "learning_rate": 9.254388590007424e-06, "loss": 1.3433, "step": 2671 }, { "epoch": 0.684032, "grad_norm": 0.36699180197385417, "learning_rate": 9.240684790927598e-06, "loss": 1.3324, "step": 2672 }, { "epoch": 0.684288, "grad_norm": 0.36280058134141696, "learning_rate": 9.22698809679167e-06, "loss": 1.3217, "step": 2673 }, { "epoch": 0.684544, "grad_norm": 0.36000615128526053, "learning_rate": 9.213298516644287e-06, "loss": 1.2957, "step": 2674 }, { "epoch": 0.6848, "grad_norm": 0.3712449515860552, "learning_rate": 9.199616059525404e-06, "loss": 1.3526, "step": 2675 }, { "epoch": 0.685056, "grad_norm": 0.3732568886667032, "learning_rate": 9.185940734470282e-06, "loss": 1.3235, "step": 2676 }, { "epoch": 0.685312, "grad_norm": 0.35894064878767934, "learning_rate": 9.172272550509442e-06, "loss": 1.3477, "step": 2677 }, { "epoch": 0.685568, "grad_norm": 0.36645436831610567, "learning_rate": 9.158611516668723e-06, "loss": 1.3157, "step": 2678 }, { "epoch": 0.685824, "grad_norm": 0.3712963788261121, "learning_rate": 9.144957641969212e-06, "loss": 1.3927, "step": 2679 }, { "epoch": 0.68608, "grad_norm": 0.3612558927210817, "learning_rate": 9.13131093542729e-06, "loss": 1.3106, "step": 2680 }, { "epoch": 0.686336, "grad_norm": 0.36323600308068715, "learning_rate": 9.117671406054598e-06, "loss": 1.3176, "step": 2681 }, { "epoch": 0.686592, "grad_norm": 0.3619876173914447, "learning_rate": 9.104039062858038e-06, "loss": 1.3247, "step": 2682 }, { "epoch": 0.686848, "grad_norm": 0.37555033906580426, "learning_rate": 9.090413914839767e-06, "loss": 1.3311, "step": 2683 }, { "epoch": 0.687104, "grad_norm": 0.35821336698172096, "learning_rate": 9.076795970997194e-06, "loss": 1.315, "step": 2684 }, { "epoch": 0.68736, "grad_norm": 0.3591643943124592, "learning_rate": 9.063185240322956e-06, "loss": 1.3312, "step": 2685 }, { "epoch": 0.687616, "grad_norm": 0.3718901452981283, "learning_rate": 9.04958173180494e-06, "loss": 1.346, "step": 2686 }, { "epoch": 0.687872, "grad_norm": 0.35944733077925384, "learning_rate": 9.03598545442627e-06, "loss": 1.3504, "step": 2687 }, { "epoch": 0.688128, "grad_norm": 0.36914490488082985, "learning_rate": 9.02239641716527e-06, "loss": 1.3406, "step": 2688 }, { "epoch": 0.688384, "grad_norm": 0.37147969084760707, "learning_rate": 9.00881462899551e-06, "loss": 1.3704, "step": 2689 }, { "epoch": 0.68864, "grad_norm": 0.3679369775193561, "learning_rate": 8.995240098885765e-06, "loss": 1.3604, "step": 2690 }, { "epoch": 0.688896, "grad_norm": 0.37102353818815614, "learning_rate": 8.981672835800002e-06, "loss": 1.3806, "step": 2691 }, { "epoch": 0.689152, "grad_norm": 0.3708482987972002, "learning_rate": 8.968112848697405e-06, "loss": 1.3537, "step": 2692 }, { "epoch": 0.689408, "grad_norm": 0.3644548553130399, "learning_rate": 8.954560146532352e-06, "loss": 1.3458, "step": 2693 }, { "epoch": 0.689664, "grad_norm": 0.3680747224598955, "learning_rate": 8.941014738254408e-06, "loss": 1.3123, "step": 2694 }, { "epoch": 0.68992, "grad_norm": 0.3667592434036538, "learning_rate": 8.927476632808321e-06, "loss": 1.3119, "step": 2695 }, { "epoch": 0.690176, "grad_norm": 0.3684561443081247, "learning_rate": 8.913945839134024e-06, "loss": 1.3607, "step": 2696 }, { "epoch": 0.690432, "grad_norm": 0.36274277056443466, "learning_rate": 8.900422366166597e-06, "loss": 1.3173, "step": 2697 }, { "epoch": 0.690688, "grad_norm": 0.36917944741693287, "learning_rate": 8.886906222836316e-06, "loss": 1.3453, "step": 2698 }, { "epoch": 0.690944, "grad_norm": 0.37373778877944863, "learning_rate": 8.873397418068607e-06, "loss": 1.3717, "step": 2699 }, { "epoch": 0.6912, "grad_norm": 0.37887534752226204, "learning_rate": 8.859895960784035e-06, "loss": 1.3662, "step": 2700 }, { "epoch": 0.691456, "grad_norm": 0.37010635786786256, "learning_rate": 8.84640185989833e-06, "loss": 1.387, "step": 2701 }, { "epoch": 0.691712, "grad_norm": 0.3642657035372625, "learning_rate": 8.832915124322367e-06, "loss": 1.3494, "step": 2702 }, { "epoch": 0.691968, "grad_norm": 0.36593322220417407, "learning_rate": 8.819435762962139e-06, "loss": 1.3331, "step": 2703 }, { "epoch": 0.692224, "grad_norm": 0.3725780521511076, "learning_rate": 8.805963784718784e-06, "loss": 1.3326, "step": 2704 }, { "epoch": 0.69248, "grad_norm": 0.358535989937182, "learning_rate": 8.792499198488558e-06, "loss": 1.298, "step": 2705 }, { "epoch": 0.692736, "grad_norm": 0.3783157675522095, "learning_rate": 8.77904201316284e-06, "loss": 1.3582, "step": 2706 }, { "epoch": 0.692992, "grad_norm": 0.37385377141941734, "learning_rate": 8.765592237628122e-06, "loss": 1.3801, "step": 2707 }, { "epoch": 0.693248, "grad_norm": 0.36088150334117597, "learning_rate": 8.752149880766008e-06, "loss": 1.3029, "step": 2708 }, { "epoch": 0.693504, "grad_norm": 0.3622307663711269, "learning_rate": 8.73871495145318e-06, "loss": 1.3265, "step": 2709 }, { "epoch": 0.69376, "grad_norm": 0.3608388450352028, "learning_rate": 8.725287458561447e-06, "loss": 1.3093, "step": 2710 }, { "epoch": 0.694016, "grad_norm": 0.35895239323077144, "learning_rate": 8.711867410957675e-06, "loss": 1.3291, "step": 2711 }, { "epoch": 0.694272, "grad_norm": 0.3724069974085379, "learning_rate": 8.698454817503838e-06, "loss": 1.3255, "step": 2712 }, { "epoch": 0.694528, "grad_norm": 0.3756068886340145, "learning_rate": 8.685049687056978e-06, "loss": 1.344, "step": 2713 }, { "epoch": 0.694784, "grad_norm": 0.3583311020199094, "learning_rate": 8.671652028469224e-06, "loss": 1.3085, "step": 2714 }, { "epoch": 0.69504, "grad_norm": 0.36166882521852034, "learning_rate": 8.658261850587734e-06, "loss": 1.3275, "step": 2715 }, { "epoch": 0.695296, "grad_norm": 0.35960985376933485, "learning_rate": 8.644879162254765e-06, "loss": 1.3634, "step": 2716 }, { "epoch": 0.695552, "grad_norm": 0.36245227571916905, "learning_rate": 8.631503972307606e-06, "loss": 1.3154, "step": 2717 }, { "epoch": 0.695808, "grad_norm": 0.3723271206685477, "learning_rate": 8.618136289578604e-06, "loss": 1.3653, "step": 2718 }, { "epoch": 0.696064, "grad_norm": 0.36436910230196, "learning_rate": 8.604776122895147e-06, "loss": 1.3349, "step": 2719 }, { "epoch": 0.69632, "grad_norm": 0.3642398946562998, "learning_rate": 8.591423481079664e-06, "loss": 1.3454, "step": 2720 }, { "epoch": 0.696576, "grad_norm": 0.36850049668145113, "learning_rate": 8.578078372949594e-06, "loss": 1.3406, "step": 2721 }, { "epoch": 0.696832, "grad_norm": 0.37386734346523903, "learning_rate": 8.564740807317433e-06, "loss": 1.3553, "step": 2722 }, { "epoch": 0.697088, "grad_norm": 0.36606994545663074, "learning_rate": 8.551410792990664e-06, "loss": 1.3306, "step": 2723 }, { "epoch": 0.697344, "grad_norm": 0.3695826996197134, "learning_rate": 8.538088338771806e-06, "loss": 1.3406, "step": 2724 }, { "epoch": 0.6976, "grad_norm": 0.36798594266626083, "learning_rate": 8.524773453458381e-06, "loss": 1.3355, "step": 2725 }, { "epoch": 0.697856, "grad_norm": 0.3727504509789047, "learning_rate": 8.511466145842909e-06, "loss": 1.3769, "step": 2726 }, { "epoch": 0.698112, "grad_norm": 0.36389633466409566, "learning_rate": 8.498166424712914e-06, "loss": 1.3475, "step": 2727 }, { "epoch": 0.698368, "grad_norm": 0.3608045830256791, "learning_rate": 8.484874298850894e-06, "loss": 1.2943, "step": 2728 }, { "epoch": 0.698624, "grad_norm": 0.3666100568208091, "learning_rate": 8.471589777034343e-06, "loss": 1.3152, "step": 2729 }, { "epoch": 0.69888, "grad_norm": 0.3935670412789717, "learning_rate": 8.458312868035739e-06, "loss": 1.3224, "step": 2730 }, { "epoch": 0.699136, "grad_norm": 0.3620247065282502, "learning_rate": 8.445043580622527e-06, "loss": 1.3244, "step": 2731 }, { "epoch": 0.699392, "grad_norm": 0.358201525530314, "learning_rate": 8.431781923557105e-06, "loss": 1.3301, "step": 2732 }, { "epoch": 0.699648, "grad_norm": 0.372965858495583, "learning_rate": 8.418527905596862e-06, "loss": 1.3515, "step": 2733 }, { "epoch": 0.699904, "grad_norm": 0.37757065670463225, "learning_rate": 8.40528153549411e-06, "loss": 1.3714, "step": 2734 }, { "epoch": 0.70016, "grad_norm": 0.3622097062107426, "learning_rate": 8.392042821996134e-06, "loss": 1.3389, "step": 2735 }, { "epoch": 0.700416, "grad_norm": 0.36302224598366645, "learning_rate": 8.378811773845155e-06, "loss": 1.3382, "step": 2736 }, { "epoch": 0.700672, "grad_norm": 0.3763654356691901, "learning_rate": 8.365588399778333e-06, "loss": 1.351, "step": 2737 }, { "epoch": 0.700928, "grad_norm": 0.3641028098842834, "learning_rate": 8.352372708527758e-06, "loss": 1.3369, "step": 2738 }, { "epoch": 0.701184, "grad_norm": 0.3591910992755397, "learning_rate": 8.33916470882046e-06, "loss": 1.3207, "step": 2739 }, { "epoch": 0.70144, "grad_norm": 0.3584194985287829, "learning_rate": 8.325964409378362e-06, "loss": 1.3244, "step": 2740 }, { "epoch": 0.701696, "grad_norm": 0.37103968026485573, "learning_rate": 8.312771818918326e-06, "loss": 1.3366, "step": 2741 }, { "epoch": 0.701952, "grad_norm": 0.3640122897082763, "learning_rate": 8.299586946152117e-06, "loss": 1.326, "step": 2742 }, { "epoch": 0.702208, "grad_norm": 0.36560827498088616, "learning_rate": 8.286409799786409e-06, "loss": 1.3507, "step": 2743 }, { "epoch": 0.702464, "grad_norm": 0.3634600113894542, "learning_rate": 8.273240388522749e-06, "loss": 1.3353, "step": 2744 }, { "epoch": 0.70272, "grad_norm": 0.36797789897040883, "learning_rate": 8.260078721057615e-06, "loss": 1.3441, "step": 2745 }, { "epoch": 0.702976, "grad_norm": 0.3652575457221787, "learning_rate": 8.246924806082338e-06, "loss": 1.363, "step": 2746 }, { "epoch": 0.703232, "grad_norm": 0.3633835172109707, "learning_rate": 8.233778652283143e-06, "loss": 1.3252, "step": 2747 }, { "epoch": 0.703488, "grad_norm": 0.3622707077283908, "learning_rate": 8.220640268341132e-06, "loss": 1.341, "step": 2748 }, { "epoch": 0.703744, "grad_norm": 0.3620740280694645, "learning_rate": 8.207509662932276e-06, "loss": 1.3187, "step": 2749 }, { "epoch": 0.704, "grad_norm": 0.45984208038303825, "learning_rate": 8.194386844727402e-06, "loss": 1.3236, "step": 2750 }, { "epoch": 0.704256, "grad_norm": 0.36700097689721134, "learning_rate": 8.181271822392213e-06, "loss": 1.3664, "step": 2751 }, { "epoch": 0.704512, "grad_norm": 0.36531681607576694, "learning_rate": 8.168164604587234e-06, "loss": 1.3405, "step": 2752 }, { "epoch": 0.704768, "grad_norm": 0.3721593833582203, "learning_rate": 8.155065199967857e-06, "loss": 1.3646, "step": 2753 }, { "epoch": 0.705024, "grad_norm": 0.36140264020776575, "learning_rate": 8.141973617184322e-06, "loss": 1.3334, "step": 2754 }, { "epoch": 0.70528, "grad_norm": 0.35267597267187806, "learning_rate": 8.128889864881676e-06, "loss": 1.3118, "step": 2755 }, { "epoch": 0.705536, "grad_norm": 0.3628531758055449, "learning_rate": 8.115813951699818e-06, "loss": 1.3489, "step": 2756 }, { "epoch": 0.705792, "grad_norm": 0.3566190851198683, "learning_rate": 8.102745886273472e-06, "loss": 1.2909, "step": 2757 }, { "epoch": 0.706048, "grad_norm": 0.4251694084038126, "learning_rate": 8.089685677232158e-06, "loss": 1.3154, "step": 2758 }, { "epoch": 0.706304, "grad_norm": 0.36430182823945445, "learning_rate": 8.076633333200225e-06, "loss": 1.329, "step": 2759 }, { "epoch": 0.70656, "grad_norm": 0.38807206857161175, "learning_rate": 8.063588862796832e-06, "loss": 1.3412, "step": 2760 }, { "epoch": 0.706816, "grad_norm": 0.3614892915017986, "learning_rate": 8.050552274635923e-06, "loss": 1.3435, "step": 2761 }, { "epoch": 0.707072, "grad_norm": 0.3827429608484736, "learning_rate": 8.037523577326254e-06, "loss": 1.3233, "step": 2762 }, { "epoch": 0.707328, "grad_norm": 0.36540664272157325, "learning_rate": 8.024502779471361e-06, "loss": 1.3626, "step": 2763 }, { "epoch": 0.707584, "grad_norm": 0.3644753721133332, "learning_rate": 8.011489889669554e-06, "loss": 1.3351, "step": 2764 }, { "epoch": 0.70784, "grad_norm": 0.35542273495093196, "learning_rate": 7.99848491651394e-06, "loss": 1.3004, "step": 2765 }, { "epoch": 0.708096, "grad_norm": 0.36107881848533613, "learning_rate": 7.985487868592392e-06, "loss": 1.3152, "step": 2766 }, { "epoch": 0.708352, "grad_norm": 0.4079202799746153, "learning_rate": 7.972498754487537e-06, "loss": 1.301, "step": 2767 }, { "epoch": 0.708608, "grad_norm": 0.3550897685212053, "learning_rate": 7.959517582776776e-06, "loss": 1.3318, "step": 2768 }, { "epoch": 0.708864, "grad_norm": 0.36217723870521634, "learning_rate": 7.946544362032274e-06, "loss": 1.3411, "step": 2769 }, { "epoch": 0.70912, "grad_norm": 0.3753939546296843, "learning_rate": 7.933579100820914e-06, "loss": 1.3805, "step": 2770 }, { "epoch": 0.709376, "grad_norm": 0.36143791329258396, "learning_rate": 7.920621807704355e-06, "loss": 1.3198, "step": 2771 }, { "epoch": 0.709632, "grad_norm": 0.36903241967884287, "learning_rate": 7.907672491238976e-06, "loss": 1.3438, "step": 2772 }, { "epoch": 0.709888, "grad_norm": 0.3634585958784751, "learning_rate": 7.894731159975896e-06, "loss": 1.3276, "step": 2773 }, { "epoch": 0.710144, "grad_norm": 0.36197502182941976, "learning_rate": 7.881797822460965e-06, "loss": 1.3291, "step": 2774 }, { "epoch": 0.7104, "grad_norm": 0.3750991396589606, "learning_rate": 7.86887248723475e-06, "loss": 1.3218, "step": 2775 }, { "epoch": 0.710656, "grad_norm": 0.3682203494174357, "learning_rate": 7.855955162832519e-06, "loss": 1.3594, "step": 2776 }, { "epoch": 0.710912, "grad_norm": 0.3659267671016427, "learning_rate": 7.843045857784278e-06, "loss": 1.3595, "step": 2777 }, { "epoch": 0.711168, "grad_norm": 0.3578802813680482, "learning_rate": 7.830144580614709e-06, "loss": 1.3338, "step": 2778 }, { "epoch": 0.711424, "grad_norm": 0.371244997667776, "learning_rate": 7.817251339843213e-06, "loss": 1.2991, "step": 2779 }, { "epoch": 0.71168, "grad_norm": 0.36245530073378024, "learning_rate": 7.804366143983878e-06, "loss": 1.358, "step": 2780 }, { "epoch": 0.711936, "grad_norm": 0.3598542358759934, "learning_rate": 7.791489001545483e-06, "loss": 1.3379, "step": 2781 }, { "epoch": 0.712192, "grad_norm": 0.37305002680947785, "learning_rate": 7.778619921031476e-06, "loss": 1.3059, "step": 2782 }, { "epoch": 0.712448, "grad_norm": 0.37256635303135505, "learning_rate": 7.765758910939995e-06, "loss": 1.3244, "step": 2783 }, { "epoch": 0.712704, "grad_norm": 0.36312275140892275, "learning_rate": 7.752905979763846e-06, "loss": 1.3299, "step": 2784 }, { "epoch": 0.71296, "grad_norm": 0.36263784012530204, "learning_rate": 7.740061135990493e-06, "loss": 1.3683, "step": 2785 }, { "epoch": 0.713216, "grad_norm": 0.3683660394471804, "learning_rate": 7.727224388102069e-06, "loss": 1.3565, "step": 2786 }, { "epoch": 0.713472, "grad_norm": 0.37191035633220004, "learning_rate": 7.714395744575362e-06, "loss": 1.3046, "step": 2787 }, { "epoch": 0.713728, "grad_norm": 0.3610043726950672, "learning_rate": 7.701575213881788e-06, "loss": 1.3341, "step": 2788 }, { "epoch": 0.713984, "grad_norm": 0.3575970011151364, "learning_rate": 7.688762804487437e-06, "loss": 1.3023, "step": 2789 }, { "epoch": 0.71424, "grad_norm": 0.36240701882029364, "learning_rate": 7.675958524853003e-06, "loss": 1.3017, "step": 2790 }, { "epoch": 0.714496, "grad_norm": 0.3829393418781689, "learning_rate": 7.663162383433834e-06, "loss": 1.3743, "step": 2791 }, { "epoch": 0.714752, "grad_norm": 0.3722489821194553, "learning_rate": 7.6503743886799e-06, "loss": 1.3718, "step": 2792 }, { "epoch": 0.715008, "grad_norm": 0.36808697856222033, "learning_rate": 7.637594549035787e-06, "loss": 1.3181, "step": 2793 }, { "epoch": 0.715264, "grad_norm": 0.3623251315224491, "learning_rate": 7.624822872940707e-06, "loss": 1.3074, "step": 2794 }, { "epoch": 0.71552, "grad_norm": 0.3606671799287942, "learning_rate": 7.612059368828457e-06, "loss": 1.3287, "step": 2795 }, { "epoch": 0.715776, "grad_norm": 0.36263257851615077, "learning_rate": 7.59930404512746e-06, "loss": 1.3188, "step": 2796 }, { "epoch": 0.716032, "grad_norm": 0.37161468820475085, "learning_rate": 7.5865569102607295e-06, "loss": 1.3312, "step": 2797 }, { "epoch": 0.716288, "grad_norm": 0.410201410714656, "learning_rate": 7.573817972645872e-06, "loss": 1.326, "step": 2798 }, { "epoch": 0.716544, "grad_norm": 0.36860411138989085, "learning_rate": 7.561087240695086e-06, "loss": 1.3233, "step": 2799 }, { "epoch": 0.7168, "grad_norm": 0.37456948624033176, "learning_rate": 7.548364722815142e-06, "loss": 1.3487, "step": 2800 }, { "epoch": 0.717056, "grad_norm": 0.3645861327314719, "learning_rate": 7.535650427407379e-06, "loss": 1.3108, "step": 2801 }, { "epoch": 0.717312, "grad_norm": 0.36824676013177354, "learning_rate": 7.52294436286773e-06, "loss": 1.3419, "step": 2802 }, { "epoch": 0.717568, "grad_norm": 0.3677961332399227, "learning_rate": 7.5102465375866765e-06, "loss": 1.331, "step": 2803 }, { "epoch": 0.717824, "grad_norm": 0.36064888460405026, "learning_rate": 7.497556959949262e-06, "loss": 1.3132, "step": 2804 }, { "epoch": 0.71808, "grad_norm": 0.3654641040746755, "learning_rate": 7.484875638335087e-06, "loss": 1.3489, "step": 2805 }, { "epoch": 0.718336, "grad_norm": 0.36333436096603144, "learning_rate": 7.472202581118304e-06, "loss": 1.3331, "step": 2806 }, { "epoch": 0.718592, "grad_norm": 0.3643158040582172, "learning_rate": 7.459537796667589e-06, "loss": 1.3097, "step": 2807 }, { "epoch": 0.718848, "grad_norm": 0.35816073504176726, "learning_rate": 7.446881293346171e-06, "loss": 1.2853, "step": 2808 }, { "epoch": 0.719104, "grad_norm": 0.3672466278772919, "learning_rate": 7.434233079511812e-06, "loss": 1.3585, "step": 2809 }, { "epoch": 0.71936, "grad_norm": 0.3673194788782525, "learning_rate": 7.4215931635168e-06, "loss": 1.3099, "step": 2810 }, { "epoch": 0.719616, "grad_norm": 0.36050103145502316, "learning_rate": 7.408961553707925e-06, "loss": 1.3425, "step": 2811 }, { "epoch": 0.719872, "grad_norm": 0.35633560634428346, "learning_rate": 7.396338258426521e-06, "loss": 1.3069, "step": 2812 }, { "epoch": 0.720128, "grad_norm": 0.36993081620145385, "learning_rate": 7.383723286008402e-06, "loss": 1.3661, "step": 2813 }, { "epoch": 0.720384, "grad_norm": 0.36454955138759243, "learning_rate": 7.371116644783905e-06, "loss": 1.3135, "step": 2814 }, { "epoch": 0.72064, "grad_norm": 0.3567921988220691, "learning_rate": 7.358518343077863e-06, "loss": 1.3066, "step": 2815 }, { "epoch": 0.720896, "grad_norm": 0.3563686964617256, "learning_rate": 7.3459283892096e-06, "loss": 1.2991, "step": 2816 }, { "epoch": 0.721152, "grad_norm": 0.3659090530769971, "learning_rate": 7.333346791492928e-06, "loss": 1.3247, "step": 2817 }, { "epoch": 0.721408, "grad_norm": 0.36442974143067813, "learning_rate": 7.320773558236145e-06, "loss": 1.3523, "step": 2818 }, { "epoch": 0.721664, "grad_norm": 0.3608091835990466, "learning_rate": 7.308208697742005e-06, "loss": 1.2936, "step": 2819 }, { "epoch": 0.72192, "grad_norm": 0.36703596126098786, "learning_rate": 7.295652218307763e-06, "loss": 1.3391, "step": 2820 }, { "epoch": 0.722176, "grad_norm": 0.3629516574988406, "learning_rate": 7.283104128225118e-06, "loss": 1.3389, "step": 2821 }, { "epoch": 0.722432, "grad_norm": 0.36207248603407655, "learning_rate": 7.270564435780247e-06, "loss": 1.3178, "step": 2822 }, { "epoch": 0.722688, "grad_norm": 0.3736835078519365, "learning_rate": 7.2580331492537555e-06, "loss": 1.3309, "step": 2823 }, { "epoch": 0.722944, "grad_norm": 0.3575569422064091, "learning_rate": 7.245510276920729e-06, "loss": 1.3333, "step": 2824 }, { "epoch": 0.7232, "grad_norm": 0.3659348719471312, "learning_rate": 7.232995827050666e-06, "loss": 1.3041, "step": 2825 }, { "epoch": 0.723456, "grad_norm": 0.3571507760616756, "learning_rate": 7.220489807907527e-06, "loss": 1.3441, "step": 2826 }, { "epoch": 0.723712, "grad_norm": 0.36173909812408583, "learning_rate": 7.207992227749694e-06, "loss": 1.3322, "step": 2827 }, { "epoch": 0.723968, "grad_norm": 0.36476573571224014, "learning_rate": 7.195503094829983e-06, "loss": 1.3529, "step": 2828 }, { "epoch": 0.724224, "grad_norm": 0.47874967975608623, "learning_rate": 7.1830224173956245e-06, "loss": 1.3253, "step": 2829 }, { "epoch": 0.72448, "grad_norm": 0.36674333850932334, "learning_rate": 7.170550203688278e-06, "loss": 1.3274, "step": 2830 }, { "epoch": 0.724736, "grad_norm": 0.3632950048806807, "learning_rate": 7.158086461943987e-06, "loss": 1.3257, "step": 2831 }, { "epoch": 0.724992, "grad_norm": 0.36140070591146417, "learning_rate": 7.14563120039323e-06, "loss": 1.318, "step": 2832 }, { "epoch": 0.725248, "grad_norm": 0.3674532997097461, "learning_rate": 7.1331844272608795e-06, "loss": 1.3397, "step": 2833 }, { "epoch": 0.725504, "grad_norm": 0.36770102503611196, "learning_rate": 7.1207461507661805e-06, "loss": 1.3155, "step": 2834 }, { "epoch": 0.72576, "grad_norm": 0.3572166504144376, "learning_rate": 7.108316379122793e-06, "loss": 1.3311, "step": 2835 }, { "epoch": 0.726016, "grad_norm": 0.47435280272680036, "learning_rate": 7.095895120538758e-06, "loss": 1.358, "step": 2836 }, { "epoch": 0.726272, "grad_norm": 0.3936589216050705, "learning_rate": 7.083482383216473e-06, "loss": 1.349, "step": 2837 }, { "epoch": 0.726528, "grad_norm": 0.3652460774782325, "learning_rate": 7.0710781753527325e-06, "loss": 1.3498, "step": 2838 }, { "epoch": 0.726784, "grad_norm": 0.3590831536592986, "learning_rate": 7.05868250513869e-06, "loss": 1.3381, "step": 2839 }, { "epoch": 0.72704, "grad_norm": 0.37046210410925323, "learning_rate": 7.04629538075986e-06, "loss": 1.3779, "step": 2840 }, { "epoch": 0.727296, "grad_norm": 0.3643138457888806, "learning_rate": 7.033916810396115e-06, "loss": 1.3314, "step": 2841 }, { "epoch": 0.727552, "grad_norm": 0.3632987223574396, "learning_rate": 7.021546802221686e-06, "loss": 1.303, "step": 2842 }, { "epoch": 0.727808, "grad_norm": 0.36910389445971414, "learning_rate": 7.009185364405129e-06, "loss": 1.3039, "step": 2843 }, { "epoch": 0.728064, "grad_norm": 0.35925845938019696, "learning_rate": 6.996832505109359e-06, "loss": 1.2821, "step": 2844 }, { "epoch": 0.72832, "grad_norm": 0.3659988405102717, "learning_rate": 6.984488232491628e-06, "loss": 1.3253, "step": 2845 }, { "epoch": 0.728576, "grad_norm": 0.37413426362838, "learning_rate": 6.972152554703499e-06, "loss": 1.3692, "step": 2846 }, { "epoch": 0.728832, "grad_norm": 0.40473669481767854, "learning_rate": 6.95982547989088e-06, "loss": 1.3242, "step": 2847 }, { "epoch": 0.729088, "grad_norm": 0.35701231562340285, "learning_rate": 6.947507016193986e-06, "loss": 1.3551, "step": 2848 }, { "epoch": 0.729344, "grad_norm": 0.36385206734505193, "learning_rate": 6.935197171747357e-06, "loss": 1.2984, "step": 2849 }, { "epoch": 0.7296, "grad_norm": 0.3640112070358793, "learning_rate": 6.922895954679818e-06, "loss": 1.3006, "step": 2850 }, { "epoch": 0.729856, "grad_norm": 0.3613111516553966, "learning_rate": 6.910603373114522e-06, "loss": 1.29, "step": 2851 }, { "epoch": 0.730112, "grad_norm": 0.35288833759229754, "learning_rate": 6.898319435168905e-06, "loss": 1.2919, "step": 2852 }, { "epoch": 0.730368, "grad_norm": 0.3933725513873792, "learning_rate": 6.886044148954707e-06, "loss": 1.3342, "step": 2853 }, { "epoch": 0.730624, "grad_norm": 0.36008652081657955, "learning_rate": 6.8737775225779405e-06, "loss": 1.307, "step": 2854 }, { "epoch": 0.73088, "grad_norm": 0.3695936770786274, "learning_rate": 6.8615195641389165e-06, "loss": 1.3487, "step": 2855 }, { "epoch": 0.731136, "grad_norm": 0.36239648938541164, "learning_rate": 6.849270281732206e-06, "loss": 1.3184, "step": 2856 }, { "epoch": 0.731392, "grad_norm": 0.3660138449744174, "learning_rate": 6.837029683446647e-06, "loss": 1.3433, "step": 2857 }, { "epoch": 0.731648, "grad_norm": 0.35969062572953525, "learning_rate": 6.824797777365364e-06, "loss": 1.3206, "step": 2858 }, { "epoch": 0.731904, "grad_norm": 0.35773110872044217, "learning_rate": 6.812574571565729e-06, "loss": 1.3208, "step": 2859 }, { "epoch": 0.73216, "grad_norm": 0.361529121509267, "learning_rate": 6.800360074119367e-06, "loss": 1.3335, "step": 2860 }, { "epoch": 0.732416, "grad_norm": 0.3749156458300707, "learning_rate": 6.788154293092166e-06, "loss": 1.3689, "step": 2861 }, { "epoch": 0.732672, "grad_norm": 0.37801284819404646, "learning_rate": 6.775957236544231e-06, "loss": 1.3548, "step": 2862 }, { "epoch": 0.732928, "grad_norm": 0.37054189757157663, "learning_rate": 6.763768912529934e-06, "loss": 1.3654, "step": 2863 }, { "epoch": 0.733184, "grad_norm": 0.3779285328064393, "learning_rate": 6.751589329097863e-06, "loss": 1.371, "step": 2864 }, { "epoch": 0.73344, "grad_norm": 0.3631957486230213, "learning_rate": 6.739418494290844e-06, "loss": 1.3323, "step": 2865 }, { "epoch": 0.733696, "grad_norm": 0.371760107273983, "learning_rate": 6.727256416145926e-06, "loss": 1.3802, "step": 2866 }, { "epoch": 0.733952, "grad_norm": 0.38476313850643973, "learning_rate": 6.71510310269436e-06, "loss": 1.3649, "step": 2867 }, { "epoch": 0.734208, "grad_norm": 0.3685614627211531, "learning_rate": 6.702958561961635e-06, "loss": 1.3407, "step": 2868 }, { "epoch": 0.734464, "grad_norm": 0.3606807338826802, "learning_rate": 6.690822801967414e-06, "loss": 1.2999, "step": 2869 }, { "epoch": 0.73472, "grad_norm": 0.36831944718808013, "learning_rate": 6.678695830725593e-06, "loss": 1.362, "step": 2870 }, { "epoch": 0.734976, "grad_norm": 0.47357312152165876, "learning_rate": 6.666577656244246e-06, "loss": 1.3113, "step": 2871 }, { "epoch": 0.735232, "grad_norm": 0.3654274251928827, "learning_rate": 6.654468286525646e-06, "loss": 1.3518, "step": 2872 }, { "epoch": 0.735488, "grad_norm": 0.3546914140566232, "learning_rate": 6.642367729566257e-06, "loss": 1.3479, "step": 2873 }, { "epoch": 0.735744, "grad_norm": 0.36387467639163257, "learning_rate": 6.6302759933567e-06, "loss": 1.3208, "step": 2874 }, { "epoch": 0.736, "grad_norm": 0.37587832089033957, "learning_rate": 6.618193085881794e-06, "loss": 1.3988, "step": 2875 }, { "epoch": 0.736256, "grad_norm": 0.37431798671056304, "learning_rate": 6.606119015120522e-06, "loss": 1.3577, "step": 2876 }, { "epoch": 0.736512, "grad_norm": 0.36748530063422613, "learning_rate": 6.594053789046031e-06, "loss": 1.3452, "step": 2877 }, { "epoch": 0.736768, "grad_norm": 0.3608372617898391, "learning_rate": 6.58199741562563e-06, "loss": 1.2782, "step": 2878 }, { "epoch": 0.737024, "grad_norm": 0.3723507905354946, "learning_rate": 6.569949902820776e-06, "loss": 1.3597, "step": 2879 }, { "epoch": 0.73728, "grad_norm": 0.36049699614161146, "learning_rate": 6.557911258587069e-06, "loss": 1.3059, "step": 2880 }, { "epoch": 0.737536, "grad_norm": 0.3712984095532251, "learning_rate": 6.545881490874267e-06, "loss": 1.3375, "step": 2881 }, { "epoch": 0.737792, "grad_norm": 0.36773145249240613, "learning_rate": 6.533860607626263e-06, "loss": 1.3383, "step": 2882 }, { "epoch": 0.738048, "grad_norm": 0.3602790819426775, "learning_rate": 6.521848616781079e-06, "loss": 1.3168, "step": 2883 }, { "epoch": 0.738304, "grad_norm": 0.3663960669176364, "learning_rate": 6.50984552627087e-06, "loss": 1.31, "step": 2884 }, { "epoch": 0.73856, "grad_norm": 0.3653924975761182, "learning_rate": 6.4978513440219125e-06, "loss": 1.3455, "step": 2885 }, { "epoch": 0.738816, "grad_norm": 0.35453178324529455, "learning_rate": 6.48586607795459e-06, "loss": 1.2849, "step": 2886 }, { "epoch": 0.739072, "grad_norm": 0.3533894471528382, "learning_rate": 6.47388973598341e-06, "loss": 1.2776, "step": 2887 }, { "epoch": 0.739328, "grad_norm": 0.3738095578280008, "learning_rate": 6.461922326016983e-06, "loss": 1.3508, "step": 2888 }, { "epoch": 0.739584, "grad_norm": 0.36506522560978083, "learning_rate": 6.449963855958031e-06, "loss": 1.3544, "step": 2889 }, { "epoch": 0.73984, "grad_norm": 0.36474066235915004, "learning_rate": 6.438014333703346e-06, "loss": 1.3264, "step": 2890 }, { "epoch": 0.740096, "grad_norm": 0.362413765001432, "learning_rate": 6.426073767143845e-06, "loss": 1.3366, "step": 2891 }, { "epoch": 0.740352, "grad_norm": 0.37652237871704547, "learning_rate": 6.414142164164501e-06, "loss": 1.3598, "step": 2892 }, { "epoch": 0.740608, "grad_norm": 0.36708740171109727, "learning_rate": 6.402219532644385e-06, "loss": 1.3666, "step": 2893 }, { "epoch": 0.740864, "grad_norm": 0.3574138690855728, "learning_rate": 6.3903058804566445e-06, "loss": 1.3346, "step": 2894 }, { "epoch": 0.74112, "grad_norm": 0.35911040744824196, "learning_rate": 6.3784012154684885e-06, "loss": 1.2751, "step": 2895 }, { "epoch": 0.741376, "grad_norm": 0.3570029046643827, "learning_rate": 6.3665055455412e-06, "loss": 1.2907, "step": 2896 }, { "epoch": 0.741632, "grad_norm": 0.365321453743712, "learning_rate": 6.354618878530123e-06, "loss": 1.3358, "step": 2897 }, { "epoch": 0.741888, "grad_norm": 0.3667263140885956, "learning_rate": 6.342741222284636e-06, "loss": 1.3622, "step": 2898 }, { "epoch": 0.742144, "grad_norm": 0.3618842106095794, "learning_rate": 6.330872584648193e-06, "loss": 1.3202, "step": 2899 }, { "epoch": 0.7424, "grad_norm": 0.35834299798938146, "learning_rate": 6.319012973458278e-06, "loss": 1.3469, "step": 2900 }, { "epoch": 0.742656, "grad_norm": 0.3778759831944958, "learning_rate": 6.307162396546429e-06, "loss": 1.3392, "step": 2901 }, { "epoch": 0.742912, "grad_norm": 0.3702861655149208, "learning_rate": 6.29532086173819e-06, "loss": 1.3629, "step": 2902 }, { "epoch": 0.743168, "grad_norm": 0.36856202080908806, "learning_rate": 6.283488376853168e-06, "loss": 1.345, "step": 2903 }, { "epoch": 0.743424, "grad_norm": 0.3558241031055548, "learning_rate": 6.2716649497049654e-06, "loss": 1.2677, "step": 2904 }, { "epoch": 0.74368, "grad_norm": 0.3654000406464242, "learning_rate": 6.2598505881012195e-06, "loss": 1.3086, "step": 2905 }, { "epoch": 0.743936, "grad_norm": 0.4211504297314756, "learning_rate": 6.248045299843577e-06, "loss": 1.3277, "step": 2906 }, { "epoch": 0.744192, "grad_norm": 0.590067654321583, "learning_rate": 6.236249092727693e-06, "loss": 1.3242, "step": 2907 }, { "epoch": 0.744448, "grad_norm": 0.36751186365142957, "learning_rate": 6.224461974543227e-06, "loss": 1.3118, "step": 2908 }, { "epoch": 0.744704, "grad_norm": 0.36372208342595935, "learning_rate": 6.212683953073837e-06, "loss": 1.2856, "step": 2909 }, { "epoch": 0.74496, "grad_norm": 0.36884207500523186, "learning_rate": 6.200915036097177e-06, "loss": 1.3546, "step": 2910 }, { "epoch": 0.745216, "grad_norm": 0.3641748520713031, "learning_rate": 6.189155231384869e-06, "loss": 1.3364, "step": 2911 }, { "epoch": 0.745472, "grad_norm": 0.389141505359334, "learning_rate": 6.17740454670255e-06, "loss": 1.364, "step": 2912 }, { "epoch": 0.745728, "grad_norm": 0.36840891706486234, "learning_rate": 6.165662989809802e-06, "loss": 1.3252, "step": 2913 }, { "epoch": 0.745984, "grad_norm": 0.4772291668007155, "learning_rate": 6.1539305684602e-06, "loss": 1.3209, "step": 2914 }, { "epoch": 0.74624, "grad_norm": 0.3596862934891284, "learning_rate": 6.1422072904012855e-06, "loss": 1.3303, "step": 2915 }, { "epoch": 0.746496, "grad_norm": 0.3658366689634869, "learning_rate": 6.130493163374562e-06, "loss": 1.3529, "step": 2916 }, { "epoch": 0.746752, "grad_norm": 0.3701227665095814, "learning_rate": 6.1187881951154745e-06, "loss": 1.3091, "step": 2917 }, { "epoch": 0.747008, "grad_norm": 0.3592989037539962, "learning_rate": 6.107092393353436e-06, "loss": 1.3253, "step": 2918 }, { "epoch": 0.747264, "grad_norm": 0.3570047933982698, "learning_rate": 6.095405765811804e-06, "loss": 1.345, "step": 2919 }, { "epoch": 0.74752, "grad_norm": 0.356744672855946, "learning_rate": 6.083728320207873e-06, "loss": 1.3315, "step": 2920 }, { "epoch": 0.747776, "grad_norm": 0.3666243470858031, "learning_rate": 6.072060064252878e-06, "loss": 1.3365, "step": 2921 }, { "epoch": 0.748032, "grad_norm": 0.36308971719881444, "learning_rate": 6.060401005651992e-06, "loss": 1.3555, "step": 2922 }, { "epoch": 0.748288, "grad_norm": 0.3874423844897525, "learning_rate": 6.0487511521042954e-06, "loss": 1.3359, "step": 2923 }, { "epoch": 0.748544, "grad_norm": 0.3673286927353204, "learning_rate": 6.037110511302811e-06, "loss": 1.3475, "step": 2924 }, { "epoch": 0.7488, "grad_norm": 0.3751307398897808, "learning_rate": 6.025479090934456e-06, "loss": 1.3532, "step": 2925 }, { "epoch": 0.749056, "grad_norm": 0.35812236855672913, "learning_rate": 6.013856898680081e-06, "loss": 1.2774, "step": 2926 }, { "epoch": 0.749312, "grad_norm": 0.36090308788878, "learning_rate": 6.00224394221443e-06, "loss": 1.2854, "step": 2927 }, { "epoch": 0.749568, "grad_norm": 0.36338299449641465, "learning_rate": 5.990640229206159e-06, "loss": 1.325, "step": 2928 }, { "epoch": 0.749824, "grad_norm": 0.36376627624726776, "learning_rate": 5.9790457673177995e-06, "loss": 1.3271, "step": 2929 }, { "epoch": 0.75008, "grad_norm": 0.36133410891253326, "learning_rate": 5.9674605642057914e-06, "loss": 1.3238, "step": 2930 }, { "epoch": 0.750336, "grad_norm": 0.3606896741827976, "learning_rate": 5.9558846275204605e-06, "loss": 1.3031, "step": 2931 }, { "epoch": 0.750592, "grad_norm": 0.3576954878141839, "learning_rate": 5.944317964906004e-06, "loss": 1.3183, "step": 2932 }, { "epoch": 0.750848, "grad_norm": 0.36007885955600133, "learning_rate": 5.932760584000509e-06, "loss": 1.3381, "step": 2933 }, { "epoch": 0.751104, "grad_norm": 0.3611404133984041, "learning_rate": 5.921212492435913e-06, "loss": 1.3308, "step": 2934 }, { "epoch": 0.75136, "grad_norm": 0.36027785631795367, "learning_rate": 5.9096736978380384e-06, "loss": 1.2989, "step": 2935 }, { "epoch": 0.751616, "grad_norm": 0.36571115154734557, "learning_rate": 5.898144207826553e-06, "loss": 1.333, "step": 2936 }, { "epoch": 0.751872, "grad_norm": 0.369863640491649, "learning_rate": 5.886624030014989e-06, "loss": 1.3047, "step": 2937 }, { "epoch": 0.752128, "grad_norm": 0.35356179798573417, "learning_rate": 5.875113172010733e-06, "loss": 1.3057, "step": 2938 }, { "epoch": 0.752384, "grad_norm": 0.36632536065659166, "learning_rate": 5.86361164141501e-06, "loss": 1.3205, "step": 2939 }, { "epoch": 0.75264, "grad_norm": 0.35460903004533356, "learning_rate": 5.852119445822895e-06, "loss": 1.3417, "step": 2940 }, { "epoch": 0.752896, "grad_norm": 0.3612903572922312, "learning_rate": 5.8406365928232764e-06, "loss": 1.3426, "step": 2941 }, { "epoch": 0.753152, "grad_norm": 0.35598890963150864, "learning_rate": 5.8291630899988995e-06, "loss": 1.3225, "step": 2942 }, { "epoch": 0.753408, "grad_norm": 0.3606739649879641, "learning_rate": 5.8176989449263176e-06, "loss": 1.3255, "step": 2943 }, { "epoch": 0.753664, "grad_norm": 0.3598927715313153, "learning_rate": 5.8062441651759165e-06, "loss": 1.303, "step": 2944 }, { "epoch": 0.75392, "grad_norm": 0.3714596240047786, "learning_rate": 5.794798758311894e-06, "loss": 1.3658, "step": 2945 }, { "epoch": 0.754176, "grad_norm": 0.3535270033350636, "learning_rate": 5.783362731892248e-06, "loss": 1.2817, "step": 2946 }, { "epoch": 0.754432, "grad_norm": 0.3542279010133975, "learning_rate": 5.7719360934688015e-06, "loss": 1.2832, "step": 2947 }, { "epoch": 0.754688, "grad_norm": 0.3589351354008305, "learning_rate": 5.760518850587154e-06, "loss": 1.2969, "step": 2948 }, { "epoch": 0.754944, "grad_norm": 0.3586419025829876, "learning_rate": 5.749111010786721e-06, "loss": 1.3182, "step": 2949 }, { "epoch": 0.7552, "grad_norm": 0.36149707658334906, "learning_rate": 5.7377125816007005e-06, "loss": 1.3072, "step": 2950 }, { "epoch": 0.755456, "grad_norm": 0.3631350656592278, "learning_rate": 5.726323570556076e-06, "loss": 1.3345, "step": 2951 }, { "epoch": 0.755712, "grad_norm": 0.3630057715266046, "learning_rate": 5.714943985173622e-06, "loss": 1.3368, "step": 2952 }, { "epoch": 0.755968, "grad_norm": 0.3674003743456213, "learning_rate": 5.7035738329678654e-06, "loss": 1.3475, "step": 2953 }, { "epoch": 0.756224, "grad_norm": 0.4908129732031185, "learning_rate": 5.692213121447126e-06, "loss": 1.3553, "step": 2954 }, { "epoch": 0.75648, "grad_norm": 0.36788077649295653, "learning_rate": 5.680861858113476e-06, "loss": 1.3499, "step": 2955 }, { "epoch": 0.756736, "grad_norm": 0.35557312197856145, "learning_rate": 5.6695200504627665e-06, "loss": 1.2952, "step": 2956 }, { "epoch": 0.756992, "grad_norm": 0.357811886157127, "learning_rate": 5.658187705984577e-06, "loss": 1.294, "step": 2957 }, { "epoch": 0.757248, "grad_norm": 0.3618713217685472, "learning_rate": 5.646864832162264e-06, "loss": 1.294, "step": 2958 }, { "epoch": 0.757504, "grad_norm": 0.3689907926411865, "learning_rate": 5.635551436472908e-06, "loss": 1.3435, "step": 2959 }, { "epoch": 0.75776, "grad_norm": 0.3607877470701937, "learning_rate": 5.62424752638735e-06, "loss": 1.2987, "step": 2960 }, { "epoch": 0.758016, "grad_norm": 0.35904133610852673, "learning_rate": 5.612953109370154e-06, "loss": 1.3131, "step": 2961 }, { "epoch": 0.758272, "grad_norm": 0.3592936221385696, "learning_rate": 5.601668192879622e-06, "loss": 1.3319, "step": 2962 }, { "epoch": 0.758528, "grad_norm": 0.37066696377475744, "learning_rate": 5.59039278436778e-06, "loss": 1.3344, "step": 2963 }, { "epoch": 0.758784, "grad_norm": 0.36033752899405347, "learning_rate": 5.579126891280382e-06, "loss": 1.3441, "step": 2964 }, { "epoch": 0.75904, "grad_norm": 0.36039021829793527, "learning_rate": 5.56787052105688e-06, "loss": 1.3562, "step": 2965 }, { "epoch": 0.759296, "grad_norm": 0.3600361299524374, "learning_rate": 5.556623681130453e-06, "loss": 1.3199, "step": 2966 }, { "epoch": 0.759552, "grad_norm": 0.35254370864844703, "learning_rate": 5.545386378927988e-06, "loss": 1.2876, "step": 2967 }, { "epoch": 0.759808, "grad_norm": 0.40244468564546243, "learning_rate": 5.534158621870071e-06, "loss": 1.3081, "step": 2968 }, { "epoch": 0.760064, "grad_norm": 0.38579074021309034, "learning_rate": 5.522940417370975e-06, "loss": 1.3377, "step": 2969 }, { "epoch": 0.76032, "grad_norm": 0.3670230014473168, "learning_rate": 5.51173177283868e-06, "loss": 1.3504, "step": 2970 }, { "epoch": 0.760576, "grad_norm": 0.37103367905305157, "learning_rate": 5.500532695674836e-06, "loss": 1.2842, "step": 2971 }, { "epoch": 0.760832, "grad_norm": 0.3531156073938096, "learning_rate": 5.48934319327479e-06, "loss": 1.2919, "step": 2972 }, { "epoch": 0.761088, "grad_norm": 0.3604569565213273, "learning_rate": 5.478163273027561e-06, "loss": 1.3262, "step": 2973 }, { "epoch": 0.761344, "grad_norm": 0.35510970486958565, "learning_rate": 5.466992942315843e-06, "loss": 1.317, "step": 2974 }, { "epoch": 0.7616, "grad_norm": 0.36493346935104093, "learning_rate": 5.455832208515994e-06, "loss": 1.3428, "step": 2975 }, { "epoch": 0.761856, "grad_norm": 0.36089640883655555, "learning_rate": 5.444681078998035e-06, "loss": 1.3137, "step": 2976 }, { "epoch": 0.762112, "grad_norm": 0.3513323364892846, "learning_rate": 5.433539561125652e-06, "loss": 1.2841, "step": 2977 }, { "epoch": 0.762368, "grad_norm": 0.5213081392785177, "learning_rate": 5.4224076622561665e-06, "loss": 1.3131, "step": 2978 }, { "epoch": 0.762624, "grad_norm": 0.36112417976360284, "learning_rate": 5.411285389740568e-06, "loss": 1.2842, "step": 2979 }, { "epoch": 0.76288, "grad_norm": 0.36975607279547196, "learning_rate": 5.400172750923472e-06, "loss": 1.3382, "step": 2980 }, { "epoch": 0.763136, "grad_norm": 0.35979062094475533, "learning_rate": 5.389069753143143e-06, "loss": 1.3465, "step": 2981 }, { "epoch": 0.763392, "grad_norm": 0.35835454438140196, "learning_rate": 5.37797640373148e-06, "loss": 1.2973, "step": 2982 }, { "epoch": 0.763648, "grad_norm": 0.3593876419585883, "learning_rate": 5.3668927100140136e-06, "loss": 1.3099, "step": 2983 }, { "epoch": 0.763904, "grad_norm": 0.3575624526026183, "learning_rate": 5.3558186793098744e-06, "loss": 1.2857, "step": 2984 }, { "epoch": 0.76416, "grad_norm": 0.3687036099013438, "learning_rate": 5.344754318931842e-06, "loss": 1.3453, "step": 2985 }, { "epoch": 0.764416, "grad_norm": 0.37214103097873885, "learning_rate": 5.333699636186296e-06, "loss": 1.3811, "step": 2986 }, { "epoch": 0.764672, "grad_norm": 0.4013738907885611, "learning_rate": 5.322654638373224e-06, "loss": 1.3639, "step": 2987 }, { "epoch": 0.764928, "grad_norm": 0.3525476526160913, "learning_rate": 5.311619332786224e-06, "loss": 1.3056, "step": 2988 }, { "epoch": 0.765184, "grad_norm": 0.35760920276407876, "learning_rate": 5.300593726712497e-06, "loss": 1.3149, "step": 2989 }, { "epoch": 0.76544, "grad_norm": 0.4102311154254802, "learning_rate": 5.289577827432819e-06, "loss": 1.3621, "step": 2990 }, { "epoch": 0.765696, "grad_norm": 0.35748288581948084, "learning_rate": 5.278571642221584e-06, "loss": 1.3005, "step": 2991 }, { "epoch": 0.765952, "grad_norm": 0.3540493531722397, "learning_rate": 5.2675751783467445e-06, "loss": 1.2995, "step": 2992 }, { "epoch": 0.766208, "grad_norm": 0.3683871517837391, "learning_rate": 5.256588443069853e-06, "loss": 1.3318, "step": 2993 }, { "epoch": 0.766464, "grad_norm": 0.35562426147612786, "learning_rate": 5.245611443646028e-06, "loss": 1.3032, "step": 2994 }, { "epoch": 0.76672, "grad_norm": 0.3610374896117163, "learning_rate": 5.23464418732397e-06, "loss": 1.3595, "step": 2995 }, { "epoch": 0.766976, "grad_norm": 0.3534251717563875, "learning_rate": 5.223686681345926e-06, "loss": 1.3228, "step": 2996 }, { "epoch": 0.767232, "grad_norm": 0.3628044160204886, "learning_rate": 5.212738932947718e-06, "loss": 1.3188, "step": 2997 }, { "epoch": 0.767488, "grad_norm": 0.3652190338148601, "learning_rate": 5.2018009493587285e-06, "loss": 1.3328, "step": 2998 }, { "epoch": 0.767744, "grad_norm": 0.3621398562741921, "learning_rate": 5.1908727378018796e-06, "loss": 1.3365, "step": 2999 }, { "epoch": 0.768, "grad_norm": 1.7120380965373587, "learning_rate": 5.179954305493651e-06, "loss": 1.3356, "step": 3000 }, { "epoch": 0.768256, "grad_norm": 0.35834378169321823, "learning_rate": 5.169045659644061e-06, "loss": 1.3285, "step": 3001 }, { "epoch": 0.768512, "grad_norm": 0.3610209931988971, "learning_rate": 5.158146807456663e-06, "loss": 1.2865, "step": 3002 }, { "epoch": 0.768768, "grad_norm": 0.3732257662595727, "learning_rate": 5.147257756128538e-06, "loss": 1.3799, "step": 3003 }, { "epoch": 0.769024, "grad_norm": 0.37134882217526066, "learning_rate": 5.136378512850304e-06, "loss": 1.315, "step": 3004 }, { "epoch": 0.76928, "grad_norm": 0.3705311725333239, "learning_rate": 5.125509084806104e-06, "loss": 1.3327, "step": 3005 }, { "epoch": 0.769536, "grad_norm": 0.3637577177691028, "learning_rate": 5.114649479173592e-06, "loss": 1.3448, "step": 3006 }, { "epoch": 0.769792, "grad_norm": 0.3620461827635551, "learning_rate": 5.1037997031239485e-06, "loss": 1.336, "step": 3007 }, { "epoch": 0.770048, "grad_norm": 0.3593466735035229, "learning_rate": 5.092959763821836e-06, "loss": 1.3295, "step": 3008 }, { "epoch": 0.770304, "grad_norm": 0.36375987192045367, "learning_rate": 5.082129668425451e-06, "loss": 1.3387, "step": 3009 }, { "epoch": 0.77056, "grad_norm": 0.3534892536722086, "learning_rate": 5.071309424086472e-06, "loss": 1.3161, "step": 3010 }, { "epoch": 0.770816, "grad_norm": 0.35490045714028706, "learning_rate": 5.0604990379500795e-06, "loss": 1.3109, "step": 3011 }, { "epoch": 0.771072, "grad_norm": 0.35703471750054855, "learning_rate": 5.049698517154951e-06, "loss": 1.3107, "step": 3012 }, { "epoch": 0.771328, "grad_norm": 0.3575705202311698, "learning_rate": 5.038907868833225e-06, "loss": 1.3066, "step": 3013 }, { "epoch": 0.771584, "grad_norm": 0.36708405680932127, "learning_rate": 5.0281271001105495e-06, "loss": 1.3391, "step": 3014 }, { "epoch": 0.77184, "grad_norm": 0.36610052132503385, "learning_rate": 5.017356218106022e-06, "loss": 1.369, "step": 3015 }, { "epoch": 0.772096, "grad_norm": 0.35423563760381266, "learning_rate": 5.006595229932234e-06, "loss": 1.2638, "step": 3016 }, { "epoch": 0.772352, "grad_norm": 0.3745075190053957, "learning_rate": 4.99584414269523e-06, "loss": 1.3633, "step": 3017 }, { "epoch": 0.772608, "grad_norm": 0.377354794170555, "learning_rate": 4.985102963494524e-06, "loss": 1.35, "step": 3018 }, { "epoch": 0.772864, "grad_norm": 0.3607129342072355, "learning_rate": 4.974371699423088e-06, "loss": 1.3005, "step": 3019 }, { "epoch": 0.77312, "grad_norm": 0.36155354105682896, "learning_rate": 4.9636503575673315e-06, "loss": 1.3372, "step": 3020 }, { "epoch": 0.773376, "grad_norm": 0.36597648814147365, "learning_rate": 4.952938945007127e-06, "loss": 1.3266, "step": 3021 }, { "epoch": 0.773632, "grad_norm": 0.3663782796535604, "learning_rate": 4.942237468815785e-06, "loss": 1.3768, "step": 3022 }, { "epoch": 0.773888, "grad_norm": 0.353994081689147, "learning_rate": 4.931545936060058e-06, "loss": 1.2683, "step": 3023 }, { "epoch": 0.774144, "grad_norm": 0.3790368190041, "learning_rate": 4.920864353800135e-06, "loss": 1.3706, "step": 3024 }, { "epoch": 0.7744, "grad_norm": 0.656109240984305, "learning_rate": 4.91019272908962e-06, "loss": 1.3788, "step": 3025 }, { "epoch": 0.774656, "grad_norm": 0.3599626949606184, "learning_rate": 4.899531068975547e-06, "loss": 1.3111, "step": 3026 }, { "epoch": 0.774912, "grad_norm": 0.35429384979386275, "learning_rate": 4.888879380498377e-06, "loss": 1.2693, "step": 3027 }, { "epoch": 0.775168, "grad_norm": 0.3578535066663329, "learning_rate": 4.878237670691985e-06, "loss": 1.3241, "step": 3028 }, { "epoch": 0.775424, "grad_norm": 0.3668962234222316, "learning_rate": 4.867605946583653e-06, "loss": 1.3518, "step": 3029 }, { "epoch": 0.77568, "grad_norm": 0.36596079299208684, "learning_rate": 4.856984215194067e-06, "loss": 1.3239, "step": 3030 }, { "epoch": 0.775936, "grad_norm": 0.36986631828555855, "learning_rate": 4.846372483537327e-06, "loss": 1.3706, "step": 3031 }, { "epoch": 0.776192, "grad_norm": 0.35252381816918543, "learning_rate": 4.835770758620906e-06, "loss": 1.2767, "step": 3032 }, { "epoch": 0.776448, "grad_norm": 0.36031430417787896, "learning_rate": 4.8251790474456875e-06, "loss": 1.3344, "step": 3033 }, { "epoch": 0.776704, "grad_norm": 0.36004813424433374, "learning_rate": 4.814597357005939e-06, "loss": 1.3104, "step": 3034 }, { "epoch": 0.77696, "grad_norm": 0.36729935517547574, "learning_rate": 4.804025694289316e-06, "loss": 1.3297, "step": 3035 }, { "epoch": 0.777216, "grad_norm": 0.3560090275587045, "learning_rate": 4.793464066276831e-06, "loss": 1.3167, "step": 3036 }, { "epoch": 0.777472, "grad_norm": 0.36207999379669126, "learning_rate": 4.782912479942894e-06, "loss": 1.3114, "step": 3037 }, { "epoch": 0.777728, "grad_norm": 0.3591899433769941, "learning_rate": 4.7723709422552755e-06, "loss": 1.2958, "step": 3038 }, { "epoch": 0.777984, "grad_norm": 0.3977382515063168, "learning_rate": 4.761839460175104e-06, "loss": 1.3696, "step": 3039 }, { "epoch": 0.77824, "grad_norm": 0.35620292489734334, "learning_rate": 4.751318040656874e-06, "loss": 1.3346, "step": 3040 }, { "epoch": 0.778496, "grad_norm": 0.3650217523570914, "learning_rate": 4.740806690648438e-06, "loss": 1.3353, "step": 3041 }, { "epoch": 0.778752, "grad_norm": 0.3684425763049362, "learning_rate": 4.730305417090992e-06, "loss": 1.3276, "step": 3042 }, { "epoch": 0.779008, "grad_norm": 0.3712144903317603, "learning_rate": 4.719814226919084e-06, "loss": 1.3491, "step": 3043 }, { "epoch": 0.779264, "grad_norm": 0.3561238382584264, "learning_rate": 4.709333127060605e-06, "loss": 1.2943, "step": 3044 }, { "epoch": 0.77952, "grad_norm": 0.37205110042317996, "learning_rate": 4.698862124436767e-06, "loss": 1.3687, "step": 3045 }, { "epoch": 0.779776, "grad_norm": 0.3695584912763628, "learning_rate": 4.6884012259621316e-06, "loss": 1.3684, "step": 3046 }, { "epoch": 0.780032, "grad_norm": 0.36329097685625406, "learning_rate": 4.67795043854459e-06, "loss": 1.3214, "step": 3047 }, { "epoch": 0.780288, "grad_norm": 0.3798693015592709, "learning_rate": 4.667509769085334e-06, "loss": 1.3809, "step": 3048 }, { "epoch": 0.780544, "grad_norm": 0.3601097934242932, "learning_rate": 4.6570792244789e-06, "loss": 1.3306, "step": 3049 }, { "epoch": 0.7808, "grad_norm": 0.35446125225039316, "learning_rate": 4.646658811613127e-06, "loss": 1.3347, "step": 3050 }, { "epoch": 0.781056, "grad_norm": 0.36405550438257583, "learning_rate": 4.636248537369156e-06, "loss": 1.3372, "step": 3051 }, { "epoch": 0.781312, "grad_norm": 0.3617185366756447, "learning_rate": 4.625848408621447e-06, "loss": 1.3214, "step": 3052 }, { "epoch": 0.781568, "grad_norm": 0.3584437977949366, "learning_rate": 4.615458432237751e-06, "loss": 1.3416, "step": 3053 }, { "epoch": 0.781824, "grad_norm": 0.37686068517129695, "learning_rate": 4.6050786150791216e-06, "loss": 1.3802, "step": 3054 }, { "epoch": 0.78208, "grad_norm": 0.3557512741068298, "learning_rate": 4.594708963999897e-06, "loss": 1.3275, "step": 3055 }, { "epoch": 0.782336, "grad_norm": 0.35954325587381625, "learning_rate": 4.58434948584771e-06, "loss": 1.33, "step": 3056 }, { "epoch": 0.782592, "grad_norm": 0.3599903432148184, "learning_rate": 4.574000187463466e-06, "loss": 1.3064, "step": 3057 }, { "epoch": 0.782848, "grad_norm": 0.35780778909053024, "learning_rate": 4.563661075681356e-06, "loss": 1.3003, "step": 3058 }, { "epoch": 0.783104, "grad_norm": 0.360807357309411, "learning_rate": 4.553332157328836e-06, "loss": 1.3272, "step": 3059 }, { "epoch": 0.78336, "grad_norm": 0.3691923761012712, "learning_rate": 4.54301343922664e-06, "loss": 1.3378, "step": 3060 }, { "epoch": 0.783616, "grad_norm": 0.408705525371534, "learning_rate": 4.532704928188763e-06, "loss": 1.3532, "step": 3061 }, { "epoch": 0.783872, "grad_norm": 0.36928712972583044, "learning_rate": 4.522406631022464e-06, "loss": 1.3937, "step": 3062 }, { "epoch": 0.784128, "grad_norm": 0.3805677952592156, "learning_rate": 4.512118554528242e-06, "loss": 1.3025, "step": 3063 }, { "epoch": 0.784384, "grad_norm": 0.369907891553738, "learning_rate": 4.50184070549986e-06, "loss": 1.3244, "step": 3064 }, { "epoch": 0.78464, "grad_norm": 0.356138064912598, "learning_rate": 4.491573090724328e-06, "loss": 1.2956, "step": 3065 }, { "epoch": 0.784896, "grad_norm": 0.3586200227620091, "learning_rate": 4.481315716981891e-06, "loss": 1.3312, "step": 3066 }, { "epoch": 0.785152, "grad_norm": 0.4061509126427212, "learning_rate": 4.4710685910460375e-06, "loss": 1.3655, "step": 3067 }, { "epoch": 0.785408, "grad_norm": 0.35601729097642537, "learning_rate": 4.46083171968349e-06, "loss": 1.2744, "step": 3068 }, { "epoch": 0.785664, "grad_norm": 0.3655100115357683, "learning_rate": 4.450605109654182e-06, "loss": 1.317, "step": 3069 }, { "epoch": 0.78592, "grad_norm": 0.3761978358436829, "learning_rate": 4.440388767711299e-06, "loss": 1.3388, "step": 3070 }, { "epoch": 0.786176, "grad_norm": 0.3554722523987874, "learning_rate": 4.430182700601218e-06, "loss": 1.314, "step": 3071 }, { "epoch": 0.786432, "grad_norm": 0.3575984379723292, "learning_rate": 4.419986915063548e-06, "loss": 1.3274, "step": 3072 }, { "epoch": 0.786688, "grad_norm": 0.3565129851797361, "learning_rate": 4.409801417831105e-06, "loss": 1.3432, "step": 3073 }, { "epoch": 0.786944, "grad_norm": 0.45537671516641026, "learning_rate": 4.399626215629917e-06, "loss": 1.3382, "step": 3074 }, { "epoch": 0.7872, "grad_norm": 0.35988155865265975, "learning_rate": 4.389461315179198e-06, "loss": 1.3515, "step": 3075 }, { "epoch": 0.787456, "grad_norm": 0.36114324128440006, "learning_rate": 4.379306723191372e-06, "loss": 1.3569, "step": 3076 }, { "epoch": 0.787712, "grad_norm": 0.3673284889231303, "learning_rate": 4.369162446372052e-06, "loss": 1.3031, "step": 3077 }, { "epoch": 0.787968, "grad_norm": 0.35692768806112357, "learning_rate": 4.359028491420039e-06, "loss": 1.3032, "step": 3078 }, { "epoch": 0.788224, "grad_norm": 0.35945720879929977, "learning_rate": 4.348904865027321e-06, "loss": 1.3131, "step": 3079 }, { "epoch": 0.78848, "grad_norm": 0.35461340813246045, "learning_rate": 4.3387915738790666e-06, "loss": 1.3094, "step": 3080 }, { "epoch": 0.788736, "grad_norm": 0.36165300799149924, "learning_rate": 4.3286886246536096e-06, "loss": 1.3376, "step": 3081 }, { "epoch": 0.788992, "grad_norm": 0.38868977657211395, "learning_rate": 4.318596024022457e-06, "loss": 1.2953, "step": 3082 }, { "epoch": 0.789248, "grad_norm": 0.35858624252787263, "learning_rate": 4.308513778650292e-06, "loss": 1.3337, "step": 3083 }, { "epoch": 0.789504, "grad_norm": 0.35453334381185514, "learning_rate": 4.298441895194952e-06, "loss": 1.3027, "step": 3084 }, { "epoch": 0.78976, "grad_norm": 0.3505993486544977, "learning_rate": 4.288380380307433e-06, "loss": 1.2952, "step": 3085 }, { "epoch": 0.790016, "grad_norm": 0.3594786774181754, "learning_rate": 4.278329240631891e-06, "loss": 1.2943, "step": 3086 }, { "epoch": 0.790272, "grad_norm": 0.35702731393577514, "learning_rate": 4.268288482805614e-06, "loss": 1.3134, "step": 3087 }, { "epoch": 0.790528, "grad_norm": 0.36179036682440446, "learning_rate": 4.2582581134590465e-06, "loss": 1.3464, "step": 3088 }, { "epoch": 0.790784, "grad_norm": 0.37112645772937886, "learning_rate": 4.248238139215775e-06, "loss": 1.3394, "step": 3089 }, { "epoch": 0.79104, "grad_norm": 0.3524229502227345, "learning_rate": 4.238228566692517e-06, "loss": 1.3004, "step": 3090 }, { "epoch": 0.791296, "grad_norm": 0.3700762586081203, "learning_rate": 4.2282294024991245e-06, "loss": 1.3474, "step": 3091 }, { "epoch": 0.791552, "grad_norm": 0.3707112495739379, "learning_rate": 4.218240653238566e-06, "loss": 1.3368, "step": 3092 }, { "epoch": 0.791808, "grad_norm": 0.3731556688911466, "learning_rate": 4.208262325506946e-06, "loss": 1.3325, "step": 3093 }, { "epoch": 0.792064, "grad_norm": 0.6725157802416758, "learning_rate": 4.198294425893476e-06, "loss": 1.3339, "step": 3094 }, { "epoch": 0.79232, "grad_norm": 0.3592065906070425, "learning_rate": 4.18833696098049e-06, "loss": 1.3136, "step": 3095 }, { "epoch": 0.792576, "grad_norm": 0.36647542390567395, "learning_rate": 4.178389937343425e-06, "loss": 1.3224, "step": 3096 }, { "epoch": 0.792832, "grad_norm": 0.36374453954357416, "learning_rate": 4.16845336155083e-06, "loss": 1.3099, "step": 3097 }, { "epoch": 0.793088, "grad_norm": 0.36455629125768607, "learning_rate": 4.158527240164347e-06, "loss": 1.3394, "step": 3098 }, { "epoch": 0.793344, "grad_norm": 0.36534109842783513, "learning_rate": 4.148611579738726e-06, "loss": 1.3744, "step": 3099 }, { "epoch": 0.7936, "grad_norm": 0.3721046078308637, "learning_rate": 4.138706386821789e-06, "loss": 1.3546, "step": 3100 }, { "epoch": 0.793856, "grad_norm": 0.35627732268861445, "learning_rate": 4.128811667954464e-06, "loss": 1.3114, "step": 3101 }, { "epoch": 0.794112, "grad_norm": 0.3681762149827043, "learning_rate": 4.118927429670756e-06, "loss": 1.3422, "step": 3102 }, { "epoch": 0.794368, "grad_norm": 0.3993286289146559, "learning_rate": 4.109053678497754e-06, "loss": 1.3146, "step": 3103 }, { "epoch": 0.794624, "grad_norm": 0.36074878999159404, "learning_rate": 4.099190420955607e-06, "loss": 1.3379, "step": 3104 }, { "epoch": 0.79488, "grad_norm": 0.3568809961976823, "learning_rate": 4.089337663557558e-06, "loss": 1.3019, "step": 3105 }, { "epoch": 0.795136, "grad_norm": 0.3769418321764292, "learning_rate": 4.079495412809886e-06, "loss": 1.3586, "step": 3106 }, { "epoch": 0.795392, "grad_norm": 0.36594227241105376, "learning_rate": 4.069663675211961e-06, "loss": 1.3327, "step": 3107 }, { "epoch": 0.795648, "grad_norm": 0.4714751645666523, "learning_rate": 4.059842457256193e-06, "loss": 1.3036, "step": 3108 }, { "epoch": 0.795904, "grad_norm": 0.3664049636985469, "learning_rate": 4.050031765428055e-06, "loss": 1.3418, "step": 3109 }, { "epoch": 0.79616, "grad_norm": 0.360443563845632, "learning_rate": 4.040231606206062e-06, "loss": 1.3159, "step": 3110 }, { "epoch": 0.796416, "grad_norm": 0.363910078885372, "learning_rate": 4.0304419860617835e-06, "loss": 1.347, "step": 3111 }, { "epoch": 0.796672, "grad_norm": 0.3670346736568895, "learning_rate": 4.020662911459812e-06, "loss": 1.3349, "step": 3112 }, { "epoch": 0.796928, "grad_norm": 0.3841657840535063, "learning_rate": 4.010894388857789e-06, "loss": 1.3613, "step": 3113 }, { "epoch": 0.797184, "grad_norm": 0.3593177124527256, "learning_rate": 4.0011364247063955e-06, "loss": 1.2885, "step": 3114 }, { "epoch": 0.79744, "grad_norm": 0.3525040279886287, "learning_rate": 3.991389025449317e-06, "loss": 1.3016, "step": 3115 }, { "epoch": 0.797696, "grad_norm": 0.37989940121636073, "learning_rate": 3.9816521975232805e-06, "loss": 1.3253, "step": 3116 }, { "epoch": 0.797952, "grad_norm": 0.3729689130394653, "learning_rate": 3.971925947358035e-06, "loss": 1.371, "step": 3117 }, { "epoch": 0.798208, "grad_norm": 0.3589802435167401, "learning_rate": 3.962210281376326e-06, "loss": 1.3222, "step": 3118 }, { "epoch": 0.798464, "grad_norm": 0.35248666608324913, "learning_rate": 3.952505205993926e-06, "loss": 1.301, "step": 3119 }, { "epoch": 0.79872, "grad_norm": 0.3552866384274521, "learning_rate": 3.942810727619608e-06, "loss": 1.3053, "step": 3120 }, { "epoch": 0.798976, "grad_norm": 0.3654536331927668, "learning_rate": 3.933126852655147e-06, "loss": 1.3536, "step": 3121 }, { "epoch": 0.799232, "grad_norm": 0.3540552016708653, "learning_rate": 3.923453587495319e-06, "loss": 1.3106, "step": 3122 }, { "epoch": 0.799488, "grad_norm": 0.3645784152942407, "learning_rate": 3.913790938527897e-06, "loss": 1.3593, "step": 3123 }, { "epoch": 0.799744, "grad_norm": 1.0262478738957521, "learning_rate": 3.9041389121336255e-06, "loss": 1.325, "step": 3124 }, { "epoch": 0.8, "grad_norm": 0.36324752042476083, "learning_rate": 3.894497514686255e-06, "loss": 1.3634, "step": 3125 }, { "epoch": 0.800256, "grad_norm": 0.35404200570858796, "learning_rate": 3.8848667525525115e-06, "loss": 1.3083, "step": 3126 }, { "epoch": 0.800512, "grad_norm": 0.3594512835939081, "learning_rate": 3.875246632092089e-06, "loss": 1.3372, "step": 3127 }, { "epoch": 0.800768, "grad_norm": 0.3592508194905306, "learning_rate": 3.865637159657662e-06, "loss": 1.3265, "step": 3128 }, { "epoch": 0.801024, "grad_norm": 0.3584998620531033, "learning_rate": 3.856038341594883e-06, "loss": 1.3534, "step": 3129 }, { "epoch": 0.80128, "grad_norm": 0.3655001505769903, "learning_rate": 3.846450184242343e-06, "loss": 1.3045, "step": 3130 }, { "epoch": 0.801536, "grad_norm": 0.35884368539448896, "learning_rate": 3.836872693931615e-06, "loss": 1.359, "step": 3131 }, { "epoch": 0.801792, "grad_norm": 0.35540195677178904, "learning_rate": 3.827305876987224e-06, "loss": 1.3314, "step": 3132 }, { "epoch": 0.802048, "grad_norm": 0.9087730178002286, "learning_rate": 3.817749739726642e-06, "loss": 1.371, "step": 3133 }, { "epoch": 0.802304, "grad_norm": 0.3710648127870917, "learning_rate": 3.808204288460289e-06, "loss": 1.3496, "step": 3134 }, { "epoch": 0.80256, "grad_norm": 0.36483582362133865, "learning_rate": 3.798669529491541e-06, "loss": 1.2893, "step": 3135 }, { "epoch": 0.802816, "grad_norm": 0.3629681329191201, "learning_rate": 3.7891454691166885e-06, "loss": 1.3251, "step": 3136 }, { "epoch": 0.803072, "grad_norm": 0.35627904565012886, "learning_rate": 3.7796321136249825e-06, "loss": 1.3355, "step": 3137 }, { "epoch": 0.803328, "grad_norm": 0.36556979995829236, "learning_rate": 3.7701294692985844e-06, "loss": 1.3245, "step": 3138 }, { "epoch": 0.803584, "grad_norm": 0.35579056682486027, "learning_rate": 3.7606375424125953e-06, "loss": 1.3199, "step": 3139 }, { "epoch": 0.80384, "grad_norm": 0.35310407482070594, "learning_rate": 3.751156339235038e-06, "loss": 1.3081, "step": 3140 }, { "epoch": 0.804096, "grad_norm": 0.35767516665581484, "learning_rate": 3.7416858660268563e-06, "loss": 1.2938, "step": 3141 }, { "epoch": 0.804352, "grad_norm": 0.36217721165714883, "learning_rate": 3.7322261290418915e-06, "loss": 1.3282, "step": 3142 }, { "epoch": 0.804608, "grad_norm": 0.3619076136991956, "learning_rate": 3.722777134526914e-06, "loss": 1.3362, "step": 3143 }, { "epoch": 0.804864, "grad_norm": 0.35390781125231585, "learning_rate": 3.713338888721594e-06, "loss": 1.3034, "step": 3144 }, { "epoch": 0.80512, "grad_norm": 0.3972857130932456, "learning_rate": 3.7039113978585017e-06, "loss": 1.3285, "step": 3145 }, { "epoch": 0.805376, "grad_norm": 0.3710543134383761, "learning_rate": 3.694494668163109e-06, "loss": 1.3393, "step": 3146 }, { "epoch": 0.805632, "grad_norm": 0.3549013195571846, "learning_rate": 3.685088705853783e-06, "loss": 1.3023, "step": 3147 }, { "epoch": 0.805888, "grad_norm": 0.35597383801760296, "learning_rate": 3.6756935171417695e-06, "loss": 1.3135, "step": 3148 }, { "epoch": 0.806144, "grad_norm": 0.3618699620579227, "learning_rate": 3.6663091082312183e-06, "loss": 1.3352, "step": 3149 }, { "epoch": 0.8064, "grad_norm": 0.36112201626608265, "learning_rate": 3.6569354853191353e-06, "loss": 1.3084, "step": 3150 }, { "epoch": 0.806656, "grad_norm": 0.3639824258678471, "learning_rate": 3.6475726545954283e-06, "loss": 1.3489, "step": 3151 }, { "epoch": 0.806912, "grad_norm": 0.3524328259369818, "learning_rate": 3.6382206222428674e-06, "loss": 1.305, "step": 3152 }, { "epoch": 0.807168, "grad_norm": 0.3582988998964472, "learning_rate": 3.628879394437095e-06, "loss": 1.3129, "step": 3153 }, { "epoch": 0.807424, "grad_norm": 0.3492270683518275, "learning_rate": 3.6195489773466117e-06, "loss": 1.2985, "step": 3154 }, { "epoch": 0.80768, "grad_norm": 0.35918442726578187, "learning_rate": 3.610229377132788e-06, "loss": 1.3109, "step": 3155 }, { "epoch": 0.807936, "grad_norm": 0.3483311741768826, "learning_rate": 3.6009205999498488e-06, "loss": 1.2635, "step": 3156 }, { "epoch": 0.808192, "grad_norm": 0.3576191488662313, "learning_rate": 3.5916226519448684e-06, "loss": 1.3471, "step": 3157 }, { "epoch": 0.808448, "grad_norm": 0.35773411100695934, "learning_rate": 3.5823355392577795e-06, "loss": 1.3014, "step": 3158 }, { "epoch": 0.808704, "grad_norm": 0.3523857178861155, "learning_rate": 3.5730592680213437e-06, "loss": 1.2739, "step": 3159 }, { "epoch": 0.80896, "grad_norm": 0.3547318520331463, "learning_rate": 3.563793844361183e-06, "loss": 1.3088, "step": 3160 }, { "epoch": 0.809216, "grad_norm": 0.35638832452938773, "learning_rate": 3.554539274395734e-06, "loss": 1.3276, "step": 3161 }, { "epoch": 0.809472, "grad_norm": 0.3700067930580701, "learning_rate": 3.5452955642362817e-06, "loss": 1.2674, "step": 3162 }, { "epoch": 0.809728, "grad_norm": 0.35053562894613155, "learning_rate": 3.536062719986939e-06, "loss": 1.2571, "step": 3163 }, { "epoch": 0.809984, "grad_norm": 0.360168570437748, "learning_rate": 3.5268407477446397e-06, "loss": 1.3479, "step": 3164 }, { "epoch": 0.81024, "grad_norm": 0.3601469099028122, "learning_rate": 3.5176296535991393e-06, "loss": 1.3302, "step": 3165 }, { "epoch": 0.810496, "grad_norm": 0.3505621329869148, "learning_rate": 3.508429443633012e-06, "loss": 1.2728, "step": 3166 }, { "epoch": 0.810752, "grad_norm": 0.35654691884651646, "learning_rate": 3.4992401239216344e-06, "loss": 1.3426, "step": 3167 }, { "epoch": 0.811008, "grad_norm": 0.3481711831110968, "learning_rate": 3.490061700533205e-06, "loss": 1.3078, "step": 3168 }, { "epoch": 0.811264, "grad_norm": 0.3575555633068931, "learning_rate": 3.48089417952872e-06, "loss": 1.321, "step": 3169 }, { "epoch": 0.81152, "grad_norm": 0.3563789857495105, "learning_rate": 3.4717375669619836e-06, "loss": 1.3228, "step": 3170 }, { "epoch": 0.811776, "grad_norm": 0.353567446602589, "learning_rate": 3.462591868879579e-06, "loss": 1.3118, "step": 3171 }, { "epoch": 0.812032, "grad_norm": 0.364570510846547, "learning_rate": 3.4534570913209064e-06, "loss": 1.3662, "step": 3172 }, { "epoch": 0.812288, "grad_norm": 0.36135528057612365, "learning_rate": 3.4443332403181274e-06, "loss": 1.3331, "step": 3173 }, { "epoch": 0.812544, "grad_norm": 0.36456315571620285, "learning_rate": 3.435220321896211e-06, "loss": 1.3717, "step": 3174 }, { "epoch": 0.8128, "grad_norm": 0.36902814769745346, "learning_rate": 3.4261183420728973e-06, "loss": 1.3518, "step": 3175 }, { "epoch": 0.813056, "grad_norm": 0.3662245019399536, "learning_rate": 3.4170273068587044e-06, "loss": 1.3101, "step": 3176 }, { "epoch": 0.813312, "grad_norm": 0.3712202445845513, "learning_rate": 3.407947222256922e-06, "loss": 1.3775, "step": 3177 }, { "epoch": 0.813568, "grad_norm": 0.3621948131073107, "learning_rate": 3.398878094263616e-06, "loss": 1.3331, "step": 3178 }, { "epoch": 0.813824, "grad_norm": 0.3548347908253831, "learning_rate": 3.3898199288675972e-06, "loss": 1.3433, "step": 3179 }, { "epoch": 0.81408, "grad_norm": 0.35195107350278804, "learning_rate": 3.380772732050459e-06, "loss": 1.3076, "step": 3180 }, { "epoch": 0.814336, "grad_norm": 0.357501334363723, "learning_rate": 3.371736509786547e-06, "loss": 1.3289, "step": 3181 }, { "epoch": 0.814592, "grad_norm": 0.36166807012869606, "learning_rate": 3.362711268042944e-06, "loss": 1.3547, "step": 3182 }, { "epoch": 0.814848, "grad_norm": 0.3548140991611999, "learning_rate": 3.353697012779502e-06, "loss": 1.3021, "step": 3183 }, { "epoch": 0.815104, "grad_norm": 0.3568677865210114, "learning_rate": 3.344693749948813e-06, "loss": 1.3237, "step": 3184 }, { "epoch": 0.81536, "grad_norm": 0.35965158273244024, "learning_rate": 3.335701485496197e-06, "loss": 1.3507, "step": 3185 }, { "epoch": 0.815616, "grad_norm": 0.371103105303262, "learning_rate": 3.326720225359723e-06, "loss": 1.3272, "step": 3186 }, { "epoch": 0.815872, "grad_norm": 0.352526575851749, "learning_rate": 3.317749975470197e-06, "loss": 1.2738, "step": 3187 }, { "epoch": 0.816128, "grad_norm": 0.36596250426269034, "learning_rate": 3.3087907417511443e-06, "loss": 1.3588, "step": 3188 }, { "epoch": 0.816384, "grad_norm": 0.35611919763538197, "learning_rate": 3.299842530118822e-06, "loss": 1.2851, "step": 3189 }, { "epoch": 0.81664, "grad_norm": 0.35591105014991437, "learning_rate": 3.2909053464822093e-06, "loss": 1.31, "step": 3190 }, { "epoch": 0.816896, "grad_norm": 0.3597566203194, "learning_rate": 3.281979196742995e-06, "loss": 1.304, "step": 3191 }, { "epoch": 0.817152, "grad_norm": 0.3491135942161179, "learning_rate": 3.2730640867955854e-06, "loss": 1.3015, "step": 3192 }, { "epoch": 0.817408, "grad_norm": 0.3587440743190293, "learning_rate": 3.26416002252711e-06, "loss": 1.3187, "step": 3193 }, { "epoch": 0.817664, "grad_norm": 0.35309370850293453, "learning_rate": 3.255267009817378e-06, "loss": 1.3266, "step": 3194 }, { "epoch": 0.81792, "grad_norm": 0.35979789067375306, "learning_rate": 3.246385054538923e-06, "loss": 1.3274, "step": 3195 }, { "epoch": 0.818176, "grad_norm": 0.3592782212990604, "learning_rate": 3.237514162556972e-06, "loss": 1.3567, "step": 3196 }, { "epoch": 0.818432, "grad_norm": 0.3511406056643651, "learning_rate": 3.2286543397294336e-06, "loss": 1.274, "step": 3197 }, { "epoch": 0.818688, "grad_norm": 0.3490984920523951, "learning_rate": 3.2198055919069217e-06, "loss": 1.3127, "step": 3198 }, { "epoch": 0.818944, "grad_norm": 0.3652287514432127, "learning_rate": 3.210967924932733e-06, "loss": 1.4128, "step": 3199 }, { "epoch": 0.8192, "grad_norm": 0.3637268186353478, "learning_rate": 3.202141344642844e-06, "loss": 1.3575, "step": 3200 }, { "epoch": 0.819456, "grad_norm": 0.3567280680340106, "learning_rate": 3.1933258568659143e-06, "loss": 1.306, "step": 3201 }, { "epoch": 0.819712, "grad_norm": 0.35438668700471676, "learning_rate": 3.184521467423276e-06, "loss": 1.3308, "step": 3202 }, { "epoch": 0.819968, "grad_norm": 0.36006515188234234, "learning_rate": 3.1757281821289276e-06, "loss": 1.3404, "step": 3203 }, { "epoch": 0.820224, "grad_norm": 0.35606437645870986, "learning_rate": 3.1669460067895464e-06, "loss": 1.3096, "step": 3204 }, { "epoch": 0.82048, "grad_norm": 0.35469815719372927, "learning_rate": 3.1581749472044576e-06, "loss": 1.3188, "step": 3205 }, { "epoch": 0.820736, "grad_norm": 0.35744934754609076, "learning_rate": 3.149415009165662e-06, "loss": 1.3282, "step": 3206 }, { "epoch": 0.820992, "grad_norm": 0.35466137067872394, "learning_rate": 3.1406661984578046e-06, "loss": 1.2772, "step": 3207 }, { "epoch": 0.821248, "grad_norm": 0.36291999134771713, "learning_rate": 3.131928520858194e-06, "loss": 1.3448, "step": 3208 }, { "epoch": 0.821504, "grad_norm": 0.3492149689626682, "learning_rate": 3.1232019821367698e-06, "loss": 1.299, "step": 3209 }, { "epoch": 0.82176, "grad_norm": 0.37018793803544103, "learning_rate": 3.1144865880561338e-06, "loss": 1.3334, "step": 3210 }, { "epoch": 0.822016, "grad_norm": 0.35952726397129353, "learning_rate": 3.1057823443715174e-06, "loss": 1.3188, "step": 3211 }, { "epoch": 0.822272, "grad_norm": 0.3596188946658317, "learning_rate": 3.0970892568307965e-06, "loss": 1.3695, "step": 3212 }, { "epoch": 0.822528, "grad_norm": 0.3584933500309711, "learning_rate": 3.08840733117447e-06, "loss": 1.3159, "step": 3213 }, { "epoch": 0.822784, "grad_norm": 0.3512487325159134, "learning_rate": 3.079736573135681e-06, "loss": 1.3182, "step": 3214 }, { "epoch": 0.82304, "grad_norm": 0.35855855893905003, "learning_rate": 3.071076988440178e-06, "loss": 1.3226, "step": 3215 }, { "epoch": 0.823296, "grad_norm": 0.3523029347121198, "learning_rate": 3.0624285828063515e-06, "loss": 1.3054, "step": 3216 }, { "epoch": 0.823552, "grad_norm": 0.3737446954204356, "learning_rate": 3.0537913619451907e-06, "loss": 1.3107, "step": 3217 }, { "epoch": 0.823808, "grad_norm": 0.3507053391681704, "learning_rate": 3.0451653315603138e-06, "loss": 1.2848, "step": 3218 }, { "epoch": 0.824064, "grad_norm": 0.3620545220183772, "learning_rate": 3.036550497347943e-06, "loss": 1.332, "step": 3219 }, { "epoch": 0.82432, "grad_norm": 0.34687524630785277, "learning_rate": 3.0279468649969133e-06, "loss": 1.291, "step": 3220 }, { "epoch": 0.824576, "grad_norm": 0.36213902925500585, "learning_rate": 3.019354440188649e-06, "loss": 1.3015, "step": 3221 }, { "epoch": 0.824832, "grad_norm": 0.3520260335127815, "learning_rate": 3.0107732285971835e-06, "loss": 1.2859, "step": 3222 }, { "epoch": 0.825088, "grad_norm": 0.35233972463483415, "learning_rate": 3.0022032358891493e-06, "loss": 1.333, "step": 3223 }, { "epoch": 0.825344, "grad_norm": 0.352629779806838, "learning_rate": 2.9936444677237597e-06, "loss": 1.3167, "step": 3224 }, { "epoch": 0.8256, "grad_norm": 0.360336855323851, "learning_rate": 2.9850969297528266e-06, "loss": 1.3369, "step": 3225 }, { "epoch": 0.825856, "grad_norm": 0.3548136748548445, "learning_rate": 2.9765606276207392e-06, "loss": 1.35, "step": 3226 }, { "epoch": 0.826112, "grad_norm": 0.3750484356225634, "learning_rate": 2.9680355669644667e-06, "loss": 1.3403, "step": 3227 }, { "epoch": 0.826368, "grad_norm": 0.3586284762084083, "learning_rate": 2.9595217534135543e-06, "loss": 1.3691, "step": 3228 }, { "epoch": 0.826624, "grad_norm": 0.36579343700420064, "learning_rate": 2.951019192590123e-06, "loss": 1.3722, "step": 3229 }, { "epoch": 0.82688, "grad_norm": 0.36688932585244266, "learning_rate": 2.942527890108866e-06, "loss": 1.3749, "step": 3230 }, { "epoch": 0.827136, "grad_norm": 0.35766663016787575, "learning_rate": 2.9340478515770376e-06, "loss": 1.3549, "step": 3231 }, { "epoch": 0.827392, "grad_norm": 0.35735930166466434, "learning_rate": 2.925579082594454e-06, "loss": 1.3213, "step": 3232 }, { "epoch": 0.827648, "grad_norm": 0.3565379385668704, "learning_rate": 2.9171215887534955e-06, "loss": 1.3248, "step": 3233 }, { "epoch": 0.827904, "grad_norm": 0.354843011897833, "learning_rate": 2.908675375639085e-06, "loss": 1.3177, "step": 3234 }, { "epoch": 0.82816, "grad_norm": 0.35439225713215344, "learning_rate": 2.900240448828704e-06, "loss": 1.2733, "step": 3235 }, { "epoch": 0.828416, "grad_norm": 0.3543610190322335, "learning_rate": 2.891816813892381e-06, "loss": 1.3261, "step": 3236 }, { "epoch": 0.828672, "grad_norm": 0.3616501518274734, "learning_rate": 2.883404476392693e-06, "loss": 1.361, "step": 3237 }, { "epoch": 0.828928, "grad_norm": 0.3618171538856441, "learning_rate": 2.8750034418847404e-06, "loss": 1.3252, "step": 3238 }, { "epoch": 0.829184, "grad_norm": 0.3505232809998268, "learning_rate": 2.8666137159161776e-06, "loss": 1.2903, "step": 3239 }, { "epoch": 0.82944, "grad_norm": 0.3591926211742982, "learning_rate": 2.8582353040271772e-06, "loss": 1.3346, "step": 3240 }, { "epoch": 0.829696, "grad_norm": 0.3679269499774998, "learning_rate": 2.8498682117504504e-06, "loss": 1.3529, "step": 3241 }, { "epoch": 0.829952, "grad_norm": 0.3592837410241286, "learning_rate": 2.8415124446112296e-06, "loss": 1.3394, "step": 3242 }, { "epoch": 0.830208, "grad_norm": 0.3569505682825715, "learning_rate": 2.8331680081272695e-06, "loss": 1.3537, "step": 3243 }, { "epoch": 0.830464, "grad_norm": 0.3567746997930505, "learning_rate": 2.824834907808842e-06, "loss": 1.3207, "step": 3244 }, { "epoch": 0.83072, "grad_norm": 0.36051559209834994, "learning_rate": 2.816513149158737e-06, "loss": 1.3228, "step": 3245 }, { "epoch": 0.830976, "grad_norm": 0.3637180484884654, "learning_rate": 2.808202737672243e-06, "loss": 1.3583, "step": 3246 }, { "epoch": 0.831232, "grad_norm": 0.36040810056694006, "learning_rate": 2.799903678837168e-06, "loss": 1.3253, "step": 3247 }, { "epoch": 0.831488, "grad_norm": 0.3551052398695939, "learning_rate": 2.791615978133817e-06, "loss": 1.2968, "step": 3248 }, { "epoch": 0.831744, "grad_norm": 0.353241238582077, "learning_rate": 2.7833396410350034e-06, "loss": 1.2907, "step": 3249 }, { "epoch": 0.832, "grad_norm": 0.3593897105491773, "learning_rate": 2.7750746730060176e-06, "loss": 1.36, "step": 3250 }, { "epoch": 0.832256, "grad_norm": 0.3529382850734224, "learning_rate": 2.7668210795046667e-06, "loss": 1.3315, "step": 3251 }, { "epoch": 0.832512, "grad_norm": 0.36022402819842014, "learning_rate": 2.758578865981223e-06, "loss": 1.3218, "step": 3252 }, { "epoch": 0.832768, "grad_norm": 0.3553246019573726, "learning_rate": 2.7503480378784607e-06, "loss": 1.306, "step": 3253 }, { "epoch": 0.833024, "grad_norm": 0.35552399012855007, "learning_rate": 2.7421286006316307e-06, "loss": 1.345, "step": 3254 }, { "epoch": 0.83328, "grad_norm": 0.3588214718251527, "learning_rate": 2.7339205596684593e-06, "loss": 1.3494, "step": 3255 }, { "epoch": 0.833536, "grad_norm": 0.3547076684771563, "learning_rate": 2.725723920409149e-06, "loss": 1.3339, "step": 3256 }, { "epoch": 0.833792, "grad_norm": 0.36393544108983333, "learning_rate": 2.7175386882663812e-06, "loss": 1.3159, "step": 3257 }, { "epoch": 0.834048, "grad_norm": 0.35325059456831887, "learning_rate": 2.709364868645288e-06, "loss": 1.2842, "step": 3258 }, { "epoch": 0.834304, "grad_norm": 0.3515229610426091, "learning_rate": 2.7012024669434756e-06, "loss": 1.2873, "step": 3259 }, { "epoch": 0.83456, "grad_norm": 0.3608482040674047, "learning_rate": 2.693051488551013e-06, "loss": 1.3397, "step": 3260 }, { "epoch": 0.834816, "grad_norm": 0.356432821428022, "learning_rate": 2.684911938850414e-06, "loss": 1.2818, "step": 3261 }, { "epoch": 0.835072, "grad_norm": 0.3586012965977556, "learning_rate": 2.67678382321666e-06, "loss": 1.3088, "step": 3262 }, { "epoch": 0.835328, "grad_norm": 0.3601238076750384, "learning_rate": 2.6686671470171743e-06, "loss": 1.3498, "step": 3263 }, { "epoch": 0.835584, "grad_norm": 0.35789518414771604, "learning_rate": 2.6605619156118212e-06, "loss": 1.3096, "step": 3264 }, { "epoch": 0.83584, "grad_norm": 0.37219448831863466, "learning_rate": 2.652468134352917e-06, "loss": 1.3208, "step": 3265 }, { "epoch": 0.836096, "grad_norm": 0.37966498918813824, "learning_rate": 2.6443858085852128e-06, "loss": 1.3274, "step": 3266 }, { "epoch": 0.836352, "grad_norm": 0.35852627846317614, "learning_rate": 2.6363149436458924e-06, "loss": 1.3334, "step": 3267 }, { "epoch": 0.836608, "grad_norm": 0.34967803948392395, "learning_rate": 2.6282555448645796e-06, "loss": 1.296, "step": 3268 }, { "epoch": 0.836864, "grad_norm": 0.3537289409537512, "learning_rate": 2.6202076175633196e-06, "loss": 1.3433, "step": 3269 }, { "epoch": 0.83712, "grad_norm": 0.3572828079681934, "learning_rate": 2.6121711670565787e-06, "loss": 1.3136, "step": 3270 }, { "epoch": 0.837376, "grad_norm": 0.352368919160701, "learning_rate": 2.6041461986512517e-06, "loss": 1.2942, "step": 3271 }, { "epoch": 0.837632, "grad_norm": 0.35728433147215205, "learning_rate": 2.5961327176466533e-06, "loss": 1.2995, "step": 3272 }, { "epoch": 0.837888, "grad_norm": 0.35438995005259727, "learning_rate": 2.5881307293345016e-06, "loss": 1.2887, "step": 3273 }, { "epoch": 0.838144, "grad_norm": 0.35941236032423135, "learning_rate": 2.580140238998934e-06, "loss": 1.367, "step": 3274 }, { "epoch": 0.8384, "grad_norm": 0.3697952834906845, "learning_rate": 2.5721612519164984e-06, "loss": 1.3503, "step": 3275 }, { "epoch": 0.838656, "grad_norm": 0.3930174076858665, "learning_rate": 2.564193773356134e-06, "loss": 1.3145, "step": 3276 }, { "epoch": 0.838912, "grad_norm": 0.3521793233615387, "learning_rate": 2.5562378085791873e-06, "loss": 1.2878, "step": 3277 }, { "epoch": 0.839168, "grad_norm": 0.3562823524706217, "learning_rate": 2.5482933628394068e-06, "loss": 1.3428, "step": 3278 }, { "epoch": 0.839424, "grad_norm": 0.36349685126140996, "learning_rate": 2.54036044138293e-06, "loss": 1.3493, "step": 3279 }, { "epoch": 0.83968, "grad_norm": 0.35344029169121743, "learning_rate": 2.532439049448279e-06, "loss": 1.3353, "step": 3280 }, { "epoch": 0.839936, "grad_norm": 0.3580095830177698, "learning_rate": 2.524529192266374e-06, "loss": 1.3308, "step": 3281 }, { "epoch": 0.840192, "grad_norm": 0.3543119028077944, "learning_rate": 2.516630875060502e-06, "loss": 1.3134, "step": 3282 }, { "epoch": 0.840448, "grad_norm": 0.3562750741047484, "learning_rate": 2.50874410304635e-06, "loss": 1.31, "step": 3283 }, { "epoch": 0.840704, "grad_norm": 0.3519229372077795, "learning_rate": 2.500868881431957e-06, "loss": 1.3085, "step": 3284 }, { "epoch": 0.84096, "grad_norm": 0.3585854371805405, "learning_rate": 2.4930052154177563e-06, "loss": 1.3322, "step": 3285 }, { "epoch": 0.841216, "grad_norm": 0.4852777688319952, "learning_rate": 2.4851531101965408e-06, "loss": 1.348, "step": 3286 }, { "epoch": 0.841472, "grad_norm": 0.354126007467709, "learning_rate": 2.4773125709534673e-06, "loss": 1.3231, "step": 3287 }, { "epoch": 0.841728, "grad_norm": 0.4195825594615701, "learning_rate": 2.469483602866063e-06, "loss": 1.297, "step": 3288 }, { "epoch": 0.841984, "grad_norm": 0.35274347666532474, "learning_rate": 2.4616662111042033e-06, "loss": 1.3157, "step": 3289 }, { "epoch": 0.84224, "grad_norm": 0.39144282841089934, "learning_rate": 2.453860400830126e-06, "loss": 1.3402, "step": 3290 }, { "epoch": 0.842496, "grad_norm": 0.3517713709042496, "learning_rate": 2.4460661771984227e-06, "loss": 1.2923, "step": 3291 }, { "epoch": 0.842752, "grad_norm": 0.43123755438975486, "learning_rate": 2.438283545356026e-06, "loss": 1.3013, "step": 3292 }, { "epoch": 0.843008, "grad_norm": 0.3517854347493126, "learning_rate": 2.4305125104422267e-06, "loss": 1.342, "step": 3293 }, { "epoch": 0.843264, "grad_norm": 0.3630344744429917, "learning_rate": 2.422753077588642e-06, "loss": 1.3318, "step": 3294 }, { "epoch": 0.84352, "grad_norm": 0.3524454342412477, "learning_rate": 2.415005251919238e-06, "loss": 1.3435, "step": 3295 }, { "epoch": 0.843776, "grad_norm": 0.35841122485576776, "learning_rate": 2.4072690385503105e-06, "loss": 1.314, "step": 3296 }, { "epoch": 0.844032, "grad_norm": 0.3576542543848138, "learning_rate": 2.3995444425904914e-06, "loss": 1.3459, "step": 3297 }, { "epoch": 0.844288, "grad_norm": 0.35719328704951125, "learning_rate": 2.3918314691407373e-06, "loss": 1.3145, "step": 3298 }, { "epoch": 0.844544, "grad_norm": 0.3633786460398092, "learning_rate": 2.384130123294337e-06, "loss": 1.3444, "step": 3299 }, { "epoch": 0.8448, "grad_norm": 0.35512217140606295, "learning_rate": 2.3764404101368954e-06, "loss": 1.3224, "step": 3300 }, { "epoch": 0.845056, "grad_norm": 0.3595177169862895, "learning_rate": 2.3687623347463284e-06, "loss": 1.3421, "step": 3301 }, { "epoch": 0.845312, "grad_norm": 0.35613220541053237, "learning_rate": 2.3610959021928803e-06, "loss": 1.3272, "step": 3302 }, { "epoch": 0.845568, "grad_norm": 0.35548634555637093, "learning_rate": 2.3534411175390994e-06, "loss": 1.2932, "step": 3303 }, { "epoch": 0.845824, "grad_norm": 0.3604640644085452, "learning_rate": 2.345797985839846e-06, "loss": 1.3243, "step": 3304 }, { "epoch": 0.84608, "grad_norm": 0.40654114497705796, "learning_rate": 2.338166512142286e-06, "loss": 1.3079, "step": 3305 }, { "epoch": 0.846336, "grad_norm": 0.3565981329165396, "learning_rate": 2.330546701485883e-06, "loss": 1.3189, "step": 3306 }, { "epoch": 0.846592, "grad_norm": 0.35645165068430334, "learning_rate": 2.322938558902392e-06, "loss": 1.343, "step": 3307 }, { "epoch": 0.846848, "grad_norm": 0.34664805759763895, "learning_rate": 2.3153420894158794e-06, "loss": 1.2661, "step": 3308 }, { "epoch": 0.847104, "grad_norm": 0.3956037562638198, "learning_rate": 2.3077572980426922e-06, "loss": 1.3481, "step": 3309 }, { "epoch": 0.84736, "grad_norm": 0.35143504470878856, "learning_rate": 2.30018418979147e-06, "loss": 1.2837, "step": 3310 }, { "epoch": 0.847616, "grad_norm": 0.35159127460323447, "learning_rate": 2.2926227696631376e-06, "loss": 1.334, "step": 3311 }, { "epoch": 0.847872, "grad_norm": 0.3625689890323539, "learning_rate": 2.2850730426509005e-06, "loss": 1.3284, "step": 3312 }, { "epoch": 0.848128, "grad_norm": 0.35997068830307666, "learning_rate": 2.2775350137402353e-06, "loss": 1.3244, "step": 3313 }, { "epoch": 0.848384, "grad_norm": 0.363924826846468, "learning_rate": 2.2700086879089046e-06, "loss": 1.3386, "step": 3314 }, { "epoch": 0.84864, "grad_norm": 0.36148199677564, "learning_rate": 2.262494070126939e-06, "loss": 1.3276, "step": 3315 }, { "epoch": 0.848896, "grad_norm": 0.3526181905708771, "learning_rate": 2.2549911653566416e-06, "loss": 1.2908, "step": 3316 }, { "epoch": 0.849152, "grad_norm": 0.3584663992368235, "learning_rate": 2.2474999785525698e-06, "loss": 1.3391, "step": 3317 }, { "epoch": 0.849408, "grad_norm": 0.3474946229767795, "learning_rate": 2.2400205146615537e-06, "loss": 1.2792, "step": 3318 }, { "epoch": 0.849664, "grad_norm": 0.34599832423622284, "learning_rate": 2.2325527786226743e-06, "loss": 1.2805, "step": 3319 }, { "epoch": 0.84992, "grad_norm": 0.3595009114769627, "learning_rate": 2.2250967753672747e-06, "loss": 1.3166, "step": 3320 }, { "epoch": 0.850176, "grad_norm": 0.3597749993349547, "learning_rate": 2.217652509818946e-06, "loss": 1.3243, "step": 3321 }, { "epoch": 0.850432, "grad_norm": 0.3519757595317033, "learning_rate": 2.2102199868935316e-06, "loss": 1.2978, "step": 3322 }, { "epoch": 0.850688, "grad_norm": 0.3525089962860643, "learning_rate": 2.202799211499118e-06, "loss": 1.3273, "step": 3323 }, { "epoch": 0.850944, "grad_norm": 0.36404519626503606, "learning_rate": 2.1953901885360395e-06, "loss": 1.3545, "step": 3324 }, { "epoch": 0.8512, "grad_norm": 0.549722139858535, "learning_rate": 2.1879929228968554e-06, "loss": 1.3217, "step": 3325 }, { "epoch": 0.851456, "grad_norm": 0.3582505849168342, "learning_rate": 2.1806074194663783e-06, "loss": 1.3416, "step": 3326 }, { "epoch": 0.851712, "grad_norm": 0.3486846766638767, "learning_rate": 2.173233683121643e-06, "loss": 1.3244, "step": 3327 }, { "epoch": 0.851968, "grad_norm": 0.36564168075447623, "learning_rate": 2.1658717187319224e-06, "loss": 1.3189, "step": 3328 }, { "epoch": 0.852224, "grad_norm": 0.3514161506865785, "learning_rate": 2.1585215311587014e-06, "loss": 1.3081, "step": 3329 }, { "epoch": 0.85248, "grad_norm": 0.3540921564763548, "learning_rate": 2.1511831252557048e-06, "loss": 1.3048, "step": 3330 }, { "epoch": 0.852736, "grad_norm": 0.3501199097110855, "learning_rate": 2.1438565058688623e-06, "loss": 1.3054, "step": 3331 }, { "epoch": 0.852992, "grad_norm": 0.35815357873398707, "learning_rate": 2.1365416778363325e-06, "loss": 1.3367, "step": 3332 }, { "epoch": 0.853248, "grad_norm": 0.354454076503121, "learning_rate": 2.1292386459884804e-06, "loss": 1.3135, "step": 3333 }, { "epoch": 0.853504, "grad_norm": 0.3508782898263423, "learning_rate": 2.1219474151478823e-06, "loss": 1.3177, "step": 3334 }, { "epoch": 0.85376, "grad_norm": 0.34883249728020327, "learning_rate": 2.1146679901293267e-06, "loss": 1.2836, "step": 3335 }, { "epoch": 0.854016, "grad_norm": 0.35817804489055227, "learning_rate": 2.1074003757398055e-06, "loss": 1.3393, "step": 3336 }, { "epoch": 0.854272, "grad_norm": 0.35322042753110866, "learning_rate": 2.1001445767784978e-06, "loss": 1.2869, "step": 3337 }, { "epoch": 0.854528, "grad_norm": 0.35888638623148406, "learning_rate": 2.0929005980367957e-06, "loss": 1.3572, "step": 3338 }, { "epoch": 0.854784, "grad_norm": 0.3509383155618107, "learning_rate": 2.085668444298288e-06, "loss": 1.2898, "step": 3339 }, { "epoch": 0.85504, "grad_norm": 0.35369281115667717, "learning_rate": 2.078448120338734e-06, "loss": 1.2957, "step": 3340 }, { "epoch": 0.855296, "grad_norm": 0.3483203959099041, "learning_rate": 2.0712396309261028e-06, "loss": 1.2841, "step": 3341 }, { "epoch": 0.855552, "grad_norm": 0.3449622697279141, "learning_rate": 2.0640429808205442e-06, "loss": 1.2864, "step": 3342 }, { "epoch": 0.855808, "grad_norm": 0.3609443953755346, "learning_rate": 2.0568581747743764e-06, "loss": 1.3091, "step": 3343 }, { "epoch": 0.856064, "grad_norm": 0.35530059622567073, "learning_rate": 2.0496852175321113e-06, "loss": 1.3229, "step": 3344 }, { "epoch": 0.85632, "grad_norm": 0.3631972632891438, "learning_rate": 2.0425241138304307e-06, "loss": 1.3325, "step": 3345 }, { "epoch": 0.856576, "grad_norm": 0.35550296364796985, "learning_rate": 2.0353748683981922e-06, "loss": 1.2978, "step": 3346 }, { "epoch": 0.856832, "grad_norm": 0.3508494337988298, "learning_rate": 2.028237485956417e-06, "loss": 1.308, "step": 3347 }, { "epoch": 0.857088, "grad_norm": 0.3522478921437514, "learning_rate": 2.0211119712182947e-06, "loss": 1.3455, "step": 3348 }, { "epoch": 0.857344, "grad_norm": 0.350390970968294, "learning_rate": 2.0139983288891864e-06, "loss": 1.2977, "step": 3349 }, { "epoch": 0.8576, "grad_norm": 0.3539239884214114, "learning_rate": 2.006896563666596e-06, "loss": 1.3096, "step": 3350 }, { "epoch": 0.857856, "grad_norm": 0.3716172380676982, "learning_rate": 1.9998066802402016e-06, "loss": 1.3136, "step": 3351 }, { "epoch": 0.858112, "grad_norm": 0.3548652937590851, "learning_rate": 1.992728683291818e-06, "loss": 1.3218, "step": 3352 }, { "epoch": 0.858368, "grad_norm": 0.4524341497519749, "learning_rate": 1.9856625774954287e-06, "loss": 1.3292, "step": 3353 }, { "epoch": 0.858624, "grad_norm": 0.34730402883355477, "learning_rate": 1.9786083675171542e-06, "loss": 1.3152, "step": 3354 }, { "epoch": 0.85888, "grad_norm": 0.3533815715889272, "learning_rate": 1.971566058015264e-06, "loss": 1.3164, "step": 3355 }, { "epoch": 0.859136, "grad_norm": 0.3782844915418903, "learning_rate": 1.9645356536401604e-06, "loss": 1.28, "step": 3356 }, { "epoch": 0.859392, "grad_norm": 0.3504076570542578, "learning_rate": 1.9575171590343967e-06, "loss": 1.2863, "step": 3357 }, { "epoch": 0.859648, "grad_norm": 0.35037007333160175, "learning_rate": 1.950510578832652e-06, "loss": 1.2866, "step": 3358 }, { "epoch": 0.859904, "grad_norm": 0.3570274592821321, "learning_rate": 1.9435159176617404e-06, "loss": 1.3024, "step": 3359 }, { "epoch": 0.86016, "grad_norm": 0.3527714563477848, "learning_rate": 1.936533180140614e-06, "loss": 1.329, "step": 3360 }, { "epoch": 0.860416, "grad_norm": 0.3535506934545577, "learning_rate": 1.9295623708803334e-06, "loss": 1.3078, "step": 3361 }, { "epoch": 0.860672, "grad_norm": 0.3487638513465772, "learning_rate": 1.922603494484101e-06, "loss": 1.3127, "step": 3362 }, { "epoch": 0.860928, "grad_norm": 0.3442575734944706, "learning_rate": 1.915656555547218e-06, "loss": 1.2731, "step": 3363 }, { "epoch": 0.861184, "grad_norm": 0.35095124260675203, "learning_rate": 1.908721558657125e-06, "loss": 1.3159, "step": 3364 }, { "epoch": 0.86144, "grad_norm": 0.34965537091352855, "learning_rate": 1.9017985083933621e-06, "loss": 1.2864, "step": 3365 }, { "epoch": 0.861696, "grad_norm": 0.36213743542148036, "learning_rate": 1.894887409327586e-06, "loss": 1.3423, "step": 3366 }, { "epoch": 0.861952, "grad_norm": 0.3478151133571996, "learning_rate": 1.8879882660235638e-06, "loss": 1.2867, "step": 3367 }, { "epoch": 0.862208, "grad_norm": 0.3469720716160055, "learning_rate": 1.88110108303716e-06, "loss": 1.2801, "step": 3368 }, { "epoch": 0.862464, "grad_norm": 0.3520254234451795, "learning_rate": 1.8742258649163435e-06, "loss": 1.3283, "step": 3369 }, { "epoch": 0.86272, "grad_norm": 0.3609139388670736, "learning_rate": 1.867362616201187e-06, "loss": 1.2954, "step": 3370 }, { "epoch": 0.862976, "grad_norm": 0.3543433865946269, "learning_rate": 1.8605113414238563e-06, "loss": 1.3006, "step": 3371 }, { "epoch": 0.863232, "grad_norm": 0.35541616886921124, "learning_rate": 1.8536720451086121e-06, "loss": 1.3502, "step": 3372 }, { "epoch": 0.863488, "grad_norm": 0.35902328104214193, "learning_rate": 1.8468447317717974e-06, "loss": 1.3506, "step": 3373 }, { "epoch": 0.863744, "grad_norm": 0.3587515537533156, "learning_rate": 1.8400294059218526e-06, "loss": 1.3675, "step": 3374 }, { "epoch": 0.864, "grad_norm": 0.35790808505538046, "learning_rate": 1.8332260720592908e-06, "loss": 1.3337, "step": 3375 }, { "epoch": 0.864256, "grad_norm": 0.3519865353025258, "learning_rate": 1.8264347346767164e-06, "loss": 1.296, "step": 3376 }, { "epoch": 0.864512, "grad_norm": 0.35808651838077504, "learning_rate": 1.8196553982588083e-06, "loss": 1.3464, "step": 3377 }, { "epoch": 0.864768, "grad_norm": 0.3774158199850852, "learning_rate": 1.812888067282319e-06, "loss": 1.297, "step": 3378 }, { "epoch": 0.865024, "grad_norm": 0.4149531386033982, "learning_rate": 1.8061327462160804e-06, "loss": 1.3575, "step": 3379 }, { "epoch": 0.86528, "grad_norm": 0.36080464349652713, "learning_rate": 1.7993894395209776e-06, "loss": 1.3266, "step": 3380 }, { "epoch": 0.865536, "grad_norm": 0.3506050539591894, "learning_rate": 1.7926581516499774e-06, "loss": 1.2938, "step": 3381 }, { "epoch": 0.865792, "grad_norm": 0.34821621782363205, "learning_rate": 1.7859388870481064e-06, "loss": 1.2853, "step": 3382 }, { "epoch": 0.866048, "grad_norm": 0.35201314250050414, "learning_rate": 1.7792316501524487e-06, "loss": 1.3226, "step": 3383 }, { "epoch": 0.866304, "grad_norm": 0.35219982656942955, "learning_rate": 1.7725364453921434e-06, "loss": 1.3361, "step": 3384 }, { "epoch": 0.86656, "grad_norm": 0.3501884147900269, "learning_rate": 1.765853277188394e-06, "loss": 1.3112, "step": 3385 }, { "epoch": 0.866816, "grad_norm": 0.35453291242813045, "learning_rate": 1.7591821499544416e-06, "loss": 1.3416, "step": 3386 }, { "epoch": 0.867072, "grad_norm": 0.3561478272005234, "learning_rate": 1.7525230680955885e-06, "loss": 1.3418, "step": 3387 }, { "epoch": 0.867328, "grad_norm": 0.35387075475204915, "learning_rate": 1.7458760360091753e-06, "loss": 1.3066, "step": 3388 }, { "epoch": 0.867584, "grad_norm": 0.35448304826178906, "learning_rate": 1.739241058084593e-06, "loss": 1.3342, "step": 3389 }, { "epoch": 0.86784, "grad_norm": 0.3557989622097373, "learning_rate": 1.7326181387032636e-06, "loss": 1.3161, "step": 3390 }, { "epoch": 0.868096, "grad_norm": 0.3465008176436588, "learning_rate": 1.7260072822386554e-06, "loss": 1.3007, "step": 3391 }, { "epoch": 0.868352, "grad_norm": 0.3543988996800462, "learning_rate": 1.7194084930562582e-06, "loss": 1.3093, "step": 3392 }, { "epoch": 0.868608, "grad_norm": 0.3439772431098518, "learning_rate": 1.7128217755136046e-06, "loss": 1.2488, "step": 3393 }, { "epoch": 0.868864, "grad_norm": 0.3777012223225288, "learning_rate": 1.7062471339602548e-06, "loss": 1.384, "step": 3394 }, { "epoch": 0.86912, "grad_norm": 0.3508376286445271, "learning_rate": 1.6996845727377898e-06, "loss": 1.3094, "step": 3395 }, { "epoch": 0.869376, "grad_norm": 0.3640175118148113, "learning_rate": 1.6931340961798138e-06, "loss": 1.3132, "step": 3396 }, { "epoch": 0.869632, "grad_norm": 0.35221238382802705, "learning_rate": 1.6865957086119554e-06, "loss": 1.3138, "step": 3397 }, { "epoch": 0.869888, "grad_norm": 0.3555176980437666, "learning_rate": 1.6800694143518526e-06, "loss": 1.3283, "step": 3398 }, { "epoch": 0.870144, "grad_norm": 0.3513765928802746, "learning_rate": 1.6735552177091642e-06, "loss": 1.3259, "step": 3399 }, { "epoch": 0.8704, "grad_norm": 0.3524966551283487, "learning_rate": 1.667053122985558e-06, "loss": 1.2875, "step": 3400 }, { "epoch": 0.870656, "grad_norm": 0.35283646774716976, "learning_rate": 1.660563134474713e-06, "loss": 1.2975, "step": 3401 }, { "epoch": 0.870912, "grad_norm": 0.3631507666526599, "learning_rate": 1.6540852564623077e-06, "loss": 1.3618, "step": 3402 }, { "epoch": 0.871168, "grad_norm": 0.3510657752296378, "learning_rate": 1.6476194932260314e-06, "loss": 1.3385, "step": 3403 }, { "epoch": 0.871424, "grad_norm": 0.36544537424144063, "learning_rate": 1.6411658490355641e-06, "loss": 1.3706, "step": 3404 }, { "epoch": 0.87168, "grad_norm": 0.37749156847370197, "learning_rate": 1.6347243281525883e-06, "loss": 1.3532, "step": 3405 }, { "epoch": 0.871936, "grad_norm": 0.35478994049377266, "learning_rate": 1.6282949348307854e-06, "loss": 1.3404, "step": 3406 }, { "epoch": 0.872192, "grad_norm": 0.35580653294159154, "learning_rate": 1.621877673315817e-06, "loss": 1.3134, "step": 3407 }, { "epoch": 0.872448, "grad_norm": 0.35388355003394034, "learning_rate": 1.6154725478453426e-06, "loss": 1.3454, "step": 3408 }, { "epoch": 0.872704, "grad_norm": 0.350271549425445, "learning_rate": 1.609079562649003e-06, "loss": 1.2876, "step": 3409 }, { "epoch": 0.87296, "grad_norm": 0.3514018406402214, "learning_rate": 1.6026987219484302e-06, "loss": 1.3055, "step": 3410 }, { "epoch": 0.873216, "grad_norm": 0.3521687235854348, "learning_rate": 1.5963300299572227e-06, "loss": 1.3029, "step": 3411 }, { "epoch": 0.873472, "grad_norm": 0.3571864252499274, "learning_rate": 1.5899734908809649e-06, "loss": 1.3374, "step": 3412 }, { "epoch": 0.873728, "grad_norm": 0.3574598414001649, "learning_rate": 1.5836291089172173e-06, "loss": 1.3551, "step": 3413 }, { "epoch": 0.873984, "grad_norm": 0.37100542161055794, "learning_rate": 1.5772968882555107e-06, "loss": 1.335, "step": 3414 }, { "epoch": 0.87424, "grad_norm": 0.3484951446054928, "learning_rate": 1.5709768330773424e-06, "loss": 1.3177, "step": 3415 }, { "epoch": 0.874496, "grad_norm": 0.35659941910606907, "learning_rate": 1.5646689475561848e-06, "loss": 1.3128, "step": 3416 }, { "epoch": 0.874752, "grad_norm": 0.35250098450503703, "learning_rate": 1.55837323585746e-06, "loss": 1.3171, "step": 3417 }, { "epoch": 0.875008, "grad_norm": 0.35672941750811266, "learning_rate": 1.5520897021385638e-06, "loss": 1.3556, "step": 3418 }, { "epoch": 0.875264, "grad_norm": 0.3499371614761826, "learning_rate": 1.5458183505488421e-06, "loss": 1.3049, "step": 3419 }, { "epoch": 0.87552, "grad_norm": 0.35814180202520735, "learning_rate": 1.5395591852295998e-06, "loss": 1.3256, "step": 3420 }, { "epoch": 0.875776, "grad_norm": 0.35749015614497737, "learning_rate": 1.533312210314095e-06, "loss": 1.3341, "step": 3421 }, { "epoch": 0.876032, "grad_norm": 0.35388091171335445, "learning_rate": 1.527077429927537e-06, "loss": 1.306, "step": 3422 }, { "epoch": 0.876288, "grad_norm": 0.3512853982188562, "learning_rate": 1.5208548481870766e-06, "loss": 1.2815, "step": 3423 }, { "epoch": 0.876544, "grad_norm": 0.3526493537506339, "learning_rate": 1.514644469201816e-06, "loss": 1.3462, "step": 3424 }, { "epoch": 0.8768, "grad_norm": 0.3458383240363241, "learning_rate": 1.5084462970727942e-06, "loss": 1.2847, "step": 3425 }, { "epoch": 0.877056, "grad_norm": 0.3679948521400727, "learning_rate": 1.5022603358929955e-06, "loss": 1.3965, "step": 3426 }, { "epoch": 0.877312, "grad_norm": 0.3548233021387248, "learning_rate": 1.496086589747332e-06, "loss": 1.3496, "step": 3427 }, { "epoch": 0.877568, "grad_norm": 0.34709181633769964, "learning_rate": 1.4899250627126581e-06, "loss": 1.2917, "step": 3428 }, { "epoch": 0.877824, "grad_norm": 0.35053424485936585, "learning_rate": 1.4837757588577551e-06, "loss": 1.2921, "step": 3429 }, { "epoch": 0.87808, "grad_norm": 0.3561158155018892, "learning_rate": 1.4776386822433276e-06, "loss": 1.3583, "step": 3430 }, { "epoch": 0.878336, "grad_norm": 0.3599502920977389, "learning_rate": 1.4715138369220161e-06, "loss": 1.3745, "step": 3431 }, { "epoch": 0.878592, "grad_norm": 0.3577276318473569, "learning_rate": 1.4654012269383765e-06, "loss": 1.3412, "step": 3432 }, { "epoch": 0.878848, "grad_norm": 0.35579293790253486, "learning_rate": 1.4593008563288913e-06, "loss": 1.3444, "step": 3433 }, { "epoch": 0.879104, "grad_norm": 0.3536808643328128, "learning_rate": 1.45321272912196e-06, "loss": 1.301, "step": 3434 }, { "epoch": 0.87936, "grad_norm": 0.3552324607426098, "learning_rate": 1.4471368493378847e-06, "loss": 1.3511, "step": 3435 }, { "epoch": 0.879616, "grad_norm": 0.3554412733248568, "learning_rate": 1.4410732209888977e-06, "loss": 1.3372, "step": 3436 }, { "epoch": 0.879872, "grad_norm": 0.35600689966840865, "learning_rate": 1.4350218480791278e-06, "loss": 1.316, "step": 3437 }, { "epoch": 0.880128, "grad_norm": 0.36349969720593567, "learning_rate": 1.4289827346046204e-06, "loss": 1.3339, "step": 3438 }, { "epoch": 0.880384, "grad_norm": 0.35477088842985516, "learning_rate": 1.422955884553321e-06, "loss": 1.3223, "step": 3439 }, { "epoch": 0.88064, "grad_norm": 0.35489013165648886, "learning_rate": 1.4169413019050726e-06, "loss": 1.3035, "step": 3440 }, { "epoch": 0.880896, "grad_norm": 0.35423741149009724, "learning_rate": 1.4109389906316273e-06, "loss": 1.3314, "step": 3441 }, { "epoch": 0.881152, "grad_norm": 0.37710625674022186, "learning_rate": 1.404948954696621e-06, "loss": 1.3654, "step": 3442 }, { "epoch": 0.881408, "grad_norm": 0.355029291981972, "learning_rate": 1.3989711980555965e-06, "loss": 1.3552, "step": 3443 }, { "epoch": 0.881664, "grad_norm": 0.3560379635516498, "learning_rate": 1.3930057246559782e-06, "loss": 1.302, "step": 3444 }, { "epoch": 0.88192, "grad_norm": 0.35617641852467496, "learning_rate": 1.387052538437086e-06, "loss": 1.3425, "step": 3445 }, { "epoch": 0.882176, "grad_norm": 0.35361458321814293, "learning_rate": 1.3811116433301264e-06, "loss": 1.3145, "step": 3446 }, { "epoch": 0.882432, "grad_norm": 0.36181102700263085, "learning_rate": 1.3751830432581792e-06, "loss": 1.3132, "step": 3447 }, { "epoch": 0.882688, "grad_norm": 0.3506588798767743, "learning_rate": 1.3692667421362127e-06, "loss": 1.2871, "step": 3448 }, { "epoch": 0.882944, "grad_norm": 0.35932189409160614, "learning_rate": 1.3633627438710772e-06, "loss": 1.3428, "step": 3449 }, { "epoch": 0.8832, "grad_norm": 0.3522959915674311, "learning_rate": 1.357471052361492e-06, "loss": 1.3509, "step": 3450 }, { "epoch": 0.883456, "grad_norm": 0.3567314172173001, "learning_rate": 1.3515916714980538e-06, "loss": 1.3272, "step": 3451 }, { "epoch": 0.883712, "grad_norm": 0.3566599208871027, "learning_rate": 1.3457246051632278e-06, "loss": 1.3133, "step": 3452 }, { "epoch": 0.883968, "grad_norm": 0.36496200773794146, "learning_rate": 1.339869857231344e-06, "loss": 1.3008, "step": 3453 }, { "epoch": 0.884224, "grad_norm": 0.35228289608772456, "learning_rate": 1.3340274315686053e-06, "loss": 1.3367, "step": 3454 }, { "epoch": 0.88448, "grad_norm": 0.36480212002618434, "learning_rate": 1.32819733203307e-06, "loss": 1.296, "step": 3455 }, { "epoch": 0.884736, "grad_norm": 0.3527501222248405, "learning_rate": 1.322379562474665e-06, "loss": 1.3284, "step": 3456 }, { "epoch": 0.884992, "grad_norm": 0.35695834230723705, "learning_rate": 1.3165741267351706e-06, "loss": 1.3454, "step": 3457 }, { "epoch": 0.885248, "grad_norm": 0.3542935006544548, "learning_rate": 1.3107810286482225e-06, "loss": 1.3216, "step": 3458 }, { "epoch": 0.885504, "grad_norm": 0.35760333775556563, "learning_rate": 1.3050002720393052e-06, "loss": 1.3466, "step": 3459 }, { "epoch": 0.88576, "grad_norm": 0.34621530263958666, "learning_rate": 1.299231860725758e-06, "loss": 1.303, "step": 3460 }, { "epoch": 0.886016, "grad_norm": 0.36148076206005114, "learning_rate": 1.2934757985167723e-06, "loss": 1.3329, "step": 3461 }, { "epoch": 0.886272, "grad_norm": 0.35451738049496223, "learning_rate": 1.2877320892133782e-06, "loss": 1.3228, "step": 3462 }, { "epoch": 0.886528, "grad_norm": 0.36493224752850867, "learning_rate": 1.2820007366084486e-06, "loss": 1.3978, "step": 3463 }, { "epoch": 0.886784, "grad_norm": 0.3595312926926017, "learning_rate": 1.2762817444867005e-06, "loss": 1.3392, "step": 3464 }, { "epoch": 0.88704, "grad_norm": 0.3547136029895536, "learning_rate": 1.2705751166246816e-06, "loss": 1.3441, "step": 3465 }, { "epoch": 0.887296, "grad_norm": 0.3501021071214016, "learning_rate": 1.2648808567907823e-06, "loss": 1.2929, "step": 3466 }, { "epoch": 0.887552, "grad_norm": 0.3592554190689579, "learning_rate": 1.2591989687452232e-06, "loss": 1.3417, "step": 3467 }, { "epoch": 0.887808, "grad_norm": 0.3572522939389974, "learning_rate": 1.2535294562400547e-06, "loss": 1.3234, "step": 3468 }, { "epoch": 0.888064, "grad_norm": 0.3443744180496664, "learning_rate": 1.247872323019157e-06, "loss": 1.2644, "step": 3469 }, { "epoch": 0.88832, "grad_norm": 0.3557885961084128, "learning_rate": 1.2422275728182309e-06, "loss": 1.3299, "step": 3470 }, { "epoch": 0.888576, "grad_norm": 0.35207523291523296, "learning_rate": 1.2365952093648082e-06, "loss": 1.3339, "step": 3471 }, { "epoch": 0.888832, "grad_norm": 0.3536742792919578, "learning_rate": 1.2309752363782291e-06, "loss": 1.324, "step": 3472 }, { "epoch": 0.889088, "grad_norm": 0.3543059238673609, "learning_rate": 1.2253676575696627e-06, "loss": 1.3354, "step": 3473 }, { "epoch": 0.889344, "grad_norm": 0.34887546065042163, "learning_rate": 1.2197724766420894e-06, "loss": 1.2935, "step": 3474 }, { "epoch": 0.8896, "grad_norm": 0.3599923005825391, "learning_rate": 1.2141896972903e-06, "loss": 1.3192, "step": 3475 }, { "epoch": 0.889856, "grad_norm": 0.36664296668228047, "learning_rate": 1.2086193232008991e-06, "loss": 1.344, "step": 3476 }, { "epoch": 0.890112, "grad_norm": 0.3524892410936937, "learning_rate": 1.2030613580523021e-06, "loss": 1.3337, "step": 3477 }, { "epoch": 0.890368, "grad_norm": 0.35854730886610753, "learning_rate": 1.1975158055147218e-06, "loss": 1.3511, "step": 3478 }, { "epoch": 0.890624, "grad_norm": 0.35524778794202605, "learning_rate": 1.1919826692501823e-06, "loss": 1.3188, "step": 3479 }, { "epoch": 0.89088, "grad_norm": 0.35027076909184174, "learning_rate": 1.1864619529125055e-06, "loss": 1.3161, "step": 3480 }, { "epoch": 0.891136, "grad_norm": 0.5203621963323755, "learning_rate": 1.1809536601473103e-06, "loss": 1.3436, "step": 3481 }, { "epoch": 0.891392, "grad_norm": 0.35342807060355685, "learning_rate": 1.1754577945920142e-06, "loss": 1.3279, "step": 3482 }, { "epoch": 0.891648, "grad_norm": 0.35474242161684677, "learning_rate": 1.1699743598758317e-06, "loss": 1.3329, "step": 3483 }, { "epoch": 0.891904, "grad_norm": 0.35012064175757707, "learning_rate": 1.1645033596197575e-06, "loss": 1.3232, "step": 3484 }, { "epoch": 0.89216, "grad_norm": 0.3472711342171822, "learning_rate": 1.1590447974365881e-06, "loss": 1.3141, "step": 3485 }, { "epoch": 0.892416, "grad_norm": 0.35601948751968426, "learning_rate": 1.1535986769308915e-06, "loss": 1.3232, "step": 3486 }, { "epoch": 0.892672, "grad_norm": 0.3612691157127042, "learning_rate": 1.1481650016990375e-06, "loss": 1.3123, "step": 3487 }, { "epoch": 0.892928, "grad_norm": 0.3541352802209042, "learning_rate": 1.1427437753291627e-06, "loss": 1.3307, "step": 3488 }, { "epoch": 0.893184, "grad_norm": 0.35188969741657383, "learning_rate": 1.137335001401194e-06, "loss": 1.2972, "step": 3489 }, { "epoch": 0.89344, "grad_norm": 0.34468543933807433, "learning_rate": 1.1319386834868262e-06, "loss": 1.2802, "step": 3490 }, { "epoch": 0.893696, "grad_norm": 0.35079358249798087, "learning_rate": 1.126554825149535e-06, "loss": 1.299, "step": 3491 }, { "epoch": 0.893952, "grad_norm": 0.35276788466905523, "learning_rate": 1.1211834299445678e-06, "loss": 1.2909, "step": 3492 }, { "epoch": 0.894208, "grad_norm": 0.3550493056736705, "learning_rate": 1.1158245014189362e-06, "loss": 1.3214, "step": 3493 }, { "epoch": 0.894464, "grad_norm": 0.3493956441552366, "learning_rate": 1.110478043111427e-06, "loss": 1.3228, "step": 3494 }, { "epoch": 0.89472, "grad_norm": 0.36103139580512994, "learning_rate": 1.1051440585525896e-06, "loss": 1.2946, "step": 3495 }, { "epoch": 0.894976, "grad_norm": 0.3616838839935567, "learning_rate": 1.0998225512647332e-06, "loss": 1.3427, "step": 3496 }, { "epoch": 0.895232, "grad_norm": 0.352782625671208, "learning_rate": 1.0945135247619332e-06, "loss": 1.3407, "step": 3497 }, { "epoch": 0.895488, "grad_norm": 0.35310119156486997, "learning_rate": 1.0892169825500144e-06, "loss": 1.2895, "step": 3498 }, { "epoch": 0.895744, "grad_norm": 0.3565934642982159, "learning_rate": 1.0839329281265676e-06, "loss": 1.3144, "step": 3499 }, { "epoch": 0.896, "grad_norm": 0.35175896072594764, "learning_rate": 1.0786613649809308e-06, "loss": 1.323, "step": 3500 }, { "epoch": 0.896256, "grad_norm": 0.3794033558081013, "learning_rate": 1.0734022965942015e-06, "loss": 1.3436, "step": 3501 }, { "epoch": 0.896512, "grad_norm": 0.3604002157148276, "learning_rate": 1.0681557264392106e-06, "loss": 1.3459, "step": 3502 }, { "epoch": 0.896768, "grad_norm": 0.3518174571483309, "learning_rate": 1.0629216579805513e-06, "loss": 1.3243, "step": 3503 }, { "epoch": 0.897024, "grad_norm": 0.35416143127004995, "learning_rate": 1.0577000946745541e-06, "loss": 1.3263, "step": 3504 }, { "epoch": 0.89728, "grad_norm": 0.36308939075673613, "learning_rate": 1.0524910399692923e-06, "loss": 1.3179, "step": 3505 }, { "epoch": 0.897536, "grad_norm": 0.35620362365960795, "learning_rate": 1.0472944973045807e-06, "loss": 1.3421, "step": 3506 }, { "epoch": 0.897792, "grad_norm": 0.3459673500036439, "learning_rate": 1.0421104701119744e-06, "loss": 1.2885, "step": 3507 }, { "epoch": 0.898048, "grad_norm": 0.35460709434956694, "learning_rate": 1.0369389618147552e-06, "loss": 1.3387, "step": 3508 }, { "epoch": 0.898304, "grad_norm": 0.3487721503862132, "learning_rate": 1.0317799758279424e-06, "loss": 1.3073, "step": 3509 }, { "epoch": 0.89856, "grad_norm": 0.3511084869259205, "learning_rate": 1.0266335155582885e-06, "loss": 1.3183, "step": 3510 }, { "epoch": 0.898816, "grad_norm": 0.35523758126788113, "learning_rate": 1.021499584404273e-06, "loss": 1.3473, "step": 3511 }, { "epoch": 0.899072, "grad_norm": 0.3518944436803949, "learning_rate": 1.0163781857561017e-06, "loss": 1.3141, "step": 3512 }, { "epoch": 0.899328, "grad_norm": 0.35002477327491416, "learning_rate": 1.0112693229957093e-06, "loss": 1.3106, "step": 3513 }, { "epoch": 0.899584, "grad_norm": 0.35655654637448386, "learning_rate": 1.0061729994967374e-06, "loss": 1.3492, "step": 3514 }, { "epoch": 0.89984, "grad_norm": 0.34876359046269984, "learning_rate": 1.0010892186245647e-06, "loss": 1.3007, "step": 3515 }, { "epoch": 0.900096, "grad_norm": 0.3576543471055604, "learning_rate": 9.960179837362793e-07, "loss": 1.3546, "step": 3516 }, { "epoch": 0.900352, "grad_norm": 0.35304341875635104, "learning_rate": 9.90959298180685e-07, "loss": 1.3116, "step": 3517 }, { "epoch": 0.900608, "grad_norm": 0.34324990360572544, "learning_rate": 9.859131652982979e-07, "loss": 1.2684, "step": 3518 }, { "epoch": 0.900864, "grad_norm": 0.3495723058377081, "learning_rate": 9.808795884213462e-07, "loss": 1.3085, "step": 3519 }, { "epoch": 0.90112, "grad_norm": 0.3572073056979552, "learning_rate": 9.758585708737711e-07, "loss": 1.3594, "step": 3520 }, { "epoch": 0.901376, "grad_norm": 0.35194856241012956, "learning_rate": 9.70850115971207e-07, "loss": 1.3033, "step": 3521 }, { "epoch": 0.901632, "grad_norm": 0.349492515088723, "learning_rate": 9.658542270210058e-07, "loss": 1.3096, "step": 3522 }, { "epoch": 0.901888, "grad_norm": 0.35469688635601465, "learning_rate": 9.608709073222156e-07, "loss": 1.3029, "step": 3523 }, { "epoch": 0.902144, "grad_norm": 0.35312500280288117, "learning_rate": 9.55900160165586e-07, "loss": 1.348, "step": 3524 }, { "epoch": 0.9024, "grad_norm": 0.35275133993837204, "learning_rate": 9.509419888335692e-07, "loss": 1.3416, "step": 3525 }, { "epoch": 0.902656, "grad_norm": 0.37030172652101323, "learning_rate": 9.459963966002972e-07, "loss": 1.3434, "step": 3526 }, { "epoch": 0.902912, "grad_norm": 0.3548582675380162, "learning_rate": 9.410633867316132e-07, "loss": 1.3227, "step": 3527 }, { "epoch": 0.903168, "grad_norm": 0.3602397241880957, "learning_rate": 9.361429624850404e-07, "loss": 1.3169, "step": 3528 }, { "epoch": 0.903424, "grad_norm": 0.3537612699934746, "learning_rate": 9.312351271097953e-07, "loss": 1.3172, "step": 3529 }, { "epoch": 0.90368, "grad_norm": 0.3536799565779114, "learning_rate": 9.263398838467852e-07, "loss": 1.328, "step": 3530 }, { "epoch": 0.903936, "grad_norm": 0.3498399404302602, "learning_rate": 9.214572359285934e-07, "loss": 1.3076, "step": 3531 }, { "epoch": 0.904192, "grad_norm": 0.34493355855011776, "learning_rate": 9.165871865794895e-07, "loss": 1.2807, "step": 3532 }, { "epoch": 0.904448, "grad_norm": 0.35369266746038835, "learning_rate": 9.117297390154234e-07, "loss": 1.3179, "step": 3533 }, { "epoch": 0.904704, "grad_norm": 0.3582844075899968, "learning_rate": 9.068848964440291e-07, "loss": 1.347, "step": 3534 }, { "epoch": 0.90496, "grad_norm": 0.35132462241849305, "learning_rate": 9.020526620646075e-07, "loss": 1.3379, "step": 3535 }, { "epoch": 0.905216, "grad_norm": 0.3577202957237167, "learning_rate": 8.972330390681394e-07, "loss": 1.3148, "step": 3536 }, { "epoch": 0.905472, "grad_norm": 0.35662835303231133, "learning_rate": 8.924260306372746e-07, "loss": 1.3208, "step": 3537 }, { "epoch": 0.905728, "grad_norm": 0.35413662216924896, "learning_rate": 8.876316399463425e-07, "loss": 1.3589, "step": 3538 }, { "epoch": 0.905984, "grad_norm": 0.3552808857259192, "learning_rate": 8.82849870161322e-07, "loss": 1.3442, "step": 3539 }, { "epoch": 0.90624, "grad_norm": 0.36181244543573926, "learning_rate": 8.780807244398737e-07, "loss": 1.3979, "step": 3540 }, { "epoch": 0.906496, "grad_norm": 0.35560535607807114, "learning_rate": 8.733242059313163e-07, "loss": 1.3436, "step": 3541 }, { "epoch": 0.906752, "grad_norm": 0.3624070040495191, "learning_rate": 8.685803177766283e-07, "loss": 1.3387, "step": 3542 }, { "epoch": 0.907008, "grad_norm": 0.3468424947599529, "learning_rate": 8.638490631084484e-07, "loss": 1.3332, "step": 3543 }, { "epoch": 0.907264, "grad_norm": 0.3473556233891367, "learning_rate": 8.591304450510795e-07, "loss": 1.3046, "step": 3544 }, { "epoch": 0.90752, "grad_norm": 0.35274617851864754, "learning_rate": 8.544244667204671e-07, "loss": 1.3253, "step": 3545 }, { "epoch": 0.907776, "grad_norm": 0.35377422193487745, "learning_rate": 8.497311312242207e-07, "loss": 1.2953, "step": 3546 }, { "epoch": 0.908032, "grad_norm": 0.3534469150335654, "learning_rate": 8.45050441661599e-07, "loss": 1.3303, "step": 3547 }, { "epoch": 0.908288, "grad_norm": 0.3530571345460178, "learning_rate": 8.403824011235051e-07, "loss": 1.3261, "step": 3548 }, { "epoch": 0.908544, "grad_norm": 0.3541815933467247, "learning_rate": 8.357270126924932e-07, "loss": 1.2923, "step": 3549 }, { "epoch": 0.9088, "grad_norm": 0.3543523099932284, "learning_rate": 8.310842794427665e-07, "loss": 1.3631, "step": 3550 }, { "epoch": 0.909056, "grad_norm": 0.35075062904742343, "learning_rate": 8.264542044401614e-07, "loss": 1.3317, "step": 3551 }, { "epoch": 0.909312, "grad_norm": 0.34901006617657604, "learning_rate": 8.218367907421631e-07, "loss": 1.3034, "step": 3552 }, { "epoch": 0.909568, "grad_norm": 0.35269704887541753, "learning_rate": 8.172320413978974e-07, "loss": 1.3037, "step": 3553 }, { "epoch": 0.909824, "grad_norm": 0.34530473409550927, "learning_rate": 8.126399594481161e-07, "loss": 1.266, "step": 3554 }, { "epoch": 0.91008, "grad_norm": 0.36272862896057384, "learning_rate": 8.080605479252158e-07, "loss": 1.3365, "step": 3555 }, { "epoch": 0.910336, "grad_norm": 0.34708744921729207, "learning_rate": 8.034938098532286e-07, "loss": 1.3029, "step": 3556 }, { "epoch": 0.910592, "grad_norm": 0.3577031768206211, "learning_rate": 7.989397482478045e-07, "loss": 1.3458, "step": 3557 }, { "epoch": 0.910848, "grad_norm": 0.3523283772959642, "learning_rate": 7.94398366116238e-07, "loss": 1.3472, "step": 3558 }, { "epoch": 0.911104, "grad_norm": 0.3492356768066558, "learning_rate": 7.89869666457439e-07, "loss": 1.3004, "step": 3559 }, { "epoch": 0.91136, "grad_norm": 0.35548566578995267, "learning_rate": 7.853536522619487e-07, "loss": 1.3341, "step": 3560 }, { "epoch": 0.911616, "grad_norm": 0.3397966662926343, "learning_rate": 7.808503265119305e-07, "loss": 1.2506, "step": 3561 }, { "epoch": 0.911872, "grad_norm": 0.35766724906151354, "learning_rate": 7.763596921811701e-07, "loss": 1.3526, "step": 3562 }, { "epoch": 0.912128, "grad_norm": 0.36615505784750024, "learning_rate": 7.718817522350641e-07, "loss": 1.3585, "step": 3563 }, { "epoch": 0.912384, "grad_norm": 0.3609716457036324, "learning_rate": 7.674165096306385e-07, "loss": 1.3286, "step": 3564 }, { "epoch": 0.91264, "grad_norm": 0.35884694355978014, "learning_rate": 7.629639673165257e-07, "loss": 1.3681, "step": 3565 }, { "epoch": 0.912896, "grad_norm": 0.3493205127783696, "learning_rate": 7.585241282329759e-07, "loss": 1.3097, "step": 3566 }, { "epoch": 0.913152, "grad_norm": 0.35234067483955706, "learning_rate": 7.54096995311846e-07, "loss": 1.3075, "step": 3567 }, { "epoch": 0.913408, "grad_norm": 0.3422673158689805, "learning_rate": 7.496825714766131e-07, "loss": 1.3031, "step": 3568 }, { "epoch": 0.913664, "grad_norm": 0.3487649561865556, "learning_rate": 7.452808596423455e-07, "loss": 1.305, "step": 3569 }, { "epoch": 0.91392, "grad_norm": 0.3495500009257064, "learning_rate": 7.408918627157313e-07, "loss": 1.3191, "step": 3570 }, { "epoch": 0.914176, "grad_norm": 0.35515631178923585, "learning_rate": 7.365155835950544e-07, "loss": 1.3391, "step": 3571 }, { "epoch": 0.914432, "grad_norm": 0.35636128106871723, "learning_rate": 7.321520251702052e-07, "loss": 1.3668, "step": 3572 }, { "epoch": 0.914688, "grad_norm": 0.3568889919829385, "learning_rate": 7.2780119032267e-07, "loss": 1.3627, "step": 3573 }, { "epoch": 0.914944, "grad_norm": 0.34801472271629263, "learning_rate": 7.234630819255373e-07, "loss": 1.3187, "step": 3574 }, { "epoch": 0.9152, "grad_norm": 0.3500874463141885, "learning_rate": 7.191377028434865e-07, "loss": 1.3115, "step": 3575 }, { "epoch": 0.915456, "grad_norm": 0.3501867814308275, "learning_rate": 7.148250559327952e-07, "loss": 1.3007, "step": 3576 }, { "epoch": 0.915712, "grad_norm": 0.3473709029393759, "learning_rate": 7.105251440413297e-07, "loss": 1.2907, "step": 3577 }, { "epoch": 0.915968, "grad_norm": 0.35195192466079234, "learning_rate": 7.062379700085497e-07, "loss": 1.3329, "step": 3578 }, { "epoch": 0.916224, "grad_norm": 0.3498404380329524, "learning_rate": 7.019635366655042e-07, "loss": 1.2875, "step": 3579 }, { "epoch": 0.91648, "grad_norm": 0.3432855288126404, "learning_rate": 6.977018468348262e-07, "loss": 1.3058, "step": 3580 }, { "epoch": 0.916736, "grad_norm": 0.35190461485939584, "learning_rate": 6.934529033307336e-07, "loss": 1.3191, "step": 3581 }, { "epoch": 0.916992, "grad_norm": 0.36128314755081226, "learning_rate": 6.892167089590307e-07, "loss": 1.3425, "step": 3582 }, { "epoch": 0.917248, "grad_norm": 0.35179188915227366, "learning_rate": 6.849932665170955e-07, "loss": 1.3287, "step": 3583 }, { "epoch": 0.917504, "grad_norm": 0.3555656532667669, "learning_rate": 6.807825787938949e-07, "loss": 1.3421, "step": 3584 }, { "epoch": 0.91776, "grad_norm": 0.35981232075237485, "learning_rate": 6.76584648569969e-07, "loss": 1.3524, "step": 3585 }, { "epoch": 0.918016, "grad_norm": 0.34986271659257606, "learning_rate": 6.72399478617427e-07, "loss": 1.3197, "step": 3586 }, { "epoch": 0.918272, "grad_norm": 0.3495191257761887, "learning_rate": 6.68227071699965e-07, "loss": 1.335, "step": 3587 }, { "epoch": 0.918528, "grad_norm": 0.35421725254568864, "learning_rate": 6.640674305728368e-07, "loss": 1.3649, "step": 3588 }, { "epoch": 0.918784, "grad_norm": 0.349382237458626, "learning_rate": 6.599205579828738e-07, "loss": 1.3059, "step": 3589 }, { "epoch": 0.91904, "grad_norm": 0.352900572864467, "learning_rate": 6.557864566684791e-07, "loss": 1.3199, "step": 3590 }, { "epoch": 0.919296, "grad_norm": 0.3519799639777463, "learning_rate": 6.516651293596154e-07, "loss": 1.3236, "step": 3591 }, { "epoch": 0.919552, "grad_norm": 0.3402143955230197, "learning_rate": 6.475565787778149e-07, "loss": 1.2663, "step": 3592 }, { "epoch": 0.919808, "grad_norm": 0.3508149155039848, "learning_rate": 6.434608076361648e-07, "loss": 1.3068, "step": 3593 }, { "epoch": 0.920064, "grad_norm": 0.34897341981899427, "learning_rate": 6.393778186393263e-07, "loss": 1.3138, "step": 3594 }, { "epoch": 0.92032, "grad_norm": 0.35628121902740284, "learning_rate": 6.353076144835069e-07, "loss": 1.3098, "step": 3595 }, { "epoch": 0.920576, "grad_norm": 0.3488804125423955, "learning_rate": 6.312501978564811e-07, "loss": 1.2959, "step": 3596 }, { "epoch": 0.920832, "grad_norm": 0.3558162811999687, "learning_rate": 6.272055714375747e-07, "loss": 1.3137, "step": 3597 }, { "epoch": 0.921088, "grad_norm": 0.34914395992482733, "learning_rate": 6.231737378976665e-07, "loss": 1.2808, "step": 3598 }, { "epoch": 0.921344, "grad_norm": 0.3493222158036344, "learning_rate": 6.191546998991916e-07, "loss": 1.3073, "step": 3599 }, { "epoch": 0.9216, "grad_norm": 0.4128103526246106, "learning_rate": 6.151484600961266e-07, "loss": 1.375, "step": 3600 }, { "epoch": 0.921856, "grad_norm": 0.35118707265119375, "learning_rate": 6.111550211340112e-07, "loss": 1.3123, "step": 3601 }, { "epoch": 0.922112, "grad_norm": 0.3436505578914053, "learning_rate": 6.071743856499179e-07, "loss": 1.2843, "step": 3602 }, { "epoch": 0.922368, "grad_norm": 0.3594562982182878, "learning_rate": 6.032065562724754e-07, "loss": 1.3143, "step": 3603 }, { "epoch": 0.922624, "grad_norm": 0.37647005001464884, "learning_rate": 5.992515356218475e-07, "loss": 1.3222, "step": 3604 }, { "epoch": 0.92288, "grad_norm": 0.3456273548822862, "learning_rate": 5.953093263097476e-07, "loss": 1.2809, "step": 3605 }, { "epoch": 0.923136, "grad_norm": 0.3592822235877073, "learning_rate": 5.913799309394175e-07, "loss": 1.3254, "step": 3606 }, { "epoch": 0.923392, "grad_norm": 0.36279905948464897, "learning_rate": 5.874633521056483e-07, "loss": 1.3213, "step": 3607 }, { "epoch": 0.923648, "grad_norm": 0.34706910163143667, "learning_rate": 5.835595923947668e-07, "loss": 1.2785, "step": 3608 }, { "epoch": 0.923904, "grad_norm": 0.36184464960500284, "learning_rate": 5.796686543846264e-07, "loss": 1.2989, "step": 3609 }, { "epoch": 0.92416, "grad_norm": 0.4666822630572504, "learning_rate": 5.757905406446229e-07, "loss": 1.3156, "step": 3610 }, { "epoch": 0.924416, "grad_norm": 0.35158112123945845, "learning_rate": 5.719252537356768e-07, "loss": 1.3223, "step": 3611 }, { "epoch": 0.924672, "grad_norm": 0.35901118914065366, "learning_rate": 5.680727962102417e-07, "loss": 1.3346, "step": 3612 }, { "epoch": 0.924928, "grad_norm": 0.3507316866462203, "learning_rate": 5.642331706122983e-07, "loss": 1.3187, "step": 3613 }, { "epoch": 0.925184, "grad_norm": 0.3522079565993651, "learning_rate": 5.604063794773562e-07, "loss": 1.3196, "step": 3614 }, { "epoch": 0.92544, "grad_norm": 0.35484688663032976, "learning_rate": 5.56592425332445e-07, "loss": 1.3253, "step": 3615 }, { "epoch": 0.925696, "grad_norm": 0.5376993171619279, "learning_rate": 5.527913106961214e-07, "loss": 1.3195, "step": 3616 }, { "epoch": 0.925952, "grad_norm": 0.3523097804600051, "learning_rate": 5.490030380784661e-07, "loss": 1.3123, "step": 3617 }, { "epoch": 0.926208, "grad_norm": 0.3393998364806952, "learning_rate": 5.452276099810672e-07, "loss": 1.2768, "step": 3618 }, { "epoch": 0.926464, "grad_norm": 0.7972038959607683, "learning_rate": 5.414650288970436e-07, "loss": 1.3008, "step": 3619 }, { "epoch": 0.92672, "grad_norm": 0.3513779759490281, "learning_rate": 5.3771529731103e-07, "loss": 1.3265, "step": 3620 }, { "epoch": 0.926976, "grad_norm": 0.35427787038276953, "learning_rate": 5.339784176991636e-07, "loss": 1.3465, "step": 3621 }, { "epoch": 0.927232, "grad_norm": 0.3525359612351788, "learning_rate": 5.302543925291103e-07, "loss": 1.335, "step": 3622 }, { "epoch": 0.927488, "grad_norm": 0.3540421323396622, "learning_rate": 5.265432242600387e-07, "loss": 1.3197, "step": 3623 }, { "epoch": 0.927744, "grad_norm": 0.3391691953596326, "learning_rate": 5.228449153426263e-07, "loss": 1.2752, "step": 3624 }, { "epoch": 0.928, "grad_norm": 0.45866546455341267, "learning_rate": 5.191594682190659e-07, "loss": 1.3489, "step": 3625 }, { "epoch": 0.928256, "grad_norm": 0.34174606361996795, "learning_rate": 5.154868853230488e-07, "loss": 1.2799, "step": 3626 }, { "epoch": 0.928512, "grad_norm": 0.35743804729337525, "learning_rate": 5.118271690797794e-07, "loss": 1.3501, "step": 3627 }, { "epoch": 0.928768, "grad_norm": 0.35012140001483644, "learning_rate": 5.0818032190596e-07, "loss": 1.3074, "step": 3628 }, { "epoch": 0.929024, "grad_norm": 0.34905979854782504, "learning_rate": 5.045463462097954e-07, "loss": 1.293, "step": 3629 }, { "epoch": 0.92928, "grad_norm": 0.36264173673598943, "learning_rate": 5.009252443909929e-07, "loss": 1.2885, "step": 3630 }, { "epoch": 0.929536, "grad_norm": 0.3436746431518763, "learning_rate": 4.973170188407573e-07, "loss": 1.2724, "step": 3631 }, { "epoch": 0.929792, "grad_norm": 0.34797870538608733, "learning_rate": 4.937216719417892e-07, "loss": 1.3166, "step": 3632 }, { "epoch": 0.930048, "grad_norm": 0.3523741839584143, "learning_rate": 4.901392060682853e-07, "loss": 1.3251, "step": 3633 }, { "epoch": 0.930304, "grad_norm": 0.34944842459917086, "learning_rate": 4.865696235859374e-07, "loss": 1.3032, "step": 3634 }, { "epoch": 0.93056, "grad_norm": 0.34102312564289916, "learning_rate": 4.830129268519334e-07, "loss": 1.2889, "step": 3635 }, { "epoch": 0.930816, "grad_norm": 0.3471565159973024, "learning_rate": 4.794691182149435e-07, "loss": 1.2901, "step": 3636 }, { "epoch": 0.931072, "grad_norm": 0.35192889952675166, "learning_rate": 4.7593820001513134e-07, "loss": 1.3194, "step": 3637 }, { "epoch": 0.931328, "grad_norm": 0.47049123091772027, "learning_rate": 4.724201745841495e-07, "loss": 1.3062, "step": 3638 }, { "epoch": 0.931584, "grad_norm": 0.3442210390393018, "learning_rate": 4.6891504424513733e-07, "loss": 1.2754, "step": 3639 }, { "epoch": 0.93184, "grad_norm": 0.34912641921433657, "learning_rate": 4.654228113127168e-07, "loss": 1.3301, "step": 3640 }, { "epoch": 0.932096, "grad_norm": 0.3472264652945926, "learning_rate": 4.619434780929966e-07, "loss": 1.3311, "step": 3641 }, { "epoch": 0.932352, "grad_norm": 0.3487894673998158, "learning_rate": 4.584770468835564e-07, "loss": 1.2905, "step": 3642 }, { "epoch": 0.932608, "grad_norm": 0.35570886866130924, "learning_rate": 4.5502351997347206e-07, "loss": 1.3149, "step": 3643 }, { "epoch": 0.932864, "grad_norm": 0.35757777786650197, "learning_rate": 4.515828996432836e-07, "loss": 1.3469, "step": 3644 }, { "epoch": 0.93312, "grad_norm": 0.34538228718386255, "learning_rate": 4.481551881650159e-07, "loss": 1.2926, "step": 3645 }, { "epoch": 0.933376, "grad_norm": 0.3542692201576496, "learning_rate": 4.4474038780216945e-07, "loss": 1.3555, "step": 3646 }, { "epoch": 0.933632, "grad_norm": 0.34896297061900816, "learning_rate": 4.413385008097204e-07, "loss": 1.291, "step": 3647 }, { "epoch": 0.933888, "grad_norm": 0.3582983596512249, "learning_rate": 4.379495294341052e-07, "loss": 1.3675, "step": 3648 }, { "epoch": 0.934144, "grad_norm": 0.3482172403823663, "learning_rate": 4.3457347591324695e-07, "loss": 1.3215, "step": 3649 }, { "epoch": 0.9344, "grad_norm": 0.3500905197844345, "learning_rate": 4.3121034247653124e-07, "loss": 1.3148, "step": 3650 }, { "epoch": 0.934656, "grad_norm": 0.35163533140821657, "learning_rate": 4.2786013134481273e-07, "loss": 1.3311, "step": 3651 }, { "epoch": 0.934912, "grad_norm": 0.35989463782872383, "learning_rate": 4.2452284473041285e-07, "loss": 1.2948, "step": 3652 }, { "epoch": 0.935168, "grad_norm": 0.34757994884300936, "learning_rate": 4.2119848483711757e-07, "loss": 1.3399, "step": 3653 }, { "epoch": 0.935424, "grad_norm": 0.3430665382796469, "learning_rate": 4.1788705386017757e-07, "loss": 1.2626, "step": 3654 }, { "epoch": 0.93568, "grad_norm": 0.3514060996218677, "learning_rate": 4.145885539863037e-07, "loss": 1.3337, "step": 3655 }, { "epoch": 0.935936, "grad_norm": 0.3627494674882456, "learning_rate": 4.1130298739367134e-07, "loss": 1.3405, "step": 3656 }, { "epoch": 0.936192, "grad_norm": 0.5677440041935232, "learning_rate": 4.080303562519117e-07, "loss": 1.3641, "step": 3657 }, { "epoch": 0.936448, "grad_norm": 0.35395362858250634, "learning_rate": 4.047706627221204e-07, "loss": 1.3414, "step": 3658 }, { "epoch": 0.936704, "grad_norm": 0.352600069479312, "learning_rate": 4.0152390895684013e-07, "loss": 1.3462, "step": 3659 }, { "epoch": 0.93696, "grad_norm": 0.3468000525896169, "learning_rate": 3.9829009710007804e-07, "loss": 1.2832, "step": 3660 }, { "epoch": 0.937216, "grad_norm": 0.3451692647891084, "learning_rate": 3.950692292872882e-07, "loss": 1.2733, "step": 3661 }, { "epoch": 0.937472, "grad_norm": 0.34673806818050334, "learning_rate": 3.918613076453781e-07, "loss": 1.2583, "step": 3662 }, { "epoch": 0.937728, "grad_norm": 0.3497431210258776, "learning_rate": 3.886663342927133e-07, "loss": 1.3481, "step": 3663 }, { "epoch": 0.937984, "grad_norm": 0.34610731095162506, "learning_rate": 3.8548431133909937e-07, "loss": 1.3053, "step": 3664 }, { "epoch": 0.93824, "grad_norm": 0.3473184463062814, "learning_rate": 3.823152408857933e-07, "loss": 1.3146, "step": 3665 }, { "epoch": 0.938496, "grad_norm": 0.34628643743692206, "learning_rate": 3.7915912502550333e-07, "loss": 1.3011, "step": 3666 }, { "epoch": 0.938752, "grad_norm": 0.34658919177669306, "learning_rate": 3.7601596584237567e-07, "loss": 1.2981, "step": 3667 }, { "epoch": 0.939008, "grad_norm": 0.3524424921528069, "learning_rate": 3.728857654120055e-07, "loss": 1.2861, "step": 3668 }, { "epoch": 0.939264, "grad_norm": 0.34919207181733214, "learning_rate": 3.6976852580142833e-07, "loss": 1.3512, "step": 3669 }, { "epoch": 0.93952, "grad_norm": 0.35094282376697344, "learning_rate": 3.666642490691241e-07, "loss": 1.3297, "step": 3670 }, { "epoch": 0.939776, "grad_norm": 0.35574517695435776, "learning_rate": 3.6357293726501097e-07, "loss": 1.3437, "step": 3671 }, { "epoch": 0.940032, "grad_norm": 0.35741602173527104, "learning_rate": 3.6049459243044483e-07, "loss": 1.3731, "step": 3672 }, { "epoch": 0.940288, "grad_norm": 0.3478913311254024, "learning_rate": 3.574292165982196e-07, "loss": 1.3146, "step": 3673 }, { "epoch": 0.940544, "grad_norm": 0.34489897805438563, "learning_rate": 3.543768117925606e-07, "loss": 1.3128, "step": 3674 }, { "epoch": 0.9408, "grad_norm": 0.35047016884341775, "learning_rate": 3.5133738002913756e-07, "loss": 1.3227, "step": 3675 }, { "epoch": 0.941056, "grad_norm": 0.34769112544260977, "learning_rate": 3.483109233150472e-07, "loss": 1.3085, "step": 3676 }, { "epoch": 0.941312, "grad_norm": 0.34327564729510157, "learning_rate": 3.4529744364881325e-07, "loss": 1.2747, "step": 3677 }, { "epoch": 0.941568, "grad_norm": 0.34683755489292145, "learning_rate": 3.422969430204015e-07, "loss": 1.3138, "step": 3678 }, { "epoch": 0.941824, "grad_norm": 0.34695239091406455, "learning_rate": 3.393094234111982e-07, "loss": 1.3127, "step": 3679 }, { "epoch": 0.94208, "grad_norm": 0.49883208371500354, "learning_rate": 3.3633488679402084e-07, "loss": 1.3649, "step": 3680 }, { "epoch": 0.942336, "grad_norm": 0.35123787382563937, "learning_rate": 3.3337333513311144e-07, "loss": 1.3135, "step": 3681 }, { "epoch": 0.942592, "grad_norm": 0.3510292588493751, "learning_rate": 3.304247703841412e-07, "loss": 1.3012, "step": 3682 }, { "epoch": 0.942848, "grad_norm": 0.3511237754765743, "learning_rate": 3.274891944942016e-07, "loss": 1.3405, "step": 3683 }, { "epoch": 0.943104, "grad_norm": 0.3418045164664127, "learning_rate": 3.245666094018107e-07, "loss": 1.3059, "step": 3684 }, { "epoch": 0.94336, "grad_norm": 0.3592500676442756, "learning_rate": 3.216570170369004e-07, "loss": 1.33, "step": 3685 }, { "epoch": 0.943616, "grad_norm": 0.38227780782334864, "learning_rate": 3.187604193208338e-07, "loss": 1.3226, "step": 3686 }, { "epoch": 0.943872, "grad_norm": 0.34467305191341785, "learning_rate": 3.158768181663874e-07, "loss": 1.2867, "step": 3687 }, { "epoch": 0.944128, "grad_norm": 0.34753212688026563, "learning_rate": 3.1300621547775136e-07, "loss": 1.3276, "step": 3688 }, { "epoch": 0.944384, "grad_norm": 0.34974246363777745, "learning_rate": 3.1014861315053825e-07, "loss": 1.2963, "step": 3689 }, { "epoch": 0.94464, "grad_norm": 0.35571572153488806, "learning_rate": 3.0730401307177414e-07, "loss": 1.2788, "step": 3690 }, { "epoch": 0.944896, "grad_norm": 0.36202426303980256, "learning_rate": 3.044724171198965e-07, "loss": 1.3217, "step": 3691 }, { "epoch": 0.945152, "grad_norm": 0.3570628538372465, "learning_rate": 3.0165382716475846e-07, "loss": 1.363, "step": 3692 }, { "epoch": 0.945408, "grad_norm": 0.358890884545062, "learning_rate": 2.988482450676267e-07, "loss": 1.3436, "step": 3693 }, { "epoch": 0.945664, "grad_norm": 0.35378478010708747, "learning_rate": 2.960556726811703e-07, "loss": 1.3533, "step": 3694 }, { "epoch": 0.94592, "grad_norm": 0.34934337285706285, "learning_rate": 2.9327611184947644e-07, "loss": 1.3084, "step": 3695 }, { "epoch": 0.946176, "grad_norm": 0.3457964973613574, "learning_rate": 2.905095644080369e-07, "loss": 1.3064, "step": 3696 }, { "epoch": 0.946432, "grad_norm": 0.34617646920800554, "learning_rate": 2.877560321837436e-07, "loss": 1.3112, "step": 3697 }, { "epoch": 0.946688, "grad_norm": 0.3443321129515638, "learning_rate": 2.8501551699490206e-07, "loss": 1.2978, "step": 3698 }, { "epoch": 0.946944, "grad_norm": 0.357534570546533, "learning_rate": 2.822880206512224e-07, "loss": 1.3371, "step": 3699 }, { "epoch": 0.9472, "grad_norm": 0.3464414449377667, "learning_rate": 2.795735449538084e-07, "loss": 1.286, "step": 3700 }, { "epoch": 0.947456, "grad_norm": 0.36388462021714646, "learning_rate": 2.7687209169517506e-07, "loss": 1.3667, "step": 3701 }, { "epoch": 0.947712, "grad_norm": 0.3468309764742381, "learning_rate": 2.7418366265923535e-07, "loss": 1.2784, "step": 3702 }, { "epoch": 0.947968, "grad_norm": 0.3478519166322828, "learning_rate": 2.715082596212981e-07, "loss": 1.3151, "step": 3703 }, { "epoch": 0.948224, "grad_norm": 0.3535066419612759, "learning_rate": 2.6884588434807456e-07, "loss": 1.3372, "step": 3704 }, { "epoch": 0.94848, "grad_norm": 0.3516442038861694, "learning_rate": 2.661965385976739e-07, "loss": 1.3391, "step": 3705 }, { "epoch": 0.948736, "grad_norm": 0.349091325807776, "learning_rate": 2.6356022411959447e-07, "loss": 1.3025, "step": 3706 }, { "epoch": 0.948992, "grad_norm": 0.3482138134701709, "learning_rate": 2.60936942654737e-07, "loss": 1.3055, "step": 3707 }, { "epoch": 0.949248, "grad_norm": 0.3586782407513873, "learning_rate": 2.5832669593539583e-07, "loss": 1.3103, "step": 3708 }, { "epoch": 0.949504, "grad_norm": 0.3424976332019905, "learning_rate": 2.557294856852477e-07, "loss": 1.2735, "step": 3709 }, { "epoch": 0.94976, "grad_norm": 0.3440927238857434, "learning_rate": 2.531453136193718e-07, "loss": 1.2836, "step": 3710 }, { "epoch": 0.950016, "grad_norm": 0.34619957050631434, "learning_rate": 2.5057418144423196e-07, "loss": 1.3102, "step": 3711 }, { "epoch": 0.950272, "grad_norm": 0.34480422347330414, "learning_rate": 2.480160908576834e-07, "loss": 1.3185, "step": 3712 }, { "epoch": 0.950528, "grad_norm": 0.3527175693088532, "learning_rate": 2.454710435489682e-07, "loss": 1.345, "step": 3713 }, { "epoch": 0.950784, "grad_norm": 0.3494360040131702, "learning_rate": 2.4293904119871535e-07, "loss": 1.3118, "step": 3714 }, { "epoch": 0.95104, "grad_norm": 0.3508094221365896, "learning_rate": 2.4042008547894067e-07, "loss": 1.3038, "step": 3715 }, { "epoch": 0.951296, "grad_norm": 0.36291145023559584, "learning_rate": 2.3791417805304028e-07, "loss": 1.3189, "step": 3716 }, { "epoch": 0.951552, "grad_norm": 0.3522821537551188, "learning_rate": 2.3542132057580159e-07, "loss": 1.3144, "step": 3717 }, { "epoch": 0.951808, "grad_norm": 0.35677326081905264, "learning_rate": 2.3294151469338777e-07, "loss": 1.3296, "step": 3718 }, { "epoch": 0.952064, "grad_norm": 0.3492062684946765, "learning_rate": 2.3047476204334452e-07, "loss": 1.309, "step": 3719 }, { "epoch": 0.95232, "grad_norm": 0.3519135669562277, "learning_rate": 2.2802106425460214e-07, "loss": 1.3367, "step": 3720 }, { "epoch": 0.952576, "grad_norm": 0.3543611436031582, "learning_rate": 2.2558042294746451e-07, "loss": 1.2959, "step": 3721 }, { "epoch": 0.952832, "grad_norm": 0.35237920779317133, "learning_rate": 2.2315283973361578e-07, "loss": 1.3345, "step": 3722 }, { "epoch": 0.953088, "grad_norm": 0.35645770761222434, "learning_rate": 2.2073831621611808e-07, "loss": 1.3609, "step": 3723 }, { "epoch": 0.953344, "grad_norm": 0.3592486549506144, "learning_rate": 2.1833685398940706e-07, "loss": 1.3794, "step": 3724 }, { "epoch": 0.9536, "grad_norm": 0.3546579554828751, "learning_rate": 2.1594845463929648e-07, "loss": 1.3185, "step": 3725 }, { "epoch": 0.953856, "grad_norm": 0.35656780226398344, "learning_rate": 2.1357311974297356e-07, "loss": 1.3147, "step": 3726 }, { "epoch": 0.954112, "grad_norm": 0.3432444771848927, "learning_rate": 2.1121085086899694e-07, "loss": 1.2913, "step": 3727 }, { "epoch": 0.954368, "grad_norm": 0.3519864137637237, "learning_rate": 2.088616495772966e-07, "loss": 1.2992, "step": 3728 }, { "epoch": 0.954624, "grad_norm": 0.3524566513481728, "learning_rate": 2.065255174191716e-07, "loss": 1.3428, "step": 3729 }, { "epoch": 0.95488, "grad_norm": 0.35300664407009874, "learning_rate": 2.0420245593729903e-07, "loss": 1.328, "step": 3730 }, { "epoch": 0.955136, "grad_norm": 0.3486016719590103, "learning_rate": 2.0189246666571405e-07, "loss": 1.2997, "step": 3731 }, { "epoch": 0.955392, "grad_norm": 0.355178532421379, "learning_rate": 1.9959555112982976e-07, "loss": 1.2968, "step": 3732 }, { "epoch": 0.955648, "grad_norm": 0.3465684809567164, "learning_rate": 1.973117108464173e-07, "loss": 1.3177, "step": 3733 }, { "epoch": 0.955904, "grad_norm": 0.3465817343905661, "learning_rate": 1.9504094732361479e-07, "loss": 1.2841, "step": 3734 }, { "epoch": 0.95616, "grad_norm": 0.35698292501992435, "learning_rate": 1.9278326206093156e-07, "loss": 1.3239, "step": 3735 }, { "epoch": 0.956416, "grad_norm": 0.5456809682709015, "learning_rate": 1.9053865654923287e-07, "loss": 1.3278, "step": 3736 }, { "epoch": 0.956672, "grad_norm": 0.3544702143320293, "learning_rate": 1.8830713227075305e-07, "loss": 1.3301, "step": 3737 }, { "epoch": 0.956928, "grad_norm": 0.3601081575765337, "learning_rate": 1.860886906990844e-07, "loss": 1.3762, "step": 3738 }, { "epoch": 0.957184, "grad_norm": 0.345714280447751, "learning_rate": 1.8388333329918185e-07, "loss": 1.2802, "step": 3739 }, { "epoch": 0.95744, "grad_norm": 0.34768500981504163, "learning_rate": 1.8169106152735815e-07, "loss": 1.3022, "step": 3740 }, { "epoch": 0.957696, "grad_norm": 0.35618702694685117, "learning_rate": 1.7951187683128645e-07, "loss": 1.361, "step": 3741 }, { "epoch": 0.957952, "grad_norm": 0.3465077292678786, "learning_rate": 1.7734578064999564e-07, "loss": 1.2947, "step": 3742 }, { "epoch": 0.958208, "grad_norm": 0.3493504939649855, "learning_rate": 1.7519277441387705e-07, "loss": 1.2827, "step": 3743 }, { "epoch": 0.958464, "grad_norm": 0.35354899600410666, "learning_rate": 1.7305285954467345e-07, "loss": 1.3258, "step": 3744 }, { "epoch": 0.95872, "grad_norm": 0.3438226209638275, "learning_rate": 1.709260374554811e-07, "loss": 1.2919, "step": 3745 }, { "epoch": 0.958976, "grad_norm": 0.3445092186975909, "learning_rate": 1.6881230955075435e-07, "loss": 1.2886, "step": 3746 }, { "epoch": 0.959232, "grad_norm": 0.3520236721544524, "learning_rate": 1.667116772262989e-07, "loss": 1.3493, "step": 3747 }, { "epoch": 0.959488, "grad_norm": 0.35481108577562315, "learning_rate": 1.64624141869274e-07, "loss": 1.3398, "step": 3748 }, { "epoch": 0.959744, "grad_norm": 0.3609011756511318, "learning_rate": 1.6254970485819032e-07, "loss": 1.3687, "step": 3749 }, { "epoch": 0.96, "grad_norm": 0.3589404262752977, "learning_rate": 1.6048836756290542e-07, "loss": 1.3725, "step": 3750 }, { "epoch": 0.960256, "grad_norm": 0.3588016450402552, "learning_rate": 1.5844013134463265e-07, "loss": 1.3681, "step": 3751 }, { "epoch": 0.960512, "grad_norm": 0.34706286850723644, "learning_rate": 1.5640499755592786e-07, "loss": 1.319, "step": 3752 }, { "epoch": 0.960768, "grad_norm": 0.3507443944698336, "learning_rate": 1.5438296754069827e-07, "loss": 1.3018, "step": 3753 }, { "epoch": 0.961024, "grad_norm": 0.43183290494199866, "learning_rate": 1.5237404263419575e-07, "loss": 1.2673, "step": 3754 }, { "epoch": 0.96128, "grad_norm": 0.40656043549809767, "learning_rate": 1.503782241630236e-07, "loss": 1.3371, "step": 3755 }, { "epoch": 0.961536, "grad_norm": 0.34710465562296583, "learning_rate": 1.4839551344512093e-07, "loss": 1.309, "step": 3756 }, { "epoch": 0.961792, "grad_norm": 0.3524167642896055, "learning_rate": 1.464259117897804e-07, "loss": 1.2886, "step": 3757 }, { "epoch": 0.962048, "grad_norm": 0.3515247920397948, "learning_rate": 1.4446942049762824e-07, "loss": 1.31, "step": 3758 }, { "epoch": 0.962304, "grad_norm": 0.348908550499184, "learning_rate": 1.425260408606466e-07, "loss": 1.3251, "step": 3759 }, { "epoch": 0.96256, "grad_norm": 0.34624626425083055, "learning_rate": 1.4059577416214442e-07, "loss": 1.2766, "step": 3760 }, { "epoch": 0.962816, "grad_norm": 0.3531943801194355, "learning_rate": 1.3867862167678438e-07, "loss": 1.3247, "step": 3761 }, { "epoch": 0.963072, "grad_norm": 0.353600873387186, "learning_rate": 1.3677458467056258e-07, "loss": 1.3404, "step": 3762 }, { "epoch": 0.963328, "grad_norm": 0.35528309808096603, "learning_rate": 1.3488366440081112e-07, "loss": 1.3426, "step": 3763 }, { "epoch": 0.963584, "grad_norm": 0.3635077162095375, "learning_rate": 1.3300586211620892e-07, "loss": 1.333, "step": 3764 }, { "epoch": 0.96384, "grad_norm": 0.36589848227862065, "learning_rate": 1.3114117905676625e-07, "loss": 1.3554, "step": 3765 }, { "epoch": 0.964096, "grad_norm": 0.37992993333715097, "learning_rate": 1.2928961645383154e-07, "loss": 1.3734, "step": 3766 }, { "epoch": 0.964352, "grad_norm": 0.35435015291180033, "learning_rate": 1.2745117553009113e-07, "loss": 1.3592, "step": 3767 }, { "epoch": 0.964608, "grad_norm": 0.35166604687861014, "learning_rate": 1.2562585749956058e-07, "loss": 1.2974, "step": 3768 }, { "epoch": 0.964864, "grad_norm": 0.35372962632464877, "learning_rate": 1.2381366356759794e-07, "loss": 1.3719, "step": 3769 }, { "epoch": 0.96512, "grad_norm": 0.35725880146078065, "learning_rate": 1.2201459493088818e-07, "loss": 1.3179, "step": 3770 }, { "epoch": 0.965376, "grad_norm": 0.35200565173623977, "learning_rate": 1.202286527774521e-07, "loss": 1.3383, "step": 3771 }, { "epoch": 0.965632, "grad_norm": 0.3455199299990574, "learning_rate": 1.1845583828664187e-07, "loss": 1.2903, "step": 3772 }, { "epoch": 0.965888, "grad_norm": 0.34786125419147174, "learning_rate": 1.1669615262914102e-07, "loss": 1.2915, "step": 3773 }, { "epoch": 0.966144, "grad_norm": 0.3447054310299593, "learning_rate": 1.1494959696696006e-07, "loss": 1.3032, "step": 3774 }, { "epoch": 0.9664, "grad_norm": 0.3465532618193849, "learning_rate": 1.132161724534453e-07, "loss": 1.3015, "step": 3775 }, { "epoch": 0.966656, "grad_norm": 0.34990752761191285, "learning_rate": 1.1149588023326773e-07, "loss": 1.3193, "step": 3776 }, { "epoch": 0.966912, "grad_norm": 0.3590234148770652, "learning_rate": 1.0978872144242536e-07, "loss": 1.336, "step": 3777 }, { "epoch": 0.967168, "grad_norm": 0.3397683826863262, "learning_rate": 1.080946972082475e-07, "loss": 1.285, "step": 3778 }, { "epoch": 0.967424, "grad_norm": 0.35458614593671955, "learning_rate": 1.06413808649386e-07, "loss": 1.3381, "step": 3779 }, { "epoch": 0.96768, "grad_norm": 0.3453849687078781, "learning_rate": 1.0474605687581962e-07, "loss": 1.2928, "step": 3780 }, { "epoch": 0.967936, "grad_norm": 0.35192355638990197, "learning_rate": 1.0309144298885631e-07, "loss": 1.3334, "step": 3781 }, { "epoch": 0.968192, "grad_norm": 0.35295276683997634, "learning_rate": 1.0144996808112207e-07, "loss": 1.3172, "step": 3782 }, { "epoch": 0.968448, "grad_norm": 0.35374365132960894, "learning_rate": 9.982163323656979e-08, "loss": 1.3449, "step": 3783 }, { "epoch": 0.968704, "grad_norm": 0.3572162078738479, "learning_rate": 9.820643953047715e-08, "loss": 1.3314, "step": 3784 }, { "epoch": 0.96896, "grad_norm": 0.3560131219232579, "learning_rate": 9.660438802943761e-08, "loss": 1.3432, "step": 3785 }, { "epoch": 0.969216, "grad_norm": 0.3543923590989018, "learning_rate": 9.501547979137383e-08, "loss": 1.3357, "step": 3786 }, { "epoch": 0.969472, "grad_norm": 0.3754716752990853, "learning_rate": 9.343971586552648e-08, "loss": 1.3661, "step": 3787 }, { "epoch": 0.969728, "grad_norm": 0.3530671159274057, "learning_rate": 9.187709729245432e-08, "loss": 1.3161, "step": 3788 }, { "epoch": 0.969984, "grad_norm": 0.36079833164501074, "learning_rate": 9.032762510403636e-08, "loss": 1.3572, "step": 3789 }, { "epoch": 0.97024, "grad_norm": 0.34745292722408605, "learning_rate": 8.879130032346973e-08, "loss": 1.307, "step": 3790 }, { "epoch": 0.970496, "grad_norm": 0.353658918345041, "learning_rate": 8.726812396527174e-08, "loss": 1.3273, "step": 3791 }, { "epoch": 0.970752, "grad_norm": 0.3477556545406439, "learning_rate": 8.575809703527782e-08, "loss": 1.3038, "step": 3792 }, { "epoch": 0.971008, "grad_norm": 0.35208745353302107, "learning_rate": 8.4261220530637e-08, "loss": 1.334, "step": 3793 }, { "epoch": 0.971264, "grad_norm": 0.3439914711689724, "learning_rate": 8.277749543981861e-08, "loss": 1.3121, "step": 3794 }, { "epoch": 0.97152, "grad_norm": 0.3650107700926055, "learning_rate": 8.130692274259888e-08, "loss": 1.3579, "step": 3795 }, { "epoch": 0.971776, "grad_norm": 0.359581419702592, "learning_rate": 7.98495034100788e-08, "loss": 1.3002, "step": 3796 }, { "epoch": 0.972032, "grad_norm": 0.3437992474925989, "learning_rate": 7.840523840467074e-08, "loss": 1.2895, "step": 3797 }, { "epoch": 0.972288, "grad_norm": 0.3457647055287586, "learning_rate": 7.697412868009402e-08, "loss": 1.3022, "step": 3798 }, { "epoch": 0.972544, "grad_norm": 0.35333293078351485, "learning_rate": 7.555617518139047e-08, "loss": 1.3607, "step": 3799 }, { "epoch": 0.9728, "grad_norm": 0.34217617369114206, "learning_rate": 7.415137884490886e-08, "loss": 1.2955, "step": 3800 }, { "epoch": 0.973056, "grad_norm": 0.3839225444299931, "learning_rate": 7.275974059830714e-08, "loss": 1.3439, "step": 3801 }, { "epoch": 0.973312, "grad_norm": 0.34351615172085204, "learning_rate": 7.138126136056134e-08, "loss": 1.2672, "step": 3802 }, { "epoch": 0.973568, "grad_norm": 0.35270898812469614, "learning_rate": 7.001594204195216e-08, "loss": 1.3505, "step": 3803 }, { "epoch": 0.973824, "grad_norm": 0.3528885161914668, "learning_rate": 6.866378354407399e-08, "loss": 1.3299, "step": 3804 }, { "epoch": 0.97408, "grad_norm": 0.3460153308469529, "learning_rate": 6.73247867598259e-08, "loss": 1.2987, "step": 3805 }, { "epoch": 0.974336, "grad_norm": 0.3463713192196872, "learning_rate": 6.599895257341837e-08, "loss": 1.2844, "step": 3806 }, { "epoch": 0.974592, "grad_norm": 0.3491302687297275, "learning_rate": 6.468628186037107e-08, "loss": 1.3157, "step": 3807 }, { "epoch": 0.974848, "grad_norm": 0.3500896335344266, "learning_rate": 6.33867754875106e-08, "loss": 1.319, "step": 3808 }, { "epoch": 0.975104, "grad_norm": 0.34294023316509276, "learning_rate": 6.210043431296608e-08, "loss": 1.2966, "step": 3809 }, { "epoch": 0.97536, "grad_norm": 0.358061324604674, "learning_rate": 6.082725918618248e-08, "loss": 1.3331, "step": 3810 }, { "epoch": 0.975616, "grad_norm": 0.35589401139098437, "learning_rate": 5.956725094789839e-08, "loss": 1.3371, "step": 3811 }, { "epoch": 0.975872, "grad_norm": 0.3516566823584661, "learning_rate": 5.832041043016601e-08, "loss": 1.3356, "step": 3812 }, { "epoch": 0.976128, "grad_norm": 0.36194903329381856, "learning_rate": 5.708673845634005e-08, "loss": 1.3476, "step": 3813 }, { "epoch": 0.976384, "grad_norm": 0.3578975714692887, "learning_rate": 5.586623584107998e-08, "loss": 1.3454, "step": 3814 }, { "epoch": 0.97664, "grad_norm": 0.35674902074061354, "learning_rate": 5.465890339034774e-08, "loss": 1.3614, "step": 3815 }, { "epoch": 0.976896, "grad_norm": 0.3424596876888329, "learning_rate": 5.3464741901407826e-08, "loss": 1.3084, "step": 3816 }, { "epoch": 0.977152, "grad_norm": 0.35434995759530646, "learning_rate": 5.228375216282944e-08, "loss": 1.3251, "step": 3817 }, { "epoch": 0.977408, "grad_norm": 0.34634332649603466, "learning_rate": 5.1115934954482086e-08, "loss": 1.3115, "step": 3818 }, { "epoch": 0.977664, "grad_norm": 0.3496581868508856, "learning_rate": 4.996129104753555e-08, "loss": 1.3228, "step": 3819 }, { "epoch": 0.97792, "grad_norm": 0.36266905873655914, "learning_rate": 4.881982120446438e-08, "loss": 1.3732, "step": 3820 }, { "epoch": 0.978176, "grad_norm": 0.3495836678056541, "learning_rate": 4.769152617904116e-08, "loss": 1.2845, "step": 3821 }, { "epoch": 0.978432, "grad_norm": 0.34404354553081207, "learning_rate": 4.657640671633656e-08, "loss": 1.2761, "step": 3822 }, { "epoch": 0.978688, "grad_norm": 0.35880344722926893, "learning_rate": 4.547446355272378e-08, "loss": 1.3567, "step": 3823 }, { "epoch": 0.978944, "grad_norm": 0.3548435710041313, "learning_rate": 4.438569741587628e-08, "loss": 1.3442, "step": 3824 }, { "epoch": 0.9792, "grad_norm": 0.3517953042632612, "learning_rate": 4.331010902476118e-08, "loss": 1.3246, "step": 3825 }, { "epoch": 0.979456, "grad_norm": 0.35696292875956626, "learning_rate": 4.2247699089648097e-08, "loss": 1.3278, "step": 3826 }, { "epoch": 0.979712, "grad_norm": 0.34590955714027705, "learning_rate": 4.119846831210028e-08, "loss": 1.2393, "step": 3827 }, { "epoch": 0.979968, "grad_norm": 0.3749090380556617, "learning_rate": 4.0162417384981275e-08, "loss": 1.3397, "step": 3828 }, { "epoch": 0.980224, "grad_norm": 0.3543619764178797, "learning_rate": 3.9139546992450484e-08, "loss": 1.3276, "step": 3829 }, { "epoch": 0.98048, "grad_norm": 0.3400678089862932, "learning_rate": 3.8129857809965365e-08, "loss": 1.2707, "step": 3830 }, { "epoch": 0.980736, "grad_norm": 0.35319035096323276, "learning_rate": 3.713335050427258e-08, "loss": 1.3485, "step": 3831 }, { "epoch": 0.980992, "grad_norm": 0.34793421781368084, "learning_rate": 3.61500257334213e-08, "loss": 1.3093, "step": 3832 }, { "epoch": 0.981248, "grad_norm": 0.33948996610897936, "learning_rate": 3.517988414675211e-08, "loss": 1.2442, "step": 3833 }, { "epoch": 0.981504, "grad_norm": 0.3546913139508053, "learning_rate": 3.4222926384899214e-08, "loss": 1.3515, "step": 3834 }, { "epoch": 0.98176, "grad_norm": 0.3502936772356789, "learning_rate": 3.32791530797949e-08, "loss": 1.3225, "step": 3835 }, { "epoch": 0.982016, "grad_norm": 0.3469309752003446, "learning_rate": 3.234856485466287e-08, "loss": 1.3245, "step": 3836 }, { "epoch": 0.982272, "grad_norm": 0.3514363370840732, "learning_rate": 3.1431162324015994e-08, "loss": 1.3348, "step": 3837 }, { "epoch": 0.982528, "grad_norm": 0.3474835208051609, "learning_rate": 3.0526946093667466e-08, "loss": 1.2925, "step": 3838 }, { "epoch": 0.982784, "grad_norm": 0.3514853656160855, "learning_rate": 2.963591676071742e-08, "loss": 1.3171, "step": 3839 }, { "epoch": 0.98304, "grad_norm": 0.3516273643810689, "learning_rate": 2.8758074913559642e-08, "loss": 1.3069, "step": 3840 }, { "epoch": 0.983296, "grad_norm": 0.3444475398946807, "learning_rate": 2.7893421131877096e-08, "loss": 1.3142, "step": 3841 }, { "epoch": 0.983552, "grad_norm": 0.35634014024484667, "learning_rate": 2.7041955986650824e-08, "loss": 1.3741, "step": 3842 }, { "epoch": 0.983808, "grad_norm": 0.34768763336693664, "learning_rate": 2.6203680040146617e-08, "loss": 1.3068, "step": 3843 }, { "epoch": 0.984064, "grad_norm": 0.35525404903128205, "learning_rate": 2.5378593845919454e-08, "loss": 1.3354, "step": 3844 }, { "epoch": 0.98432, "grad_norm": 0.3626379386770382, "learning_rate": 2.4566697948822382e-08, "loss": 1.3531, "step": 3845 }, { "epoch": 0.984576, "grad_norm": 0.347284377269125, "learning_rate": 2.3767992884988767e-08, "loss": 1.2826, "step": 3846 }, { "epoch": 0.984832, "grad_norm": 0.3480190393510757, "learning_rate": 2.2982479181847817e-08, "loss": 1.2948, "step": 3847 }, { "epoch": 0.985088, "grad_norm": 0.40031189243474147, "learning_rate": 2.2210157358113492e-08, "loss": 1.297, "step": 3848 }, { "epoch": 0.985344, "grad_norm": 0.3461403164240625, "learning_rate": 2.145102792379339e-08, "loss": 1.2863, "step": 3849 }, { "epoch": 0.9856, "grad_norm": 0.3520763451840515, "learning_rate": 2.0705091380182065e-08, "loss": 1.3367, "step": 3850 }, { "epoch": 0.985856, "grad_norm": 0.3539777080019041, "learning_rate": 1.9972348219854386e-08, "loss": 1.3638, "step": 3851 }, { "epoch": 0.986112, "grad_norm": 0.3594904642845286, "learning_rate": 1.9252798926685522e-08, "loss": 1.3619, "step": 3852 }, { "epoch": 0.986368, "grad_norm": 0.35024791118469184, "learning_rate": 1.8546443975830943e-08, "loss": 1.3545, "step": 3853 }, { "epoch": 0.986624, "grad_norm": 0.3470396670229881, "learning_rate": 1.7853283833730863e-08, "loss": 1.2893, "step": 3854 }, { "epoch": 0.98688, "grad_norm": 0.34403460218880644, "learning_rate": 1.7173318958119134e-08, "loss": 1.3023, "step": 3855 }, { "epoch": 0.987136, "grad_norm": 0.3544374524748081, "learning_rate": 1.6506549798009918e-08, "loss": 1.3251, "step": 3856 }, { "epoch": 0.987392, "grad_norm": 0.35555715661586185, "learning_rate": 1.5852976793708784e-08, "loss": 1.3082, "step": 3857 }, { "epoch": 0.987648, "grad_norm": 0.3511935107519599, "learning_rate": 1.521260037680161e-08, "loss": 1.3498, "step": 3858 }, { "epoch": 0.987904, "grad_norm": 0.34334834143606763, "learning_rate": 1.4585420970163466e-08, "loss": 1.2909, "step": 3859 }, { "epoch": 0.98816, "grad_norm": 0.3572379175600008, "learning_rate": 1.3971438987954167e-08, "loss": 1.3418, "step": 3860 }, { "epoch": 0.988416, "grad_norm": 0.35440490915641165, "learning_rate": 1.33706548356205e-08, "loss": 1.3511, "step": 3861 }, { "epoch": 0.988672, "grad_norm": 0.3580339355955322, "learning_rate": 1.2783068909889563e-08, "loss": 1.3147, "step": 3862 }, { "epoch": 0.988928, "grad_norm": 0.35739545462064787, "learning_rate": 1.2208681598775418e-08, "loss": 1.3625, "step": 3863 }, { "epoch": 0.989184, "grad_norm": 0.3472079378979475, "learning_rate": 1.1647493281576883e-08, "loss": 1.2752, "step": 3864 }, { "epoch": 0.98944, "grad_norm": 0.35260321075238293, "learning_rate": 1.109950432887752e-08, "loss": 1.336, "step": 3865 }, { "epoch": 0.989696, "grad_norm": 0.3535175802036694, "learning_rate": 1.05647151025412e-08, "loss": 1.3145, "step": 3866 }, { "epoch": 0.989952, "grad_norm": 0.34943144852472, "learning_rate": 1.0043125955718768e-08, "loss": 1.371, "step": 3867 }, { "epoch": 0.990208, "grad_norm": 0.33723738252045143, "learning_rate": 9.534737232843595e-09, "loss": 1.2792, "step": 3868 }, { "epoch": 0.990464, "grad_norm": 0.35362043984295866, "learning_rate": 9.039549269629355e-09, "loss": 1.3615, "step": 3869 }, { "epoch": 0.99072, "grad_norm": 0.3520687631289754, "learning_rate": 8.557562393076701e-09, "loss": 1.3399, "step": 3870 }, { "epoch": 0.990976, "grad_norm": 0.35390613023195366, "learning_rate": 8.08877692146659e-09, "loss": 1.3467, "step": 3871 }, { "epoch": 0.991232, "grad_norm": 0.34448707278488316, "learning_rate": 7.633193164364727e-09, "loss": 1.3042, "step": 3872 }, { "epoch": 0.991488, "grad_norm": 0.35563086254485854, "learning_rate": 7.190811422612687e-09, "loss": 1.3461, "step": 3873 }, { "epoch": 0.991744, "grad_norm": 0.3509166264142058, "learning_rate": 6.761631988341233e-09, "loss": 1.3177, "step": 3874 }, { "epoch": 0.992, "grad_norm": 0.3630141134148411, "learning_rate": 6.345655144961438e-09, "loss": 1.3002, "step": 3875 }, { "epoch": 0.992256, "grad_norm": 0.3541541011937014, "learning_rate": 5.9428811671602415e-09, "loss": 1.3336, "step": 3876 }, { "epoch": 0.992512, "grad_norm": 0.35104907283775955, "learning_rate": 5.5533103209159945e-09, "loss": 1.3427, "step": 3877 }, { "epoch": 0.992768, "grad_norm": 0.3366049178999771, "learning_rate": 5.176942863480694e-09, "loss": 1.2664, "step": 3878 }, { "epoch": 0.993024, "grad_norm": 0.3501018580938512, "learning_rate": 4.8137790433888665e-09, "loss": 1.3132, "step": 3879 }, { "epoch": 0.99328, "grad_norm": 0.3738265291377611, "learning_rate": 4.463819100457567e-09, "loss": 1.3084, "step": 3880 }, { "epoch": 0.993536, "grad_norm": 0.3443566307145647, "learning_rate": 4.127063265781938e-09, "loss": 1.2984, "step": 3881 }, { "epoch": 0.993792, "grad_norm": 0.34915148412781555, "learning_rate": 3.803511761744094e-09, "loss": 1.311, "step": 3882 }, { "epoch": 0.994048, "grad_norm": 0.3650515319126288, "learning_rate": 3.4931648019975728e-09, "loss": 1.3433, "step": 3883 }, { "epoch": 0.994304, "grad_norm": 0.3542045592400462, "learning_rate": 3.1960225914828834e-09, "loss": 1.346, "step": 3884 }, { "epoch": 0.99456, "grad_norm": 0.34564596416562104, "learning_rate": 2.9120853264186232e-09, "loss": 1.3124, "step": 3885 }, { "epoch": 0.994816, "grad_norm": 0.33972248263531846, "learning_rate": 2.6413531943036976e-09, "loss": 1.2934, "step": 3886 }, { "epoch": 0.995072, "grad_norm": 0.3467753361555826, "learning_rate": 2.383826373915099e-09, "loss": 1.3308, "step": 3887 }, { "epoch": 0.995328, "grad_norm": 0.3690600203431167, "learning_rate": 2.1395050353145707e-09, "loss": 1.3299, "step": 3888 }, { "epoch": 0.995584, "grad_norm": 0.36844653127531746, "learning_rate": 1.908389339837502e-09, "loss": 1.3553, "step": 3889 }, { "epoch": 0.99584, "grad_norm": 0.34891045298644996, "learning_rate": 1.6904794401040313e-09, "loss": 1.3165, "step": 3890 }, { "epoch": 0.996096, "grad_norm": 0.34309651033192795, "learning_rate": 1.4857754800101654e-09, "loss": 1.2882, "step": 3891 }, { "epoch": 0.996352, "grad_norm": 0.3640368935262463, "learning_rate": 1.2942775947322184e-09, "loss": 1.3411, "step": 3892 }, { "epoch": 0.996608, "grad_norm": 0.3646503582741394, "learning_rate": 1.1159859107290337e-09, "loss": 1.3399, "step": 3893 }, { "epoch": 0.996864, "grad_norm": 0.3524402467497575, "learning_rate": 9.509005457331022e-10, "loss": 1.3675, "step": 3894 }, { "epoch": 0.99712, "grad_norm": 0.34898679795961723, "learning_rate": 7.990216087594426e-10, "loss": 1.322, "step": 3895 }, { "epoch": 0.997376, "grad_norm": 0.3487734891703071, "learning_rate": 6.603492001033828e-10, "loss": 1.301, "step": 3896 }, { "epoch": 0.997632, "grad_norm": 0.34534764107641575, "learning_rate": 5.348834113361179e-10, "loss": 1.2745, "step": 3897 }, { "epoch": 0.997888, "grad_norm": 0.3524019403509108, "learning_rate": 4.226243253091511e-10, "loss": 1.3533, "step": 3898 }, { "epoch": 0.998144, "grad_norm": 0.3489700492235033, "learning_rate": 3.2357201615429434e-10, "loss": 1.326, "step": 3899 }, { "epoch": 0.9984, "grad_norm": 0.3415355486302458, "learning_rate": 2.3772654928144733e-10, "loss": 1.3004, "step": 3900 }, { "epoch": 0.998656, "grad_norm": 0.35418162815142945, "learning_rate": 1.6508798137637727e-10, "loss": 1.3324, "step": 3901 }, { "epoch": 0.998912, "grad_norm": 0.3463959582994453, "learning_rate": 1.0565636040960059e-10, "loss": 1.3021, "step": 3902 }, { "epoch": 0.999168, "grad_norm": 0.34322502965955426, "learning_rate": 5.943172562306032e-11, "loss": 1.2871, "step": 3903 }, { "epoch": 0.999424, "grad_norm": 0.3523476709598277, "learning_rate": 2.64141075456692e-11, "loss": 1.3023, "step": 3904 }, { "epoch": 0.99968, "grad_norm": 0.34721163075993766, "learning_rate": 6.6035279755460865e-12, "loss": 1.3415, "step": 3905 }, { "epoch": 0.999936, "grad_norm": 0.3528875622081801, "learning_rate": 0.0, "loss": 1.3614, "step": 3906 }, { "epoch": 0.999936, "step": 3906, "total_flos": 4458641369333760.0, "train_loss": 1.408147927589192, "train_runtime": 51696.0221, "train_samples_per_second": 14.508, "train_steps_per_second": 0.076 } ], "logging_steps": 1, "max_steps": 3906, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4458641369333760.0, "train_batch_size": 3, "trial_name": null, "trial_params": null }